Old src/hotspot/share/gc/g1/g1ConcurrentMark.cpp

   1 /*
   2  * Copyright (c) 2001, 2026, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "classfile/classLoaderData.hpp"
  26 #include "classfile/classLoaderDataGraph.hpp"
  27 #include "cppstdlib/new.hpp"
  28 #include "gc/g1/g1BarrierSet.hpp"
  29 #include "gc/g1/g1BatchedTask.hpp"
  30 #include "gc/g1/g1CardSetMemory.hpp"
  31 #include "gc/g1/g1CardTableClaimTable.inline.hpp"
  32 #include "gc/g1/g1CollectedHeap.inline.hpp"
  33 #include "gc/g1/g1CollectionSetChooser.hpp"
  34 #include "gc/g1/g1CollectorState.hpp"
  35 #include "gc/g1/g1ConcurrentMark.inline.hpp"
  36 #include "gc/g1/g1ConcurrentMarkRemarkTasks.hpp"
  37 #include "gc/g1/g1ConcurrentMarkThread.inline.hpp"
  38 #include "gc/g1/g1ConcurrentRebuildAndScrub.hpp"
  39 #include "gc/g1/g1ConcurrentRefine.hpp"
  40 #include "gc/g1/g1HeapRegion.inline.hpp"
  41 #include "gc/g1/g1HeapRegionManager.hpp"
  42 #include "gc/g1/g1HeapRegionPrinter.hpp"
  43 #include "gc/g1/g1HeapRegionRemSet.inline.hpp"
  44 #include "gc/g1/g1HeapRegionSet.inline.hpp"
  45 #include "gc/g1/g1HeapVerifier.hpp"
  46 #include "gc/g1/g1OopClosures.inline.hpp"
  47 #include "gc/g1/g1Policy.hpp"
  48 #include "gc/g1/g1RegionMarkStatsCache.inline.hpp"
  49 #include "gc/g1/g1ThreadLocalData.hpp"
  50 #include "gc/g1/g1Trace.hpp"
  51 #include "gc/shared/gcId.hpp"
  52 #include "gc/shared/gcTimer.hpp"
  53 #include "gc/shared/gcTraceTime.inline.hpp"
  54 #include "gc/shared/gcVMOperations.hpp"
  55 #include "gc/shared/partialArraySplitter.inline.hpp"
  56 #include "gc/shared/partialArrayState.hpp"
  57 #include "gc/shared/partialArrayTaskStats.hpp"
  58 #include "gc/shared/referencePolicy.hpp"
  59 #include "gc/shared/suspendibleThreadSet.hpp"
  60 #include "gc/shared/taskqueue.inline.hpp"
  61 #include "gc/shared/taskTerminator.hpp"
  62 #include "gc/shared/weakProcessor.inline.hpp"
  63 #include "gc/shared/workerPolicy.hpp"
  64 #include "jvm.h"
  65 #include "logging/log.hpp"
  66 #include "memory/allocation.hpp"
  67 #include "memory/iterator.hpp"
  68 #include "memory/metaspaceUtils.hpp"
  69 #include "memory/resourceArea.hpp"
  70 #include "memory/universe.hpp"
  71 #include "nmt/memTracker.hpp"
  72 #include "oops/access.inline.hpp"
  73 #include "oops/oop.inline.hpp"
  74 #include "runtime/globals_extension.hpp"
  75 #include "runtime/handles.inline.hpp"
  76 #include "runtime/java.hpp"
  77 #include "runtime/orderAccess.hpp"
  78 #include "runtime/os.hpp"
  79 #include "runtime/prefetch.inline.hpp"
  80 #include "runtime/threads.hpp"
  81 #include "utilities/align.hpp"
  82 #include "utilities/checkedCast.hpp"
  83 #include "utilities/formatBuffer.hpp"
  84 #include "utilities/growableArray.hpp"
  85 #include "utilities/powerOfTwo.hpp"
  86 
  87 G1CMIsAliveClosure::G1CMIsAliveClosure() : _cm(nullptr) { }
  88 
  89 G1CMIsAliveClosure::G1CMIsAliveClosure(G1ConcurrentMark* cm) : _cm(cm) {
  90   assert(cm != nullptr, "must be");
  91 }
  92 
  93 void G1CMIsAliveClosure::initialize(G1ConcurrentMark* cm) {
  94   assert(cm != nullptr, "must be");
  95   assert(_cm == nullptr, "double initialize");
  96   _cm = cm;
  97 }
  98 
  99 bool G1CMBitMapClosure::do_addr(HeapWord* const addr) {
 100   assert(addr < _cm->finger(), "invariant");
 101   assert(addr >= _task->finger(), "invariant");
 102 
 103   // We move that task's local finger along.
 104   _task->move_finger_to(addr);
 105 
 106   _task->process_entry(G1TaskQueueEntry(cast_to_oop(addr)), false /* stolen */);
 107   // we only partially drain the local queue and global stack
 108   _task->drain_local_queue(true);
 109   _task->drain_global_stack(true);
 110 
 111   // if the has_aborted flag has been raised, we need to bail out of
 112   // the iteration
 113   return !_task->has_aborted();
 114 }
 115 
 116 G1CMMarkStack::G1CMMarkStack() :
 117   _chunk_allocator() {
 118   set_empty();
 119 }
 120 
 121 size_t G1CMMarkStack::capacity_alignment() {
 122   return (size_t)lcm(os::vm_allocation_granularity(), sizeof(TaskQueueEntryChunk)) / sizeof(G1TaskQueueEntry);
 123 }
 124 
 125 bool G1CMMarkStack::initialize() {
 126   guarantee(_chunk_allocator.capacity() == 0, "G1CMMarkStack already initialized.");
 127 
 128   size_t initial_capacity = MarkStackSize;
 129   size_t max_capacity = MarkStackSizeMax;
 130 
 131   size_t const TaskEntryChunkSizeInVoidStar = sizeof(TaskQueueEntryChunk) / sizeof(G1TaskQueueEntry);
 132 
 133   size_t max_num_chunks = align_up(max_capacity, capacity_alignment()) / TaskEntryChunkSizeInVoidStar;
 134   size_t initial_num_chunks = align_up(initial_capacity, capacity_alignment()) / TaskEntryChunkSizeInVoidStar;
 135 
 136   initial_num_chunks = round_up_power_of_2(initial_num_chunks);
 137   max_num_chunks = MAX2(initial_num_chunks, max_num_chunks);
 138 
 139   size_t limit = (INT_MAX - 1);
 140   max_capacity = MIN2((max_num_chunks * TaskEntryChunkSizeInVoidStar), limit);
 141   initial_capacity = MIN2((initial_num_chunks * TaskEntryChunkSizeInVoidStar), limit);
 142 
 143   FLAG_SET_ERGO(MarkStackSizeMax, max_capacity);
 144   FLAG_SET_ERGO(MarkStackSize, initial_capacity);
 145 
 146   log_trace(gc)("MarkStackSize: %uk  MarkStackSizeMax: %uk", (uint)(MarkStackSize / K), (uint)(MarkStackSizeMax / K));
 147 
 148   log_debug(gc)("Initialize mark stack with %zu chunks, maximum %zu",
 149                 initial_num_chunks, max_capacity);
 150 
 151   return _chunk_allocator.initialize(initial_num_chunks, max_num_chunks);
 152 }
 153 
 154 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::ChunkAllocator::allocate_new_chunk() {
 155   if (_size.load_relaxed() >= _max_capacity) {
 156     return nullptr;
 157   }
 158 
 159   size_t cur_idx = _size.fetch_then_add(1u);
 160 
 161   if (cur_idx >= _max_capacity) {
 162     return nullptr;
 163   }
 164 
 165   size_t bucket = get_bucket(cur_idx);
 166   if (_buckets[bucket].load_acquire() == nullptr) {
 167     if (!_should_grow) {
 168       // Prefer to restart the CM.
 169       return nullptr;
 170     }
 171 
 172     MutexLocker x(G1MarkStackChunkList_lock, Mutex::_no_safepoint_check_flag);
 173     if (_buckets[bucket].load_acquire() == nullptr) {
 174       size_t desired_capacity = bucket_size(bucket) * 2;
 175       if (!try_expand_to(desired_capacity)) {
 176         return nullptr;
 177       }
 178     }
 179   }
 180 
 181   size_t bucket_idx = get_bucket_index(cur_idx);
 182   TaskQueueEntryChunk* result = ::new (&_buckets[bucket].load_relaxed()[bucket_idx]) TaskQueueEntryChunk;
 183   result->next = nullptr;
 184   return result;
 185 }
 186 
 187 G1CMMarkStack::ChunkAllocator::ChunkAllocator() :
 188   _min_capacity(0),
 189   _max_capacity(0),
 190   _capacity(0),
 191   _num_buckets(0),
 192   _should_grow(false),
 193   _buckets(nullptr),
 194   _size(0)
 195 { }
 196 
 197 bool G1CMMarkStack::ChunkAllocator::initialize(size_t initial_capacity, size_t max_capacity) {
 198   guarantee(is_power_of_2(initial_capacity), "Invalid initial_capacity");
 199 
 200   _min_capacity = initial_capacity;
 201   _max_capacity = max_capacity;
 202   _num_buckets  = get_bucket(_max_capacity) + 1;
 203 
 204   _buckets = NEW_C_HEAP_ARRAY(Atomic<TaskQueueEntryChunk*>, _num_buckets, mtGC);
 205 
 206   for (size_t i = 0; i < _num_buckets; i++) {
 207     _buckets[i].store_relaxed(nullptr);
 208   }
 209 
 210   size_t new_capacity = bucket_size(0);
 211 
 212   if (!reserve(new_capacity)) {
 213     log_warning(gc)("Failed to reserve memory for new overflow mark stack with %zu chunks and size %zuB.", new_capacity, new_capacity * sizeof(TaskQueueEntryChunk));
 214     return false;
 215   }
 216   return true;
 217 }
 218 
 219 bool G1CMMarkStack::ChunkAllocator::try_expand_to(size_t desired_capacity) {
 220   if (_capacity == _max_capacity) {
 221     log_debug(gc)("Can not expand overflow mark stack further, already at maximum capacity of %zu chunks.", _capacity);
 222     return false;
 223   }
 224 
 225   size_t old_capacity = _capacity;
 226   desired_capacity = MIN2(desired_capacity, _max_capacity);
 227 
 228   if (reserve(desired_capacity)) {
 229     log_debug(gc)("Expanded the mark stack capacity from %zu to %zu chunks",
 230                   old_capacity, desired_capacity);
 231     return true;
 232   }
 233   return false;
 234 }
 235 
 236 bool G1CMMarkStack::ChunkAllocator::try_expand() {
 237   size_t new_capacity = _capacity * 2;
 238   return try_expand_to(new_capacity);
 239 }
 240 
 241 G1CMMarkStack::ChunkAllocator::~ChunkAllocator() {
 242   if (_buckets == nullptr) {
 243     return;
 244   }
 245 
 246   for (size_t i = 0; i < _num_buckets; i++) {
 247     if (_buckets[i].load_relaxed() != nullptr) {
 248       MmapArrayAllocator<TaskQueueEntryChunk>::free(_buckets[i].load_relaxed(),  bucket_size(i));
 249       _buckets[i].store_relaxed(nullptr);
 250     }
 251   }
 252 
 253   FREE_C_HEAP_ARRAY(TaskQueueEntryChunk*, _buckets);
 254 }
 255 
 256 bool G1CMMarkStack::ChunkAllocator::reserve(size_t new_capacity) {
 257   assert(new_capacity <= _max_capacity, "Cannot expand overflow mark stack beyond the max_capacity of %zu chunks.", _max_capacity);
 258 
 259   size_t highest_bucket = get_bucket(new_capacity - 1);
 260   size_t i = get_bucket(_capacity);
 261 
 262   // Allocate all buckets associated with indexes between the current capacity (_capacity)
 263   // and the new capacity (new_capacity). This step ensures that there are no gaps in the
 264   // array and that the capacity accurately reflects the reserved memory.
 265   for (; i <= highest_bucket; i++) {
 266     if (_buckets[i].load_acquire() != nullptr) {
 267       continue; // Skip over already allocated buckets.
 268     }
 269 
 270     size_t bucket_capacity = bucket_size(i);
 271 
 272     // Trim bucket size so that we do not exceed the _max_capacity.
 273     bucket_capacity = (_capacity + bucket_capacity) <= _max_capacity ?
 274                       bucket_capacity :
 275                       _max_capacity - _capacity;
 276 
 277 
 278     TaskQueueEntryChunk* bucket_base = MmapArrayAllocator<TaskQueueEntryChunk>::allocate_or_null(bucket_capacity, mtGC);
 279 
 280     if (bucket_base == nullptr) {
 281       log_warning(gc)("Failed to reserve memory for increasing the overflow mark stack capacity with %zu chunks and size %zuB.",
 282                       bucket_capacity, bucket_capacity * sizeof(TaskQueueEntryChunk));
 283       return false;
 284     }
 285     _capacity += bucket_capacity;
 286     _buckets[i].release_store(bucket_base);
 287   }
 288   return true;
 289 }
 290 
 291 void G1CMMarkStack::expand() {
 292   _chunk_allocator.try_expand();
 293 }
 294 
 295 void G1CMMarkStack::add_chunk_to_list(Atomic<TaskQueueEntryChunk*>* list, TaskQueueEntryChunk* elem) {
 296   elem->next = list->load_relaxed();
 297   list->store_relaxed(elem);
 298 }
 299 
 300 void G1CMMarkStack::add_chunk_to_chunk_list(TaskQueueEntryChunk* elem) {
 301   MutexLocker x(G1MarkStackChunkList_lock, Mutex::_no_safepoint_check_flag);
 302   add_chunk_to_list(&_chunk_list, elem);
 303   _chunks_in_chunk_list++;
 304 }
 305 
 306 void G1CMMarkStack::add_chunk_to_free_list(TaskQueueEntryChunk* elem) {
 307   MutexLocker x(G1MarkStackFreeList_lock, Mutex::_no_safepoint_check_flag);
 308   add_chunk_to_list(&_free_list, elem);
 309 }
 310 
 311 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_list(Atomic<TaskQueueEntryChunk*>* list) {
 312   TaskQueueEntryChunk* result = list->load_relaxed();
 313   if (result != nullptr) {
 314     list->store_relaxed(list->load_relaxed()->next);
 315   }
 316   return result;
 317 }
 318 
 319 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_chunk_list() {
 320   MutexLocker x(G1MarkStackChunkList_lock, Mutex::_no_safepoint_check_flag);
 321   TaskQueueEntryChunk* result = remove_chunk_from_list(&_chunk_list);
 322   if (result != nullptr) {
 323     _chunks_in_chunk_list--;
 324   }
 325   return result;
 326 }
 327 
 328 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_free_list() {
 329   MutexLocker x(G1MarkStackFreeList_lock, Mutex::_no_safepoint_check_flag);
 330   return remove_chunk_from_list(&_free_list);
 331 }
 332 
 333 bool G1CMMarkStack::par_push_chunk(G1TaskQueueEntry* ptr_arr) {
 334   // Get a new chunk.
 335   TaskQueueEntryChunk* new_chunk = remove_chunk_from_free_list();
 336 
 337   if (new_chunk == nullptr) {
 338     // Did not get a chunk from the free list. Allocate from backing memory.
 339     new_chunk = _chunk_allocator.allocate_new_chunk();
 340 
 341     if (new_chunk == nullptr) {
 342       return false;
 343     }
 344   }
 345 
 346   Copy::conjoint_memory_atomic(ptr_arr, new_chunk->data, EntriesPerChunk * sizeof(G1TaskQueueEntry));
 347 
 348   add_chunk_to_chunk_list(new_chunk);
 349 
 350   return true;
 351 }
 352 
 353 bool G1CMMarkStack::par_pop_chunk(G1TaskQueueEntry* ptr_arr) {
 354   TaskQueueEntryChunk* cur = remove_chunk_from_chunk_list();
 355 
 356   if (cur == nullptr) {
 357     return false;
 358   }
 359 
 360   Copy::conjoint_memory_atomic(cur->data, ptr_arr, EntriesPerChunk * sizeof(G1TaskQueueEntry));
 361 
 362   add_chunk_to_free_list(cur);
 363   return true;
 364 }
 365 
 366 void G1CMMarkStack::set_empty() {
 367   _chunks_in_chunk_list = 0;
 368   _chunk_list.store_relaxed(nullptr);
 369   _free_list.store_relaxed(nullptr);
 370   _chunk_allocator.reset();
 371 }
 372 
 373 G1CMRootMemRegions::G1CMRootMemRegions(uint const max_regions) :
 374     _root_regions(MemRegion::create_array(max_regions, mtGC)),
 375     _max_regions(max_regions),
 376     _num_root_regions(0),
 377     _claimed_root_regions(0),
 378     _scan_in_progress(false),
 379     _should_abort(false) { }
 380 
 381 G1CMRootMemRegions::~G1CMRootMemRegions() {
 382   MemRegion::destroy_array(_root_regions, _max_regions);
 383 }
 384 
 385 void G1CMRootMemRegions::reset() {
 386   _num_root_regions.store_relaxed(0);
 387 }
 388 
 389 void G1CMRootMemRegions::add(HeapWord* start, HeapWord* end) {
 390   assert_at_safepoint();
 391   size_t idx = _num_root_regions.fetch_then_add(1u);
 392   assert(idx < _max_regions, "Trying to add more root MemRegions than there is space %zu", _max_regions);
 393   assert(start != nullptr && end != nullptr && start <= end, "Start (" PTR_FORMAT ") should be less or equal to "
 394          "end (" PTR_FORMAT ")", p2i(start), p2i(end));
 395   _root_regions[idx].set_start(start);
 396   _root_regions[idx].set_end(end);
 397 }
 398 
 399 void G1CMRootMemRegions::prepare_for_scan() {
 400   assert(!scan_in_progress(), "pre-condition");
 401 
 402   _scan_in_progress.store_relaxed(num_root_regions() > 0);
 403 
 404   _claimed_root_regions.store_relaxed(0);
 405   _should_abort.store_relaxed(false);
 406 }
 407 
 408 const MemRegion* G1CMRootMemRegions::claim_next() {
 409   if (_should_abort.load_relaxed()) {
 410     // If someone has set the should_abort flag, we return null to
 411     // force the caller to bail out of their loop.
 412     return nullptr;
 413   }
 414 
 415   uint local_num_root_regions = num_root_regions();
 416   if (_claimed_root_regions.load_relaxed() >= local_num_root_regions) {
 417     return nullptr;
 418   }
 419 
 420   size_t claimed_index = _claimed_root_regions.fetch_then_add(1u);
 421   if (claimed_index < local_num_root_regions) {
 422     return &_root_regions[claimed_index];
 423   }
 424   return nullptr;
 425 }
 426 
 427 uint G1CMRootMemRegions::num_root_regions() const {
 428   return (uint)_num_root_regions.load_relaxed();
 429 }
 430 
 431 bool G1CMRootMemRegions::contains(const MemRegion mr) const {
 432   uint local_num_root_regions = num_root_regions();
 433   for (uint i = 0; i < local_num_root_regions; i++) {
 434     if (_root_regions[i].equals(mr)) {
 435       return true;
 436     }
 437   }
 438   return false;
 439 }
 440 
 441 void G1CMRootMemRegions::notify_scan_done() {
 442   MutexLocker x(G1RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 443   _scan_in_progress.store_relaxed(false);
 444   G1RootRegionScan_lock->notify_all();
 445 }
 446 
 447 void G1CMRootMemRegions::cancel_scan() {
 448   notify_scan_done();
 449 }
 450 
 451 void G1CMRootMemRegions::scan_finished() {
 452   assert(scan_in_progress(), "pre-condition");
 453 
 454   if (!_should_abort.load_relaxed()) {
 455     assert(_claimed_root_regions.load_relaxed() >= num_root_regions(),
 456            "we should have claimed all root regions, claimed %zu, length = %u",
 457            _claimed_root_regions.load_relaxed(), num_root_regions());
 458   }
 459 
 460   notify_scan_done();
 461 }
 462 
 463 bool G1CMRootMemRegions::wait_until_scan_finished() {
 464   if (!scan_in_progress()) {
 465     return false;
 466   }
 467 
 468   {
 469     MonitorLocker ml(G1RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 470     while (scan_in_progress()) {
 471       ml.wait();
 472     }
 473   }
 474   return true;
 475 }
 476 
 477 G1ConcurrentMark::G1ConcurrentMark(G1CollectedHeap* g1h,
 478                                    G1RegionToSpaceMapper* bitmap_storage) :
 479   _cm_thread(nullptr),
 480   _g1h(g1h),
 481 
 482   _mark_bitmap(),
 483 
 484   _heap(_g1h->reserved()),
 485 
 486   _root_regions(_g1h->max_num_regions()),
 487 
 488   _global_mark_stack(),
 489 
 490   _finger(nullptr), // _finger set in set_non_marking_state
 491 
 492   _worker_id_offset(G1ConcRefinementThreads), // The refinement control thread does not refine cards, so it's just the worker threads.
 493   _max_num_tasks(MAX2(ConcGCThreads, ParallelGCThreads)),
 494   _num_active_tasks(0), // _num_active_tasks set in set_non_marking_state()
 495   _tasks(nullptr), // _tasks set inside late_init()
 496   _task_queues(new G1CMTaskQueueSet(_max_num_tasks)),
 497   _terminator(_max_num_tasks, _task_queues),
 498   _partial_array_state_manager(new PartialArrayStateManager(_max_num_tasks)),
 499 
 500   _first_overflow_barrier_sync(),
 501   _second_overflow_barrier_sync(),
 502 
 503   _completed_mark_cycles(0),
 504   _has_overflown(false),
 505   _concurrent(false),
 506   _has_aborted(false),
 507   _restart_for_overflow(false),
 508   _gc_timer_cm(new ConcurrentGCTimer()),
 509   _gc_tracer_cm(new G1OldTracer()),
 510 
 511   // _verbose_level set below
 512 
 513   _remark_times(),
 514   _remark_mark_times(),
 515   _remark_weak_ref_times(),
 516   _cleanup_times(),
 517 
 518   _concurrent_workers(nullptr),
 519   _num_concurrent_workers(0),
 520   _max_concurrent_workers(0),
 521 
 522   _region_mark_stats(NEW_C_HEAP_ARRAY(G1RegionMarkStats, _g1h->max_num_regions(), mtGC)),
 523   _top_at_mark_starts(NEW_C_HEAP_ARRAY(Atomic<HeapWord*>, _g1h->max_num_regions(), mtGC)),
 524   _top_at_rebuild_starts(NEW_C_HEAP_ARRAY(Atomic<HeapWord*>, _g1h->max_num_regions(), mtGC)),
 525   _needs_remembered_set_rebuild(false)
 526 {
 527   assert(G1CGC_lock != nullptr, "CGC_lock must be initialized");
 528 
 529   _mark_bitmap.initialize(g1h->reserved(), bitmap_storage);
 530 }
 531 
 532 void G1ConcurrentMark::fully_initialize() {
 533   if (is_fully_initialized()) {
 534     return;
 535   }
 536 
 537   // Create & start ConcurrentMark thread.
 538   _cm_thread = new G1ConcurrentMarkThread(this);
 539   if (_cm_thread->osthread() == nullptr) {
 540     vm_shutdown_during_initialization("Could not create ConcurrentMarkThread");
 541   }
 542 
 543   log_debug(gc)("ConcGCThreads: %u offset %u", ConcGCThreads, _worker_id_offset);
 544   log_debug(gc)("ParallelGCThreads: %u", ParallelGCThreads);
 545 
 546   _max_concurrent_workers = ConcGCThreads;
 547 
 548   _concurrent_workers = new WorkerThreads("G1 Conc", _max_concurrent_workers);
 549   _concurrent_workers->initialize_workers();
 550   _num_concurrent_workers = _concurrent_workers->active_workers();
 551 
 552   if (!_global_mark_stack.initialize()) {
 553     vm_exit_during_initialization("Failed to allocate initial concurrent mark overflow mark stack.");
 554   }
 555 
 556   _tasks = NEW_C_HEAP_ARRAY(G1CMTask*, _max_num_tasks, mtGC);
 557 
 558   // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
 559   _num_active_tasks = _max_num_tasks;
 560 
 561   for (uint i = 0; i < _max_num_tasks; ++i) {
 562     G1CMTaskQueue* task_queue = new G1CMTaskQueue();
 563     _task_queues->register_queue(i, task_queue);
 564 
 565     _tasks[i] = new G1CMTask(i, this, task_queue, _region_mark_stats);
 566   }
 567 
 568   for (uint i = 0; i < _g1h->max_num_regions(); i++) {
 569     ::new (&_region_mark_stats[i]) G1RegionMarkStats{};
 570     ::new (&_top_at_mark_starts[i]) Atomic<HeapWord*>{};
 571     ::new (&_top_at_rebuild_starts[i]) Atomic<HeapWord*>{};
 572   }
 573 
 574   reset_at_marking_complete();
 575 }
 576 
 577 bool G1ConcurrentMark::in_progress() const {
 578   return is_fully_initialized() ? _cm_thread->in_progress() : false;
 579 }
 580 
 581 PartialArrayStateManager* G1ConcurrentMark::partial_array_state_manager() const {
 582   return _partial_array_state_manager;
 583 }
 584 
 585 void G1ConcurrentMark::reset() {
 586   _has_aborted.store_relaxed(false);
 587 
 588   reset_marking_for_restart();
 589 
 590   // Reset all tasks, since different phases will use different number of active
 591   // threads. So, it's easiest to have all of them ready.
 592   for (uint i = 0; i < _max_num_tasks; ++i) {
 593     _tasks[i]->reset(mark_bitmap());
 594   }
 595 
 596   uint max_num_regions = _g1h->max_num_regions();
 597   for (uint i = 0; i < max_num_regions; i++) {
 598     _top_at_rebuild_starts[i].store_relaxed(nullptr);
 599     _region_mark_stats[i].clear();
 600   }
 601 
 602   _root_regions.reset();
 603 }
 604 
 605 void G1ConcurrentMark::clear_statistics(G1HeapRegion* r) {
 606   uint region_idx = r->hrm_index();
 607   for (uint j = 0; j < _max_num_tasks; ++j) {
 608     _tasks[j]->clear_mark_stats_cache(region_idx);
 609   }
 610   _top_at_rebuild_starts[region_idx].store_relaxed(nullptr);
 611   _region_mark_stats[region_idx].clear();
 612 }
 613 
 614 void G1ConcurrentMark::humongous_object_eagerly_reclaimed(G1HeapRegion* r) {
 615   assert_at_safepoint();
 616   assert(r->is_starts_humongous(), "Got humongous continues region here");
 617 
 618   // Need to clear mark bit of the humongous object. Doing this unconditionally is fine.
 619   mark_bitmap()->clear(r->bottom());
 620 
 621   if (!_g1h->collector_state()->mark_or_rebuild_in_progress()) {
 622     return;
 623   }
 624 
 625   // Clear any statistics about the region gathered so far.
 626   _g1h->humongous_obj_regions_iterate(r,
 627                                       [&] (G1HeapRegion* r) {
 628                                         clear_statistics(r);
 629                                       });
 630 }
 631 
 632 void G1ConcurrentMark::reset_marking_for_restart() {
 633   _global_mark_stack.set_empty();
 634 
 635   // Expand the marking stack, if we have to and if we can.
 636   if (has_overflown()) {
 637     _global_mark_stack.expand();
 638 
 639     uint max_num_regions = _g1h->max_num_regions();
 640     for (uint i = 0; i < max_num_regions; i++) {
 641       _region_mark_stats[i].clear_during_overflow();
 642     }
 643   }
 644 
 645   clear_has_overflown();
 646   _finger.store_relaxed(_heap.start());
 647 
 648   for (uint i = 0; i < _max_num_tasks; ++i) {
 649     _tasks[i]->reset_for_restart();
 650   }
 651 }
 652 
 653 void G1ConcurrentMark::set_concurrency(uint active_tasks) {
 654   assert(active_tasks <= _max_num_tasks, "we should not have more");
 655 
 656   _num_active_tasks = active_tasks;
 657   // Need to update the three data structures below according to the
 658   // number of active threads for this phase.
 659   _terminator.reset_for_reuse(active_tasks);
 660   _first_overflow_barrier_sync.set_n_workers(active_tasks);
 661   _second_overflow_barrier_sync.set_n_workers(active_tasks);
 662 }
 663 
 664 void G1ConcurrentMark::set_concurrency_and_phase(uint active_tasks, bool concurrent) {
 665   set_concurrency(active_tasks);
 666 
 667   _concurrent.store_relaxed(concurrent);
 668 
 669   if (!concurrent) {
 670     // At this point we should be in a STW phase, and completed marking.
 671     assert_at_safepoint_on_vm_thread();
 672     assert(out_of_regions(),
 673            "only way to get here: _finger: " PTR_FORMAT ", _heap_end: " PTR_FORMAT,
 674            p2i(finger()), p2i(_heap.end()));
 675   }
 676 }
 677 
 678 #if TASKQUEUE_STATS
 679 void G1ConcurrentMark::print_and_reset_taskqueue_stats() {
 680 
 681   _task_queues->print_and_reset_taskqueue_stats("G1ConcurrentMark Oop Queue");
 682 
 683   auto get_pa_stats = [&](uint i) {
 684     return _tasks[i]->partial_array_task_stats();
 685   };
 686 
 687   PartialArrayTaskStats::log_set(_max_num_tasks, get_pa_stats,
 688                                  "G1ConcurrentMark Partial Array Task Stats");
 689 
 690   for (uint i = 0; i < _max_num_tasks; ++i) {
 691     get_pa_stats(i)->reset();
 692   }
 693 }
 694 #endif
 695 
 696 void G1ConcurrentMark::reset_at_marking_complete() {
 697   TASKQUEUE_STATS_ONLY(print_and_reset_taskqueue_stats());
 698   // We set the global marking state to some default values when we're
 699   // not doing marking.
 700   reset_marking_for_restart();
 701   _num_active_tasks = 0;
 702 }
 703 
 704 G1ConcurrentMark::~G1ConcurrentMark() {
 705   FREE_C_HEAP_ARRAY(Atomic<HeapWord*>, _top_at_mark_starts);
 706   FREE_C_HEAP_ARRAY(Atomic<HeapWord*>, _top_at_rebuild_starts);
 707   FREE_C_HEAP_ARRAY(G1RegionMarkStats, _region_mark_stats);
 708   // The G1ConcurrentMark instance is never freed.
 709   ShouldNotReachHere();
 710 }
 711 
 712 class G1ClearBitMapTask : public WorkerTask {
 713 public:
 714   static size_t chunk_size() { return M; }
 715 
 716 private:
 717   // Heap region closure used for clearing the _mark_bitmap.
 718   class G1ClearBitmapHRClosure : public G1HeapRegionClosure {
 719   private:
 720     G1ConcurrentMark* _cm;
 721     G1CMBitMap* _bitmap;
 722     bool _suspendible; // If suspendible, do yield checks.
 723 
 724     bool suspendible() {
 725       return _suspendible;
 726     }
 727 
 728     bool is_clear_concurrent_undo() {
 729       return suspendible() && _cm->cm_thread()->in_undo_mark();
 730     }
 731 
 732     bool has_aborted() {
 733       if (suspendible()) {
 734         _cm->do_yield_check();
 735         return _cm->has_aborted();
 736       }
 737       return false;
 738     }
 739 
 740     HeapWord* region_clear_limit(G1HeapRegion* r) {
 741       // During a Concurrent Undo Mark cycle, the per region top_at_mark_start and
 742       // live_words data are current wrt to the _mark_bitmap. We use this information
 743       // to only clear ranges of the bitmap that require clearing.
 744       if (is_clear_concurrent_undo()) {
 745         // No need to clear bitmaps for empty regions (which includes regions we
 746         // did not mark through).
 747         if (!_cm->contains_live_object(r->hrm_index())) {
 748           assert(_bitmap->get_next_marked_addr(r->bottom(), r->end()) == r->end(), "Should not have marked bits");
 749           return r->bottom();
 750         }
 751         assert(_bitmap->get_next_marked_addr(_cm->top_at_mark_start(r), r->end()) == r->end(), "Should not have marked bits above tams");
 752       }
 753       return r->end();
 754     }
 755 
 756   public:
 757     G1ClearBitmapHRClosure(G1ConcurrentMark* cm, bool suspendible) :
 758       G1HeapRegionClosure(),
 759       _cm(cm),
 760       _bitmap(cm->mark_bitmap()),
 761       _suspendible(suspendible)
 762     { }
 763 
 764     virtual bool do_heap_region(G1HeapRegion* r) {
 765       if (has_aborted()) {
 766         return true;
 767       }
 768 
 769       HeapWord* cur = r->bottom();
 770       HeapWord* const end = region_clear_limit(r);
 771 
 772       size_t const chunk_size_in_words = G1ClearBitMapTask::chunk_size() / HeapWordSize;
 773 
 774       while (cur < end) {
 775 
 776         MemRegion mr(cur, MIN2(cur + chunk_size_in_words, end));
 777         _bitmap->clear_range(mr);
 778 
 779         cur += chunk_size_in_words;
 780 
 781         // Repeat the asserts from before the start of the closure. We will do them
 782         // as asserts here to minimize their overhead on the product. However, we
 783         // will have them as guarantees at the beginning / end of the bitmap
 784         // clearing to get some checking in the product.
 785         assert(!suspendible() || _cm->in_progress(), "invariant");
 786         assert(!suspendible() || !G1CollectedHeap::heap()->collector_state()->mark_or_rebuild_in_progress(), "invariant");
 787 
 788         // Abort iteration if necessary.
 789         if (has_aborted()) {
 790           return true;
 791         }
 792       }
 793       assert(cur >= end, "Must have completed iteration over the bitmap for region %u.", r->hrm_index());
 794 
 795       _cm->reset_top_at_mark_start(r);
 796 
 797       return false;
 798     }
 799   };
 800 
 801   G1ClearBitmapHRClosure _cl;
 802   G1HeapRegionClaimer _hr_claimer;
 803   bool _suspendible; // If the task is suspendible, workers must join the STS.
 804 
 805 public:
 806   G1ClearBitMapTask(G1ConcurrentMark* cm, uint n_workers, bool suspendible) :
 807     WorkerTask("G1 Clear Bitmap"),
 808     _cl(cm, suspendible),
 809     _hr_claimer(n_workers),
 810     _suspendible(suspendible)
 811   { }
 812 
 813   void work(uint worker_id) {
 814     SuspendibleThreadSetJoiner sts_join(_suspendible);
 815     G1CollectedHeap::heap()->heap_region_par_iterate_from_worker_offset(&_cl, &_hr_claimer, worker_id);
 816   }
 817 
 818   bool is_complete() {
 819     return _cl.is_complete();
 820   }
 821 };
 822 
 823 void G1ConcurrentMark::clear_bitmap(WorkerThreads* workers, bool may_yield) {
 824   assert(may_yield || SafepointSynchronize::is_at_safepoint(), "Non-yielding bitmap clear only allowed at safepoint.");
 825 
 826   size_t const num_bytes_to_clear = (G1HeapRegion::GrainBytes * _g1h->num_committed_regions()) / G1CMBitMap::heap_map_factor();
 827   size_t const num_chunks = align_up(num_bytes_to_clear, G1ClearBitMapTask::chunk_size()) / G1ClearBitMapTask::chunk_size();
 828 
 829   uint const num_workers = (uint)MIN2(num_chunks, (size_t)workers->active_workers());
 830 
 831   G1ClearBitMapTask cl(this, num_workers, may_yield);
 832 
 833   log_debug(gc, ergo)("Running %s with %u workers for %zu work units.", cl.name(), num_workers, num_chunks);
 834   workers->run_task(&cl, num_workers);
 835   guarantee(may_yield || cl.is_complete(), "Must have completed iteration when not yielding.");
 836 }
 837 
 838 void G1ConcurrentMark::cleanup_for_next_mark() {
 839   // Make sure that the concurrent mark thread looks to still be in
 840   // the current cycle.
 841   guarantee(is_fully_initialized(), "should be initializd");
 842   guarantee(in_progress(), "invariant");
 843 
 844   // We are finishing up the current cycle by clearing the next
 845   // marking bitmap and getting it ready for the next cycle. During
 846   // this time no other cycle can start. So, let's make sure that this
 847   // is the case.
 848   guarantee(!_g1h->collector_state()->mark_or_rebuild_in_progress(), "invariant");
 849 
 850   clear_bitmap(_concurrent_workers, true);
 851 
 852   reset_partial_array_state_manager();
 853 
 854   // Repeat the asserts from above.
 855   guarantee(is_fully_initialized(), "should be initializd");
 856   guarantee(in_progress(), "invariant");
 857   guarantee(!_g1h->collector_state()->mark_or_rebuild_in_progress(), "invariant");
 858 }
 859 
 860 void G1ConcurrentMark::reset_partial_array_state_manager() {
 861   for (uint i = 0; i < _max_num_tasks; ++i) {
 862     _tasks[i]->unregister_partial_array_splitter();
 863   }
 864 
 865   partial_array_state_manager()->reset();
 866 
 867   for (uint i = 0; i < _max_num_tasks; ++i) {
 868     _tasks[i]->register_partial_array_splitter();
 869   }
 870 }
 871 
 872 void G1ConcurrentMark::clear_bitmap(WorkerThreads* workers) {
 873   assert_at_safepoint_on_vm_thread();
 874   // To avoid fragmentation the full collection requesting to clear the bitmap
 875   // might use fewer workers than available. To ensure the bitmap is cleared
 876   // as efficiently as possible the number of active workers are temporarily
 877   // increased to include all currently created workers.
 878   WithActiveWorkers update(workers, workers->created_workers());
 879   clear_bitmap(workers, false);
 880 }
 881 
 882 class G1PreConcurrentStartTask : public G1BatchedTask {
 883   // Reset marking state.
 884   class ResetMarkingStateTask;
 885   // For each region note start of marking.
 886   class NoteStartOfMarkTask;
 887 
 888 public:
 889   G1PreConcurrentStartTask(GCCause::Cause cause, G1ConcurrentMark* cm);
 890 };
 891 
 892 class G1PreConcurrentStartTask::ResetMarkingStateTask : public G1AbstractSubTask {
 893   G1ConcurrentMark* _cm;
 894 public:
 895   ResetMarkingStateTask(G1ConcurrentMark* cm) : G1AbstractSubTask(G1GCPhaseTimes::ResetMarkingState), _cm(cm) { }
 896 
 897   double worker_cost() const override { return 1.0; }
 898   void do_work(uint worker_id) override;
 899 };
 900 
 901 class G1PreConcurrentStartTask::NoteStartOfMarkTask : public G1AbstractSubTask {
 902   G1HeapRegionClaimer _claimer;
 903 public:
 904   NoteStartOfMarkTask() : G1AbstractSubTask(G1GCPhaseTimes::NoteStartOfMark), _claimer(0) { }
 905 
 906   double worker_cost() const override {
 907     // The work done per region is very small, therefore we choose this magic number to cap the number
 908     // of threads used when there are few regions.
 909     const double regions_per_thread = 1000;
 910     return _claimer.n_regions() / regions_per_thread;
 911   }
 912 
 913   void set_max_workers(uint max_workers) override;
 914   void do_work(uint worker_id) override;
 915 };
 916 
 917 void G1PreConcurrentStartTask::ResetMarkingStateTask::do_work(uint worker_id) {
 918   // Reset marking state.
 919   _cm->reset();
 920 }
 921 
 922 class NoteStartOfMarkHRClosure : public G1HeapRegionClosure {
 923   G1ConcurrentMark* _cm;
 924 
 925 public:
 926   NoteStartOfMarkHRClosure() : G1HeapRegionClosure(), _cm(G1CollectedHeap::heap()->concurrent_mark()) { }
 927 
 928   bool do_heap_region(G1HeapRegion* r) override {
 929     if (r->is_old_or_humongous() && !r->is_collection_set_candidate() && !r->in_collection_set()) {
 930       _cm->update_top_at_mark_start(r);
 931     } else {
 932       _cm->reset_top_at_mark_start(r);
 933     }
 934     return false;
 935   }
 936 };
 937 
 938 void G1PreConcurrentStartTask::NoteStartOfMarkTask::do_work(uint worker_id) {
 939   NoteStartOfMarkHRClosure start_cl;
 940   G1CollectedHeap::heap()->heap_region_par_iterate_from_worker_offset(&start_cl, &_claimer, worker_id);
 941 }
 942 
 943 void G1PreConcurrentStartTask::NoteStartOfMarkTask::set_max_workers(uint max_workers) {
 944   _claimer.set_n_workers(max_workers);
 945 }
 946 
 947 G1PreConcurrentStartTask::G1PreConcurrentStartTask(GCCause::Cause cause, G1ConcurrentMark* cm) :
 948   G1BatchedTask("Pre Concurrent Start", G1CollectedHeap::heap()->phase_times()) {
 949   add_serial_task(new ResetMarkingStateTask(cm));
 950   add_parallel_task(new NoteStartOfMarkTask());
 951 };
 952 
 953 void G1ConcurrentMark::pre_concurrent_start(GCCause::Cause cause) {
 954   assert_at_safepoint_on_vm_thread();
 955 
 956   G1CollectedHeap::start_codecache_marking_cycle_if_inactive(true /* concurrent_mark_start */);
 957 
 958   ClassLoaderDataGraph::verify_claimed_marks_cleared(ClassLoaderData::_claim_strong);
 959 
 960   G1PreConcurrentStartTask cl(cause, this);
 961   G1CollectedHeap::heap()->run_batch_task(&cl);
 962 
 963   _gc_tracer_cm->set_gc_cause(cause);
 964 }
 965 
 966 
 967 void G1ConcurrentMark::post_concurrent_mark_start() {
 968   // Start Concurrent Marking weak-reference discovery.
 969   ReferenceProcessor* rp = _g1h->ref_processor_cm();
 970   rp->start_discovery(false /* always_clear */);
 971 
 972   SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
 973   // This is the start of  the marking cycle, we're expected all
 974   // threads to have SATB queues with active set to false.
 975   satb_mq_set.set_active_all_threads(true, /* new active value */
 976                                      false /* expected_active */);
 977 
 978   _root_regions.prepare_for_scan();
 979 
 980   // update_g1_committed() will be called at the end of an evac pause
 981   // when marking is on. So, it's also called at the end of the
 982   // concurrent start pause to update the heap end, if the heap expands
 983   // during it. No need to call it here.
 984 }
 985 
 986 void G1ConcurrentMark::post_concurrent_undo_start() {
 987   root_regions()->cancel_scan();
 988 }
 989 
 990 /*
 991  * Notice that in the next two methods, we actually leave the STS
 992  * during the barrier sync and join it immediately afterwards. If we
 993  * do not do this, the following deadlock can occur: one thread could
 994  * be in the barrier sync code, waiting for the other thread to also
 995  * sync up, whereas another one could be trying to yield, while also
 996  * waiting for the other threads to sync up too.
 997  *
 998  * Note, however, that this code is also used during remark and in
 999  * this case we should not attempt to leave / enter the STS, otherwise
1000  * we'll either hit an assert (debug / fastdebug) or deadlock
1001  * (product). So we should only leave / enter the STS if we are
1002  * operating concurrently.
1003  *
1004  * Because the thread that does the sync barrier has left the STS, it
1005  * is possible to be suspended for a Full GC or an evacuation pause
1006  * could occur. This is actually safe, since the entering the sync
1007  * barrier is one of the last things do_marking_step() does, and it
1008  * doesn't manipulate any data structures afterwards.
1009  */
1010 
1011 void G1ConcurrentMark::enter_first_sync_barrier(uint worker_id) {
1012   bool barrier_aborted;
1013   {
1014     SuspendibleThreadSetLeaver sts_leave(concurrent());
1015     barrier_aborted = !_first_overflow_barrier_sync.enter();
1016   }
1017 
1018   // at this point everyone should have synced up and not be doing any
1019   // more work
1020 
1021   if (barrier_aborted) {
1022     // If the barrier aborted we ignore the overflow condition and
1023     // just abort the whole marking phase as quickly as possible.
1024     return;
1025   }
1026 }
1027 
1028 void G1ConcurrentMark::enter_second_sync_barrier(uint worker_id) {
1029   SuspendibleThreadSetLeaver sts_leave(concurrent());
1030   _second_overflow_barrier_sync.enter();
1031 
1032   // at this point everything should be re-initialized and ready to go
1033 }
1034 
1035 class G1CMConcurrentMarkingTask : public WorkerTask {
1036   G1ConcurrentMark*     _cm;
1037 
1038 public:
1039   void work(uint worker_id) {
1040     ResourceMark rm;
1041 
1042     SuspendibleThreadSetJoiner sts_join;
1043 
1044     assert(worker_id < _cm->active_tasks(), "invariant");
1045 
1046     G1CMTask* task = _cm->task(worker_id);
1047     task->record_start_time();
1048     if (!_cm->has_aborted()) {
1049       do {
1050         task->do_marking_step(G1ConcMarkStepDurationMillis,
1051                               true  /* do_termination */,
1052                               false /* is_serial*/);
1053 
1054         _cm->do_yield_check();
1055       } while (!_cm->has_aborted() && task->has_aborted());
1056     }
1057     task->record_end_time();
1058     guarantee(!task->has_aborted() || _cm->has_aborted(), "invariant");
1059   }
1060 
1061   G1CMConcurrentMarkingTask(G1ConcurrentMark* cm) :
1062       WorkerTask("Concurrent Mark"), _cm(cm) { }
1063 
1064   ~G1CMConcurrentMarkingTask() { }
1065 };
1066 
1067 uint G1ConcurrentMark::calc_active_marking_workers() {
1068   uint result = 0;
1069   if (!UseDynamicNumberOfGCThreads || !FLAG_IS_DEFAULT(ConcGCThreads)) {
1070     result = _max_concurrent_workers;
1071   } else {
1072     result =
1073       WorkerPolicy::calc_default_active_workers(_max_concurrent_workers,
1074                                                 1, /* Minimum workers */
1075                                                 _num_concurrent_workers,
1076                                                 Threads::number_of_non_daemon_threads());
1077     // Don't scale the result down by scale_concurrent_workers() because
1078     // that scaling has already gone into "_max_concurrent_workers".
1079   }
1080   assert(result > 0 && result <= _max_concurrent_workers,
1081          "Calculated number of marking workers must be larger than zero and at most the maximum %u, but is %u",
1082          _max_concurrent_workers, result);
1083   return result;
1084 }
1085 
1086 void G1ConcurrentMark::scan_root_region(const MemRegion* region, uint worker_id) {
1087 #ifdef ASSERT
1088   HeapWord* last = region->last();
1089   G1HeapRegion* hr = _g1h->heap_region_containing(last);
1090   assert(hr->is_old() || top_at_mark_start(hr) == hr->bottom(),
1091          "Root regions must be old or survivor/eden but region %u is %s", hr->hrm_index(), hr->get_type_str());
1092   assert(top_at_mark_start(hr) == region->start(),
1093          "MemRegion start should be equal to TAMS");
1094 #endif
1095 
1096   G1RootRegionScanClosure cl(_g1h, this, worker_id);
1097 
1098   const uintx interval = PrefetchScanIntervalInBytes;
1099   HeapWord* curr = region->start();
1100   const HeapWord* end = region->end();
1101   while (curr < end) {
1102     Prefetch::read(curr, interval);
1103     oop obj = cast_to_oop(curr);
1104     size_t size = obj->oop_iterate_size(&cl);
1105     assert(size == obj->size(), "sanity");
1106     curr += size;
1107   }
1108 }
1109 
1110 class G1CMRootRegionScanTask : public WorkerTask {
1111   G1ConcurrentMark* _cm;
1112 public:
1113   G1CMRootRegionScanTask(G1ConcurrentMark* cm) :
1114     WorkerTask("G1 Root Region Scan"), _cm(cm) { }
1115 
1116   void work(uint worker_id) {
1117     G1CMRootMemRegions* root_regions = _cm->root_regions();
1118     const MemRegion* region = root_regions->claim_next();
1119     while (region != nullptr) {
1120       _cm->scan_root_region(region, worker_id);
1121       region = root_regions->claim_next();
1122     }
1123   }
1124 };
1125 
1126 void G1ConcurrentMark::scan_root_regions() {
1127   // scan_in_progress() will have been set to true only if there was
1128   // at least one root region to scan. So, if it's false, we
1129   // should not attempt to do any further work.
1130   if (root_regions()->scan_in_progress()) {
1131     assert(!has_aborted(), "Aborting before root region scanning is finished not supported.");
1132 
1133     // Assign one worker to each root-region but subject to the max constraint.
1134     const uint num_workers = MIN2(root_regions()->num_root_regions(),
1135                                   _max_concurrent_workers);
1136 
1137     G1CMRootRegionScanTask task(this);
1138     log_debug(gc, ergo)("Running %s using %u workers for %u work units.",
1139                         task.name(), num_workers, root_regions()->num_root_regions());
1140     _concurrent_workers->run_task(&task, num_workers);
1141 
1142     // It's possible that has_aborted() is true here without actually
1143     // aborting the survivor scan earlier. This is OK as it's
1144     // mainly used for sanity checking.
1145     root_regions()->scan_finished();
1146   }
1147 }
1148 
1149 bool G1ConcurrentMark::wait_until_root_region_scan_finished() {
1150   return root_regions()->wait_until_scan_finished();
1151 }
1152 
1153 void G1ConcurrentMark::add_root_region(G1HeapRegion* r) {
1154   root_regions()->add(top_at_mark_start(r), r->top());
1155 }
1156 
1157 bool G1ConcurrentMark::is_root_region(G1HeapRegion* r) {
1158   return root_regions()->contains(MemRegion(top_at_mark_start(r), r->top()));
1159 }
1160 
1161 void G1ConcurrentMark::root_region_scan_abort_and_wait() {
1162   root_regions()->abort();
1163   root_regions()->wait_until_scan_finished();
1164 }
1165 
1166 void G1ConcurrentMark::concurrent_cycle_start() {
1167   _gc_timer_cm->register_gc_start();
1168 
1169   _gc_tracer_cm->report_gc_start(GCCause::_no_gc /* first parameter is not used */, _gc_timer_cm->gc_start());
1170 
1171   _g1h->trace_heap_before_gc(_gc_tracer_cm);
1172 }
1173 
1174 uint G1ConcurrentMark::completed_mark_cycles() const {
1175   return _completed_mark_cycles.load_relaxed();
1176 }
1177 
1178 void G1ConcurrentMark::concurrent_cycle_end(bool mark_cycle_completed) {
1179   _g1h->collector_state()->set_clear_bitmap_in_progress(false);
1180 
1181   _g1h->trace_heap_after_gc(_gc_tracer_cm);
1182 
1183   if (mark_cycle_completed) {
1184     _completed_mark_cycles.add_then_fetch(1u, memory_order_relaxed);
1185   }
1186 
1187   if (has_aborted()) {
1188     log_info(gc, marking)("Concurrent Mark Abort");
1189     _gc_tracer_cm->report_concurrent_mode_failure();
1190   }
1191 
1192   _gc_timer_cm->register_gc_end();
1193 
1194   _gc_tracer_cm->report_gc_end(_gc_timer_cm->gc_end(), _gc_timer_cm->time_partitions());
1195 }
1196 
1197 void G1ConcurrentMark::mark_from_roots() {
1198   _restart_for_overflow.store_relaxed(false);
1199 
1200   uint active_workers = calc_active_marking_workers();
1201 
1202   // Setting active workers is not guaranteed since fewer
1203   // worker threads may currently exist and more may not be
1204   // available.
1205   active_workers = _concurrent_workers->set_active_workers(active_workers);
1206   log_info(gc, task)("Concurrent Mark Using %u of %u Workers", active_workers, _concurrent_workers->max_workers());
1207 
1208   _num_concurrent_workers = active_workers;
1209 
1210   // Parallel task terminator is set in "set_concurrency_and_phase()"
1211   set_concurrency_and_phase(active_workers, true /* concurrent */);
1212 
1213   G1CMConcurrentMarkingTask marking_task(this);
1214   _concurrent_workers->run_task(&marking_task);
1215   print_stats();
1216 }
1217 
1218 const char* G1ConcurrentMark::verify_location_string(VerifyLocation location) {
1219   static const char* location_strings[] = { "Remark Before",
1220                                             "Remark After",
1221                                             "Remark Overflow",
1222                                             "Cleanup Before",
1223                                             "Cleanup After" };
1224   return location_strings[static_cast<std::underlying_type_t<VerifyLocation>>(location)];
1225 }
1226 
1227 void G1ConcurrentMark::verify_during_pause(G1HeapVerifier::G1VerifyType type,
1228                                            VerifyLocation location) {
1229   G1HeapVerifier* verifier = _g1h->verifier();
1230 
1231   verifier->verify_region_sets_optional();
1232 
1233   const char* caller = verify_location_string(location);
1234 
1235   if (VerifyDuringGC && G1HeapVerifier::should_verify(type)) {
1236     GCTraceTime(Debug, gc, phases) debug(caller, _gc_timer_cm);
1237 
1238     size_t const BufLen = 512;
1239     char buffer[BufLen];
1240 
1241     jio_snprintf(buffer, BufLen, "During GC (%s)", caller);
1242     verifier->verify(VerifyOption::G1UseConcMarking, buffer);
1243 
1244     // Only check bitmap in Remark, and not at After-Verification because the regions
1245     // already have their TAMS'es reset.
1246     if (location != VerifyLocation::RemarkAfter) {
1247       verifier->verify_bitmap_clear(true /* above_tams_only */);
1248     }
1249   }
1250 }
1251 
1252 class G1ObjectCountIsAliveClosure: public BoolObjectClosure {
1253   G1CollectedHeap* _g1h;
1254 public:
1255   G1ObjectCountIsAliveClosure(G1CollectedHeap* g1h) : _g1h(g1h) {}
1256 
1257   bool do_object_b(oop obj) {
1258     return !_g1h->is_obj_dead(obj);
1259   }
1260 };
1261 
1262 void G1ConcurrentMark::remark() {
1263   assert_at_safepoint_on_vm_thread();
1264 
1265   // If a full collection has happened, we should not continue. However we might
1266   // have ended up here as the Remark VM operation has been scheduled already.
1267   if (has_aborted()) {
1268     return;
1269   }
1270 
1271   G1Policy* policy = _g1h->policy();
1272   policy->record_pause_start_time();
1273 
1274   double start = os::elapsedTime();
1275 
1276   verify_during_pause(G1HeapVerifier::G1VerifyRemark, VerifyLocation::RemarkBefore);
1277 
1278   {
1279     GCTraceTime(Debug, gc, phases) debug("Finalize Marking", _gc_timer_cm);
1280     finalize_marking();
1281   }
1282 
1283   double mark_work_end = os::elapsedTime();
1284 
1285   bool const mark_finished = !has_overflown();
1286   if (mark_finished) {
1287     weak_refs_work();
1288 
1289     // Unload Klasses, String, Code Cache, etc.
1290     if (ClassUnloadingWithConcurrentMark) {
1291       G1CMIsAliveClosure is_alive(this);
1292       _g1h->unload_classes_and_code("Class Unloading", &is_alive, _gc_timer_cm);
1293     }
1294 
1295     SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
1296     // We're done with marking.
1297     // This is the end of the marking cycle, we're expected all
1298     // threads to have SATB queues with active set to true.
1299     satb_mq_set.set_active_all_threads(false, /* new active value */
1300                                        true /* expected_active */);
1301 
1302     {
1303       GCTraceTime(Debug, gc, phases) debug("Flush Task Caches", _gc_timer_cm);
1304       flush_all_task_caches();
1305     }
1306 
1307     // All marking completed. Check bitmap now as we will start to reset TAMSes
1308     // in parallel below so that we can not do this in the After-Remark verification.
1309     _g1h->verifier()->verify_bitmap_clear(true /* above_tams_only */);
1310 
1311     {
1312       GCTraceTime(Debug, gc, phases) debug("Select For Rebuild and Reclaim Empty Regions", _gc_timer_cm);
1313 
1314       G1UpdateRegionLivenessAndSelectForRebuildTask cl(_g1h, this, _g1h->workers()->active_workers());
1315       uint const num_workers = MIN2(G1UpdateRegionLivenessAndSelectForRebuildTask::desired_num_workers(_g1h->num_committed_regions()),
1316                                     _g1h->workers()->active_workers());
1317       log_debug(gc,ergo)("Running %s using %u workers for %u regions in heap", cl.name(), num_workers, _g1h->num_committed_regions());
1318       _g1h->workers()->run_task(&cl, num_workers);
1319 
1320       log_debug(gc, remset, tracking)("Remembered Set Tracking update regions total %u, selected %u",
1321                                         _g1h->num_committed_regions(), cl.total_selected_for_rebuild());
1322 
1323       _needs_remembered_set_rebuild = (cl.total_selected_for_rebuild() > 0);
1324 
1325       if (_needs_remembered_set_rebuild) {
1326         // Prune rebuild candidates based on G1HeapWastePercent.
1327         // Improves rebuild time in addition to remembered set memory usage.
1328         G1CollectionSetChooser::build(_g1h->workers(), _g1h->num_committed_regions(), _g1h->policy()->candidates());
1329       }
1330     }
1331 
1332     if (log_is_enabled(Trace, gc, liveness)) {
1333       G1PrintRegionLivenessInfoClosure cl("Post-Marking");
1334       _g1h->heap_region_iterate(&cl);
1335     }
1336 
1337     // Potentially, some empty-regions have been reclaimed; make this a
1338     // "collection" so that pending allocation can retry before attempting a
1339     // GC pause.
1340     _g1h->increment_total_collections();
1341 
1342     // For Remark Pauses that may have been triggered by PeriodicGCs, we maintain
1343     // resizing based on MinHeapFreeRatio or MaxHeapFreeRatio. If a PeriodicGC is
1344     // triggered, it likely means there are very few regular GCs, making resizing
1345     // based on gc heuristics less effective.
1346     if (_g1h->last_gc_was_periodic()) {
1347       _g1h->resize_heap_after_full_collection(0 /* allocation_word_size */);
1348     }
1349 
1350     compute_new_sizes();
1351 
1352     verify_during_pause(G1HeapVerifier::G1VerifyRemark, VerifyLocation::RemarkAfter);
1353 
1354     assert(!restart_for_overflow(), "sanity");
1355     // Completely reset the marking state (except bitmaps) since marking completed.
1356     reset_at_marking_complete();
1357 
1358     G1CollectedHeap::finish_codecache_marking_cycle();
1359 
1360     {
1361       GCTraceTime(Debug, gc, phases) debug("Report Object Count", _gc_timer_cm);
1362       G1ObjectCountIsAliveClosure is_alive(_g1h);
1363       _gc_tracer_cm->report_object_count_after_gc(&is_alive, _g1h->workers());
1364     }
1365   } else {
1366     // We overflowed.  Restart concurrent marking.
1367     _restart_for_overflow.store_relaxed(true);
1368 
1369     verify_during_pause(G1HeapVerifier::G1VerifyRemark, VerifyLocation::RemarkOverflow);
1370 
1371     // Clear the marking state because we will be restarting
1372     // marking due to overflowing the global mark stack.
1373     reset_marking_for_restart();
1374   }
1375 
1376   // Statistics
1377   double now = os::elapsedTime();
1378   _remark_mark_times.add((mark_work_end - start) * 1000.0);
1379   _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
1380   _remark_times.add((now - start) * 1000.0);
1381 
1382   _g1h->update_perf_counter_cpu_time();
1383 
1384   policy->record_concurrent_mark_remark_end();
1385 }
1386 
1387 void G1ConcurrentMark::compute_new_sizes() {
1388   MetaspaceGC::compute_new_size();
1389 
1390   // Cleanup will have freed any regions completely full of garbage.
1391   // Update the soft reference policy with the new heap occupancy.
1392   Universe::heap()->update_capacity_and_used_at_gc();
1393 
1394   // We reclaimed old regions so we should calculate the sizes to make
1395   // sure we update the old gen/space data.
1396   _g1h->monitoring_support()->update_sizes();
1397 }
1398 
1399 class G1UpdateRegionsAfterRebuild : public G1HeapRegionClosure {
1400   G1CollectedHeap* _g1h;
1401 
1402 public:
1403   G1UpdateRegionsAfterRebuild(G1CollectedHeap* g1h) : _g1h(g1h) { }
1404 
1405   bool do_heap_region(G1HeapRegion* r) override {
1406     // Update the remset tracking state from updating to complete
1407     // if remembered sets have been rebuilt.
1408     _g1h->policy()->remset_tracker()->update_after_rebuild(r);
1409     return false;
1410   }
1411 };
1412 
1413 void G1ConcurrentMark::cleanup() {
1414   assert_at_safepoint_on_vm_thread();
1415 
1416   // If a full collection has happened, we shouldn't do this.
1417   if (has_aborted()) {
1418     return;
1419   }
1420 
1421   G1Policy* policy = _g1h->policy();
1422   policy->record_pause_start_time();
1423 
1424   double start = os::elapsedTime();
1425 
1426   verify_during_pause(G1HeapVerifier::G1VerifyCleanup, VerifyLocation::CleanupBefore);
1427 
1428   if (needs_remembered_set_rebuild()) {
1429     // Update the remset tracking information as well as marking all regions
1430     // as fully parsable.
1431     GCTraceTime(Debug, gc, phases) debug("Update Remembered Set Tracking After Rebuild", _gc_timer_cm);
1432     G1UpdateRegionsAfterRebuild cl(_g1h);
1433     _g1h->heap_region_iterate(&cl);
1434   } else {
1435     log_debug(gc, phases)("No Remembered Sets to update after rebuild");
1436   }
1437 
1438   verify_during_pause(G1HeapVerifier::G1VerifyCleanup, VerifyLocation::CleanupAfter);
1439 
1440   // Local statistics
1441   _cleanup_times.add((os::elapsedTime() - start) * 1000.0);
1442 
1443   {
1444     GCTraceTime(Debug, gc, phases) debug("Finalize Concurrent Mark Cleanup", _gc_timer_cm);
1445     policy->record_concurrent_mark_cleanup_end(needs_remembered_set_rebuild());
1446   }
1447 }
1448 
1449 // 'Keep Alive' oop closure used by both serial parallel reference processing.
1450 // Uses the G1CMTask associated with a worker thread (for serial reference
1451 // processing the G1CMTask for worker 0 is used) to preserve (mark) and
1452 // trace referent objects.
1453 //
1454 // Using the G1CMTask and embedded local queues avoids having the worker
1455 // threads operating on the global mark stack. This reduces the risk
1456 // of overflowing the stack - which we would rather avoid at this late
1457 // state. Also using the tasks' local queues removes the potential
1458 // of the workers interfering with each other that could occur if
1459 // operating on the global stack.
1460 
1461 class G1CMKeepAliveAndDrainClosure : public OopClosure {
1462   G1ConcurrentMark* _cm;
1463   G1CMTask*         _task;
1464   uint              _ref_counter_limit;
1465   uint              _ref_counter;
1466   bool              _is_serial;
1467 public:
1468   G1CMKeepAliveAndDrainClosure(G1ConcurrentMark* cm, G1CMTask* task, bool is_serial) :
1469     _cm(cm), _task(task), _ref_counter_limit(G1RefProcDrainInterval),
1470     _ref_counter(_ref_counter_limit), _is_serial(is_serial) {
1471     assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
1472   }
1473 
1474   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
1475   virtual void do_oop(      oop* p) { do_oop_work(p); }
1476 
1477   template <class T> void do_oop_work(T* p) {
1478     if (_cm->has_overflown()) {
1479       return;
1480     }
1481     if (!_task->deal_with_reference(p)) {
1482       // We did not add anything to the mark bitmap (or mark stack), so there is
1483       // no point trying to drain it.
1484       return;
1485     }
1486     _ref_counter--;
1487 
1488     if (_ref_counter == 0) {
1489       // We have dealt with _ref_counter_limit references, pushing them
1490       // and objects reachable from them on to the local stack (and
1491       // possibly the global stack). Call G1CMTask::do_marking_step() to
1492       // process these entries.
1493       //
1494       // We call G1CMTask::do_marking_step() in a loop, which we'll exit if
1495       // there's nothing more to do (i.e. we're done with the entries that
1496       // were pushed as a result of the G1CMTask::deal_with_reference() calls
1497       // above) or we overflow.
1498       //
1499       // Note: G1CMTask::do_marking_step() can set the G1CMTask::has_aborted()
1500       // flag while there may still be some work to do. (See the comment at
1501       // the beginning of G1CMTask::do_marking_step() for those conditions -
1502       // one of which is reaching the specified time target.) It is only
1503       // when G1CMTask::do_marking_step() returns without setting the
1504       // has_aborted() flag that the marking step has completed.
1505       do {
1506         double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
1507         _task->do_marking_step(mark_step_duration_ms,
1508                                false      /* do_termination */,
1509                                _is_serial);
1510       } while (_task->has_aborted() && !_cm->has_overflown());
1511       _ref_counter = _ref_counter_limit;
1512     }
1513   }
1514 };
1515 
1516 // 'Drain' oop closure used by both serial and parallel reference processing.
1517 // Uses the G1CMTask associated with a given worker thread (for serial
1518 // reference processing the G1CMtask for worker 0 is used). Calls the
1519 // do_marking_step routine, with an unbelievably large timeout value,
1520 // to drain the marking data structures of the remaining entries
1521 // added by the 'keep alive' oop closure above.
1522 
1523 class G1CMDrainMarkingStackClosure : public VoidClosure {
1524   G1ConcurrentMark* _cm;
1525   G1CMTask*         _task;
1526   bool              _is_serial;
1527  public:
1528   G1CMDrainMarkingStackClosure(G1ConcurrentMark* cm, G1CMTask* task, bool is_serial) :
1529     _cm(cm), _task(task), _is_serial(is_serial) {
1530     assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
1531   }
1532 
1533   void do_void() {
1534     do {
1535       // We call G1CMTask::do_marking_step() to completely drain the local
1536       // and global marking stacks of entries pushed by the 'keep alive'
1537       // oop closure (an instance of G1CMKeepAliveAndDrainClosure above).
1538       //
1539       // G1CMTask::do_marking_step() is called in a loop, which we'll exit
1540       // if there's nothing more to do (i.e. we've completely drained the
1541       // entries that were pushed as a result of applying the 'keep alive'
1542       // closure to the entries on the discovered ref lists) or we overflow
1543       // the global marking stack.
1544       //
1545       // Note: G1CMTask::do_marking_step() can set the G1CMTask::has_aborted()
1546       // flag while there may still be some work to do. (See the comment at
1547       // the beginning of G1CMTask::do_marking_step() for those conditions -
1548       // one of which is reaching the specified time target.) It is only
1549       // when G1CMTask::do_marking_step() returns without setting the
1550       // has_aborted() flag that the marking step has completed.
1551 
1552       _task->do_marking_step(1000000000.0 /* something very large */,
1553                              true         /* do_termination */,
1554                              _is_serial);
1555     } while (_task->has_aborted() && !_cm->has_overflown());
1556   }
1557 };
1558 
1559 class G1CMRefProcProxyTask : public RefProcProxyTask {
1560   G1CollectedHeap& _g1h;
1561   G1ConcurrentMark& _cm;
1562 
1563 public:
1564   G1CMRefProcProxyTask(uint max_workers, G1CollectedHeap& g1h, G1ConcurrentMark &cm)
1565     : RefProcProxyTask("G1CMRefProcProxyTask", max_workers),
1566       _g1h(g1h),
1567       _cm(cm) {}
1568 
1569   void work(uint worker_id) override {
1570     assert(worker_id < _max_workers, "sanity");
1571     G1CMIsAliveClosure is_alive(&_cm);
1572     uint index = (_tm == RefProcThreadModel::Single) ? 0 : worker_id;
1573     G1CMKeepAliveAndDrainClosure keep_alive(&_cm, _cm.task(index), _tm == RefProcThreadModel::Single);
1574     BarrierEnqueueDiscoveredFieldClosure enqueue;
1575     G1CMDrainMarkingStackClosure complete_gc(&_cm, _cm.task(index), _tm == RefProcThreadModel::Single);
1576     _rp_task->rp_work(worker_id, &is_alive, &keep_alive, &enqueue, &complete_gc);
1577   }
1578 
1579   void prepare_run_task_hook() override {
1580     // We need to reset the concurrency level before each
1581     // proxy task execution, so that the termination protocol
1582     // and overflow handling in G1CMTask::do_marking_step() knows
1583     // how many workers to wait for.
1584     _cm.set_concurrency(_queue_count);
1585   }
1586 };
1587 
1588 void G1ConcurrentMark::weak_refs_work() {
1589   ResourceMark rm;
1590 
1591   {
1592     GCTraceTime(Debug, gc, phases) debug("Reference Processing", _gc_timer_cm);
1593 
1594     ReferenceProcessor* rp = _g1h->ref_processor_cm();
1595 
1596     // See the comment in G1CollectedHeap::ref_processing_init()
1597     // about how reference processing currently works in G1.
1598 
1599     assert(_global_mark_stack.is_empty(), "mark stack should be empty");
1600 
1601     // Prefer to grow the stack until the max capacity.
1602     _global_mark_stack.set_should_grow();
1603 
1604     // Parallel processing task executor.
1605     G1CMRefProcProxyTask task(rp->max_num_queues(), *_g1h, *this);
1606     ReferenceProcessorPhaseTimes pt(_gc_timer_cm, rp->max_num_queues());
1607 
1608     // Process the weak references.
1609     const ReferenceProcessorStats& stats = rp->process_discovered_references(task, _g1h->workers(), pt);
1610     _gc_tracer_cm->report_gc_reference_stats(stats);
1611     pt.print_all_references();
1612 
1613     // The do_oop work routines of the keep_alive and drain_marking_stack
1614     // oop closures will set the has_overflown flag if we overflow the
1615     // global marking stack.
1616 
1617     assert(has_overflown() || _global_mark_stack.is_empty(),
1618            "Mark stack should be empty (unless it has overflown)");
1619   }
1620 
1621   if (has_overflown()) {
1622     // We can not trust g1_is_alive and the contents of the heap if the marking stack
1623     // overflowed while processing references. Exit the VM.
1624     fatal("Overflow during reference processing, can not continue. Current mark stack depth: "
1625           "%zu, MarkStackSize: %zu, MarkStackSizeMax: %zu. "
1626           "Please increase MarkStackSize and/or MarkStackSizeMax and restart.",
1627           _global_mark_stack.size(), MarkStackSize, MarkStackSizeMax);
1628     return;
1629   }
1630 
1631   assert(_global_mark_stack.is_empty(), "Marking should have completed");
1632 
1633   {
1634     GCTraceTime(Debug, gc, phases) debug("Weak Processing", _gc_timer_cm);
1635     G1CMIsAliveClosure is_alive(this);
1636     WeakProcessor::weak_oops_do(_g1h->workers(), &is_alive, &do_nothing_cl, 1);
1637   }
1638 }
1639 
1640 class G1PrecleanYieldClosure : public YieldClosure {
1641   G1ConcurrentMark* _cm;
1642 
1643 public:
1644   G1PrecleanYieldClosure(G1ConcurrentMark* cm) : _cm(cm) { }
1645 
1646   virtual bool should_return() {
1647     return _cm->has_aborted();
1648   }
1649 
1650   virtual bool should_return_fine_grain() {
1651     _cm->do_yield_check();
1652     return _cm->has_aborted();
1653   }
1654 };
1655 
1656 void G1ConcurrentMark::preclean() {
1657   assert(G1UseReferencePrecleaning, "Precleaning must be enabled.");
1658 
1659   SuspendibleThreadSetJoiner joiner;
1660 
1661   BarrierEnqueueDiscoveredFieldClosure enqueue;
1662 
1663   set_concurrency_and_phase(1, true);
1664 
1665   G1PrecleanYieldClosure yield_cl(this);
1666 
1667   ReferenceProcessor* rp = _g1h->ref_processor_cm();
1668   // Precleaning is single threaded. Temporarily disable MT discovery.
1669   ReferenceProcessorMTDiscoveryMutator rp_mut_discovery(rp, false);
1670   rp->preclean_discovered_references(rp->is_alive_non_header(),
1671                                      &enqueue,
1672                                      &yield_cl,
1673                                      _gc_timer_cm);
1674 }
1675 
1676 // Closure for marking entries in SATB buffers.
1677 class G1CMSATBBufferClosure : public SATBBufferClosure {
1678 private:
1679   G1CMTask* _task;
1680   G1CollectedHeap* _g1h;
1681 
1682   // This is very similar to G1CMTask::deal_with_reference, but with
1683   // more relaxed requirements for the argument, so this must be more
1684   // circumspect about treating the argument as an object.
1685   void do_entry(void* entry) const {
1686     _task->increment_refs_reached();
1687     oop const obj = cast_to_oop(entry);
1688     _task->make_reference_grey(obj);
1689   }
1690 
1691 public:
1692   G1CMSATBBufferClosure(G1CMTask* task, G1CollectedHeap* g1h)
1693     : _task(task), _g1h(g1h) { }
1694 
1695   virtual void do_buffer(void** buffer, size_t size) {
1696     for (size_t i = 0; i < size; ++i) {
1697       do_entry(buffer[i]);
1698     }
1699   }
1700 };
1701 
1702 class G1RemarkThreadsClosure : public ThreadClosure {
1703   G1SATBMarkQueueSet& _qset;
1704 
1705  public:
1706   G1RemarkThreadsClosure(G1CollectedHeap* g1h, G1CMTask* task) :
1707     _qset(G1BarrierSet::satb_mark_queue_set()) {}
1708 
1709   void do_thread(Thread* thread) {
1710     // Transfer any partial buffer to the qset for completed buffer processing.
1711     _qset.flush_queue(G1ThreadLocalData::satb_mark_queue(thread));
1712   }
1713 };
1714 
1715 class G1CMRemarkTask : public WorkerTask {
1716   // For Threads::possibly_parallel_threads_do
1717   ThreadsClaimTokenScope _threads_claim_token_scope;
1718   G1ConcurrentMark* _cm;
1719 public:
1720   void work(uint worker_id) {
1721     G1CMTask* task = _cm->task(worker_id);
1722     task->record_start_time();
1723     {
1724       ResourceMark rm;
1725 
1726       G1RemarkThreadsClosure threads_f(G1CollectedHeap::heap(), task);
1727       Threads::possibly_parallel_threads_do(true /* is_par */, &threads_f);
1728     }
1729 
1730     do {
1731       task->do_marking_step(1000000000.0 /* something very large */,
1732                             true         /* do_termination       */,
1733                             false        /* is_serial            */);
1734     } while (task->has_aborted() && !_cm->has_overflown());
1735     // If we overflow, then we do not want to restart. We instead
1736     // want to abort remark and do concurrent marking again.
1737     task->record_end_time();
1738   }
1739 
1740   G1CMRemarkTask(G1ConcurrentMark* cm, uint active_workers) :
1741     WorkerTask("Par Remark"), _threads_claim_token_scope(), _cm(cm) {
1742     _cm->terminator()->reset_for_reuse(active_workers);
1743   }
1744 };
1745 
1746 void G1ConcurrentMark::finalize_marking() {
1747   ResourceMark rm;
1748 
1749   _g1h->ensure_parsability(false);
1750 
1751   // this is remark, so we'll use up all active threads
1752   uint active_workers = _g1h->workers()->active_workers();
1753   set_concurrency_and_phase(active_workers, false /* concurrent */);
1754   // Leave _parallel_marking_threads at it's
1755   // value originally calculated in the G1ConcurrentMark
1756   // constructor and pass values of the active workers
1757   // through the task.
1758 
1759   {
1760     G1CMRemarkTask remarkTask(this, active_workers);
1761     // We will start all available threads, even if we decide that the
1762     // active_workers will be fewer. The extra ones will just bail out
1763     // immediately.
1764     _g1h->workers()->run_task(&remarkTask);
1765   }
1766 
1767   SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
1768   guarantee(has_overflown() ||
1769             satb_mq_set.completed_buffers_num() == 0,
1770             "Invariant: has_overflown = %s, num buffers = %zu",
1771             BOOL_TO_STR(has_overflown()),
1772             satb_mq_set.completed_buffers_num());
1773 
1774   print_stats();
1775 }
1776 
1777 void G1ConcurrentMark::flush_all_task_caches() {
1778   size_t hits = 0;
1779   size_t misses = 0;
1780   for (uint i = 0; i < _max_num_tasks; i++) {
1781     Pair<size_t, size_t> stats = _tasks[i]->flush_mark_stats_cache();
1782     hits += stats.first;
1783     misses += stats.second;
1784   }
1785   size_t sum = hits + misses;
1786   log_debug(gc, stats)("Mark stats cache hits %zu misses %zu ratio %1.3lf",
1787                        hits, misses, percent_of(hits, sum));
1788 }
1789 
1790 void G1ConcurrentMark::clear_bitmap_for_region(G1HeapRegion* hr) {
1791   assert_at_safepoint();
1792   _mark_bitmap.clear_range(MemRegion(hr->bottom(), hr->end()));
1793 }
1794 
1795 G1HeapRegion* G1ConcurrentMark::claim_region(uint worker_id) {
1796   // "Checkpoint" the finger.
1797   HeapWord* local_finger = finger();
1798 
1799   while (local_finger < _heap.end()) {
1800     assert(_g1h->is_in_reserved(local_finger), "invariant");
1801 
1802     G1HeapRegion* curr_region = _g1h->heap_region_containing_or_null(local_finger);
1803     // Make sure that the reads below do not float before loading curr_region.
1804     OrderAccess::loadload();
1805     // Above heap_region_containing may return null as we always scan claim
1806     // until the end of the heap. In this case, just jump to the next region.
1807     HeapWord* end = curr_region != nullptr ? curr_region->end() : local_finger + G1HeapRegion::GrainWords;
1808 
1809     // Is the gap between reading the finger and doing the CAS too long?
1810     HeapWord* res = _finger.compare_exchange(local_finger, end);
1811     if (res == local_finger && curr_region != nullptr) {
1812       // We succeeded.
1813       HeapWord* bottom = curr_region->bottom();
1814       HeapWord* limit = top_at_mark_start(curr_region);
1815 
1816       log_trace(gc, marking)("Claim region %u bottom " PTR_FORMAT " tams " PTR_FORMAT, curr_region->hrm_index(), p2i(curr_region->bottom()), p2i(top_at_mark_start(curr_region)));
1817       // Notice that _finger == end cannot be guaranteed here since,
1818       // someone else might have moved the finger even further.
1819       assert(finger() >= end, "The finger should have moved forward");
1820 
1821       if (limit > bottom) {
1822         return curr_region;
1823       } else {
1824         assert(limit == bottom,
1825                "The region limit should be at bottom");
1826         // We return null and the caller should try calling
1827         // claim_region() again.
1828         return nullptr;
1829       }
1830     } else {
1831       // Read the finger again.
1832       HeapWord* next_finger = finger();
1833       assert(next_finger > local_finger, "The finger should have moved forward " PTR_FORMAT " " PTR_FORMAT, p2i(local_finger), p2i(next_finger));
1834       local_finger = next_finger;
1835     }
1836   }
1837 
1838   return nullptr;
1839 }
1840 
1841 #ifndef PRODUCT
1842 class VerifyNoCSetOops {
1843   G1CollectedHeap* _g1h;
1844   const char* _phase;
1845   int _info;
1846 
1847 public:
1848   VerifyNoCSetOops(const char* phase, int info = -1) :
1849     _g1h(G1CollectedHeap::heap()),
1850     _phase(phase),
1851     _info(info)
1852   { }
1853 
1854   void operator()(G1TaskQueueEntry task_entry) const {
1855     if (task_entry.is_partial_array_state()) {
1856       oop obj = task_entry.to_partial_array_state()->source();
1857       guarantee(_g1h->is_in_reserved(obj), "Partial Array " PTR_FORMAT " must be in heap.", p2i(obj));
1858       return;
1859     }
1860     guarantee(oopDesc::is_oop(task_entry.to_oop()),
1861               "Non-oop " PTR_FORMAT ", phase: %s, info: %d",
1862               p2i(task_entry.to_oop()), _phase, _info);
1863     G1HeapRegion* r = _g1h->heap_region_containing(task_entry.to_oop());
1864     guarantee(!(r->in_collection_set() || r->has_index_in_opt_cset()),
1865               "obj " PTR_FORMAT " from %s (%d) in region %u in (optional) collection set",
1866               p2i(task_entry.to_oop()), _phase, _info, r->hrm_index());
1867   }
1868 };
1869 
1870 void G1ConcurrentMark::verify_no_collection_set_oops() {
1871   assert(SafepointSynchronize::is_at_safepoint() || !is_init_completed(),
1872          "should be at a safepoint or initializing");
1873   if (!_g1h->collector_state()->mark_or_rebuild_in_progress()) {
1874     return;
1875   }
1876 
1877   // Verify entries on the global mark stack
1878   _global_mark_stack.iterate(VerifyNoCSetOops("Stack"));
1879 
1880   // Verify entries on the task queues
1881   for (uint i = 0; i < _max_num_tasks; ++i) {
1882     G1CMTaskQueue* queue = _task_queues->queue(i);
1883     queue->iterate(VerifyNoCSetOops("Queue", i));
1884   }
1885 
1886   // Verify the global finger
1887   HeapWord* global_finger = finger();
1888   if (global_finger != nullptr && global_finger < _heap.end()) {
1889     // Since we always iterate over all regions, we might get a null G1HeapRegion
1890     // here.
1891     G1HeapRegion* global_hr = _g1h->heap_region_containing_or_null(global_finger);
1892     guarantee(global_hr == nullptr || global_finger == global_hr->bottom(),
1893               "global finger: " PTR_FORMAT " region: " HR_FORMAT,
1894               p2i(global_finger), HR_FORMAT_PARAMS(global_hr));
1895   }
1896 
1897   // Verify the task fingers
1898   assert(_num_concurrent_workers <= _max_num_tasks, "sanity");
1899   for (uint i = 0; i < _num_concurrent_workers; ++i) {
1900     G1CMTask* task = _tasks[i];
1901     HeapWord* task_finger = task->finger();
1902     if (task_finger != nullptr && task_finger < _heap.end()) {
1903       // See above note on the global finger verification.
1904       G1HeapRegion* r = _g1h->heap_region_containing_or_null(task_finger);
1905       guarantee(r == nullptr || task_finger == r->bottom() ||
1906                 !r->in_collection_set() || !r->has_index_in_opt_cset(),
1907                 "task finger: " PTR_FORMAT " region: " HR_FORMAT,
1908                 p2i(task_finger), HR_FORMAT_PARAMS(r));
1909     }
1910   }
1911 }
1912 #endif // PRODUCT
1913 
1914 void G1ConcurrentMark::rebuild_and_scrub() {
1915   if (!needs_remembered_set_rebuild()) {
1916     log_debug(gc, marking)("Skipping Remembered Set Rebuild. No regions selected for rebuild, will only scrub");
1917   }
1918 
1919   G1ConcurrentRebuildAndScrub::rebuild_and_scrub(this, needs_remembered_set_rebuild(), _concurrent_workers);
1920 }
1921 
1922 void G1ConcurrentMark::print_stats() {
1923   if (!log_is_enabled(Debug, gc, stats)) {
1924     return;
1925   }
1926   log_debug(gc, stats)("---------------------------------------------------------------------");
1927   for (size_t i = 0; i < _num_active_tasks; ++i) {
1928     _tasks[i]->print_stats();
1929     log_debug(gc, stats)("---------------------------------------------------------------------");
1930   }
1931 }
1932 
1933 bool G1ConcurrentMark::concurrent_cycle_abort() {
1934   // If we start the compaction before the CM threads finish
1935   // scanning the root regions we might trip them over as we'll
1936   // be moving objects / updating references. So let's wait until
1937   // they are done. By telling them to abort, they should complete
1938   // early.
1939   root_region_scan_abort_and_wait();
1940 
1941   // We haven't started a concurrent cycle no need to do anything; we might have
1942   // aborted the marking because of shutting down though. In this case the marking
1943   // might have already completed the abort (leading to in_progress() below to
1944   // return false), however this still left marking state particularly in the
1945   // shared marking bitmap that must be cleaned up.
1946   // If there are multiple full gcs during shutdown we do this work repeatedly for
1947   // nothing, but this situation should be extremely rare (a full gc after shutdown
1948   // has been signalled is already rare), and this work should be negligible compared
1949   // to actual full gc work.
1950 
1951   if (!is_fully_initialized() || (!cm_thread()->in_progress() && !_g1h->concurrent_mark_is_terminating())) {
1952     return false;
1953   }
1954 
1955   reset_marking_for_restart();
1956 
1957   abort_marking_threads();
1958 
1959   SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
1960   satb_mq_set.abandon_partial_marking();
1961   // This can be called either during or outside marking, we'll read
1962   // the expected_active value from the SATB queue set.
1963   satb_mq_set.set_active_all_threads(false, /* new active value */
1964                                      satb_mq_set.is_active() /* expected_active */);
1965   return true;
1966 }
1967 
1968 void G1ConcurrentMark::abort_marking_threads() {
1969   assert(!_root_regions.scan_in_progress(), "still doing root region scan");
1970   _has_aborted.store_relaxed(true);
1971   _first_overflow_barrier_sync.abort();
1972   _second_overflow_barrier_sync.abort();
1973 }
1974 
1975 double G1ConcurrentMark::worker_threads_cpu_time_s() {
1976   class CountCpuTimeThreadClosure : public ThreadClosure {
1977   public:
1978     jlong _total_cpu_time;
1979 
1980     CountCpuTimeThreadClosure() : ThreadClosure(), _total_cpu_time(0) { }
1981 
1982     void do_thread(Thread* t) {
1983       _total_cpu_time += os::thread_cpu_time(t);
1984     }
1985   } cl;
1986 
1987   threads_do(&cl);
1988 
1989   return (double)cl._total_cpu_time / NANOSECS_PER_SEC;
1990 }
1991 
1992 static void print_ms_time_info(const char* prefix, const char* name,
1993                                NumberSeq& ns) {
1994   log_trace(gc, marking)("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
1995                          prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
1996   if (ns.num() > 0) {
1997     log_trace(gc, marking)("%s         [std. dev = %8.2f ms, max = %8.2f ms]",
1998                            prefix, ns.sd(), ns.maximum());
1999   }
2000 }
2001 
2002 void G1ConcurrentMark::print_summary_info() {
2003   Log(gc, marking) log;
2004   if (!log.is_trace()) {
2005     return;
2006   }
2007 
2008   log.trace(" Concurrent marking:");
2009   if (!is_fully_initialized()) {
2010     log.trace("    has not been initialized yet");
2011     return;
2012   }
2013   print_ms_time_info("  ", "remarks", _remark_times);
2014   {
2015     print_ms_time_info("     ", "final marks", _remark_mark_times);
2016     print_ms_time_info("     ", "weak refs", _remark_weak_ref_times);
2017 
2018   }
2019   print_ms_time_info("  ", "cleanups", _cleanup_times);
2020   log.trace("    Finalize live data total time = %8.2f s (avg = %8.2f ms).",
2021             _cleanup_times.sum() / 1000.0, _cleanup_times.avg());
2022   log.trace("  Total stop_world time = %8.2f s.",
2023             (_remark_times.sum() + _cleanup_times.sum())/1000.0);
2024   log.trace("  Total concurrent time = %8.2f s (%8.2f s marking).",
2025             cm_thread()->total_mark_cpu_time_s(), cm_thread()->worker_threads_cpu_time_s());
2026 }
2027 
2028 void G1ConcurrentMark::threads_do(ThreadClosure* tc) const {
2029   if (is_fully_initialized()) { // they are initialized late
2030     tc->do_thread(_cm_thread);
2031     _concurrent_workers->threads_do(tc);
2032   }
2033 }
2034 
2035 void G1ConcurrentMark::print_on(outputStream* st) const {
2036   st->print_cr("Marking Bits: (CMBitMap*) " PTR_FORMAT, p2i(mark_bitmap()));
2037   _mark_bitmap.print_on(st, " Bits: ");
2038 }
2039 
2040 static ReferenceProcessor* get_cm_oop_closure_ref_processor(G1CollectedHeap* g1h) {
2041   ReferenceProcessor* result = g1h->ref_processor_cm();
2042   assert(result != nullptr, "CM reference processor should not be null");
2043   return result;
2044 }
2045 
2046 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h,
2047                                G1CMTask* task)
2048   : ClaimMetadataVisitingOopIterateClosure(ClassLoaderData::_claim_strong, get_cm_oop_closure_ref_processor(g1h)),
2049     _g1h(g1h), _task(task)
2050 { }
2051 
2052 void G1CMTask::setup_for_region(G1HeapRegion* hr) {
2053   assert(hr != nullptr,
2054         "claim_region() should have filtered out null regions");
2055   _curr_region  = hr;
2056   _finger       = hr->bottom();
2057   update_region_limit();
2058 }
2059 
2060 void G1CMTask::update_region_limit() {
2061   G1HeapRegion* hr = _curr_region;
2062   HeapWord* bottom = hr->bottom();
2063   HeapWord* limit = _cm->top_at_mark_start(hr);
2064 
2065   if (limit == bottom) {
2066     // The region was collected underneath our feet.
2067     // We set the finger to bottom to ensure that the bitmap
2068     // iteration that will follow this will not do anything.
2069     // (this is not a condition that holds when we set the region up,
2070     // as the region is not supposed to be empty in the first place)
2071     _finger = bottom;
2072   } else if (limit >= _region_limit) {
2073     assert(limit >= _finger, "peace of mind");
2074   } else {
2075     assert(limit < _region_limit, "only way to get here");
2076     // This can happen under some pretty unusual circumstances.  An
2077     // evacuation pause empties the region underneath our feet (TAMS
2078     // at bottom). We then do some allocation in the region (TAMS
2079     // stays at bottom), followed by the region being used as a GC
2080     // alloc region (TAMS will move to top() and the objects
2081     // originally below it will be greyed). All objects now marked in
2082     // the region are explicitly greyed, if below the global finger,
2083     // and we do not need in fact to scan anything else. So, we simply
2084     // set _finger to be limit to ensure that the bitmap iteration
2085     // doesn't do anything.
2086     _finger = limit;
2087   }
2088 
2089   _region_limit = limit;
2090 }
2091 
2092 void G1CMTask::giveup_current_region() {
2093   assert(_curr_region != nullptr, "invariant");
2094   clear_region_fields();
2095 }
2096 
2097 void G1CMTask::clear_region_fields() {
2098   // Values for these three fields that indicate that we're not
2099   // holding on to a region.
2100   _curr_region   = nullptr;
2101   _finger        = nullptr;
2102   _region_limit  = nullptr;
2103 }
2104 
2105 void G1CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) {
2106   if (cm_oop_closure == nullptr) {
2107     assert(_cm_oop_closure != nullptr, "invariant");
2108   } else {
2109     assert(_cm_oop_closure == nullptr, "invariant");
2110   }
2111   _cm_oop_closure = cm_oop_closure;
2112 }
2113 
2114 void G1CMTask::reset(G1CMBitMap* mark_bitmap) {
2115   guarantee(mark_bitmap != nullptr, "invariant");
2116   _mark_bitmap              = mark_bitmap;
2117   clear_region_fields();
2118 
2119   _calls                         = 0;
2120   _elapsed_time_ms               = 0.0;
2121   _termination_time_ms           = 0.0;
2122 
2123   _mark_stats_cache.reset();
2124 }
2125 
2126 void G1CMTask::reset_for_restart() {
2127   clear_region_fields();
2128   _task_queue->set_empty();
2129   TASKQUEUE_STATS_ONLY(_partial_array_splitter.stats()->reset());
2130   TASKQUEUE_STATS_ONLY(_task_queue->stats.reset());
2131 }
2132 
2133 void G1CMTask::register_partial_array_splitter() {
2134 
2135   ::new (&_partial_array_splitter) PartialArraySplitter(_cm->partial_array_state_manager(),
2136                                                         _cm->max_num_tasks(),
2137                                                         ObjArrayMarkingStride);
2138 }
2139 
2140 void G1CMTask::unregister_partial_array_splitter() {
2141   _partial_array_splitter.~PartialArraySplitter();
2142 }
2143 
2144 bool G1CMTask::should_exit_termination() {
2145   if (!regular_clock_call()) {
2146     return true;
2147   }
2148 
2149   // This is called when we are in the termination protocol. We should
2150   // quit if, for some reason, this task wants to abort or the global
2151   // stack is not empty (this means that we can get work from it).
2152   return !_cm->mark_stack_empty() || has_aborted();
2153 }
2154 
2155 void G1CMTask::reached_limit() {
2156   assert(_words_scanned >= _words_scanned_limit ||
2157          _refs_reached >= _refs_reached_limit ,
2158          "shouldn't have been called otherwise");
2159   abort_marking_if_regular_check_fail();
2160 }
2161 
2162 bool G1CMTask::regular_clock_call() {
2163   if (has_aborted()) {
2164     return false;
2165   }
2166 
2167   // First, we need to recalculate the words scanned and refs reached
2168   // limits for the next clock call.
2169   recalculate_limits();
2170 
2171   // During the regular clock call we do the following
2172 
2173   // (1) If an overflow has been flagged, then we abort.
2174   if (_cm->has_overflown()) {
2175     return false;
2176   }
2177 
2178   // If we are not concurrent (i.e. we're doing remark) we don't need
2179   // to check anything else. The other steps are only needed during
2180   // the concurrent marking phase.
2181   if (!_cm->concurrent()) {
2182     return true;
2183   }
2184 
2185   // (2) If marking has been aborted for Full GC, then we also abort.
2186   if (_cm->has_aborted()) {
2187     return false;
2188   }
2189 
2190   // (4) We check whether we should yield. If we have to, then we abort.
2191   if (SuspendibleThreadSet::should_yield()) {
2192     // We should yield. To do this we abort the task. The caller is
2193     // responsible for yielding.
2194     return false;
2195   }
2196 
2197   // (5) We check whether we've reached our time quota. If we have,
2198   // then we abort.
2199   double elapsed_time_ms = (double)(os::current_thread_cpu_time() - _start_cpu_time_ns) / NANOSECS_PER_MILLISEC;
2200   if (elapsed_time_ms > _time_target_ms) {
2201     _has_timed_out = true;
2202     return false;
2203   }
2204 
2205   // (6) Finally, we check whether there are enough completed STAB
2206   // buffers available for processing. If there are, we abort.
2207   SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
2208   if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
2209     // we do need to process SATB buffers, we'll abort and restart
2210     // the marking task to do so
2211     return false;
2212   }
2213   return true;
2214 }
2215 
2216 void G1CMTask::recalculate_limits() {
2217   _real_words_scanned_limit = _words_scanned + words_scanned_period;
2218   _words_scanned_limit      = _real_words_scanned_limit;
2219 
2220   _real_refs_reached_limit  = _refs_reached  + refs_reached_period;
2221   _refs_reached_limit       = _real_refs_reached_limit;
2222 }
2223 
2224 void G1CMTask::decrease_limits() {
2225   // This is called when we believe that we're going to do an infrequent
2226   // operation which will increase the per byte scanned cost (i.e. move
2227   // entries to/from the global stack). It basically tries to decrease the
2228   // scanning limit so that the clock is called earlier.
2229 
2230   _words_scanned_limit = _real_words_scanned_limit - 3 * words_scanned_period / 4;
2231   _refs_reached_limit  = _real_refs_reached_limit - 3 * refs_reached_period / 4;
2232 }
2233 
2234 void G1CMTask::move_entries_to_global_stack() {
2235   // Local array where we'll store the entries that will be popped
2236   // from the local queue.
2237   G1TaskQueueEntry buffer[G1CMMarkStack::EntriesPerChunk];
2238 
2239   size_t n = 0;
2240   G1TaskQueueEntry task_entry;
2241   while (n < G1CMMarkStack::EntriesPerChunk && _task_queue->pop_local(task_entry)) {
2242     buffer[n] = task_entry;
2243     ++n;
2244   }
2245   if (n < G1CMMarkStack::EntriesPerChunk) {
2246     buffer[n] = G1TaskQueueEntry();
2247   }
2248 
2249   if (n > 0) {
2250     if (!_cm->mark_stack_push(buffer)) {
2251       set_has_aborted();
2252     }
2253   }
2254 
2255   // This operation was quite expensive, so decrease the limits.
2256   decrease_limits();
2257 }
2258 
2259 bool G1CMTask::get_entries_from_global_stack() {
2260   // Local array where we'll store the entries that will be popped
2261   // from the global stack.
2262   G1TaskQueueEntry buffer[G1CMMarkStack::EntriesPerChunk];
2263 
2264   if (!_cm->mark_stack_pop(buffer)) {
2265     return false;
2266   }
2267 
2268   // We did actually pop at least one entry.
2269   for (size_t i = 0; i < G1CMMarkStack::EntriesPerChunk; ++i) {
2270     G1TaskQueueEntry task_entry = buffer[i];
2271     if (task_entry.is_null()) {
2272       break;
2273     }
2274     assert(task_entry.is_partial_array_state() || oopDesc::is_oop(task_entry.to_oop()), "Element " PTR_FORMAT " must be an array slice or oop", p2i(task_entry.to_oop()));
2275     bool success = _task_queue->push(task_entry);
2276     // We only call this when the local queue is empty or under a
2277     // given target limit. So, we do not expect this push to fail.
2278     assert(success, "invariant");
2279   }
2280 
2281   // This operation was quite expensive, so decrease the limits
2282   decrease_limits();
2283   return true;
2284 }
2285 
2286 void G1CMTask::drain_local_queue(bool partially) {
2287   if (has_aborted()) {
2288     return;
2289   }
2290 
2291   // Decide what the target size is, depending whether we're going to
2292   // drain it partially (so that other tasks can steal if they run out
2293   // of things to do) or totally (at the very end).
2294   uint target_size;
2295   if (partially) {
2296     target_size = GCDrainStackTargetSize;
2297   } else {
2298     target_size = 0;
2299   }
2300 
2301   if (_task_queue->size() > target_size) {
2302     G1TaskQueueEntry entry;
2303     bool ret = _task_queue->pop_local(entry);
2304     while (ret) {
2305       process_entry(entry, false /* stolen */);
2306       if (_task_queue->size() <= target_size || has_aborted()) {
2307         ret = false;
2308       } else {
2309         ret = _task_queue->pop_local(entry);
2310       }
2311     }
2312   }
2313 }
2314 
2315 size_t G1CMTask::start_partial_array_processing(oop obj) {
2316   assert(should_be_sliced(obj), "Must be an array object %d and large %zu", obj->is_objArray(), obj->size());
2317 
2318   objArrayOop obj_array = objArrayOop(obj);
2319   size_t array_length = obj_array->length();
2320 
2321   size_t initial_chunk_size = _partial_array_splitter.start(_task_queue, obj_array, nullptr, array_length);
2322 
2323   // Mark objArray klass metadata
2324   if (_cm_oop_closure->do_metadata()) {
2325     _cm_oop_closure->do_klass(obj_array->klass());
2326   }
2327 
2328   process_array_chunk(obj_array, 0, initial_chunk_size);
2329 
2330   // Include object header size
2331   return objArrayOopDesc::object_size(checked_cast<int>(initial_chunk_size));
2332 }
2333 
2334 size_t G1CMTask::process_partial_array(const G1TaskQueueEntry& task, bool stolen) {
2335   PartialArrayState* state = task.to_partial_array_state();
2336   // Access state before release by claim().
2337   objArrayOop obj = objArrayOop(state->source());
2338 
2339   PartialArraySplitter::Claim claim =
2340     _partial_array_splitter.claim(state, _task_queue, stolen);
2341 
2342   process_array_chunk(obj, claim._start, claim._end);
2343   return heap_word_size((claim._end - claim._start) * heapOopSize);
2344 }
2345 
2346 void G1CMTask::drain_global_stack(bool partially) {
2347   if (has_aborted()) {
2348     return;
2349   }
2350 
2351   // We have a policy to drain the local queue before we attempt to
2352   // drain the global stack.
2353   assert(partially || _task_queue->size() == 0, "invariant");
2354 
2355   // Decide what the target size is, depending whether we're going to
2356   // drain it partially (so that other tasks can steal if they run out
2357   // of things to do) or totally (at the very end).
2358   // Notice that when draining the global mark stack partially, due to the racyness
2359   // of the mark stack size update we might in fact drop below the target. But,
2360   // this is not a problem.
2361   // In case of total draining, we simply process until the global mark stack is
2362   // totally empty, disregarding the size counter.
2363   if (partially) {
2364     size_t const target_size = _cm->partial_mark_stack_size_target();
2365     while (!has_aborted() && _cm->mark_stack_size() > target_size) {
2366       if (get_entries_from_global_stack()) {
2367         drain_local_queue(partially);
2368       }
2369     }
2370   } else {
2371     while (!has_aborted() && get_entries_from_global_stack()) {
2372       drain_local_queue(partially);
2373     }
2374   }
2375 }
2376 
2377 // SATB Queue has several assumptions on whether to call the par or
2378 // non-par versions of the methods. this is why some of the code is
2379 // replicated. We should really get rid of the single-threaded version
2380 // of the code to simplify things.
2381 void G1CMTask::drain_satb_buffers() {
2382   if (has_aborted()) {
2383     return;
2384   }
2385 
2386   // We set this so that the regular clock knows that we're in the
2387   // middle of draining buffers and doesn't set the abort flag when it
2388   // notices that SATB buffers are available for draining. It'd be
2389   // very counter productive if it did that. :-)
2390   _draining_satb_buffers = true;
2391 
2392   G1CMSATBBufferClosure satb_cl(this, _g1h);
2393   SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
2394 
2395   // This keeps claiming and applying the closure to completed buffers
2396   // until we run out of buffers or we need to abort.
2397   while (!has_aborted() &&
2398          satb_mq_set.apply_closure_to_completed_buffer(&satb_cl)) {
2399     abort_marking_if_regular_check_fail();
2400   }
2401 
2402   // Can't assert qset is empty here, even if not aborted.  If concurrent,
2403   // some other thread might be adding to the queue.  If not concurrent,
2404   // some other thread might have won the race for the last buffer, but
2405   // has not yet decremented the count.
2406 
2407   _draining_satb_buffers = false;
2408 
2409   // again, this was a potentially expensive operation, decrease the
2410   // limits to get the regular clock call early
2411   decrease_limits();
2412 }
2413 
2414 void G1CMTask::clear_mark_stats_cache(uint region_idx) {
2415   _mark_stats_cache.reset(region_idx);
2416 }
2417 
2418 Pair<size_t, size_t> G1CMTask::flush_mark_stats_cache() {
2419   return _mark_stats_cache.evict_all();
2420 }
2421 
2422 void G1CMTask::print_stats() {
2423   log_debug(gc, stats)("Marking Stats, task = %u, calls = %u", _worker_id, _calls);
2424   log_debug(gc, stats)("  Elapsed time = %1.2lfms, Termination time = %1.2lfms",
2425                        _elapsed_time_ms, _termination_time_ms);
2426   log_debug(gc, stats)("  Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms max = %1.2lfms, total = %1.2lfms",
2427                        _step_times_ms.num(),
2428                        _step_times_ms.avg(),
2429                        _step_times_ms.sd(),
2430                        _step_times_ms.maximum(),
2431                        _step_times_ms.sum());
2432   size_t const hits = _mark_stats_cache.hits();
2433   size_t const misses = _mark_stats_cache.misses();
2434   log_debug(gc, stats)("  Mark Stats Cache: hits %zu misses %zu ratio %.3f",
2435                        hits, misses, percent_of(hits, hits + misses));
2436 }
2437 
2438 bool G1ConcurrentMark::try_stealing(uint worker_id, G1TaskQueueEntry& task_entry) {
2439   return _task_queues->steal(worker_id, task_entry);
2440 }
2441 
2442 void G1CMTask::process_current_region(G1CMBitMapClosure& bitmap_closure) {
2443   if (has_aborted() || _curr_region == nullptr) {
2444     return;
2445   }
2446 
2447   // This means that we're already holding on to a region.
2448   assert(_finger != nullptr, "if region is not null, then the finger "
2449          "should not be null either");
2450 
2451   // We might have restarted this task after an evacuation pause
2452   // which might have evacuated the region we're holding on to
2453   // underneath our feet. Let's read its limit again to make sure
2454   // that we do not iterate over a region of the heap that
2455   // contains garbage (update_region_limit() will also move
2456   // _finger to the start of the region if it is found empty).
2457   update_region_limit();
2458   // We will start from _finger not from the start of the region,
2459   // as we might be restarting this task after aborting half-way
2460   // through scanning this region. In this case, _finger points to
2461   // the address where we last found a marked object. If this is a
2462   // fresh region, _finger points to start().
2463   MemRegion mr = MemRegion(_finger, _region_limit);
2464 
2465   assert(!_curr_region->is_humongous() || mr.start() == _curr_region->bottom(),
2466          "humongous regions should go around loop once only");
2467 
2468   // Some special cases:
2469   // If the memory region is empty, we can just give up the region.
2470   // If the current region is humongous then we only need to check
2471   // the bitmap for the bit associated with the start of the object,
2472   // scan the object if it's live, and give up the region.
2473   // Otherwise, let's iterate over the bitmap of the part of the region
2474   // that is left.
2475   // If the iteration is successful, give up the region.
2476   if (mr.is_empty()) {
2477     giveup_current_region();
2478     abort_marking_if_regular_check_fail();
2479   } else if (_curr_region->is_humongous() && mr.start() == _curr_region->bottom()) {
2480     if (_mark_bitmap->is_marked(mr.start())) {
2481       // The object is marked - apply the closure
2482       bitmap_closure.do_addr(mr.start());
2483     }
2484     // Even if this task aborted while scanning the humongous object
2485     // we can (and should) give up the current region.
2486     giveup_current_region();
2487     abort_marking_if_regular_check_fail();
2488   } else if (_mark_bitmap->iterate(&bitmap_closure, mr)) {
2489     giveup_current_region();
2490     abort_marking_if_regular_check_fail();
2491   } else {
2492     assert(has_aborted(), "currently the only way to do so");
2493     // The only way to abort the bitmap iteration is to return
2494     // false from the do_bit() method. However, inside the
2495     // do_bit() method we move the _finger to point to the
2496     // object currently being looked at. So, if we bail out, we
2497     // have definitely set _finger to something non-null.
2498     assert(_finger != nullptr, "invariant");
2499 
2500     // Region iteration was actually aborted. So now _finger
2501     // points to the address of the object we last scanned. If we
2502     // leave it there, when we restart this task, we will rescan
2503     // the object. It is easy to avoid this. We move the finger by
2504     // enough to point to the next possible object header.
2505     assert(_finger < _region_limit, "invariant");
2506     HeapWord* const new_finger = _finger + cast_to_oop(_finger)->size();
2507     if (new_finger >= _region_limit) {
2508       giveup_current_region();
2509     } else {
2510       move_finger_to(new_finger);
2511     }
2512   }
2513 }
2514 
2515 void G1CMTask::claim_new_region() {
2516   // Read the note on the claim_region() method on why it might
2517   // return null with potentially more regions available for
2518   // claiming and why we have to check out_of_regions() to determine
2519   // whether we're done or not.
2520   while (!has_aborted() && _curr_region == nullptr && !_cm->out_of_regions()) {
2521     // We are going to try to claim a new region. We should have
2522     // given up on the previous one.
2523     // Separated the asserts so that we know which one fires.
2524     assert(_curr_region  == nullptr, "invariant");
2525     assert(_finger       == nullptr, "invariant");
2526     assert(_region_limit == nullptr, "invariant");
2527     G1HeapRegion* claimed_region = _cm->claim_region(_worker_id);
2528     if (claimed_region != nullptr) {
2529       // Yes, we managed to claim one
2530       setup_for_region(claimed_region);
2531       assert(_curr_region == claimed_region, "invariant");
2532     }
2533     // It is important to call the regular clock here. It might take
2534     // a while to claim a region if, for example, we hit a large
2535     // block of empty regions. So we need to call the regular clock
2536     // method once round the loop to make sure it's called
2537     // frequently enough.
2538     abort_marking_if_regular_check_fail();
2539   }
2540 }
2541 
2542 void G1CMTask::attempt_stealing() {
2543   // We cannot check whether the global stack is empty, since other
2544   // tasks might be pushing objects to it concurrently.
2545   assert(_cm->out_of_regions() && _task_queue->size() == 0,
2546          "only way to reach here");
2547   while (!has_aborted()) {
2548     G1TaskQueueEntry entry;
2549     if (_cm->try_stealing(_worker_id, entry)) {
2550       process_entry(entry, true /* stolen */);
2551 
2552       // And since we're towards the end, let's totally drain the
2553       // local queue and global stack.
2554       drain_local_queue(false);
2555       drain_global_stack(false);
2556     } else {
2557       break;
2558     }
2559   }
2560 }
2561 
2562 void G1CMTask::attempt_termination(bool is_serial) {
2563   // We cannot check whether the global stack is empty, since other
2564   // tasks might be concurrently pushing objects on it.
2565   // Separated the asserts so that we know which one fires.
2566   assert(_cm->out_of_regions(), "only way to reach here");
2567   assert(_task_queue->size() == 0, "only way to reach here");
2568   double termination_start_time_ms = os::elapsedTime() * 1000.0;
2569 
2570   // The G1CMTask class also extends the TerminatorTerminator class,
2571   // hence its should_exit_termination() method will also decide
2572   // whether to exit the termination protocol or not.
2573   bool finished = (is_serial ||
2574                    _cm->terminator()->offer_termination(this));
2575   _termination_time_ms += (os::elapsedTime() * 1000.0 - termination_start_time_ms);
2576 
2577   if (finished) {
2578     // We're all done.
2579 
2580     // We can now guarantee that the global stack is empty, since
2581     // all other tasks have finished. We separated the guarantees so
2582     // that, if a condition is false, we can immediately find out
2583     // which one.
2584     guarantee(_cm->out_of_regions(), "only way to reach here");
2585     guarantee(_cm->mark_stack_empty(), "only way to reach here");
2586     guarantee(_task_queue->size() == 0, "only way to reach here");
2587     guarantee(!_cm->has_overflown(), "only way to reach here");
2588     guarantee(!has_aborted(), "should never happen if termination has completed");
2589   } else {
2590     // Apparently there's more work to do. Let's abort this task. We
2591     // will restart it and hopefully we can find more things to do.
2592     set_has_aborted();
2593   }
2594 }
2595 
2596 void G1CMTask::handle_abort(bool is_serial, double elapsed_time_ms) {
2597   if (_has_timed_out) {
2598     double diff_ms = elapsed_time_ms - _time_target_ms;
2599     // Keep statistics of how well we did with respect to hitting
2600     // our target only if we actually timed out (if we aborted for
2601     // other reasons, then the results might get skewed).
2602     _marking_step_diff_ms.add(diff_ms);
2603   }
2604 
2605   if (!_cm->has_overflown()) {
2606     return;
2607   }
2608 
2609   // This is the interesting one. We aborted because a global
2610   // overflow was raised. This means we have to restart the
2611   // marking phase and start iterating over regions. However, in
2612   // order to do this we have to make sure that all tasks stop
2613   // what they are doing and re-initialize in a safe manner. We
2614   // will achieve this with the use of two barrier sync points.
2615   if (!is_serial) {
2616     // We only need to enter the sync barrier if being called
2617     // from a parallel context
2618     _cm->enter_first_sync_barrier(_worker_id);
2619 
2620     // When we exit this sync barrier we know that all tasks have
2621     // stopped doing marking work. So, it's now safe to
2622     // re-initialize our data structures.
2623   }
2624 
2625   clear_region_fields();
2626   flush_mark_stats_cache();
2627 
2628   if (!is_serial) {
2629     // If we're executing the concurrent phase of marking, reset the marking
2630     // state; otherwise the marking state is reset after reference processing,
2631     // during the remark pause.
2632     // If we reset here as a result of an overflow during the remark we will
2633     // see assertion failures from any subsequent set_concurrency_and_phase()
2634     // calls.
2635     if (_cm->concurrent() && _worker_id == 0) {
2636       // Worker 0 is responsible for clearing the global data structures because
2637       // of an overflow. During STW we should not clear the overflow flag (in
2638       // G1ConcurrentMark::reset_marking_state()) since we rely on it being true when we exit
2639       // method to abort the pause and restart concurrent marking.
2640       _cm->reset_marking_for_restart();
2641 
2642       log_info(gc, marking)("Concurrent Mark reset for overflow");
2643     }
2644 
2645     // ...and enter the second barrier.
2646     _cm->enter_second_sync_barrier(_worker_id);
2647   }
2648 }
2649 
2650 /*****************************************************************************
2651 
2652     The do_marking_step(time_target_ms, ...) method is the building
2653     block of the parallel marking framework. It can be called in parallel
2654     with other invocations of do_marking_step() on different tasks
2655     (but only one per task, obviously) and concurrently with the
2656     mutator threads, or during remark, hence it eliminates the need
2657     for two versions of the code. When called during remark, it will
2658     pick up from where the task left off during the concurrent marking
2659     phase. Interestingly, tasks are also claimable during evacuation
2660     pauses too, since do_marking_step() ensures that it aborts before
2661     it needs to yield.
2662 
2663     The data structures that it uses to do marking work are the
2664     following:
2665 
2666       (1) Marking Bitmap. If there are grey objects that appear only
2667       on the bitmap (this happens either when dealing with an overflow
2668       or when the concurrent start pause has simply marked the roots
2669       and didn't push them on the stack), then tasks claim heap
2670       regions whose bitmap they then scan to find grey objects. A
2671       global finger indicates where the end of the last claimed region
2672       is. A local finger indicates how far into the region a task has
2673       scanned. The two fingers are used to determine how to grey an
2674       object (i.e. whether simply marking it is OK, as it will be
2675       visited by a task in the future, or whether it needs to be also
2676       pushed on a stack).
2677 
2678       (2) Local Queue. The local queue of the task which is accessed
2679       reasonably efficiently by the task. Other tasks can steal from
2680       it when they run out of work. Throughout the marking phase, a
2681       task attempts to keep its local queue short but not totally
2682       empty, so that entries are available for stealing by other
2683       tasks. Only when there is no more work, a task will totally
2684       drain its local queue.
2685 
2686       (3) Global Mark Stack. This handles local queue overflow. During
2687       marking only sets of entries are moved between it and the local
2688       queues, as access to it requires a mutex and more fine-grain
2689       interaction with it which might cause contention. If it
2690       overflows, then the marking phase should restart and iterate
2691       over the bitmap to identify grey objects. Throughout the marking
2692       phase, tasks attempt to keep the global mark stack at a small
2693       length but not totally empty, so that entries are available for
2694       popping by other tasks. Only when there is no more work, tasks
2695       will totally drain the global mark stack.
2696 
2697       (4) SATB Buffer Queue. This is where completed SATB buffers are
2698       made available. Buffers are regularly removed from this queue
2699       and scanned for roots, so that the queue doesn't get too
2700       long. During remark, all completed buffers are processed, as
2701       well as the filled in parts of any uncompleted buffers.
2702 
2703     The do_marking_step() method tries to abort when the time target
2704     has been reached. There are a few other cases when the
2705     do_marking_step() method also aborts:
2706 
2707       (1) When the marking phase has been aborted (after a Full GC).
2708 
2709       (2) When a global overflow (on the global stack) has been
2710       triggered. Before the task aborts, it will actually sync up with
2711       the other tasks to ensure that all the marking data structures
2712       (local queues, stacks, fingers etc.)  are re-initialized so that
2713       when do_marking_step() completes, the marking phase can
2714       immediately restart.
2715 
2716       (3) When enough completed SATB buffers are available. The
2717       do_marking_step() method only tries to drain SATB buffers right
2718       at the beginning. So, if enough buffers are available, the
2719       marking step aborts and the SATB buffers are processed at
2720       the beginning of the next invocation.
2721 
2722       (4) To yield. when we have to yield then we abort and yield
2723       right at the end of do_marking_step(). This saves us from a lot
2724       of hassle as, by yielding we might allow a Full GC. If this
2725       happens then objects will be compacted underneath our feet, the
2726       heap might shrink, etc. We save checking for this by just
2727       aborting and doing the yield right at the end.
2728 
2729     From the above it follows that the do_marking_step() method should
2730     be called in a loop (or, otherwise, regularly) until it completes.
2731 
2732     If a marking step completes without its has_aborted() flag being
2733     true, it means it has completed the current marking phase (and
2734     also all other marking tasks have done so and have all synced up).
2735 
2736     A method called regular_clock_call() is invoked "regularly" (in
2737     sub ms intervals) throughout marking. It is this clock method that
2738     checks all the abort conditions which were mentioned above and
2739     decides when the task should abort. A work-based scheme is used to
2740     trigger this clock method: when the number of object words the
2741     marking phase has scanned or the number of references the marking
2742     phase has visited reach a given limit. Additional invocations to
2743     the method clock have been planted in a few other strategic places
2744     too. The initial reason for the clock method was to avoid calling
2745     cpu time gathering too regularly, as it is quite expensive. So,
2746     once it was in place, it was natural to piggy-back all the other
2747     conditions on it too and not constantly check them throughout the code.
2748 
2749     If do_termination is true then do_marking_step will enter its
2750     termination protocol.
2751 
2752     The value of is_serial must be true when do_marking_step is being
2753     called serially (i.e. by the VMThread) and do_marking_step should
2754     skip any synchronization in the termination and overflow code.
2755     Examples include the serial remark code and the serial reference
2756     processing closures.
2757 
2758     The value of is_serial must be false when do_marking_step is
2759     being called by any of the worker threads.
2760     Examples include the concurrent marking code (CMMarkingTask),
2761     the MT remark code, and the MT reference processing closures.
2762 
2763  *****************************************************************************/
2764 
2765 void G1CMTask::do_marking_step(double time_target_ms,
2766                                bool do_termination,
2767                                bool is_serial) {
2768   assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
2769 
2770   _start_cpu_time_ns = os::current_thread_cpu_time();
2771 
2772   // If do_stealing is true then do_marking_step will attempt to
2773   // steal work from the other G1CMTasks. It only makes sense to
2774   // enable stealing when the termination protocol is enabled
2775   // and do_marking_step() is not being called serially.
2776   bool do_stealing = do_termination && !is_serial;
2777 
2778   G1Predictions const& predictor = _g1h->policy()->predictor();
2779   double diff_prediction_ms = predictor.predict_zero_bounded(&_marking_step_diff_ms);
2780   _time_target_ms = time_target_ms - diff_prediction_ms;
2781 
2782   // set up the variables that are used in the work-based scheme to
2783   // call the regular clock method
2784   _words_scanned = 0;
2785   _refs_reached  = 0;
2786   recalculate_limits();
2787 
2788   // clear all flags
2789   clear_has_aborted();
2790   _has_timed_out = false;
2791   _draining_satb_buffers = false;
2792 
2793   ++_calls;
2794 
2795   // Set up the bitmap and oop closures. Anything that uses them is
2796   // eventually called from this method, so it is OK to allocate these
2797   // statically.
2798   G1CMBitMapClosure bitmap_closure(this, _cm);
2799   G1CMOopClosure cm_oop_closure(_g1h, this);
2800   set_cm_oop_closure(&cm_oop_closure);
2801 
2802   if (_cm->has_overflown()) {
2803     // This can happen if the mark stack overflows during a GC pause
2804     // and this task, after a yield point, restarts. We have to abort
2805     // as we need to get into the overflow protocol which happens
2806     // right at the end of this task.
2807     set_has_aborted();
2808   }
2809 
2810   // First drain any available SATB buffers. After this, we will not
2811   // look at SATB buffers before the next invocation of this method.
2812   // If enough completed SATB buffers are queued up, the regular clock
2813   // will abort this task so that it restarts.
2814   drain_satb_buffers();
2815   // ...then partially drain the local queue and the global stack
2816   drain_local_queue(true);
2817   drain_global_stack(true);
2818 
2819   do {
2820     process_current_region(bitmap_closure);
2821     // At this point we have either completed iterating over the
2822     // region we were holding on to, or we have aborted.
2823 
2824     // We then partially drain the local queue and the global stack.
2825     drain_local_queue(true);
2826     drain_global_stack(true);
2827 
2828     claim_new_region();
2829 
2830     assert(has_aborted() || _curr_region != nullptr || _cm->out_of_regions(),
2831            "at this point we should be out of regions");
2832   } while ( _curr_region != nullptr && !has_aborted());
2833 
2834   // We cannot check whether the global stack is empty, since other
2835   // tasks might be pushing objects to it concurrently.
2836   assert(has_aborted() || _cm->out_of_regions(),
2837          "at this point we should be out of regions");
2838   // Try to reduce the number of available SATB buffers so that
2839   // remark has less work to do.
2840   drain_satb_buffers();
2841 
2842   // Since we've done everything else, we can now totally drain the
2843   // local queue and global stack.
2844   drain_local_queue(false);
2845   drain_global_stack(false);
2846 
2847   // Attempt at work stealing from other task's queues.
2848   if (do_stealing && !has_aborted()) {
2849     // We have not aborted. This means that we have finished all that
2850     // we could. Let's try to do some stealing...
2851     attempt_stealing();
2852   }
2853 
2854   // We still haven't aborted. Now, let's try to get into the
2855   // termination protocol.
2856   if (do_termination && !has_aborted()) {
2857     attempt_termination(is_serial);
2858   }
2859 
2860   // Mainly for debugging purposes to make sure that a pointer to the
2861   // closure which was statically allocated in this frame doesn't
2862   // escape it by accident.
2863   set_cm_oop_closure(nullptr);
2864   jlong end_cpu_time_ns = os::current_thread_cpu_time();
2865   double elapsed_time_ms = (double)(end_cpu_time_ns - _start_cpu_time_ns) / NANOSECS_PER_MILLISEC;
2866   // Update the step history.
2867   _step_times_ms.add(elapsed_time_ms);
2868 
2869   if (has_aborted()) {
2870     // The task was aborted for some reason.
2871     handle_abort(is_serial, elapsed_time_ms);
2872   }
2873 }
2874 
2875 G1CMTask::G1CMTask(uint worker_id,
2876                    G1ConcurrentMark* cm,
2877                    G1CMTaskQueue* task_queue,
2878                    G1RegionMarkStats* mark_stats) :
2879   _worker_id(worker_id),
2880   _g1h(G1CollectedHeap::heap()),
2881   _cm(cm),
2882   _mark_bitmap(nullptr),
2883   _task_queue(task_queue),
2884   _partial_array_splitter(_cm->partial_array_state_manager(), _cm->max_num_tasks(), ObjArrayMarkingStride),
2885   _mark_stats_cache(mark_stats, G1RegionMarkStatsCache::RegionMarkStatsCacheSize),
2886   _calls(0),
2887   _time_target_ms(0.0),
2888   _start_cpu_time_ns(0),
2889   _cm_oop_closure(nullptr),
2890   _curr_region(nullptr),
2891   _finger(nullptr),
2892   _region_limit(nullptr),
2893   _words_scanned(0),
2894   _words_scanned_limit(0),
2895   _real_words_scanned_limit(0),
2896   _refs_reached(0),
2897   _refs_reached_limit(0),
2898   _real_refs_reached_limit(0),
2899   _has_aborted(false),
2900   _has_timed_out(false),
2901   _draining_satb_buffers(false),
2902   _step_times_ms(),
2903   _elapsed_time_ms(0.0),
2904   _termination_time_ms(0.0),
2905   _marking_step_diff_ms()
2906 {
2907   guarantee(task_queue != nullptr, "invariant");
2908 
2909   _marking_step_diff_ms.add(0.5);
2910 }
2911 
2912 // These are formatting macros that are used below to ensure
2913 // consistent formatting. The *_H_* versions are used to format the
2914 // header for a particular value and they should be kept consistent
2915 // with the corresponding macro. Also note that most of the macros add
2916 // the necessary white space (as a prefix) which makes them a bit
2917 // easier to compose.
2918 
2919 // All the output lines are prefixed with this string to be able to
2920 // identify them easily in a large log file.
2921 #define G1PPRL_LINE_PREFIX            "###"
2922 
2923 #define G1PPRL_ADDR_BASE_FORMAT    " " PTR_FORMAT "-" PTR_FORMAT
2924 #ifdef _LP64
2925 #define G1PPRL_ADDR_BASE_H_FORMAT  " %37s"
2926 #else // _LP64
2927 #define G1PPRL_ADDR_BASE_H_FORMAT  " %21s"
2928 #endif // _LP64
2929 
2930 // For per-region info
2931 #define G1PPRL_TYPE_FORMAT            "   %-4s"
2932 #define G1PPRL_TYPE_H_FORMAT          "   %4s"
2933 #define G1PPRL_STATE_FORMAT           "   %-5s"
2934 #define G1PPRL_STATE_H_FORMAT         "   %5s"
2935 #define G1PPRL_BYTE_FORMAT            "  %9zu"
2936 #define G1PPRL_BYTE_H_FORMAT          "  %9s"
2937 #define G1PPRL_DOUBLE_FORMAT          "%14.1f"
2938 #define G1PPRL_GCEFF_H_FORMAT         "  %14s"
2939 #define G1PPRL_GID_H_FORMAT           "  %9s"
2940 #define G1PPRL_GID_FORMAT             "  " UINT32_FORMAT_W(9)
2941 #define G1PPRL_LEN_FORMAT             "  " UINT32_FORMAT_W(14)
2942 #define G1PPRL_LEN_H_FORMAT           "  %14s"
2943 #define G1PPRL_GID_GCEFF_FORMAT       "  %14.1f"
2944 #define G1PPRL_GID_LIVENESS_FORMAT    "  %9.2f"
2945 
2946 // For summary info
2947 #define G1PPRL_SUM_ADDR_FORMAT(tag)    "  " tag ":" G1PPRL_ADDR_BASE_FORMAT
2948 #define G1PPRL_SUM_BYTE_FORMAT(tag)    "  " tag ": %zu"
2949 #define G1PPRL_SUM_MB_FORMAT(tag)      "  " tag ": %1.2f MB"
2950 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag) " / %1.2f %%"
2951 
2952 G1PrintRegionLivenessInfoClosure::G1PrintRegionLivenessInfoClosure(const char* phase_name) :
2953   _total_used_bytes(0),
2954   _total_capacity_bytes(0),
2955   _total_live_bytes(0),
2956   _total_remset_bytes(0),
2957   _total_code_roots_bytes(0)
2958 {
2959   if (!log_is_enabled(Trace, gc, liveness)) {
2960     return;
2961   }
2962 
2963   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2964   MemRegion reserved = g1h->reserved();
2965   double now = os::elapsedTime();
2966 
2967   // Print the header of the output.
2968   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now);
2969   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" HEAP"
2970                           G1PPRL_SUM_ADDR_FORMAT("reserved")
2971                           G1PPRL_SUM_BYTE_FORMAT("region-size"),
2972                           p2i(reserved.start()), p2i(reserved.end()),
2973                           G1HeapRegion::GrainBytes);
2974   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX);
2975   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
2976                           G1PPRL_TYPE_H_FORMAT
2977                           G1PPRL_ADDR_BASE_H_FORMAT
2978                           G1PPRL_BYTE_H_FORMAT
2979                           G1PPRL_BYTE_H_FORMAT
2980                           G1PPRL_STATE_H_FORMAT
2981                           G1PPRL_BYTE_H_FORMAT
2982                           G1PPRL_GID_H_FORMAT,
2983                           "type", "address-range",
2984                           "used", "live",
2985                           "state", "code-roots",
2986                           "group-id");
2987   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
2988                           G1PPRL_TYPE_H_FORMAT
2989                           G1PPRL_ADDR_BASE_H_FORMAT
2990                           G1PPRL_BYTE_H_FORMAT
2991                           G1PPRL_BYTE_H_FORMAT
2992                           G1PPRL_STATE_H_FORMAT
2993                           G1PPRL_BYTE_H_FORMAT
2994                           G1PPRL_GID_H_FORMAT,
2995                           "", "",
2996                           "(bytes)", "(bytes)",
2997                           "", "(bytes)", "");
2998 }
2999 
3000 bool G1PrintRegionLivenessInfoClosure::do_heap_region(G1HeapRegion* r) {
3001   if (!log_is_enabled(Trace, gc, liveness)) {
3002     return false;
3003   }
3004 
3005   const char* type       = r->get_type_str();
3006   HeapWord* bottom       = r->bottom();
3007   HeapWord* end          = r->end();
3008   size_t capacity_bytes  = r->capacity();
3009   size_t used_bytes      = r->used();
3010   size_t live_bytes      = r->live_bytes();
3011   size_t remset_bytes    = r->rem_set()->mem_size();
3012   size_t code_roots_bytes = r->rem_set()->code_roots_mem_size();
3013   const char* remset_type = r->rem_set()->get_short_state_str();
3014   uint cset_group_id     = r->rem_set()->has_cset_group()
3015                          ? r->rem_set()->cset_group_id()
3016                          : G1CSetCandidateGroup::NoRemSetId;
3017 
3018   _total_used_bytes      += used_bytes;
3019   _total_capacity_bytes  += capacity_bytes;
3020   _total_live_bytes      += live_bytes;
3021   _total_remset_bytes    += remset_bytes;
3022   _total_code_roots_bytes += code_roots_bytes;
3023 
3024   // Print a line for this particular region.
3025   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3026                         G1PPRL_TYPE_FORMAT
3027                         G1PPRL_ADDR_BASE_FORMAT
3028                         G1PPRL_BYTE_FORMAT
3029                         G1PPRL_BYTE_FORMAT
3030                         G1PPRL_STATE_FORMAT
3031                         G1PPRL_BYTE_FORMAT
3032                         G1PPRL_GID_FORMAT,
3033                         type, p2i(bottom), p2i(end),
3034                         used_bytes, live_bytes,
3035                         remset_type, code_roots_bytes,
3036                         cset_group_id);
3037 
3038   return false;
3039 }
3040 
3041 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() {
3042   if (!log_is_enabled(Trace, gc, liveness)) {
3043     return;
3044   }
3045 
3046   G1CollectedHeap* g1h = G1CollectedHeap::heap();
3047   _total_remset_bytes += g1h->card_set_freelist_pool()->mem_size();
3048   // add static memory usages to remembered set sizes
3049   _total_remset_bytes += G1HeapRegionRemSet::static_mem_size();
3050 
3051   log_cset_candidate_groups();
3052 
3053   // Print the footer of the output.
3054   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX);
3055   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3056                          " SUMMARY"
3057                          G1PPRL_SUM_MB_FORMAT("capacity")
3058                          G1PPRL_SUM_MB_PERC_FORMAT("used")
3059                          G1PPRL_SUM_MB_PERC_FORMAT("live")
3060                          G1PPRL_SUM_MB_FORMAT("remset")
3061                          G1PPRL_SUM_MB_FORMAT("code-roots"),
3062                          bytes_to_mb(_total_capacity_bytes),
3063                          bytes_to_mb(_total_used_bytes),
3064                          percent_of(_total_used_bytes, _total_capacity_bytes),
3065                          bytes_to_mb(_total_live_bytes),
3066                          percent_of(_total_live_bytes, _total_capacity_bytes),
3067                          bytes_to_mb(_total_remset_bytes),
3068                          bytes_to_mb(_total_code_roots_bytes));
3069 }
3070 
3071 void G1PrintRegionLivenessInfoClosure::log_cset_candidate_group_add_total(G1CSetCandidateGroup* group, const char* type) {
3072   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3073                           G1PPRL_GID_FORMAT
3074                           G1PPRL_LEN_FORMAT
3075                           G1PPRL_GID_GCEFF_FORMAT
3076                           G1PPRL_GID_LIVENESS_FORMAT
3077                           G1PPRL_BYTE_FORMAT
3078                           G1PPRL_TYPE_H_FORMAT,
3079                           group->group_id(),
3080                           group->length(),
3081                           group->length() > 0 ? group->gc_efficiency() : 0.0,
3082                           group->length() > 0 ? group->liveness_percent() : 0.0,
3083                           group->card_set()->mem_size(),
3084                           type);
3085   _total_remset_bytes += group->card_set()->mem_size();
3086 }
3087 
3088 void G1PrintRegionLivenessInfoClosure::log_cset_candidate_grouplist(G1CSetCandidateGroupList& gl, const char* type) {
3089   for (G1CSetCandidateGroup* group : gl) {
3090     log_cset_candidate_group_add_total(group, type);
3091   }
3092 }
3093 
3094 void G1PrintRegionLivenessInfoClosure::log_cset_candidate_groups() {
3095   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX);
3096   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" Collection Set Candidate Groups");
3097   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX " Types: Y=Young, M=From Marking Regions, R=Retained Regions");
3098   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3099                           G1PPRL_GID_H_FORMAT
3100                           G1PPRL_LEN_H_FORMAT
3101                           G1PPRL_GCEFF_H_FORMAT
3102                           G1PPRL_BYTE_H_FORMAT
3103                           G1PPRL_BYTE_H_FORMAT
3104                           G1PPRL_TYPE_H_FORMAT,
3105                           "groud-id", "num-regions",
3106                           "gc-eff", "liveness",
3107                           "remset", "type");
3108 
3109   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3110                           G1PPRL_GID_H_FORMAT
3111                           G1PPRL_LEN_H_FORMAT
3112                           G1PPRL_GCEFF_H_FORMAT
3113                           G1PPRL_BYTE_H_FORMAT
3114                           G1PPRL_BYTE_H_FORMAT
3115                           G1PPRL_TYPE_H_FORMAT,
3116                           "", "",
3117                           "(bytes/ms)", "%",
3118                           "(bytes)", "");
3119 
3120   G1CollectedHeap* g1h = G1CollectedHeap::heap();
3121 
3122   log_cset_candidate_group_add_total(g1h->young_regions_cset_group(), "Y");
3123 
3124   G1CollectionSetCandidates* candidates = g1h->policy()->candidates();
3125   log_cset_candidate_grouplist(candidates->from_marking_groups(), "M");
3126   log_cset_candidate_grouplist(candidates->retained_groups(), "R");
3127 }