1 /*
   2  * Copyright (c) 2001, 2026, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "classfile/classLoaderData.hpp"
  26 #include "classfile/classLoaderDataGraph.hpp"
  27 #include "cppstdlib/new.hpp"
  28 #include "gc/g1/g1BarrierSet.hpp"
  29 #include "gc/g1/g1BatchedTask.hpp"
  30 #include "gc/g1/g1CardSetMemory.hpp"
  31 #include "gc/g1/g1CardTableClaimTable.inline.hpp"
  32 #include "gc/g1/g1CollectedHeap.inline.hpp"
  33 #include "gc/g1/g1CollectionSetChooser.hpp"
  34 #include "gc/g1/g1CollectorState.hpp"
  35 #include "gc/g1/g1ConcurrentMark.inline.hpp"
  36 #include "gc/g1/g1ConcurrentMarkRemarkTasks.hpp"
  37 #include "gc/g1/g1ConcurrentMarkThread.inline.hpp"
  38 #include "gc/g1/g1ConcurrentRebuildAndScrub.hpp"
  39 #include "gc/g1/g1ConcurrentRefine.hpp"
  40 #include "gc/g1/g1HeapRegion.inline.hpp"
  41 #include "gc/g1/g1HeapRegionManager.hpp"
  42 #include "gc/g1/g1HeapRegionPrinter.hpp"
  43 #include "gc/g1/g1HeapRegionRemSet.inline.hpp"
  44 #include "gc/g1/g1HeapRegionSet.inline.hpp"
  45 #include "gc/g1/g1HeapVerifier.hpp"
  46 #include "gc/g1/g1OopClosures.inline.hpp"
  47 #include "gc/g1/g1Policy.hpp"
  48 #include "gc/g1/g1RegionMarkStatsCache.inline.hpp"
  49 #include "gc/g1/g1ThreadLocalData.hpp"
  50 #include "gc/g1/g1Trace.hpp"
  51 #include "gc/shared/gcId.hpp"
  52 #include "gc/shared/gcTimer.hpp"
  53 #include "gc/shared/gcTraceTime.inline.hpp"
  54 #include "gc/shared/gcVMOperations.hpp"
  55 #include "gc/shared/partialArraySplitter.inline.hpp"
  56 #include "gc/shared/partialArrayState.hpp"
  57 #include "gc/shared/partialArrayTaskStats.hpp"
  58 #include "gc/shared/referencePolicy.hpp"
  59 #include "gc/shared/suspendibleThreadSet.hpp"
  60 #include "gc/shared/taskqueue.inline.hpp"
  61 #include "gc/shared/taskTerminator.hpp"
  62 #include "gc/shared/weakProcessor.inline.hpp"
  63 #include "gc/shared/workerPolicy.hpp"
  64 #include "jvm.h"
  65 #include "logging/log.hpp"
  66 #include "memory/allocation.hpp"
  67 #include "memory/iterator.hpp"
  68 #include "memory/metaspaceUtils.hpp"
  69 #include "memory/resourceArea.hpp"
  70 #include "memory/universe.hpp"
  71 #include "nmt/memTracker.hpp"
  72 #include "oops/access.inline.hpp"
  73 #include "oops/oop.inline.hpp"
  74 #include "oops/oopCast.inline.hpp"
  75 #include "runtime/globals_extension.hpp"
  76 #include "runtime/handles.inline.hpp"
  77 #include "runtime/java.hpp"
  78 #include "runtime/orderAccess.hpp"
  79 #include "runtime/os.hpp"
  80 #include "runtime/prefetch.inline.hpp"
  81 #include "runtime/threads.hpp"
  82 #include "utilities/align.hpp"
  83 #include "utilities/checkedCast.hpp"
  84 #include "utilities/formatBuffer.hpp"
  85 #include "utilities/growableArray.hpp"
  86 #include "utilities/powerOfTwo.hpp"
  87 
  88 G1CMIsAliveClosure::G1CMIsAliveClosure() : _cm(nullptr) { }
  89 
  90 G1CMIsAliveClosure::G1CMIsAliveClosure(G1ConcurrentMark* cm) : _cm(cm) {
  91   assert(cm != nullptr, "must be");
  92 }
  93 
  94 void G1CMIsAliveClosure::initialize(G1ConcurrentMark* cm) {
  95   assert(cm != nullptr, "must be");
  96   assert(_cm == nullptr, "double initialize");
  97   _cm = cm;
  98 }
  99 
 100 bool G1CMBitMapClosure::do_addr(HeapWord* const addr) {
 101   assert(addr < _cm->finger(), "invariant");
 102   assert(addr >= _task->finger(), "invariant");
 103 
 104   // We move that task's local finger along.
 105   _task->move_finger_to(addr);
 106 
 107   _task->process_entry(G1TaskQueueEntry(cast_to_oop(addr)), false /* stolen */);
 108   // we only partially drain the local queue and global stack
 109   _task->drain_local_queue(true);
 110   _task->drain_global_stack(true);
 111 
 112   // if the has_aborted flag has been raised, we need to bail out of
 113   // the iteration
 114   return !_task->has_aborted();
 115 }
 116 
 117 G1CMMarkStack::G1CMMarkStack() :
 118   _chunk_allocator() {
 119   set_empty();
 120 }
 121 
 122 size_t G1CMMarkStack::capacity_alignment() {
 123   return (size_t)lcm(os::vm_allocation_granularity(), sizeof(TaskQueueEntryChunk)) / sizeof(G1TaskQueueEntry);
 124 }
 125 
 126 bool G1CMMarkStack::initialize() {
 127   guarantee(_chunk_allocator.capacity() == 0, "G1CMMarkStack already initialized.");
 128 
 129   size_t initial_capacity = MarkStackSize;
 130   size_t max_capacity = MarkStackSizeMax;
 131 
 132   size_t const TaskEntryChunkSizeInVoidStar = sizeof(TaskQueueEntryChunk) / sizeof(G1TaskQueueEntry);
 133 
 134   size_t max_num_chunks = align_up(max_capacity, capacity_alignment()) / TaskEntryChunkSizeInVoidStar;
 135   size_t initial_num_chunks = align_up(initial_capacity, capacity_alignment()) / TaskEntryChunkSizeInVoidStar;
 136 
 137   initial_num_chunks = round_up_power_of_2(initial_num_chunks);
 138   max_num_chunks = MAX2(initial_num_chunks, max_num_chunks);
 139 
 140   size_t limit = (INT_MAX - 1);
 141   max_capacity = MIN2((max_num_chunks * TaskEntryChunkSizeInVoidStar), limit);
 142   initial_capacity = MIN2((initial_num_chunks * TaskEntryChunkSizeInVoidStar), limit);
 143 
 144   FLAG_SET_ERGO(MarkStackSizeMax, max_capacity);
 145   FLAG_SET_ERGO(MarkStackSize, initial_capacity);
 146 
 147   log_trace(gc)("MarkStackSize: %uk  MarkStackSizeMax: %uk", (uint)(MarkStackSize / K), (uint)(MarkStackSizeMax / K));
 148 
 149   log_debug(gc)("Initialize mark stack with %zu chunks, maximum %zu",
 150                 initial_num_chunks, max_capacity);
 151 
 152   return _chunk_allocator.initialize(initial_num_chunks, max_num_chunks);
 153 }
 154 
 155 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::ChunkAllocator::allocate_new_chunk() {
 156   if (_size.load_relaxed() >= _max_capacity) {
 157     return nullptr;
 158   }
 159 
 160   size_t cur_idx = _size.fetch_then_add(1u);
 161 
 162   if (cur_idx >= _max_capacity) {
 163     return nullptr;
 164   }
 165 
 166   size_t bucket = get_bucket(cur_idx);
 167   if (_buckets[bucket].load_acquire() == nullptr) {
 168     if (!_should_grow) {
 169       // Prefer to restart the CM.
 170       return nullptr;
 171     }
 172 
 173     MutexLocker x(G1MarkStackChunkList_lock, Mutex::_no_safepoint_check_flag);
 174     if (_buckets[bucket].load_acquire() == nullptr) {
 175       size_t desired_capacity = bucket_size(bucket) * 2;
 176       if (!try_expand_to(desired_capacity)) {
 177         return nullptr;
 178       }
 179     }
 180   }
 181 
 182   size_t bucket_idx = get_bucket_index(cur_idx);
 183   TaskQueueEntryChunk* result = ::new (&_buckets[bucket].load_relaxed()[bucket_idx]) TaskQueueEntryChunk;
 184   result->next = nullptr;
 185   return result;
 186 }
 187 
 188 G1CMMarkStack::ChunkAllocator::ChunkAllocator() :
 189   _min_capacity(0),
 190   _max_capacity(0),
 191   _capacity(0),
 192   _num_buckets(0),
 193   _should_grow(false),
 194   _buckets(nullptr),
 195   _size(0)
 196 { }
 197 
 198 bool G1CMMarkStack::ChunkAllocator::initialize(size_t initial_capacity, size_t max_capacity) {
 199   guarantee(is_power_of_2(initial_capacity), "Invalid initial_capacity");
 200 
 201   _min_capacity = initial_capacity;
 202   _max_capacity = max_capacity;
 203   _num_buckets  = get_bucket(_max_capacity) + 1;
 204 
 205   _buckets = NEW_C_HEAP_ARRAY(Atomic<TaskQueueEntryChunk*>, _num_buckets, mtGC);
 206 
 207   for (size_t i = 0; i < _num_buckets; i++) {
 208     _buckets[i].store_relaxed(nullptr);
 209   }
 210 
 211   size_t new_capacity = bucket_size(0);
 212 
 213   if (!reserve(new_capacity)) {
 214     log_warning(gc)("Failed to reserve memory for new overflow mark stack with %zu chunks and size %zuB.", new_capacity, new_capacity * sizeof(TaskQueueEntryChunk));
 215     return false;
 216   }
 217   return true;
 218 }
 219 
 220 bool G1CMMarkStack::ChunkAllocator::try_expand_to(size_t desired_capacity) {
 221   if (_capacity == _max_capacity) {
 222     log_debug(gc)("Can not expand overflow mark stack further, already at maximum capacity of %zu chunks.", _capacity);
 223     return false;
 224   }
 225 
 226   size_t old_capacity = _capacity;
 227   desired_capacity = MIN2(desired_capacity, _max_capacity);
 228 
 229   if (reserve(desired_capacity)) {
 230     log_debug(gc)("Expanded the mark stack capacity from %zu to %zu chunks",
 231                   old_capacity, desired_capacity);
 232     return true;
 233   }
 234   return false;
 235 }
 236 
 237 bool G1CMMarkStack::ChunkAllocator::try_expand() {
 238   size_t new_capacity = _capacity * 2;
 239   return try_expand_to(new_capacity);
 240 }
 241 
 242 G1CMMarkStack::ChunkAllocator::~ChunkAllocator() {
 243   if (_buckets == nullptr) {
 244     return;
 245   }
 246 
 247   for (size_t i = 0; i < _num_buckets; i++) {
 248     if (_buckets[i].load_relaxed() != nullptr) {
 249       MmapArrayAllocator<TaskQueueEntryChunk>::free(_buckets[i].load_relaxed(),  bucket_size(i));
 250       _buckets[i].store_relaxed(nullptr);
 251     }
 252   }
 253 
 254   FREE_C_HEAP_ARRAY(TaskQueueEntryChunk*, _buckets);
 255 }
 256 
 257 bool G1CMMarkStack::ChunkAllocator::reserve(size_t new_capacity) {
 258   assert(new_capacity <= _max_capacity, "Cannot expand overflow mark stack beyond the max_capacity of %zu chunks.", _max_capacity);
 259 
 260   size_t highest_bucket = get_bucket(new_capacity - 1);
 261   size_t i = get_bucket(_capacity);
 262 
 263   // Allocate all buckets associated with indexes between the current capacity (_capacity)
 264   // and the new capacity (new_capacity). This step ensures that there are no gaps in the
 265   // array and that the capacity accurately reflects the reserved memory.
 266   for (; i <= highest_bucket; i++) {
 267     if (_buckets[i].load_acquire() != nullptr) {
 268       continue; // Skip over already allocated buckets.
 269     }
 270 
 271     size_t bucket_capacity = bucket_size(i);
 272 
 273     // Trim bucket size so that we do not exceed the _max_capacity.
 274     bucket_capacity = (_capacity + bucket_capacity) <= _max_capacity ?
 275                       bucket_capacity :
 276                       _max_capacity - _capacity;
 277 
 278 
 279     TaskQueueEntryChunk* bucket_base = MmapArrayAllocator<TaskQueueEntryChunk>::allocate_or_null(bucket_capacity, mtGC);
 280 
 281     if (bucket_base == nullptr) {
 282       log_warning(gc)("Failed to reserve memory for increasing the overflow mark stack capacity with %zu chunks and size %zuB.",
 283                       bucket_capacity, bucket_capacity * sizeof(TaskQueueEntryChunk));
 284       return false;
 285     }
 286     _capacity += bucket_capacity;
 287     _buckets[i].release_store(bucket_base);
 288   }
 289   return true;
 290 }
 291 
 292 void G1CMMarkStack::expand() {
 293   _chunk_allocator.try_expand();
 294 }
 295 
 296 void G1CMMarkStack::add_chunk_to_list(Atomic<TaskQueueEntryChunk*>* list, TaskQueueEntryChunk* elem) {
 297   elem->next = list->load_relaxed();
 298   list->store_relaxed(elem);
 299 }
 300 
 301 void G1CMMarkStack::add_chunk_to_chunk_list(TaskQueueEntryChunk* elem) {
 302   MutexLocker x(G1MarkStackChunkList_lock, Mutex::_no_safepoint_check_flag);
 303   add_chunk_to_list(&_chunk_list, elem);
 304   _chunks_in_chunk_list++;
 305 }
 306 
 307 void G1CMMarkStack::add_chunk_to_free_list(TaskQueueEntryChunk* elem) {
 308   MutexLocker x(G1MarkStackFreeList_lock, Mutex::_no_safepoint_check_flag);
 309   add_chunk_to_list(&_free_list, elem);
 310 }
 311 
 312 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_list(Atomic<TaskQueueEntryChunk*>* list) {
 313   TaskQueueEntryChunk* result = list->load_relaxed();
 314   if (result != nullptr) {
 315     list->store_relaxed(list->load_relaxed()->next);
 316   }
 317   return result;
 318 }
 319 
 320 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_chunk_list() {
 321   MutexLocker x(G1MarkStackChunkList_lock, Mutex::_no_safepoint_check_flag);
 322   TaskQueueEntryChunk* result = remove_chunk_from_list(&_chunk_list);
 323   if (result != nullptr) {
 324     _chunks_in_chunk_list--;
 325   }
 326   return result;
 327 }
 328 
 329 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_free_list() {
 330   MutexLocker x(G1MarkStackFreeList_lock, Mutex::_no_safepoint_check_flag);
 331   return remove_chunk_from_list(&_free_list);
 332 }
 333 
 334 bool G1CMMarkStack::par_push_chunk(G1TaskQueueEntry* ptr_arr) {
 335   // Get a new chunk.
 336   TaskQueueEntryChunk* new_chunk = remove_chunk_from_free_list();
 337 
 338   if (new_chunk == nullptr) {
 339     // Did not get a chunk from the free list. Allocate from backing memory.
 340     new_chunk = _chunk_allocator.allocate_new_chunk();
 341 
 342     if (new_chunk == nullptr) {
 343       return false;
 344     }
 345   }
 346 
 347   Copy::conjoint_memory_atomic(ptr_arr, new_chunk->data, EntriesPerChunk * sizeof(G1TaskQueueEntry));
 348 
 349   add_chunk_to_chunk_list(new_chunk);
 350 
 351   return true;
 352 }
 353 
 354 bool G1CMMarkStack::par_pop_chunk(G1TaskQueueEntry* ptr_arr) {
 355   TaskQueueEntryChunk* cur = remove_chunk_from_chunk_list();
 356 
 357   if (cur == nullptr) {
 358     return false;
 359   }
 360 
 361   Copy::conjoint_memory_atomic(cur->data, ptr_arr, EntriesPerChunk * sizeof(G1TaskQueueEntry));
 362 
 363   add_chunk_to_free_list(cur);
 364   return true;
 365 }
 366 
 367 void G1CMMarkStack::set_empty() {
 368   _chunks_in_chunk_list = 0;
 369   _chunk_list.store_relaxed(nullptr);
 370   _free_list.store_relaxed(nullptr);
 371   _chunk_allocator.reset();
 372 }
 373 
 374 G1CMRootMemRegions::G1CMRootMemRegions(uint const max_regions) :
 375     _root_regions(MemRegion::create_array(max_regions, mtGC)),
 376     _max_regions(max_regions),
 377     _num_root_regions(0),
 378     _claimed_root_regions(0),
 379     _scan_in_progress(false),
 380     _should_abort(false) { }
 381 
 382 G1CMRootMemRegions::~G1CMRootMemRegions() {
 383   MemRegion::destroy_array(_root_regions, _max_regions);
 384 }
 385 
 386 void G1CMRootMemRegions::reset() {
 387   _num_root_regions.store_relaxed(0);
 388 }
 389 
 390 void G1CMRootMemRegions::add(HeapWord* start, HeapWord* end) {
 391   assert_at_safepoint();
 392   size_t idx = _num_root_regions.fetch_then_add(1u);
 393   assert(idx < _max_regions, "Trying to add more root MemRegions than there is space %zu", _max_regions);
 394   assert(start != nullptr && end != nullptr && start <= end, "Start (" PTR_FORMAT ") should be less or equal to "
 395          "end (" PTR_FORMAT ")", p2i(start), p2i(end));
 396   _root_regions[idx].set_start(start);
 397   _root_regions[idx].set_end(end);
 398 }
 399 
 400 void G1CMRootMemRegions::prepare_for_scan() {
 401   assert(!scan_in_progress(), "pre-condition");
 402 
 403   _scan_in_progress.store_relaxed(num_root_regions() > 0);
 404 
 405   _claimed_root_regions.store_relaxed(0);
 406   _should_abort.store_relaxed(false);
 407 }
 408 
 409 const MemRegion* G1CMRootMemRegions::claim_next() {
 410   if (_should_abort.load_relaxed()) {
 411     // If someone has set the should_abort flag, we return null to
 412     // force the caller to bail out of their loop.
 413     return nullptr;
 414   }
 415 
 416   uint local_num_root_regions = num_root_regions();
 417   if (_claimed_root_regions.load_relaxed() >= local_num_root_regions) {
 418     return nullptr;
 419   }
 420 
 421   size_t claimed_index = _claimed_root_regions.fetch_then_add(1u);
 422   if (claimed_index < local_num_root_regions) {
 423     return &_root_regions[claimed_index];
 424   }
 425   return nullptr;
 426 }
 427 
 428 uint G1CMRootMemRegions::num_root_regions() const {
 429   return (uint)_num_root_regions.load_relaxed();
 430 }
 431 
 432 bool G1CMRootMemRegions::contains(const MemRegion mr) const {
 433   uint local_num_root_regions = num_root_regions();
 434   for (uint i = 0; i < local_num_root_regions; i++) {
 435     if (_root_regions[i].equals(mr)) {
 436       return true;
 437     }
 438   }
 439   return false;
 440 }
 441 
 442 void G1CMRootMemRegions::notify_scan_done() {
 443   MutexLocker x(G1RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 444   _scan_in_progress.store_relaxed(false);
 445   G1RootRegionScan_lock->notify_all();
 446 }
 447 
 448 void G1CMRootMemRegions::cancel_scan() {
 449   notify_scan_done();
 450 }
 451 
 452 void G1CMRootMemRegions::scan_finished() {
 453   assert(scan_in_progress(), "pre-condition");
 454 
 455   if (!_should_abort.load_relaxed()) {
 456     assert(_claimed_root_regions.load_relaxed() >= num_root_regions(),
 457            "we should have claimed all root regions, claimed %zu, length = %u",
 458            _claimed_root_regions.load_relaxed(), num_root_regions());
 459   }
 460 
 461   notify_scan_done();
 462 }
 463 
 464 bool G1CMRootMemRegions::wait_until_scan_finished() {
 465   if (!scan_in_progress()) {
 466     return false;
 467   }
 468 
 469   {
 470     MonitorLocker ml(G1RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 471     while (scan_in_progress()) {
 472       ml.wait();
 473     }
 474   }
 475   return true;
 476 }
 477 
 478 G1ConcurrentMark::G1ConcurrentMark(G1CollectedHeap* g1h,
 479                                    G1RegionToSpaceMapper* bitmap_storage) :
 480   _cm_thread(nullptr),
 481   _g1h(g1h),
 482 
 483   _mark_bitmap(),
 484 
 485   _heap(_g1h->reserved()),
 486 
 487   _root_regions(_g1h->max_num_regions()),
 488 
 489   _global_mark_stack(),
 490 
 491   _finger(nullptr), // _finger set in set_non_marking_state
 492 
 493   _worker_id_offset(G1ConcRefinementThreads), // The refinement control thread does not refine cards, so it's just the worker threads.
 494   _max_num_tasks(MAX2(ConcGCThreads, ParallelGCThreads)),
 495   _num_active_tasks(0), // _num_active_tasks set in set_non_marking_state()
 496   _tasks(nullptr), // _tasks set inside late_init()
 497   _task_queues(new G1CMTaskQueueSet(_max_num_tasks)),
 498   _terminator(_max_num_tasks, _task_queues),
 499   _partial_array_state_manager(new PartialArrayStateManager(_max_num_tasks)),
 500 
 501   _first_overflow_barrier_sync(),
 502   _second_overflow_barrier_sync(),
 503 
 504   _completed_mark_cycles(0),
 505   _has_overflown(false),
 506   _concurrent(false),
 507   _has_aborted(false),
 508   _restart_for_overflow(false),
 509   _gc_timer_cm(new ConcurrentGCTimer()),
 510   _gc_tracer_cm(new G1OldTracer()),
 511 
 512   // _verbose_level set below
 513 
 514   _remark_times(),
 515   _remark_mark_times(),
 516   _remark_weak_ref_times(),
 517   _cleanup_times(),
 518 
 519   _concurrent_workers(nullptr),
 520   _num_concurrent_workers(0),
 521   _max_concurrent_workers(0),
 522 
 523   _region_mark_stats(NEW_C_HEAP_ARRAY(G1RegionMarkStats, _g1h->max_num_regions(), mtGC)),
 524   _top_at_mark_starts(NEW_C_HEAP_ARRAY(Atomic<HeapWord*>, _g1h->max_num_regions(), mtGC)),
 525   _top_at_rebuild_starts(NEW_C_HEAP_ARRAY(Atomic<HeapWord*>, _g1h->max_num_regions(), mtGC)),
 526   _needs_remembered_set_rebuild(false)
 527 {
 528   assert(G1CGC_lock != nullptr, "CGC_lock must be initialized");
 529 
 530   _mark_bitmap.initialize(g1h->reserved(), bitmap_storage);
 531 }
 532 
 533 void G1ConcurrentMark::fully_initialize() {
 534   if (is_fully_initialized()) {
 535     return;
 536   }
 537 
 538   // Create & start ConcurrentMark thread.
 539   _cm_thread = new G1ConcurrentMarkThread(this);
 540   if (_cm_thread->osthread() == nullptr) {
 541     vm_shutdown_during_initialization("Could not create ConcurrentMarkThread");
 542   }
 543 
 544   log_debug(gc)("ConcGCThreads: %u offset %u", ConcGCThreads, _worker_id_offset);
 545   log_debug(gc)("ParallelGCThreads: %u", ParallelGCThreads);
 546 
 547   _max_concurrent_workers = ConcGCThreads;
 548 
 549   _concurrent_workers = new WorkerThreads("G1 Conc", _max_concurrent_workers);
 550   _concurrent_workers->initialize_workers();
 551   _num_concurrent_workers = _concurrent_workers->active_workers();
 552 
 553   if (!_global_mark_stack.initialize()) {
 554     vm_exit_during_initialization("Failed to allocate initial concurrent mark overflow mark stack.");
 555   }
 556 
 557   _tasks = NEW_C_HEAP_ARRAY(G1CMTask*, _max_num_tasks, mtGC);
 558 
 559   // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
 560   _num_active_tasks = _max_num_tasks;
 561 
 562   for (uint i = 0; i < _max_num_tasks; ++i) {
 563     G1CMTaskQueue* task_queue = new G1CMTaskQueue();
 564     _task_queues->register_queue(i, task_queue);
 565 
 566     _tasks[i] = new G1CMTask(i, this, task_queue, _region_mark_stats);
 567   }
 568 
 569   uint max_num_regions = _g1h->max_num_regions();
 570   ::new (_region_mark_stats) G1RegionMarkStats[max_num_regions]{};
 571   ::new (_top_at_mark_starts) Atomic<HeapWord*>[max_num_regions]{};
 572   ::new (_top_at_rebuild_starts) Atomic<HeapWord*>[max_num_regions]{};
 573 
 574   reset_at_marking_complete();
 575 }
 576 
 577 bool G1ConcurrentMark::in_progress() const {
 578   return is_fully_initialized() ? _cm_thread->in_progress() : false;
 579 }
 580 
 581 PartialArrayStateManager* G1ConcurrentMark::partial_array_state_manager() const {
 582   return _partial_array_state_manager;
 583 }
 584 
 585 G1ConcurrentMarkThread* G1ConcurrentMark::cm_thread() const {
 586   assert(is_fully_initialized(), "must be");
 587   return _cm_thread;
 588 }
 589 
 590 void G1ConcurrentMark::reset() {
 591   _has_aborted.store_relaxed(false);
 592 
 593   reset_marking_for_restart();
 594 
 595   // Reset all tasks, since different phases will use different number of active
 596   // threads. So, it's easiest to have all of them ready.
 597   for (uint i = 0; i < _max_num_tasks; ++i) {
 598     _tasks[i]->reset(mark_bitmap());
 599   }
 600 
 601   uint max_num_regions = _g1h->max_num_regions();
 602   ::new (_top_at_rebuild_starts) Atomic<HeapWord*>[max_num_regions]{};
 603   for (uint i = 0; i < max_num_regions; i++) {
 604     _region_mark_stats[i].clear();
 605   }
 606 
 607   _root_regions.reset();
 608 }
 609 
 610 void G1ConcurrentMark::clear_statistics(G1HeapRegion* r) {
 611   uint region_idx = r->hrm_index();
 612   for (uint j = 0; j < _max_num_tasks; ++j) {
 613     _tasks[j]->clear_mark_stats_cache(region_idx);
 614   }
 615   _top_at_rebuild_starts[region_idx].store_relaxed(nullptr);
 616   _region_mark_stats[region_idx].clear();
 617 }
 618 
 619 void G1ConcurrentMark::humongous_object_eagerly_reclaimed(G1HeapRegion* r) {
 620   assert_at_safepoint();
 621   assert(r->is_starts_humongous(), "Got humongous continues region here");
 622 
 623   // Need to clear mark bit of the humongous object. Doing this unconditionally is fine.
 624   mark_bitmap()->clear(r->bottom());
 625 
 626   if (!_g1h->collector_state()->mark_or_rebuild_in_progress()) {
 627     return;
 628   }
 629 
 630   // Clear any statistics about the region gathered so far.
 631   _g1h->humongous_obj_regions_iterate(r,
 632                                       [&] (G1HeapRegion* r) {
 633                                         clear_statistics(r);
 634                                       });
 635 }
 636 
 637 void G1ConcurrentMark::reset_marking_for_restart() {
 638   _global_mark_stack.set_empty();
 639 
 640   // Expand the marking stack, if we have to and if we can.
 641   if (has_overflown()) {
 642     _global_mark_stack.expand();
 643 
 644     uint max_num_regions = _g1h->max_num_regions();
 645     for (uint i = 0; i < max_num_regions; i++) {
 646       _region_mark_stats[i].clear_during_overflow();
 647     }
 648   }
 649 
 650   clear_has_overflown();
 651   _finger.store_relaxed(_heap.start());
 652 
 653   for (uint i = 0; i < _max_num_tasks; ++i) {
 654     _tasks[i]->reset_for_restart();
 655   }
 656 }
 657 
 658 void G1ConcurrentMark::set_concurrency(uint active_tasks) {
 659   assert(active_tasks <= _max_num_tasks, "we should not have more");
 660 
 661   _num_active_tasks = active_tasks;
 662   // Need to update the three data structures below according to the
 663   // number of active threads for this phase.
 664   _terminator.reset_for_reuse(active_tasks);
 665   _first_overflow_barrier_sync.set_n_workers(active_tasks);
 666   _second_overflow_barrier_sync.set_n_workers(active_tasks);
 667 }
 668 
 669 void G1ConcurrentMark::set_concurrency_and_phase(uint active_tasks, bool concurrent) {
 670   set_concurrency(active_tasks);
 671 
 672   _concurrent.store_relaxed(concurrent);
 673 
 674   if (!concurrent) {
 675     // At this point we should be in a STW phase, and completed marking.
 676     assert_at_safepoint_on_vm_thread();
 677     assert(out_of_regions(),
 678            "only way to get here: _finger: " PTR_FORMAT ", _heap_end: " PTR_FORMAT,
 679            p2i(finger()), p2i(_heap.end()));
 680   }
 681 }
 682 
 683 #if TASKQUEUE_STATS
 684 void G1ConcurrentMark::print_and_reset_taskqueue_stats() {
 685 
 686   _task_queues->print_and_reset_taskqueue_stats("G1ConcurrentMark Oop Queue");
 687 
 688   auto get_pa_stats = [&](uint i) {
 689     return _tasks[i]->partial_array_task_stats();
 690   };
 691 
 692   PartialArrayTaskStats::log_set(_max_num_tasks, get_pa_stats,
 693                                  "G1ConcurrentMark Partial Array Task Stats");
 694 
 695   for (uint i = 0; i < _max_num_tasks; ++i) {
 696     get_pa_stats(i)->reset();
 697   }
 698 }
 699 #endif
 700 
 701 void G1ConcurrentMark::reset_at_marking_complete() {
 702   TASKQUEUE_STATS_ONLY(print_and_reset_taskqueue_stats());
 703   // We set the global marking state to some default values when we're
 704   // not doing marking.
 705   reset_marking_for_restart();
 706   _num_active_tasks = 0;
 707 }
 708 
 709 G1ConcurrentMark::~G1ConcurrentMark() {
 710   FREE_C_HEAP_ARRAY(Atomic<HeapWord*>, _top_at_mark_starts);
 711   FREE_C_HEAP_ARRAY(Atomic<HeapWord*>, _top_at_rebuild_starts);
 712   FREE_C_HEAP_ARRAY(G1RegionMarkStats, _region_mark_stats);
 713   // The G1ConcurrentMark instance is never freed.
 714   ShouldNotReachHere();
 715 }
 716 
 717 class G1ClearBitMapTask : public WorkerTask {
 718 public:
 719   static size_t chunk_size() { return M; }
 720 
 721 private:
 722   // Heap region closure used for clearing the _mark_bitmap.
 723   class G1ClearBitmapHRClosure : public G1HeapRegionClosure {
 724     G1ConcurrentMark* _cm;
 725     G1CMBitMap* _bitmap;
 726     bool _suspendible; // If suspendible, do yield checks.
 727 
 728     bool suspendible() {
 729       return _suspendible;
 730     }
 731 
 732     bool is_clear_concurrent_undo() {
 733       return suspendible() && _cm->cm_thread()->in_undo_mark();
 734     }
 735 
 736     bool has_aborted() {
 737       if (suspendible()) {
 738         _cm->do_yield_check();
 739         return _cm->has_aborted();
 740       }
 741       return false;
 742     }
 743 
 744     HeapWord* region_clear_limit(G1HeapRegion* r) {
 745       // During a Concurrent Undo Mark cycle, the per region top_at_mark_start and
 746       // live_words data are current wrt to the _mark_bitmap. We use this information
 747       // to only clear ranges of the bitmap that require clearing.
 748       if (is_clear_concurrent_undo()) {
 749         // No need to clear bitmaps for empty regions (which includes regions we
 750         // did not mark through).
 751         if (!_cm->contains_live_object(r->hrm_index())) {
 752           assert(_bitmap->get_next_marked_addr(r->bottom(), r->end()) == r->end(), "Should not have marked bits");
 753           return r->bottom();
 754         }
 755         assert(_bitmap->get_next_marked_addr(_cm->top_at_mark_start(r), r->end()) == r->end(), "Should not have marked bits above tams");
 756       }
 757       return r->end();
 758     }
 759 
 760   public:
 761     G1ClearBitmapHRClosure(G1ConcurrentMark* cm, bool suspendible) :
 762       G1HeapRegionClosure(),
 763       _cm(cm),
 764       _bitmap(cm->mark_bitmap()),
 765       _suspendible(suspendible)
 766     { }
 767 
 768     virtual bool do_heap_region(G1HeapRegion* r) {
 769       if (has_aborted()) {
 770         return true;
 771       }
 772 
 773       HeapWord* cur = r->bottom();
 774       HeapWord* const end = region_clear_limit(r);
 775 
 776       size_t const chunk_size_in_words = G1ClearBitMapTask::chunk_size() / HeapWordSize;
 777 
 778       while (cur < end) {
 779 
 780         MemRegion mr(cur, MIN2(cur + chunk_size_in_words, end));
 781         _bitmap->clear_range(mr);
 782 
 783         cur += chunk_size_in_words;
 784 
 785         // Repeat the asserts from before the start of the closure. We will do them
 786         // as asserts here to minimize their overhead on the product. However, we
 787         // will have them as guarantees at the beginning / end of the bitmap
 788         // clearing to get some checking in the product.
 789         assert(!suspendible() || _cm->in_progress(), "invariant");
 790         assert(!suspendible() || !G1CollectedHeap::heap()->collector_state()->mark_or_rebuild_in_progress(), "invariant");
 791 
 792         // Abort iteration if necessary.
 793         if (has_aborted()) {
 794           return true;
 795         }
 796       }
 797       assert(cur >= end, "Must have completed iteration over the bitmap for region %u.", r->hrm_index());
 798 
 799       _cm->reset_top_at_mark_start(r);
 800 
 801       return false;
 802     }
 803   };
 804 
 805   G1ClearBitmapHRClosure _cl;
 806   G1HeapRegionClaimer _hr_claimer;
 807   bool _suspendible; // If the task is suspendible, workers must join the STS.
 808 
 809 public:
 810   G1ClearBitMapTask(G1ConcurrentMark* cm, uint n_workers, bool suspendible) :
 811     WorkerTask("G1 Clear Bitmap"),
 812     _cl(cm, suspendible),
 813     _hr_claimer(n_workers),
 814     _suspendible(suspendible)
 815   { }
 816 
 817   void work(uint worker_id) {
 818     SuspendibleThreadSetJoiner sts_join(_suspendible);
 819     G1CollectedHeap::heap()->heap_region_par_iterate_from_worker_offset(&_cl, &_hr_claimer, worker_id);
 820   }
 821 };
 822 
 823 void G1ConcurrentMark::clear_bitmap(WorkerThreads* workers, bool may_yield) {
 824   assert(may_yield || SafepointSynchronize::is_at_safepoint(), "Non-yielding bitmap clear only allowed at safepoint.");
 825 
 826   size_t const num_bytes_to_clear = (G1HeapRegion::GrainBytes * _g1h->num_committed_regions()) / G1CMBitMap::heap_map_factor();
 827   size_t const num_chunks = align_up(num_bytes_to_clear, G1ClearBitMapTask::chunk_size()) / G1ClearBitMapTask::chunk_size();
 828 
 829   uint const num_workers = (uint)MIN2(num_chunks, (size_t)workers->active_workers());
 830 
 831   G1ClearBitMapTask cl(this, num_workers, may_yield);
 832 
 833   log_debug(gc, ergo)("Running %s with %u workers for %zu work units.", cl.name(), num_workers, num_chunks);
 834   workers->run_task(&cl, num_workers);
 835 }
 836 
 837 void G1ConcurrentMark::cleanup_for_next_mark() {
 838   // Make sure that the concurrent mark thread looks to still be in
 839   // the current cycle.
 840   guarantee(is_fully_initialized(), "should be initializd");
 841   guarantee(in_progress(), "invariant");
 842 
 843   // We are finishing up the current cycle by clearing the next
 844   // marking bitmap and getting it ready for the next cycle. During
 845   // this time no other cycle can start. So, let's make sure that this
 846   // is the case.
 847   guarantee(!_g1h->collector_state()->mark_or_rebuild_in_progress(), "invariant");
 848 
 849   clear_bitmap(_concurrent_workers, true);
 850 
 851   reset_partial_array_state_manager();
 852 
 853   // Repeat the asserts from above.
 854   guarantee(is_fully_initialized(), "should be initializd");
 855   guarantee(in_progress(), "invariant");
 856   guarantee(!_g1h->collector_state()->mark_or_rebuild_in_progress(), "invariant");
 857 }
 858 
 859 void G1ConcurrentMark::reset_partial_array_state_manager() {
 860   for (uint i = 0; i < _max_num_tasks; ++i) {
 861     _tasks[i]->unregister_partial_array_splitter();
 862   }
 863 
 864   partial_array_state_manager()->reset();
 865 
 866   for (uint i = 0; i < _max_num_tasks; ++i) {
 867     _tasks[i]->register_partial_array_splitter();
 868   }
 869 }
 870 
 871 void G1ConcurrentMark::clear_bitmap(WorkerThreads* workers) {
 872   assert_at_safepoint_on_vm_thread();
 873   // To avoid fragmentation the full collection requesting to clear the bitmap
 874   // might use fewer workers than available. To ensure the bitmap is cleared
 875   // as efficiently as possible the number of active workers are temporarily
 876   // increased to include all currently created workers.
 877   WithActiveWorkers update(workers, workers->created_workers());
 878   clear_bitmap(workers, false);
 879 }
 880 
 881 class G1PreConcurrentStartTask : public G1BatchedTask {
 882   // Reset marking state.
 883   class ResetMarkingStateTask;
 884   // For each region note start of marking.
 885   class NoteStartOfMarkTask;
 886 
 887 public:
 888   G1PreConcurrentStartTask(GCCause::Cause cause, G1ConcurrentMark* cm);
 889 };
 890 
 891 class G1PreConcurrentStartTask::ResetMarkingStateTask : public G1AbstractSubTask {
 892   G1ConcurrentMark* _cm;
 893 public:
 894   ResetMarkingStateTask(G1ConcurrentMark* cm) : G1AbstractSubTask(G1GCPhaseTimes::ResetMarkingState), _cm(cm) { }
 895 
 896   double worker_cost() const override { return 1.0; }
 897   void do_work(uint worker_id) override;
 898 };
 899 
 900 class G1PreConcurrentStartTask::NoteStartOfMarkTask : public G1AbstractSubTask {
 901 
 902   class NoteStartOfMarkHRClosure : public G1HeapRegionClosure {
 903     G1ConcurrentMark* _cm;
 904 
 905   public:
 906     NoteStartOfMarkHRClosure() : G1HeapRegionClosure(), _cm(G1CollectedHeap::heap()->concurrent_mark()) { }
 907 
 908     bool do_heap_region(G1HeapRegion* r) override {
 909       if (r->is_old_or_humongous() && !r->is_collection_set_candidate() && !r->in_collection_set()) {
 910         _cm->update_top_at_mark_start(r);
 911       } else {
 912         _cm->reset_top_at_mark_start(r);
 913       }
 914       return false;
 915     }
 916   } _region_cl;
 917 
 918   G1HeapRegionClaimer _claimer;
 919 public:
 920   NoteStartOfMarkTask() : G1AbstractSubTask(G1GCPhaseTimes::NoteStartOfMark), _region_cl(), _claimer(0) { }
 921 
 922   double worker_cost() const override {
 923     // The work done per region is very small, therefore we choose this magic number to cap the number
 924     // of threads used when there are few regions.
 925     const double regions_per_thread = 1000;
 926     return _claimer.n_regions() / regions_per_thread;
 927   }
 928 
 929   void set_max_workers(uint max_workers) override {
 930     _claimer.set_n_workers(max_workers);
 931   }
 932 
 933   void do_work(uint worker_id) override {
 934     G1CollectedHeap::heap()->heap_region_par_iterate_from_worker_offset(&_region_cl, &_claimer, worker_id);
 935   }
 936 };
 937 
 938 void G1PreConcurrentStartTask::ResetMarkingStateTask::do_work(uint worker_id) {
 939   // Reset marking state.
 940   _cm->reset();
 941 }
 942 
 943 G1PreConcurrentStartTask::G1PreConcurrentStartTask(GCCause::Cause cause, G1ConcurrentMark* cm) :
 944   G1BatchedTask("Pre Concurrent Start", G1CollectedHeap::heap()->phase_times()) {
 945   add_serial_task(new ResetMarkingStateTask(cm));
 946   add_parallel_task(new NoteStartOfMarkTask());
 947 };
 948 
 949 void G1ConcurrentMark::pre_concurrent_start(GCCause::Cause cause) {
 950   assert_at_safepoint_on_vm_thread();
 951 
 952   G1CollectedHeap::start_codecache_marking_cycle_if_inactive(true /* concurrent_mark_start */);
 953 
 954   ClassLoaderDataGraph::verify_claimed_marks_cleared(ClassLoaderData::_claim_strong);
 955 
 956   G1PreConcurrentStartTask cl(cause, this);
 957   G1CollectedHeap::heap()->run_batch_task(&cl);
 958 
 959   _gc_tracer_cm->set_gc_cause(cause);
 960 }
 961 
 962 void G1ConcurrentMark::start_full_concurrent_cycle() {
 963   // Start Concurrent Marking weak-reference discovery.
 964   ReferenceProcessor* rp = _g1h->ref_processor_cm();
 965   rp->start_discovery(false /* always_clear */);
 966 
 967   SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
 968   // This is the start of  the marking cycle, we're expected all
 969   // threads to have SATB queues with active set to false.
 970   satb_mq_set.set_active_all_threads(true, /* new active value */
 971                                      false /* expected_active */);
 972 
 973   _root_regions.prepare_for_scan();
 974 
 975   // update_g1_committed() will be called at the end of an evac pause
 976   // when marking is on. So, it's also called at the end of the
 977   // concurrent start pause to update the heap end, if the heap expands
 978   // during it. No need to call it here.
 979 
 980   // Signal the thread to start work.
 981   cm_thread()->start_full_mark();
 982 }
 983 
 984 void G1ConcurrentMark::start_undo_concurrent_cycle() {
 985   root_regions()->cancel_scan();
 986 
 987   // Signal the thread to start work.
 988   cm_thread()->start_undo_mark();
 989 }
 990 
 991 void G1ConcurrentMark::notify_concurrent_cycle_completed() {
 992   cm_thread()->set_idle();
 993 }
 994 
 995 void G1ConcurrentMark::stop() {
 996   if (is_fully_initialized()) {
 997     cm_thread()->stop();
 998   }
 999 }
1000 
1001 /*
1002  * Notice that in the next two methods, we actually leave the STS
1003  * during the barrier sync and join it immediately afterwards. If we
1004  * do not do this, the following deadlock can occur: one thread could
1005  * be in the barrier sync code, waiting for the other thread to also
1006  * sync up, whereas another one could be trying to yield, while also
1007  * waiting for the other threads to sync up too.
1008  *
1009  * Note, however, that this code is also used during remark and in
1010  * this case we should not attempt to leave / enter the STS, otherwise
1011  * we'll either hit an assert (debug / fastdebug) or deadlock
1012  * (product). So we should only leave / enter the STS if we are
1013  * operating concurrently.
1014  *
1015  * Because the thread that does the sync barrier has left the STS, it
1016  * is possible to be suspended for a Full GC or an evacuation pause
1017  * could occur. This is actually safe, since the entering the sync
1018  * barrier is one of the last things do_marking_step() does, and it
1019  * doesn't manipulate any data structures afterwards.
1020  */
1021 
1022 void G1ConcurrentMark::enter_first_sync_barrier(uint worker_id) {
1023   bool barrier_aborted;
1024   {
1025     SuspendibleThreadSetLeaver sts_leave(concurrent());
1026     barrier_aborted = !_first_overflow_barrier_sync.enter();
1027   }
1028 
1029   // at this point everyone should have synced up and not be doing any
1030   // more work
1031 
1032   if (barrier_aborted) {
1033     // If the barrier aborted we ignore the overflow condition and
1034     // just abort the whole marking phase as quickly as possible.
1035     return;
1036   }
1037 }
1038 
1039 void G1ConcurrentMark::enter_second_sync_barrier(uint worker_id) {
1040   SuspendibleThreadSetLeaver sts_leave(concurrent());
1041   _second_overflow_barrier_sync.enter();
1042 
1043   // at this point everything should be re-initialized and ready to go
1044 }
1045 
1046 class G1CMConcurrentMarkingTask : public WorkerTask {
1047   G1ConcurrentMark*     _cm;
1048 
1049 public:
1050   void work(uint worker_id) {
1051     ResourceMark rm;
1052 
1053     SuspendibleThreadSetJoiner sts_join;
1054 
1055     assert(worker_id < _cm->active_tasks(), "invariant");
1056 
1057     G1CMTask* task = _cm->task(worker_id);
1058     task->record_start_time();
1059     if (!_cm->has_aborted()) {
1060       do {
1061         task->do_marking_step(G1ConcMarkStepDurationMillis,
1062                               true  /* do_termination */,
1063                               false /* is_serial*/);
1064 
1065         _cm->do_yield_check();
1066       } while (!_cm->has_aborted() && task->has_aborted());
1067     }
1068     task->record_end_time();
1069     guarantee(!task->has_aborted() || _cm->has_aborted(), "invariant");
1070   }
1071 
1072   G1CMConcurrentMarkingTask(G1ConcurrentMark* cm) :
1073       WorkerTask("Concurrent Mark"), _cm(cm) { }
1074 
1075   ~G1CMConcurrentMarkingTask() { }
1076 };
1077 
1078 uint G1ConcurrentMark::calc_active_marking_workers() {
1079   uint result = 0;
1080   if (!UseDynamicNumberOfGCThreads || !FLAG_IS_DEFAULT(ConcGCThreads)) {
1081     result = _max_concurrent_workers;
1082   } else {
1083     result =
1084       WorkerPolicy::calc_default_active_workers(_max_concurrent_workers,
1085                                                 1, /* Minimum workers */
1086                                                 _num_concurrent_workers,
1087                                                 Threads::number_of_non_daemon_threads());
1088     // Don't scale the result down by scale_concurrent_workers() because
1089     // that scaling has already gone into "_max_concurrent_workers".
1090   }
1091   assert(result > 0 && result <= _max_concurrent_workers,
1092          "Calculated number of marking workers must be larger than zero and at most the maximum %u, but is %u",
1093          _max_concurrent_workers, result);
1094   return result;
1095 }
1096 
1097 void G1ConcurrentMark::scan_root_region(const MemRegion* region, uint worker_id) {
1098 #ifdef ASSERT
1099   HeapWord* last = region->last();
1100   G1HeapRegion* hr = _g1h->heap_region_containing(last);
1101   assert(hr->is_old() || top_at_mark_start(hr) == hr->bottom(),
1102          "Root regions must be old or survivor/eden but region %u is %s", hr->hrm_index(), hr->get_type_str());
1103   assert(top_at_mark_start(hr) == region->start(),
1104          "MemRegion start should be equal to TAMS");
1105 #endif
1106 
1107   G1RootRegionScanClosure cl(_g1h, this, worker_id);
1108 
1109   const uintx interval = PrefetchScanIntervalInBytes;
1110   HeapWord* curr = region->start();
1111   const HeapWord* end = region->end();
1112   while (curr < end) {
1113     Prefetch::read(curr, interval);
1114     oop obj = cast_to_oop(curr);
1115     size_t size = obj->oop_iterate_size(&cl);
1116     assert(size == obj->size(), "sanity");
1117     curr += size;
1118   }
1119 }
1120 
1121 class G1CMRootRegionScanTask : public WorkerTask {
1122   G1ConcurrentMark* _cm;
1123 public:
1124   G1CMRootRegionScanTask(G1ConcurrentMark* cm) :
1125     WorkerTask("G1 Root Region Scan"), _cm(cm) { }
1126 
1127   void work(uint worker_id) {
1128     G1CMRootMemRegions* root_regions = _cm->root_regions();
1129     const MemRegion* region = root_regions->claim_next();
1130     while (region != nullptr) {
1131       _cm->scan_root_region(region, worker_id);
1132       region = root_regions->claim_next();
1133     }
1134   }
1135 };
1136 
1137 void G1ConcurrentMark::scan_root_regions() {
1138   // scan_in_progress() will have been set to true only if there was
1139   // at least one root region to scan. So, if it's false, we
1140   // should not attempt to do any further work.
1141   if (root_regions()->scan_in_progress()) {
1142     assert(!has_aborted(), "Aborting before root region scanning is finished not supported.");
1143 
1144     // Assign one worker to each root-region but subject to the max constraint.
1145     const uint num_workers = MIN2(root_regions()->num_root_regions(),
1146                                   _max_concurrent_workers);
1147 
1148     G1CMRootRegionScanTask task(this);
1149     log_debug(gc, ergo)("Running %s using %u workers for %u work units.",
1150                         task.name(), num_workers, root_regions()->num_root_regions());
1151     _concurrent_workers->run_task(&task, num_workers);
1152 
1153     // It's possible that has_aborted() is true here without actually
1154     // aborting the survivor scan earlier. This is OK as it's
1155     // mainly used for sanity checking.
1156     root_regions()->scan_finished();
1157   }
1158 }
1159 
1160 bool G1ConcurrentMark::wait_until_root_region_scan_finished() {
1161   return root_regions()->wait_until_scan_finished();
1162 }
1163 
1164 void G1ConcurrentMark::add_root_region(G1HeapRegion* r) {
1165   root_regions()->add(top_at_mark_start(r), r->top());
1166 }
1167 
1168 bool G1ConcurrentMark::is_root_region(G1HeapRegion* r) {
1169   return root_regions()->contains(MemRegion(top_at_mark_start(r), r->top()));
1170 }
1171 
1172 void G1ConcurrentMark::root_region_scan_abort_and_wait() {
1173   root_regions()->abort();
1174   root_regions()->wait_until_scan_finished();
1175 }
1176 
1177 void G1ConcurrentMark::concurrent_cycle_start() {
1178   _gc_timer_cm->register_gc_start();
1179 
1180   _gc_tracer_cm->report_gc_start(GCCause::_no_gc /* first parameter is not used */, _gc_timer_cm->gc_start());
1181 
1182   _g1h->trace_heap_before_gc(_gc_tracer_cm);
1183 }
1184 
1185 uint G1ConcurrentMark::completed_mark_cycles() const {
1186   return _completed_mark_cycles.load_relaxed();
1187 }
1188 
1189 void G1ConcurrentMark::concurrent_cycle_end(bool mark_cycle_completed) {
1190   _g1h->collector_state()->set_clear_bitmap_in_progress(false);
1191 
1192   _g1h->trace_heap_after_gc(_gc_tracer_cm);
1193 
1194   if (mark_cycle_completed) {
1195     _completed_mark_cycles.add_then_fetch(1u, memory_order_relaxed);
1196   }
1197 
1198   if (has_aborted()) {
1199     log_info(gc, marking)("Concurrent Mark Abort");
1200     _gc_tracer_cm->report_concurrent_mode_failure();
1201   }
1202 
1203   _gc_timer_cm->register_gc_end();
1204 
1205   _gc_tracer_cm->report_gc_end(_gc_timer_cm->gc_end(), _gc_timer_cm->time_partitions());
1206 }
1207 
1208 void G1ConcurrentMark::mark_from_roots() {
1209   _restart_for_overflow.store_relaxed(false);
1210 
1211   uint active_workers = calc_active_marking_workers();
1212 
1213   // Setting active workers is not guaranteed since fewer
1214   // worker threads may currently exist and more may not be
1215   // available.
1216   active_workers = _concurrent_workers->set_active_workers(active_workers);
1217   log_info(gc, task)("Concurrent Mark Using %u of %u Workers", active_workers, _concurrent_workers->max_workers());
1218 
1219   _num_concurrent_workers = active_workers;
1220 
1221   // Parallel task terminator is set in "set_concurrency_and_phase()"
1222   set_concurrency_and_phase(active_workers, true /* concurrent */);
1223 
1224   G1CMConcurrentMarkingTask marking_task(this);
1225   _concurrent_workers->run_task(&marking_task);
1226   print_stats();
1227 }
1228 
1229 const char* G1ConcurrentMark::verify_location_string(VerifyLocation location) {
1230   static const char* location_strings[] = { "Remark Before",
1231                                             "Remark After",
1232                                             "Remark Overflow",
1233                                             "Cleanup Before",
1234                                             "Cleanup After" };
1235   return location_strings[static_cast<std::underlying_type_t<VerifyLocation>>(location)];
1236 }
1237 
1238 void G1ConcurrentMark::verify_during_pause(G1HeapVerifier::G1VerifyType type,
1239                                            VerifyLocation location) {
1240   G1HeapVerifier* verifier = _g1h->verifier();
1241 
1242   verifier->verify_region_sets_optional();
1243 
1244   const char* caller = verify_location_string(location);
1245 
1246   if (VerifyDuringGC && G1HeapVerifier::should_verify(type)) {
1247     GCTraceTime(Debug, gc, phases) debug(caller, _gc_timer_cm);
1248 
1249     size_t const BufLen = 512;
1250     char buffer[BufLen];
1251 
1252     jio_snprintf(buffer, BufLen, "During GC (%s)", caller);
1253     verifier->verify(VerifyOption::G1UseConcMarking, buffer);
1254 
1255     // Only check bitmap in Remark, and not at After-Verification because the regions
1256     // already have their TAMS'es reset.
1257     if (location != VerifyLocation::RemarkAfter) {
1258       verifier->verify_bitmap_clear(true /* above_tams_only */);
1259     }
1260   }
1261 }
1262 
1263 class G1ObjectCountIsAliveClosure: public BoolObjectClosure {
1264   G1CollectedHeap* _g1h;
1265 public:
1266   G1ObjectCountIsAliveClosure(G1CollectedHeap* g1h) : _g1h(g1h) {}
1267 
1268   bool do_object_b(oop obj) {
1269     return !_g1h->is_obj_dead(obj);
1270   }
1271 };
1272 
1273 void G1ConcurrentMark::remark() {
1274   assert_at_safepoint_on_vm_thread();
1275 
1276   // If a full collection has happened, we should not continue. However we might
1277   // have ended up here as the Remark VM operation has been scheduled already.
1278   if (has_aborted()) {
1279     return;
1280   }
1281 
1282   G1Policy* policy = _g1h->policy();
1283   policy->record_pause_start_time();
1284 
1285   double start = os::elapsedTime();
1286 
1287   verify_during_pause(G1HeapVerifier::G1VerifyRemark, VerifyLocation::RemarkBefore);
1288 
1289   {
1290     GCTraceTime(Debug, gc, phases) debug("Finalize Marking", _gc_timer_cm);
1291     finalize_marking();
1292   }
1293 
1294   double mark_work_end = os::elapsedTime();
1295 
1296   bool const mark_finished = !has_overflown();
1297   if (mark_finished) {
1298     weak_refs_work();
1299 
1300     // Unload Klasses, String, Code Cache, etc.
1301     if (ClassUnloadingWithConcurrentMark) {
1302       G1CMIsAliveClosure is_alive(this);
1303       _g1h->unload_classes_and_code("Class Unloading", &is_alive, _gc_timer_cm);
1304     }
1305 
1306     SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
1307     // We're done with marking.
1308     // This is the end of the marking cycle, we're expected all
1309     // threads to have SATB queues with active set to true.
1310     satb_mq_set.set_active_all_threads(false, /* new active value */
1311                                        true /* expected_active */);
1312 
1313     {
1314       GCTraceTime(Debug, gc, phases) debug("Flush Task Caches", _gc_timer_cm);
1315       flush_all_task_caches();
1316     }
1317 
1318     // All marking completed. Check bitmap now as we will start to reset TAMSes
1319     // in parallel below so that we can not do this in the After-Remark verification.
1320     _g1h->verifier()->verify_bitmap_clear(true /* above_tams_only */);
1321 
1322     {
1323       GCTraceTime(Debug, gc, phases) debug("Select For Rebuild and Reclaim Empty Regions", _gc_timer_cm);
1324 
1325       G1UpdateRegionLivenessAndSelectForRebuildTask cl(_g1h, this, _g1h->workers()->active_workers());
1326       uint const num_workers = MIN2(G1UpdateRegionLivenessAndSelectForRebuildTask::desired_num_workers(_g1h->num_committed_regions()),
1327                                     _g1h->workers()->active_workers());
1328       log_debug(gc,ergo)("Running %s using %u workers for %u regions in heap", cl.name(), num_workers, _g1h->num_committed_regions());
1329       _g1h->workers()->run_task(&cl, num_workers);
1330 
1331       log_debug(gc, remset, tracking)("Remembered Set Tracking update regions total %u, selected %u",
1332                                         _g1h->num_committed_regions(), cl.total_selected_for_rebuild());
1333 
1334       _needs_remembered_set_rebuild = (cl.total_selected_for_rebuild() > 0);
1335 
1336       if (_needs_remembered_set_rebuild) {
1337         // Prune rebuild candidates based on G1HeapWastePercent.
1338         // Improves rebuild time in addition to remembered set memory usage.
1339         G1CollectionSetChooser::build(_g1h->workers(), _g1h->num_committed_regions(), _g1h->policy()->candidates());
1340       }
1341     }
1342 
1343     if (log_is_enabled(Trace, gc, liveness)) {
1344       G1PrintRegionLivenessInfoClosure cl("Post-Marking");
1345       _g1h->heap_region_iterate(&cl);
1346     }
1347 
1348     // Potentially, some empty-regions have been reclaimed; make this a
1349     // "collection" so that pending allocation can retry before attempting a
1350     // GC pause.
1351     _g1h->increment_total_collections();
1352 
1353     // For Remark Pauses that may have been triggered by PeriodicGCs, we maintain
1354     // resizing based on MinHeapFreeRatio or MaxHeapFreeRatio. If a PeriodicGC is
1355     // triggered, it likely means there are very few regular GCs, making resizing
1356     // based on gc heuristics less effective.
1357     if (_g1h->last_gc_was_periodic()) {
1358       _g1h->resize_heap_after_full_collection(0 /* allocation_word_size */);
1359     }
1360 
1361     compute_new_sizes();
1362 
1363     verify_during_pause(G1HeapVerifier::G1VerifyRemark, VerifyLocation::RemarkAfter);
1364 
1365     assert(!restart_for_overflow(), "sanity");
1366     // Completely reset the marking state (except bitmaps) since marking completed.
1367     reset_at_marking_complete();
1368 
1369     G1CollectedHeap::finish_codecache_marking_cycle();
1370 
1371     {
1372       GCTraceTime(Debug, gc, phases) debug("Report Object Count", _gc_timer_cm);
1373       G1ObjectCountIsAliveClosure is_alive(_g1h);
1374       _gc_tracer_cm->report_object_count_after_gc(&is_alive, _g1h->workers());
1375     }
1376   } else {
1377     // We overflowed.  Restart concurrent marking.
1378     _restart_for_overflow.store_relaxed(true);
1379 
1380     verify_during_pause(G1HeapVerifier::G1VerifyRemark, VerifyLocation::RemarkOverflow);
1381 
1382     // Clear the marking state because we will be restarting
1383     // marking due to overflowing the global mark stack.
1384     reset_marking_for_restart();
1385   }
1386 
1387   // Statistics
1388   double now = os::elapsedTime();
1389   _remark_mark_times.add((mark_work_end - start) * 1000.0);
1390   _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
1391   _remark_times.add((now - start) * 1000.0);
1392 
1393   _g1h->update_perf_counter_cpu_time();
1394 
1395   policy->record_concurrent_mark_remark_end();
1396 }
1397 
1398 void G1ConcurrentMark::compute_new_sizes() {
1399   MetaspaceGC::compute_new_size();
1400 
1401   // Cleanup will have freed any regions completely full of garbage.
1402   // Update the soft reference policy with the new heap occupancy.
1403   Universe::heap()->update_capacity_and_used_at_gc();
1404 
1405   // We reclaimed old regions so we should calculate the sizes to make
1406   // sure we update the old gen/space data.
1407   _g1h->monitoring_support()->update_sizes();
1408 }
1409 
1410 class G1UpdateRegionsAfterRebuild : public G1HeapRegionClosure {
1411   G1CollectedHeap* _g1h;
1412 
1413 public:
1414   G1UpdateRegionsAfterRebuild(G1CollectedHeap* g1h) : _g1h(g1h) { }
1415 
1416   bool do_heap_region(G1HeapRegion* r) override {
1417     // Update the remset tracking state from updating to complete
1418     // if remembered sets have been rebuilt.
1419     _g1h->policy()->remset_tracker()->update_after_rebuild(r);
1420     return false;
1421   }
1422 };
1423 
1424 void G1ConcurrentMark::cleanup() {
1425   assert_at_safepoint_on_vm_thread();
1426 
1427   // If a full collection has happened, we shouldn't do this.
1428   if (has_aborted()) {
1429     return;
1430   }
1431 
1432   G1Policy* policy = _g1h->policy();
1433   policy->record_pause_start_time();
1434 
1435   double start = os::elapsedTime();
1436 
1437   verify_during_pause(G1HeapVerifier::G1VerifyCleanup, VerifyLocation::CleanupBefore);
1438 
1439   if (needs_remembered_set_rebuild()) {
1440     // Update the remset tracking information as well as marking all regions
1441     // as fully parsable.
1442     GCTraceTime(Debug, gc, phases) debug("Update Remembered Set Tracking After Rebuild", _gc_timer_cm);
1443     G1UpdateRegionsAfterRebuild cl(_g1h);
1444     _g1h->heap_region_iterate(&cl);
1445   } else {
1446     log_debug(gc, phases)("No Remembered Sets to update after rebuild");
1447   }
1448 
1449   verify_during_pause(G1HeapVerifier::G1VerifyCleanup, VerifyLocation::CleanupAfter);
1450 
1451   // Local statistics
1452   _cleanup_times.add((os::elapsedTime() - start) * 1000.0);
1453 
1454   {
1455     GCTraceTime(Debug, gc, phases) debug("Finalize Concurrent Mark Cleanup", _gc_timer_cm);
1456     policy->record_concurrent_mark_cleanup_end(needs_remembered_set_rebuild());
1457   }
1458 }
1459 
1460 // 'Keep Alive' oop closure used by both serial parallel reference processing.
1461 // Uses the G1CMTask associated with a worker thread (for serial reference
1462 // processing the G1CMTask for worker 0 is used) to preserve (mark) and
1463 // trace referent objects.
1464 //
1465 // Using the G1CMTask and embedded local queues avoids having the worker
1466 // threads operating on the global mark stack. This reduces the risk
1467 // of overflowing the stack - which we would rather avoid at this late
1468 // state. Also using the tasks' local queues removes the potential
1469 // of the workers interfering with each other that could occur if
1470 // operating on the global stack.
1471 
1472 class G1CMKeepAliveAndDrainClosure : public OopClosure {
1473   G1ConcurrentMark* _cm;
1474   G1CMTask*         _task;
1475   uint              _ref_counter_limit;
1476   uint              _ref_counter;
1477   bool              _is_serial;
1478 public:
1479   G1CMKeepAliveAndDrainClosure(G1ConcurrentMark* cm, G1CMTask* task, bool is_serial) :
1480     _cm(cm), _task(task), _ref_counter_limit(G1RefProcDrainInterval),
1481     _ref_counter(_ref_counter_limit), _is_serial(is_serial) {
1482     assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
1483   }
1484 
1485   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
1486   virtual void do_oop(      oop* p) { do_oop_work(p); }
1487 
1488   template <class T> void do_oop_work(T* p) {
1489     if (_cm->has_overflown()) {
1490       return;
1491     }
1492     if (!_task->deal_with_reference(p)) {
1493       // We did not add anything to the mark bitmap (or mark stack), so there is
1494       // no point trying to drain it.
1495       return;
1496     }
1497     _ref_counter--;
1498 
1499     if (_ref_counter == 0) {
1500       // We have dealt with _ref_counter_limit references, pushing them
1501       // and objects reachable from them on to the local stack (and
1502       // possibly the global stack). Call G1CMTask::do_marking_step() to
1503       // process these entries.
1504       //
1505       // We call G1CMTask::do_marking_step() in a loop, which we'll exit if
1506       // there's nothing more to do (i.e. we're done with the entries that
1507       // were pushed as a result of the G1CMTask::deal_with_reference() calls
1508       // above) or we overflow.
1509       //
1510       // Note: G1CMTask::do_marking_step() can set the G1CMTask::has_aborted()
1511       // flag while there may still be some work to do. (See the comment at
1512       // the beginning of G1CMTask::do_marking_step() for those conditions -
1513       // one of which is reaching the specified time target.) It is only
1514       // when G1CMTask::do_marking_step() returns without setting the
1515       // has_aborted() flag that the marking step has completed.
1516       do {
1517         double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
1518         _task->do_marking_step(mark_step_duration_ms,
1519                                false      /* do_termination */,
1520                                _is_serial);
1521       } while (_task->has_aborted() && !_cm->has_overflown());
1522       _ref_counter = _ref_counter_limit;
1523     }
1524   }
1525 };
1526 
1527 // 'Drain' oop closure used by both serial and parallel reference processing.
1528 // Uses the G1CMTask associated with a given worker thread (for serial
1529 // reference processing the G1CMtask for worker 0 is used). Calls the
1530 // do_marking_step routine, with an unbelievably large timeout value,
1531 // to drain the marking data structures of the remaining entries
1532 // added by the 'keep alive' oop closure above.
1533 
1534 class G1CMDrainMarkingStackClosure : public VoidClosure {
1535   G1ConcurrentMark* _cm;
1536   G1CMTask*         _task;
1537   bool              _is_serial;
1538  public:
1539   G1CMDrainMarkingStackClosure(G1ConcurrentMark* cm, G1CMTask* task, bool is_serial) :
1540     _cm(cm), _task(task), _is_serial(is_serial) {
1541     assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
1542   }
1543 
1544   void do_void() {
1545     do {
1546       // We call G1CMTask::do_marking_step() to completely drain the local
1547       // and global marking stacks of entries pushed by the 'keep alive'
1548       // oop closure (an instance of G1CMKeepAliveAndDrainClosure above).
1549       //
1550       // G1CMTask::do_marking_step() is called in a loop, which we'll exit
1551       // if there's nothing more to do (i.e. we've completely drained the
1552       // entries that were pushed as a result of applying the 'keep alive'
1553       // closure to the entries on the discovered ref lists) or we overflow
1554       // the global marking stack.
1555       //
1556       // Note: G1CMTask::do_marking_step() can set the G1CMTask::has_aborted()
1557       // flag while there may still be some work to do. (See the comment at
1558       // the beginning of G1CMTask::do_marking_step() for those conditions -
1559       // one of which is reaching the specified time target.) It is only
1560       // when G1CMTask::do_marking_step() returns without setting the
1561       // has_aborted() flag that the marking step has completed.
1562 
1563       _task->do_marking_step(1000000000.0 /* something very large */,
1564                              true         /* do_termination */,
1565                              _is_serial);
1566     } while (_task->has_aborted() && !_cm->has_overflown());
1567   }
1568 };
1569 
1570 class G1CMRefProcProxyTask : public RefProcProxyTask {
1571   G1CollectedHeap& _g1h;
1572   G1ConcurrentMark& _cm;
1573 
1574 public:
1575   G1CMRefProcProxyTask(uint max_workers, G1CollectedHeap& g1h, G1ConcurrentMark &cm)
1576     : RefProcProxyTask("G1CMRefProcProxyTask", max_workers),
1577       _g1h(g1h),
1578       _cm(cm) {}
1579 
1580   void work(uint worker_id) override {
1581     assert(worker_id < _max_workers, "sanity");
1582     G1CMIsAliveClosure is_alive(&_cm);
1583     uint index = (_tm == RefProcThreadModel::Single) ? 0 : worker_id;
1584     G1CMKeepAliveAndDrainClosure keep_alive(&_cm, _cm.task(index), _tm == RefProcThreadModel::Single);
1585     BarrierEnqueueDiscoveredFieldClosure enqueue;
1586     G1CMDrainMarkingStackClosure complete_gc(&_cm, _cm.task(index), _tm == RefProcThreadModel::Single);
1587     _rp_task->rp_work(worker_id, &is_alive, &keep_alive, &enqueue, &complete_gc);
1588   }
1589 
1590   void prepare_run_task_hook() override {
1591     // We need to reset the concurrency level before each
1592     // proxy task execution, so that the termination protocol
1593     // and overflow handling in G1CMTask::do_marking_step() knows
1594     // how many workers to wait for.
1595     _cm.set_concurrency(_queue_count);
1596   }
1597 };
1598 
1599 void G1ConcurrentMark::weak_refs_work() {
1600   ResourceMark rm;
1601 
1602   {
1603     GCTraceTime(Debug, gc, phases) debug("Reference Processing", _gc_timer_cm);
1604 
1605     ReferenceProcessor* rp = _g1h->ref_processor_cm();
1606 
1607     // See the comment in G1CollectedHeap::ref_processing_init()
1608     // about how reference processing currently works in G1.
1609 
1610     assert(_global_mark_stack.is_empty(), "mark stack should be empty");
1611 
1612     // Prefer to grow the stack until the max capacity.
1613     _global_mark_stack.set_should_grow();
1614 
1615     // Parallel processing task executor.
1616     G1CMRefProcProxyTask task(rp->max_num_queues(), *_g1h, *this);
1617     ReferenceProcessorPhaseTimes pt(_gc_timer_cm, rp->max_num_queues());
1618 
1619     // Process the weak references.
1620     const ReferenceProcessorStats& stats = rp->process_discovered_references(task, _g1h->workers(), pt);
1621     _gc_tracer_cm->report_gc_reference_stats(stats);
1622     pt.print_all_references();
1623 
1624     // The do_oop work routines of the keep_alive and drain_marking_stack
1625     // oop closures will set the has_overflown flag if we overflow the
1626     // global marking stack.
1627 
1628     assert(has_overflown() || _global_mark_stack.is_empty(),
1629            "Mark stack should be empty (unless it has overflown)");
1630   }
1631 
1632   if (has_overflown()) {
1633     // We can not trust g1_is_alive and the contents of the heap if the marking stack
1634     // overflowed while processing references. Exit the VM.
1635     fatal("Overflow during reference processing, can not continue. Current mark stack depth: "
1636           "%zu, MarkStackSize: %zu, MarkStackSizeMax: %zu. "
1637           "Please increase MarkStackSize and/or MarkStackSizeMax and restart.",
1638           _global_mark_stack.size(), MarkStackSize, MarkStackSizeMax);
1639     return;
1640   }
1641 
1642   assert(_global_mark_stack.is_empty(), "Marking should have completed");
1643 
1644   {
1645     GCTraceTime(Debug, gc, phases) debug("Weak Processing", _gc_timer_cm);
1646     G1CMIsAliveClosure is_alive(this);
1647     WeakProcessor::weak_oops_do(_g1h->workers(), &is_alive, &do_nothing_cl, 1);
1648   }
1649 }
1650 
1651 class G1PrecleanYieldClosure : public YieldClosure {
1652   G1ConcurrentMark* _cm;
1653 
1654 public:
1655   G1PrecleanYieldClosure(G1ConcurrentMark* cm) : _cm(cm) { }
1656 
1657   virtual bool should_return() {
1658     return _cm->has_aborted();
1659   }
1660 
1661   virtual bool should_return_fine_grain() {
1662     _cm->do_yield_check();
1663     return _cm->has_aborted();
1664   }
1665 };
1666 
1667 void G1ConcurrentMark::preclean() {
1668   assert(G1UseReferencePrecleaning, "Precleaning must be enabled.");
1669 
1670   SuspendibleThreadSetJoiner joiner;
1671 
1672   BarrierEnqueueDiscoveredFieldClosure enqueue;
1673 
1674   set_concurrency_and_phase(1, true);
1675 
1676   G1PrecleanYieldClosure yield_cl(this);
1677 
1678   ReferenceProcessor* rp = _g1h->ref_processor_cm();
1679   // Precleaning is single threaded. Temporarily disable MT discovery.
1680   ReferenceProcessorMTDiscoveryMutator rp_mut_discovery(rp, false);
1681   rp->preclean_discovered_references(rp->is_alive_non_header(),
1682                                      &enqueue,
1683                                      &yield_cl,
1684                                      _gc_timer_cm);
1685 }
1686 
1687 // Closure for marking entries in SATB buffers.
1688 class G1CMSATBBufferClosure : public SATBBufferClosure {
1689 private:
1690   G1CMTask* _task;
1691   G1CollectedHeap* _g1h;
1692 
1693   // This is very similar to G1CMTask::deal_with_reference, but with
1694   // more relaxed requirements for the argument, so this must be more
1695   // circumspect about treating the argument as an object.
1696   void do_entry(void* entry) const {
1697     _task->increment_refs_reached();
1698     oop const obj = cast_to_oop(entry);
1699     _task->make_reference_grey(obj);
1700   }
1701 
1702 public:
1703   G1CMSATBBufferClosure(G1CMTask* task, G1CollectedHeap* g1h)
1704     : _task(task), _g1h(g1h) { }
1705 
1706   virtual void do_buffer(void** buffer, size_t size) {
1707     for (size_t i = 0; i < size; ++i) {
1708       do_entry(buffer[i]);
1709     }
1710   }
1711 };
1712 
1713 class G1RemarkThreadsClosure : public ThreadClosure {
1714   G1SATBMarkQueueSet& _qset;
1715 
1716  public:
1717   G1RemarkThreadsClosure(G1CollectedHeap* g1h, G1CMTask* task) :
1718     _qset(G1BarrierSet::satb_mark_queue_set()) {}
1719 
1720   void do_thread(Thread* thread) {
1721     // Transfer any partial buffer to the qset for completed buffer processing.
1722     _qset.flush_queue(G1ThreadLocalData::satb_mark_queue(thread));
1723   }
1724 };
1725 
1726 class G1CMRemarkTask : public WorkerTask {
1727   // For Threads::possibly_parallel_threads_do
1728   ThreadsClaimTokenScope _threads_claim_token_scope;
1729   G1ConcurrentMark* _cm;
1730 public:
1731   void work(uint worker_id) {
1732     G1CMTask* task = _cm->task(worker_id);
1733     task->record_start_time();
1734     {
1735       ResourceMark rm;
1736 
1737       G1RemarkThreadsClosure threads_f(G1CollectedHeap::heap(), task);
1738       Threads::possibly_parallel_threads_do(true /* is_par */, &threads_f);
1739     }
1740 
1741     do {
1742       task->do_marking_step(1000000000.0 /* something very large */,
1743                             true         /* do_termination       */,
1744                             false        /* is_serial            */);
1745     } while (task->has_aborted() && !_cm->has_overflown());
1746     // If we overflow, then we do not want to restart. We instead
1747     // want to abort remark and do concurrent marking again.
1748     task->record_end_time();
1749   }
1750 
1751   G1CMRemarkTask(G1ConcurrentMark* cm, uint active_workers) :
1752     WorkerTask("Par Remark"), _threads_claim_token_scope(), _cm(cm) {
1753     _cm->terminator()->reset_for_reuse(active_workers);
1754   }
1755 };
1756 
1757 void G1ConcurrentMark::finalize_marking() {
1758   ResourceMark rm;
1759 
1760   _g1h->ensure_parsability(false);
1761 
1762   // this is remark, so we'll use up all active threads
1763   uint active_workers = _g1h->workers()->active_workers();
1764   set_concurrency_and_phase(active_workers, false /* concurrent */);
1765   // Leave _parallel_marking_threads at it's
1766   // value originally calculated in the G1ConcurrentMark
1767   // constructor and pass values of the active workers
1768   // through the task.
1769 
1770   {
1771     G1CMRemarkTask remarkTask(this, active_workers);
1772     // We will start all available threads, even if we decide that the
1773     // active_workers will be fewer. The extra ones will just bail out
1774     // immediately.
1775     _g1h->workers()->run_task(&remarkTask);
1776   }
1777 
1778   SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
1779   guarantee(has_overflown() ||
1780             satb_mq_set.completed_buffers_num() == 0,
1781             "Invariant: has_overflown = %s, num buffers = %zu",
1782             BOOL_TO_STR(has_overflown()),
1783             satb_mq_set.completed_buffers_num());
1784 
1785   print_stats();
1786 }
1787 
1788 void G1ConcurrentMark::flush_all_task_caches() {
1789   size_t hits = 0;
1790   size_t misses = 0;
1791   for (uint i = 0; i < _max_num_tasks; i++) {
1792     Pair<size_t, size_t> stats = _tasks[i]->flush_mark_stats_cache();
1793     hits += stats.first;
1794     misses += stats.second;
1795   }
1796   size_t sum = hits + misses;
1797   log_debug(gc, stats)("Mark stats cache hits %zu misses %zu ratio %1.3lf",
1798                        hits, misses, percent_of(hits, sum));
1799 }
1800 
1801 void G1ConcurrentMark::clear_bitmap_for_region(G1HeapRegion* hr) {
1802   assert_at_safepoint();
1803   _mark_bitmap.clear_range(MemRegion(hr->bottom(), hr->end()));
1804 }
1805 
1806 G1HeapRegion* G1ConcurrentMark::claim_region(uint worker_id) {
1807   // "Checkpoint" the finger.
1808   HeapWord* local_finger = finger();
1809 
1810   while (local_finger < _heap.end()) {
1811     assert(_g1h->is_in_reserved(local_finger), "invariant");
1812 
1813     G1HeapRegion* curr_region = _g1h->heap_region_containing_or_null(local_finger);
1814     // Make sure that the reads below do not float before loading curr_region.
1815     OrderAccess::loadload();
1816     // Above heap_region_containing may return null as we always scan claim
1817     // until the end of the heap. In this case, just jump to the next region.
1818     HeapWord* end = curr_region != nullptr ? curr_region->end() : local_finger + G1HeapRegion::GrainWords;
1819 
1820     // Is the gap between reading the finger and doing the CAS too long?
1821     HeapWord* res = _finger.compare_exchange(local_finger, end);
1822     if (res == local_finger && curr_region != nullptr) {
1823       // We succeeded.
1824       HeapWord* bottom = curr_region->bottom();
1825       HeapWord* limit = top_at_mark_start(curr_region);
1826 
1827       log_trace(gc, marking)("Claim region %u bottom " PTR_FORMAT " tams " PTR_FORMAT, curr_region->hrm_index(), p2i(curr_region->bottom()), p2i(top_at_mark_start(curr_region)));
1828       // Notice that _finger == end cannot be guaranteed here since,
1829       // someone else might have moved the finger even further.
1830       assert(finger() >= end, "The finger should have moved forward");
1831 
1832       if (limit > bottom) {
1833         return curr_region;
1834       } else {
1835         assert(limit == bottom,
1836                "The region limit should be at bottom");
1837         // We return null and the caller should try calling
1838         // claim_region() again.
1839         return nullptr;
1840       }
1841     } else {
1842       // Read the finger again.
1843       HeapWord* next_finger = finger();
1844       assert(next_finger > local_finger, "The finger should have moved forward " PTR_FORMAT " " PTR_FORMAT, p2i(local_finger), p2i(next_finger));
1845       local_finger = next_finger;
1846     }
1847   }
1848 
1849   return nullptr;
1850 }
1851 
1852 #ifndef PRODUCT
1853 class VerifyNoCSetOops {
1854   G1CollectedHeap* _g1h;
1855   const char* _phase;
1856   int _info;
1857 
1858 public:
1859   VerifyNoCSetOops(const char* phase, int info = -1) :
1860     _g1h(G1CollectedHeap::heap()),
1861     _phase(phase),
1862     _info(info)
1863   { }
1864 
1865   void operator()(G1TaskQueueEntry task_entry) const {
1866     if (task_entry.is_partial_array_state()) {
1867       oop obj = task_entry.to_partial_array_state()->source();
1868       guarantee(_g1h->is_in_reserved(obj), "Partial Array " PTR_FORMAT " must be in heap.", p2i(obj));
1869       return;
1870     }
1871     guarantee(oopDesc::is_oop(task_entry.to_oop()),
1872               "Non-oop " PTR_FORMAT ", phase: %s, info: %d",
1873               p2i(task_entry.to_oop()), _phase, _info);
1874     G1HeapRegion* r = _g1h->heap_region_containing(task_entry.to_oop());
1875     guarantee(!(r->in_collection_set() || r->has_index_in_opt_cset()),
1876               "obj " PTR_FORMAT " from %s (%d) in region %u in (optional) collection set",
1877               p2i(task_entry.to_oop()), _phase, _info, r->hrm_index());
1878   }
1879 };
1880 
1881 void G1ConcurrentMark::verify_no_collection_set_oops() {
1882   assert(SafepointSynchronize::is_at_safepoint() || !is_init_completed(),
1883          "should be at a safepoint or initializing");
1884   if (!_g1h->collector_state()->mark_or_rebuild_in_progress()) {
1885     return;
1886   }
1887 
1888   // Verify entries on the global mark stack
1889   _global_mark_stack.iterate(VerifyNoCSetOops("Stack"));
1890 
1891   // Verify entries on the task queues
1892   for (uint i = 0; i < _max_num_tasks; ++i) {
1893     G1CMTaskQueue* queue = _task_queues->queue(i);
1894     queue->iterate(VerifyNoCSetOops("Queue", i));
1895   }
1896 
1897   // Verify the global finger
1898   HeapWord* global_finger = finger();
1899   if (global_finger != nullptr && global_finger < _heap.end()) {
1900     // Since we always iterate over all regions, we might get a null G1HeapRegion
1901     // here.
1902     G1HeapRegion* global_hr = _g1h->heap_region_containing_or_null(global_finger);
1903     guarantee(global_hr == nullptr || global_finger == global_hr->bottom(),
1904               "global finger: " PTR_FORMAT " region: " HR_FORMAT,
1905               p2i(global_finger), HR_FORMAT_PARAMS(global_hr));
1906   }
1907 
1908   // Verify the task fingers
1909   assert(_num_concurrent_workers <= _max_num_tasks, "sanity");
1910   for (uint i = 0; i < _num_concurrent_workers; ++i) {
1911     G1CMTask* task = _tasks[i];
1912     HeapWord* task_finger = task->finger();
1913     if (task_finger != nullptr && task_finger < _heap.end()) {
1914       // See above note on the global finger verification.
1915       G1HeapRegion* r = _g1h->heap_region_containing_or_null(task_finger);
1916       guarantee(r == nullptr || task_finger == r->bottom() ||
1917                 !r->in_collection_set() || !r->has_index_in_opt_cset(),
1918                 "task finger: " PTR_FORMAT " region: " HR_FORMAT,
1919                 p2i(task_finger), HR_FORMAT_PARAMS(r));
1920     }
1921   }
1922 }
1923 #endif // PRODUCT
1924 
1925 void G1ConcurrentMark::rebuild_and_scrub() {
1926   if (!needs_remembered_set_rebuild()) {
1927     log_debug(gc, marking)("Skipping Remembered Set Rebuild. No regions selected for rebuild, will only scrub");
1928   }
1929 
1930   G1ConcurrentRebuildAndScrub::rebuild_and_scrub(this, needs_remembered_set_rebuild(), _concurrent_workers);
1931 }
1932 
1933 void G1ConcurrentMark::print_stats() {
1934   if (!log_is_enabled(Debug, gc, stats)) {
1935     return;
1936   }
1937   log_debug(gc, stats)("---------------------------------------------------------------------");
1938   for (size_t i = 0; i < _num_active_tasks; ++i) {
1939     _tasks[i]->print_stats();
1940     log_debug(gc, stats)("---------------------------------------------------------------------");
1941   }
1942 }
1943 
1944 bool G1ConcurrentMark::concurrent_cycle_abort() {
1945   // If we start the compaction before the CM threads finish
1946   // scanning the root regions we might trip them over as we'll
1947   // be moving objects / updating references. So let's wait until
1948   // they are done. By telling them to abort, they should complete
1949   // early.
1950   root_region_scan_abort_and_wait();
1951 
1952   // We haven't started a concurrent cycle no need to do anything; we might have
1953   // aborted the marking because of shutting down though. In this case the marking
1954   // might have already completed the abort (leading to in_progress() below to
1955   // return false), however this still left marking state particularly in the
1956   // shared marking bitmap that must be cleaned up.
1957   // If there are multiple full gcs during shutdown we do this work repeatedly for
1958   // nothing, but this situation should be extremely rare (a full gc after shutdown
1959   // has been signalled is already rare), and this work should be negligible compared
1960   // to actual full gc work.
1961 
1962   if (!is_fully_initialized() || (!cm_thread()->in_progress() && !cm_thread()->should_terminate())) {
1963     return false;
1964   }
1965 
1966   reset_marking_for_restart();
1967 
1968   abort_marking_threads();
1969 
1970   SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
1971   satb_mq_set.abandon_partial_marking();
1972   // This can be called either during or outside marking, we'll read
1973   // the expected_active value from the SATB queue set.
1974   satb_mq_set.set_active_all_threads(false, /* new active value */
1975                                      satb_mq_set.is_active() /* expected_active */);
1976   return true;
1977 }
1978 
1979 void G1ConcurrentMark::abort_marking_threads() {
1980   assert(!_root_regions.scan_in_progress(), "still doing root region scan");
1981   _has_aborted.store_relaxed(true);
1982   _first_overflow_barrier_sync.abort();
1983   _second_overflow_barrier_sync.abort();
1984 }
1985 
1986 double G1ConcurrentMark::worker_threads_cpu_time_s() {
1987   class CountCpuTimeThreadClosure : public ThreadClosure {
1988   public:
1989     jlong _total_cpu_time;
1990 
1991     CountCpuTimeThreadClosure() : ThreadClosure(), _total_cpu_time(0) { }
1992 
1993     void do_thread(Thread* t) {
1994       _total_cpu_time += os::thread_cpu_time(t);
1995     }
1996   } cl;
1997 
1998   threads_do(&cl);
1999 
2000   return (double)cl._total_cpu_time / NANOSECS_PER_SEC;
2001 }
2002 
2003 static void print_ms_time_info(const char* prefix, const char* name,
2004                                NumberSeq& ns) {
2005   log_trace(gc, marking)("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
2006                          prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
2007   if (ns.num() > 0) {
2008     log_trace(gc, marking)("%s         [std. dev = %8.2f ms, max = %8.2f ms]",
2009                            prefix, ns.sd(), ns.maximum());
2010   }
2011 }
2012 
2013 void G1ConcurrentMark::print_summary_info() {
2014   Log(gc, marking) log;
2015   if (!log.is_trace()) {
2016     return;
2017   }
2018 
2019   log.trace(" Concurrent marking:");
2020   if (!is_fully_initialized()) {
2021     log.trace("    has not been initialized yet");
2022     return;
2023   }
2024   print_ms_time_info("  ", "remarks", _remark_times);
2025   {
2026     print_ms_time_info("     ", "final marks", _remark_mark_times);
2027     print_ms_time_info("     ", "weak refs", _remark_weak_ref_times);
2028 
2029   }
2030   print_ms_time_info("  ", "cleanups", _cleanup_times);
2031   log.trace("    Finalize live data total time = %8.2f s (avg = %8.2f ms).",
2032             _cleanup_times.sum() / 1000.0, _cleanup_times.avg());
2033   log.trace("  Total stop_world time = %8.2f s.",
2034             (_remark_times.sum() + _cleanup_times.sum())/1000.0);
2035   log.trace("  Total concurrent time = %8.2f s (%8.2f s marking).",
2036             cm_thread()->total_mark_cpu_time_s(), cm_thread()->worker_threads_cpu_time_s());
2037 }
2038 
2039 void G1ConcurrentMark::threads_do(ThreadClosure* tc) const {
2040   if (is_fully_initialized()) { // they are initialized late
2041     tc->do_thread(_cm_thread);
2042     _concurrent_workers->threads_do(tc);
2043   }
2044 }
2045 
2046 void G1ConcurrentMark::print_on(outputStream* st) const {
2047   st->print_cr("Marking Bits: (CMBitMap*) " PTR_FORMAT, p2i(mark_bitmap()));
2048   _mark_bitmap.print_on(st, " Bits: ");
2049 }
2050 
2051 static ReferenceProcessor* get_cm_oop_closure_ref_processor(G1CollectedHeap* g1h) {
2052   ReferenceProcessor* result = g1h->ref_processor_cm();
2053   assert(result != nullptr, "CM reference processor should not be null");
2054   return result;
2055 }
2056 
2057 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h,
2058                                G1CMTask* task)
2059   : ClaimMetadataVisitingOopIterateClosure(ClassLoaderData::_claim_strong, get_cm_oop_closure_ref_processor(g1h)),
2060     _g1h(g1h), _task(task)
2061 { }
2062 
2063 void G1CMTask::setup_for_region(G1HeapRegion* hr) {
2064   assert(hr != nullptr,
2065         "claim_region() should have filtered out null regions");
2066   _curr_region  = hr;
2067   _finger       = hr->bottom();
2068   update_region_limit();
2069 }
2070 
2071 void G1CMTask::update_region_limit() {
2072   G1HeapRegion* hr = _curr_region;
2073   HeapWord* bottom = hr->bottom();
2074   HeapWord* limit = _cm->top_at_mark_start(hr);
2075 
2076   if (limit == bottom) {
2077     // The region was collected underneath our feet.
2078     // We set the finger to bottom to ensure that the bitmap
2079     // iteration that will follow this will not do anything.
2080     // (this is not a condition that holds when we set the region up,
2081     // as the region is not supposed to be empty in the first place)
2082     _finger = bottom;
2083   } else if (limit >= _region_limit) {
2084     assert(limit >= _finger, "peace of mind");
2085   } else {
2086     assert(limit < _region_limit, "only way to get here");
2087     // This can happen under some pretty unusual circumstances.  An
2088     // evacuation pause empties the region underneath our feet (TAMS
2089     // at bottom). We then do some allocation in the region (TAMS
2090     // stays at bottom), followed by the region being used as a GC
2091     // alloc region (TAMS will move to top() and the objects
2092     // originally below it will be greyed). All objects now marked in
2093     // the region are explicitly greyed, if below the global finger,
2094     // and we do not need in fact to scan anything else. So, we simply
2095     // set _finger to be limit to ensure that the bitmap iteration
2096     // doesn't do anything.
2097     _finger = limit;
2098   }
2099 
2100   _region_limit = limit;
2101 }
2102 
2103 void G1CMTask::giveup_current_region() {
2104   assert(_curr_region != nullptr, "invariant");
2105   clear_region_fields();
2106 }
2107 
2108 void G1CMTask::clear_region_fields() {
2109   // Values for these three fields that indicate that we're not
2110   // holding on to a region.
2111   _curr_region   = nullptr;
2112   _finger        = nullptr;
2113   _region_limit  = nullptr;
2114 }
2115 
2116 void G1CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) {
2117   if (cm_oop_closure == nullptr) {
2118     assert(_cm_oop_closure != nullptr, "invariant");
2119   } else {
2120     assert(_cm_oop_closure == nullptr, "invariant");
2121   }
2122   _cm_oop_closure = cm_oop_closure;
2123 }
2124 
2125 void G1CMTask::reset(G1CMBitMap* mark_bitmap) {
2126   guarantee(mark_bitmap != nullptr, "invariant");
2127   _mark_bitmap              = mark_bitmap;
2128   clear_region_fields();
2129 
2130   _calls                         = 0;
2131   _elapsed_time_ms               = 0.0;
2132   _termination_time_ms           = 0.0;
2133 
2134   _mark_stats_cache.reset();
2135 }
2136 
2137 void G1CMTask::reset_for_restart() {
2138   clear_region_fields();
2139   _task_queue->set_empty();
2140   TASKQUEUE_STATS_ONLY(_partial_array_splitter.stats()->reset());
2141   TASKQUEUE_STATS_ONLY(_task_queue->stats.reset());
2142 }
2143 
2144 void G1CMTask::register_partial_array_splitter() {
2145 
2146   ::new (&_partial_array_splitter) PartialArraySplitter(_cm->partial_array_state_manager(),
2147                                                         _cm->max_num_tasks(),
2148                                                         ObjArrayMarkingStride);
2149 }
2150 
2151 void G1CMTask::unregister_partial_array_splitter() {
2152   _partial_array_splitter.~PartialArraySplitter();
2153 }
2154 
2155 bool G1CMTask::should_exit_termination() {
2156   if (!regular_clock_call()) {
2157     return true;
2158   }
2159 
2160   // This is called when we are in the termination protocol. We should
2161   // quit if, for some reason, this task wants to abort or the global
2162   // stack is not empty (this means that we can get work from it).
2163   return !_cm->mark_stack_empty() || has_aborted();
2164 }
2165 
2166 void G1CMTask::reached_limit() {
2167   assert(_words_scanned >= _words_scanned_limit ||
2168          _refs_reached >= _refs_reached_limit ,
2169          "shouldn't have been called otherwise");
2170   abort_marking_if_regular_check_fail();
2171 }
2172 
2173 bool G1CMTask::regular_clock_call() {
2174   if (has_aborted()) {
2175     return false;
2176   }
2177 
2178   // First, we need to recalculate the words scanned and refs reached
2179   // limits for the next clock call.
2180   recalculate_limits();
2181 
2182   // During the regular clock call we do the following
2183 
2184   // (1) If an overflow has been flagged, then we abort.
2185   if (_cm->has_overflown()) {
2186     return false;
2187   }
2188 
2189   // If we are not concurrent (i.e. we're doing remark) we don't need
2190   // to check anything else. The other steps are only needed during
2191   // the concurrent marking phase.
2192   if (!_cm->concurrent()) {
2193     return true;
2194   }
2195 
2196   // (2) If marking has been aborted for Full GC, then we also abort.
2197   if (_cm->has_aborted()) {
2198     return false;
2199   }
2200 
2201   // (4) We check whether we should yield. If we have to, then we abort.
2202   if (SuspendibleThreadSet::should_yield()) {
2203     // We should yield. To do this we abort the task. The caller is
2204     // responsible for yielding.
2205     return false;
2206   }
2207 
2208   // (5) We check whether we've reached our time quota. If we have,
2209   // then we abort.
2210   double elapsed_time_ms = (double)(os::current_thread_cpu_time() - _start_cpu_time_ns) / NANOSECS_PER_MILLISEC;
2211   if (elapsed_time_ms > _time_target_ms) {
2212     _has_timed_out = true;
2213     return false;
2214   }
2215 
2216   // (6) Finally, we check whether there are enough completed STAB
2217   // buffers available for processing. If there are, we abort.
2218   SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
2219   if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
2220     // we do need to process SATB buffers, we'll abort and restart
2221     // the marking task to do so
2222     return false;
2223   }
2224   return true;
2225 }
2226 
2227 void G1CMTask::recalculate_limits() {
2228   _real_words_scanned_limit = _words_scanned + words_scanned_period;
2229   _words_scanned_limit      = _real_words_scanned_limit;
2230 
2231   _real_refs_reached_limit  = _refs_reached  + refs_reached_period;
2232   _refs_reached_limit       = _real_refs_reached_limit;
2233 }
2234 
2235 void G1CMTask::decrease_limits() {
2236   // This is called when we believe that we're going to do an infrequent
2237   // operation which will increase the per byte scanned cost (i.e. move
2238   // entries to/from the global stack). It basically tries to decrease the
2239   // scanning limit so that the clock is called earlier.
2240 
2241   _words_scanned_limit = _real_words_scanned_limit - 3 * words_scanned_period / 4;
2242   _refs_reached_limit  = _real_refs_reached_limit - 3 * refs_reached_period / 4;
2243 }
2244 
2245 void G1CMTask::move_entries_to_global_stack() {
2246   // Local array where we'll store the entries that will be popped
2247   // from the local queue.
2248   G1TaskQueueEntry buffer[G1CMMarkStack::EntriesPerChunk];
2249 
2250   size_t n = 0;
2251   G1TaskQueueEntry task_entry;
2252   while (n < G1CMMarkStack::EntriesPerChunk && _task_queue->pop_local(task_entry)) {
2253     buffer[n] = task_entry;
2254     ++n;
2255   }
2256   if (n < G1CMMarkStack::EntriesPerChunk) {
2257     buffer[n] = G1TaskQueueEntry();
2258   }
2259 
2260   if (n > 0) {
2261     if (!_cm->mark_stack_push(buffer)) {
2262       set_has_aborted();
2263     }
2264   }
2265 
2266   // This operation was quite expensive, so decrease the limits.
2267   decrease_limits();
2268 }
2269 
2270 bool G1CMTask::get_entries_from_global_stack() {
2271   // Local array where we'll store the entries that will be popped
2272   // from the global stack.
2273   G1TaskQueueEntry buffer[G1CMMarkStack::EntriesPerChunk];
2274 
2275   if (!_cm->mark_stack_pop(buffer)) {
2276     return false;
2277   }
2278 
2279   // We did actually pop at least one entry.
2280   for (size_t i = 0; i < G1CMMarkStack::EntriesPerChunk; ++i) {
2281     G1TaskQueueEntry task_entry = buffer[i];
2282     if (task_entry.is_null()) {
2283       break;
2284     }
2285     assert(task_entry.is_partial_array_state() || oopDesc::is_oop(task_entry.to_oop()), "Element " PTR_FORMAT " must be an array slice or oop", p2i(task_entry.to_oop()));
2286     bool success = _task_queue->push(task_entry);
2287     // We only call this when the local queue is empty or under a
2288     // given target limit. So, we do not expect this push to fail.
2289     assert(success, "invariant");
2290   }
2291 
2292   // This operation was quite expensive, so decrease the limits
2293   decrease_limits();
2294   return true;
2295 }
2296 
2297 void G1CMTask::drain_local_queue(bool partially) {
2298   if (has_aborted()) {
2299     return;
2300   }
2301 
2302   // Decide what the target size is, depending whether we're going to
2303   // drain it partially (so that other tasks can steal if they run out
2304   // of things to do) or totally (at the very end).
2305   uint target_size;
2306   if (partially) {
2307     target_size = GCDrainStackTargetSize;
2308   } else {
2309     target_size = 0;
2310   }
2311 
2312   if (_task_queue->size() > target_size) {
2313     G1TaskQueueEntry entry;
2314     bool ret = _task_queue->pop_local(entry);
2315     while (ret) {
2316       process_entry(entry, false /* stolen */);
2317       if (_task_queue->size() <= target_size || has_aborted()) {
2318         ret = false;
2319       } else {
2320         ret = _task_queue->pop_local(entry);
2321       }
2322     }
2323   }
2324 }
2325 
2326 size_t G1CMTask::start_partial_array_processing(oop obj) {
2327   assert(should_be_sliced(obj), "Must be an array object %d and large %zu", obj->is_objArray(), obj->size());
2328 
2329   objArrayOop obj_array = oop_cast<objArrayOop>(obj);
2330   size_t array_length = obj_array->length();
2331 
2332   size_t initial_chunk_size = _partial_array_splitter.start(_task_queue, obj_array, nullptr, array_length);
2333 
2334   // Mark objArray klass metadata
2335   if (_cm_oop_closure->do_metadata()) {
2336     _cm_oop_closure->do_klass(obj_array->klass());
2337   }
2338 
2339   process_array_chunk(obj_array, 0, initial_chunk_size);
2340 
2341   // Include object header size
2342   if (obj_array->is_refArray()) {
2343     return refArrayOopDesc::object_size(checked_cast<int>(initial_chunk_size));
2344   } else {
2345     FlatArrayKlass* fak = FlatArrayKlass::cast(obj_array->klass());
2346     return flatArrayOopDesc::object_size(fak->layout_helper(), checked_cast<int>(initial_chunk_size));
2347   }
2348 }
2349 
2350 size_t G1CMTask::process_partial_array(const G1TaskQueueEntry& task, bool stolen) {
2351   PartialArrayState* state = task.to_partial_array_state();
2352   // Access state before release by claim().
2353   objArrayOop obj = oop_cast<objArrayOop>(state->source());
2354 
2355   PartialArraySplitter::Claim claim =
2356     _partial_array_splitter.claim(state, _task_queue, stolen);
2357 
2358   process_array_chunk(obj, claim._start, claim._end);
2359 
2360   if (obj->is_refArray()) {
2361     return heap_word_size((claim._end - claim._start) * heapOopSize);
2362   } else {
2363     assert(obj->is_flatArray(), "Must be!");
2364     size_t element_byte_size = FlatArrayKlass::cast(obj->klass())->element_byte_size();
2365     size_t nof_elements = claim._end - claim._start;
2366     return heap_word_size(nof_elements * element_byte_size);
2367   }
2368 }
2369 
2370 void G1CMTask::drain_global_stack(bool partially) {
2371   if (has_aborted()) {
2372     return;
2373   }
2374 
2375   // We have a policy to drain the local queue before we attempt to
2376   // drain the global stack.
2377   assert(partially || _task_queue->size() == 0, "invariant");
2378 
2379   // Decide what the target size is, depending whether we're going to
2380   // drain it partially (so that other tasks can steal if they run out
2381   // of things to do) or totally (at the very end).
2382   // Notice that when draining the global mark stack partially, due to the racyness
2383   // of the mark stack size update we might in fact drop below the target. But,
2384   // this is not a problem.
2385   // In case of total draining, we simply process until the global mark stack is
2386   // totally empty, disregarding the size counter.
2387   if (partially) {
2388     size_t const target_size = _cm->partial_mark_stack_size_target();
2389     while (!has_aborted() && _cm->mark_stack_size() > target_size) {
2390       if (get_entries_from_global_stack()) {
2391         drain_local_queue(partially);
2392       }
2393     }
2394   } else {
2395     while (!has_aborted() && get_entries_from_global_stack()) {
2396       drain_local_queue(partially);
2397     }
2398   }
2399 }
2400 
2401 // SATB Queue has several assumptions on whether to call the par or
2402 // non-par versions of the methods. this is why some of the code is
2403 // replicated. We should really get rid of the single-threaded version
2404 // of the code to simplify things.
2405 void G1CMTask::drain_satb_buffers() {
2406   if (has_aborted()) {
2407     return;
2408   }
2409 
2410   // We set this so that the regular clock knows that we're in the
2411   // middle of draining buffers and doesn't set the abort flag when it
2412   // notices that SATB buffers are available for draining. It'd be
2413   // very counter productive if it did that. :-)
2414   _draining_satb_buffers = true;
2415 
2416   G1CMSATBBufferClosure satb_cl(this, _g1h);
2417   SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
2418 
2419   // This keeps claiming and applying the closure to completed buffers
2420   // until we run out of buffers or we need to abort.
2421   while (!has_aborted() &&
2422          satb_mq_set.apply_closure_to_completed_buffer(&satb_cl)) {
2423     abort_marking_if_regular_check_fail();
2424   }
2425 
2426   // Can't assert qset is empty here, even if not aborted.  If concurrent,
2427   // some other thread might be adding to the queue.  If not concurrent,
2428   // some other thread might have won the race for the last buffer, but
2429   // has not yet decremented the count.
2430 
2431   _draining_satb_buffers = false;
2432 
2433   // again, this was a potentially expensive operation, decrease the
2434   // limits to get the regular clock call early
2435   decrease_limits();
2436 }
2437 
2438 void G1CMTask::clear_mark_stats_cache(uint region_idx) {
2439   _mark_stats_cache.reset(region_idx);
2440 }
2441 
2442 Pair<size_t, size_t> G1CMTask::flush_mark_stats_cache() {
2443   return _mark_stats_cache.evict_all();
2444 }
2445 
2446 void G1CMTask::print_stats() {
2447   log_debug(gc, stats)("Marking Stats, task = %u, calls = %u", _worker_id, _calls);
2448   log_debug(gc, stats)("  Elapsed time = %1.2lfms, Termination time = %1.2lfms",
2449                        _elapsed_time_ms, _termination_time_ms);
2450   log_debug(gc, stats)("  Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms max = %1.2lfms, total = %1.2lfms",
2451                        _step_times_ms.num(),
2452                        _step_times_ms.avg(),
2453                        _step_times_ms.sd(),
2454                        _step_times_ms.maximum(),
2455                        _step_times_ms.sum());
2456   size_t const hits = _mark_stats_cache.hits();
2457   size_t const misses = _mark_stats_cache.misses();
2458   log_debug(gc, stats)("  Mark Stats Cache: hits %zu misses %zu ratio %.3f",
2459                        hits, misses, percent_of(hits, hits + misses));
2460 }
2461 
2462 bool G1ConcurrentMark::try_stealing(uint worker_id, G1TaskQueueEntry& task_entry) {
2463   return _task_queues->steal(worker_id, task_entry);
2464 }
2465 
2466 void G1CMTask::process_current_region(G1CMBitMapClosure& bitmap_closure) {
2467   if (has_aborted() || _curr_region == nullptr) {
2468     return;
2469   }
2470 
2471   // This means that we're already holding on to a region.
2472   assert(_finger != nullptr, "if region is not null, then the finger "
2473          "should not be null either");
2474 
2475   // We might have restarted this task after an evacuation pause
2476   // which might have evacuated the region we're holding on to
2477   // underneath our feet. Let's read its limit again to make sure
2478   // that we do not iterate over a region of the heap that
2479   // contains garbage (update_region_limit() will also move
2480   // _finger to the start of the region if it is found empty).
2481   update_region_limit();
2482   // We will start from _finger not from the start of the region,
2483   // as we might be restarting this task after aborting half-way
2484   // through scanning this region. In this case, _finger points to
2485   // the address where we last found a marked object. If this is a
2486   // fresh region, _finger points to start().
2487   MemRegion mr = MemRegion(_finger, _region_limit);
2488 
2489   assert(!_curr_region->is_humongous() || mr.start() == _curr_region->bottom(),
2490          "humongous regions should go around loop once only");
2491 
2492   // Some special cases:
2493   // If the memory region is empty, we can just give up the region.
2494   // If the current region is humongous then we only need to check
2495   // the bitmap for the bit associated with the start of the object,
2496   // scan the object if it's live, and give up the region.
2497   // Otherwise, let's iterate over the bitmap of the part of the region
2498   // that is left.
2499   // If the iteration is successful, give up the region.
2500   if (mr.is_empty()) {
2501     giveup_current_region();
2502     abort_marking_if_regular_check_fail();
2503   } else if (_curr_region->is_humongous() && mr.start() == _curr_region->bottom()) {
2504     if (_mark_bitmap->is_marked(mr.start())) {
2505       // The object is marked - apply the closure
2506       bitmap_closure.do_addr(mr.start());
2507     }
2508     // Even if this task aborted while scanning the humongous object
2509     // we can (and should) give up the current region.
2510     giveup_current_region();
2511     abort_marking_if_regular_check_fail();
2512   } else if (_mark_bitmap->iterate(&bitmap_closure, mr)) {
2513     giveup_current_region();
2514     abort_marking_if_regular_check_fail();
2515   } else {
2516     assert(has_aborted(), "currently the only way to do so");
2517     // The only way to abort the bitmap iteration is to return
2518     // false from the do_bit() method. However, inside the
2519     // do_bit() method we move the _finger to point to the
2520     // object currently being looked at. So, if we bail out, we
2521     // have definitely set _finger to something non-null.
2522     assert(_finger != nullptr, "invariant");
2523 
2524     // Region iteration was actually aborted. So now _finger
2525     // points to the address of the object we last scanned. If we
2526     // leave it there, when we restart this task, we will rescan
2527     // the object. It is easy to avoid this. We move the finger by
2528     // enough to point to the next possible object header.
2529     assert(_finger < _region_limit, "invariant");
2530     HeapWord* const new_finger = _finger + cast_to_oop(_finger)->size();
2531     if (new_finger >= _region_limit) {
2532       giveup_current_region();
2533     } else {
2534       move_finger_to(new_finger);
2535     }
2536   }
2537 }
2538 
2539 void G1CMTask::claim_new_region() {
2540   // Read the note on the claim_region() method on why it might
2541   // return null with potentially more regions available for
2542   // claiming and why we have to check out_of_regions() to determine
2543   // whether we're done or not.
2544   while (!has_aborted() && _curr_region == nullptr && !_cm->out_of_regions()) {
2545     // We are going to try to claim a new region. We should have
2546     // given up on the previous one.
2547     // Separated the asserts so that we know which one fires.
2548     assert(_curr_region  == nullptr, "invariant");
2549     assert(_finger       == nullptr, "invariant");
2550     assert(_region_limit == nullptr, "invariant");
2551     G1HeapRegion* claimed_region = _cm->claim_region(_worker_id);
2552     if (claimed_region != nullptr) {
2553       // Yes, we managed to claim one
2554       setup_for_region(claimed_region);
2555       assert(_curr_region == claimed_region, "invariant");
2556     }
2557     // It is important to call the regular clock here. It might take
2558     // a while to claim a region if, for example, we hit a large
2559     // block of empty regions. So we need to call the regular clock
2560     // method once round the loop to make sure it's called
2561     // frequently enough.
2562     abort_marking_if_regular_check_fail();
2563   }
2564 }
2565 
2566 void G1CMTask::attempt_stealing() {
2567   // We cannot check whether the global stack is empty, since other
2568   // tasks might be pushing objects to it concurrently.
2569   assert(_cm->out_of_regions() && _task_queue->size() == 0,
2570          "only way to reach here");
2571   while (!has_aborted()) {
2572     G1TaskQueueEntry entry;
2573     if (_cm->try_stealing(_worker_id, entry)) {
2574       process_entry(entry, true /* stolen */);
2575 
2576       // And since we're towards the end, let's totally drain the
2577       // local queue and global stack.
2578       drain_local_queue(false);
2579       drain_global_stack(false);
2580     } else {
2581       break;
2582     }
2583   }
2584 }
2585 
2586 void G1CMTask::attempt_termination(bool is_serial) {
2587   // We cannot check whether the global stack is empty, since other
2588   // tasks might be concurrently pushing objects on it.
2589   // Separated the asserts so that we know which one fires.
2590   assert(_cm->out_of_regions(), "only way to reach here");
2591   assert(_task_queue->size() == 0, "only way to reach here");
2592   double termination_start_time_ms = os::elapsedTime() * 1000.0;
2593 
2594   // The G1CMTask class also extends the TerminatorTerminator class,
2595   // hence its should_exit_termination() method will also decide
2596   // whether to exit the termination protocol or not.
2597   bool finished = (is_serial ||
2598                    _cm->terminator()->offer_termination(this));
2599   _termination_time_ms += (os::elapsedTime() * 1000.0 - termination_start_time_ms);
2600 
2601   if (finished) {
2602     // We're all done.
2603 
2604     // We can now guarantee that the global stack is empty, since
2605     // all other tasks have finished. We separated the guarantees so
2606     // that, if a condition is false, we can immediately find out
2607     // which one.
2608     guarantee(_cm->out_of_regions(), "only way to reach here");
2609     guarantee(_cm->mark_stack_empty(), "only way to reach here");
2610     guarantee(_task_queue->size() == 0, "only way to reach here");
2611     guarantee(!_cm->has_overflown(), "only way to reach here");
2612     guarantee(!has_aborted(), "should never happen if termination has completed");
2613   } else {
2614     // Apparently there's more work to do. Let's abort this task. We
2615     // will restart it and hopefully we can find more things to do.
2616     set_has_aborted();
2617   }
2618 }
2619 
2620 void G1CMTask::handle_abort(bool is_serial, double elapsed_time_ms) {
2621   if (_has_timed_out) {
2622     double diff_ms = elapsed_time_ms - _time_target_ms;
2623     // Keep statistics of how well we did with respect to hitting
2624     // our target only if we actually timed out (if we aborted for
2625     // other reasons, then the results might get skewed).
2626     _marking_step_diff_ms.add(diff_ms);
2627   }
2628 
2629   if (!_cm->has_overflown()) {
2630     return;
2631   }
2632 
2633   // This is the interesting one. We aborted because a global
2634   // overflow was raised. This means we have to restart the
2635   // marking phase and start iterating over regions. However, in
2636   // order to do this we have to make sure that all tasks stop
2637   // what they are doing and re-initialize in a safe manner. We
2638   // will achieve this with the use of two barrier sync points.
2639   if (!is_serial) {
2640     // We only need to enter the sync barrier if being called
2641     // from a parallel context
2642     _cm->enter_first_sync_barrier(_worker_id);
2643 
2644     // When we exit this sync barrier we know that all tasks have
2645     // stopped doing marking work. So, it's now safe to
2646     // re-initialize our data structures.
2647   }
2648 
2649   clear_region_fields();
2650   flush_mark_stats_cache();
2651 
2652   if (!is_serial) {
2653     // If we're executing the concurrent phase of marking, reset the marking
2654     // state; otherwise the marking state is reset after reference processing,
2655     // during the remark pause.
2656     // If we reset here as a result of an overflow during the remark we will
2657     // see assertion failures from any subsequent set_concurrency_and_phase()
2658     // calls.
2659     if (_cm->concurrent() && _worker_id == 0) {
2660       // Worker 0 is responsible for clearing the global data structures because
2661       // of an overflow. During STW we should not clear the overflow flag (in
2662       // G1ConcurrentMark::reset_marking_state()) since we rely on it being true when we exit
2663       // method to abort the pause and restart concurrent marking.
2664       _cm->reset_marking_for_restart();
2665 
2666       log_info(gc, marking)("Concurrent Mark reset for overflow");
2667     }
2668 
2669     // ...and enter the second barrier.
2670     _cm->enter_second_sync_barrier(_worker_id);
2671   }
2672 }
2673 
2674 /*****************************************************************************
2675 
2676     The do_marking_step(time_target_ms, ...) method is the building
2677     block of the parallel marking framework. It can be called in parallel
2678     with other invocations of do_marking_step() on different tasks
2679     (but only one per task, obviously) and concurrently with the
2680     mutator threads, or during remark, hence it eliminates the need
2681     for two versions of the code. When called during remark, it will
2682     pick up from where the task left off during the concurrent marking
2683     phase. Interestingly, tasks are also claimable during evacuation
2684     pauses too, since do_marking_step() ensures that it aborts before
2685     it needs to yield.
2686 
2687     The data structures that it uses to do marking work are the
2688     following:
2689 
2690       (1) Marking Bitmap. If there are grey objects that appear only
2691       on the bitmap (this happens either when dealing with an overflow
2692       or when the concurrent start pause has simply marked the roots
2693       and didn't push them on the stack), then tasks claim heap
2694       regions whose bitmap they then scan to find grey objects. A
2695       global finger indicates where the end of the last claimed region
2696       is. A local finger indicates how far into the region a task has
2697       scanned. The two fingers are used to determine how to grey an
2698       object (i.e. whether simply marking it is OK, as it will be
2699       visited by a task in the future, or whether it needs to be also
2700       pushed on a stack).
2701 
2702       (2) Local Queue. The local queue of the task which is accessed
2703       reasonably efficiently by the task. Other tasks can steal from
2704       it when they run out of work. Throughout the marking phase, a
2705       task attempts to keep its local queue short but not totally
2706       empty, so that entries are available for stealing by other
2707       tasks. Only when there is no more work, a task will totally
2708       drain its local queue.
2709 
2710       (3) Global Mark Stack. This handles local queue overflow. During
2711       marking only sets of entries are moved between it and the local
2712       queues, as access to it requires a mutex and more fine-grain
2713       interaction with it which might cause contention. If it
2714       overflows, then the marking phase should restart and iterate
2715       over the bitmap to identify grey objects. Throughout the marking
2716       phase, tasks attempt to keep the global mark stack at a small
2717       length but not totally empty, so that entries are available for
2718       popping by other tasks. Only when there is no more work, tasks
2719       will totally drain the global mark stack.
2720 
2721       (4) SATB Buffer Queue. This is where completed SATB buffers are
2722       made available. Buffers are regularly removed from this queue
2723       and scanned for roots, so that the queue doesn't get too
2724       long. During remark, all completed buffers are processed, as
2725       well as the filled in parts of any uncompleted buffers.
2726 
2727     The do_marking_step() method tries to abort when the time target
2728     has been reached. There are a few other cases when the
2729     do_marking_step() method also aborts:
2730 
2731       (1) When the marking phase has been aborted (after a Full GC).
2732 
2733       (2) When a global overflow (on the global stack) has been
2734       triggered. Before the task aborts, it will actually sync up with
2735       the other tasks to ensure that all the marking data structures
2736       (local queues, stacks, fingers etc.)  are re-initialized so that
2737       when do_marking_step() completes, the marking phase can
2738       immediately restart.
2739 
2740       (3) When enough completed SATB buffers are available. The
2741       do_marking_step() method only tries to drain SATB buffers right
2742       at the beginning. So, if enough buffers are available, the
2743       marking step aborts and the SATB buffers are processed at
2744       the beginning of the next invocation.
2745 
2746       (4) To yield. when we have to yield then we abort and yield
2747       right at the end of do_marking_step(). This saves us from a lot
2748       of hassle as, by yielding we might allow a Full GC. If this
2749       happens then objects will be compacted underneath our feet, the
2750       heap might shrink, etc. We save checking for this by just
2751       aborting and doing the yield right at the end.
2752 
2753     From the above it follows that the do_marking_step() method should
2754     be called in a loop (or, otherwise, regularly) until it completes.
2755 
2756     If a marking step completes without its has_aborted() flag being
2757     true, it means it has completed the current marking phase (and
2758     also all other marking tasks have done so and have all synced up).
2759 
2760     A method called regular_clock_call() is invoked "regularly" (in
2761     sub ms intervals) throughout marking. It is this clock method that
2762     checks all the abort conditions which were mentioned above and
2763     decides when the task should abort. A work-based scheme is used to
2764     trigger this clock method: when the number of object words the
2765     marking phase has scanned or the number of references the marking
2766     phase has visited reach a given limit. Additional invocations to
2767     the method clock have been planted in a few other strategic places
2768     too. The initial reason for the clock method was to avoid calling
2769     cpu time gathering too regularly, as it is quite expensive. So,
2770     once it was in place, it was natural to piggy-back all the other
2771     conditions on it too and not constantly check them throughout the code.
2772 
2773     If do_termination is true then do_marking_step will enter its
2774     termination protocol.
2775 
2776     The value of is_serial must be true when do_marking_step is being
2777     called serially (i.e. by the VMThread) and do_marking_step should
2778     skip any synchronization in the termination and overflow code.
2779     Examples include the serial remark code and the serial reference
2780     processing closures.
2781 
2782     The value of is_serial must be false when do_marking_step is
2783     being called by any of the worker threads.
2784     Examples include the concurrent marking code (CMMarkingTask),
2785     the MT remark code, and the MT reference processing closures.
2786 
2787  *****************************************************************************/
2788 
2789 void G1CMTask::do_marking_step(double time_target_ms,
2790                                bool do_termination,
2791                                bool is_serial) {
2792   assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
2793 
2794   _start_cpu_time_ns = os::current_thread_cpu_time();
2795 
2796   // If do_stealing is true then do_marking_step will attempt to
2797   // steal work from the other G1CMTasks. It only makes sense to
2798   // enable stealing when the termination protocol is enabled
2799   // and do_marking_step() is not being called serially.
2800   bool do_stealing = do_termination && !is_serial;
2801 
2802   G1Predictions const& predictor = _g1h->policy()->predictor();
2803   double diff_prediction_ms = predictor.predict_zero_bounded(&_marking_step_diff_ms);
2804   _time_target_ms = time_target_ms - diff_prediction_ms;
2805 
2806   // set up the variables that are used in the work-based scheme to
2807   // call the regular clock method
2808   _words_scanned = 0;
2809   _refs_reached  = 0;
2810   recalculate_limits();
2811 
2812   // clear all flags
2813   clear_has_aborted();
2814   _has_timed_out = false;
2815   _draining_satb_buffers = false;
2816 
2817   ++_calls;
2818 
2819   // Set up the bitmap and oop closures. Anything that uses them is
2820   // eventually called from this method, so it is OK to allocate these
2821   // statically.
2822   G1CMBitMapClosure bitmap_closure(this, _cm);
2823   G1CMOopClosure cm_oop_closure(_g1h, this);
2824   set_cm_oop_closure(&cm_oop_closure);
2825 
2826   if (_cm->has_overflown()) {
2827     // This can happen if the mark stack overflows during a GC pause
2828     // and this task, after a yield point, restarts. We have to abort
2829     // as we need to get into the overflow protocol which happens
2830     // right at the end of this task.
2831     set_has_aborted();
2832   }
2833 
2834   // First drain any available SATB buffers. After this, we will not
2835   // look at SATB buffers before the next invocation of this method.
2836   // If enough completed SATB buffers are queued up, the regular clock
2837   // will abort this task so that it restarts.
2838   drain_satb_buffers();
2839   // ...then partially drain the local queue and the global stack
2840   drain_local_queue(true);
2841   drain_global_stack(true);
2842 
2843   do {
2844     process_current_region(bitmap_closure);
2845     // At this point we have either completed iterating over the
2846     // region we were holding on to, or we have aborted.
2847 
2848     // We then partially drain the local queue and the global stack.
2849     drain_local_queue(true);
2850     drain_global_stack(true);
2851 
2852     claim_new_region();
2853 
2854     assert(has_aborted() || _curr_region != nullptr || _cm->out_of_regions(),
2855            "at this point we should be out of regions");
2856   } while ( _curr_region != nullptr && !has_aborted());
2857 
2858   // We cannot check whether the global stack is empty, since other
2859   // tasks might be pushing objects to it concurrently.
2860   assert(has_aborted() || _cm->out_of_regions(),
2861          "at this point we should be out of regions");
2862   // Try to reduce the number of available SATB buffers so that
2863   // remark has less work to do.
2864   drain_satb_buffers();
2865 
2866   // Since we've done everything else, we can now totally drain the
2867   // local queue and global stack.
2868   drain_local_queue(false);
2869   drain_global_stack(false);
2870 
2871   // Attempt at work stealing from other task's queues.
2872   if (do_stealing && !has_aborted()) {
2873     // We have not aborted. This means that we have finished all that
2874     // we could. Let's try to do some stealing...
2875     attempt_stealing();
2876   }
2877 
2878   // We still haven't aborted. Now, let's try to get into the
2879   // termination protocol.
2880   if (do_termination && !has_aborted()) {
2881     attempt_termination(is_serial);
2882   }
2883 
2884   // Mainly for debugging purposes to make sure that a pointer to the
2885   // closure which was statically allocated in this frame doesn't
2886   // escape it by accident.
2887   set_cm_oop_closure(nullptr);
2888   jlong end_cpu_time_ns = os::current_thread_cpu_time();
2889   double elapsed_time_ms = (double)(end_cpu_time_ns - _start_cpu_time_ns) / NANOSECS_PER_MILLISEC;
2890   // Update the step history.
2891   _step_times_ms.add(elapsed_time_ms);
2892 
2893   if (has_aborted()) {
2894     // The task was aborted for some reason.
2895     handle_abort(is_serial, elapsed_time_ms);
2896   }
2897 }
2898 
2899 G1CMTask::G1CMTask(uint worker_id,
2900                    G1ConcurrentMark* cm,
2901                    G1CMTaskQueue* task_queue,
2902                    G1RegionMarkStats* mark_stats) :
2903   _worker_id(worker_id),
2904   _g1h(G1CollectedHeap::heap()),
2905   _cm(cm),
2906   _mark_bitmap(nullptr),
2907   _task_queue(task_queue),
2908   _partial_array_splitter(_cm->partial_array_state_manager(), _cm->max_num_tasks(), ObjArrayMarkingStride),
2909   _mark_stats_cache(mark_stats, G1RegionMarkStatsCache::RegionMarkStatsCacheSize),
2910   _calls(0),
2911   _time_target_ms(0.0),
2912   _start_cpu_time_ns(0),
2913   _cm_oop_closure(nullptr),
2914   _curr_region(nullptr),
2915   _finger(nullptr),
2916   _region_limit(nullptr),
2917   _words_scanned(0),
2918   _words_scanned_limit(0),
2919   _real_words_scanned_limit(0),
2920   _refs_reached(0),
2921   _refs_reached_limit(0),
2922   _real_refs_reached_limit(0),
2923   _has_aborted(false),
2924   _has_timed_out(false),
2925   _draining_satb_buffers(false),
2926   _step_times_ms(),
2927   _elapsed_time_ms(0.0),
2928   _termination_time_ms(0.0),
2929   _marking_step_diff_ms()
2930 {
2931   guarantee(task_queue != nullptr, "invariant");
2932 
2933   _marking_step_diff_ms.add(0.5);
2934 }
2935 
2936 // These are formatting macros that are used below to ensure
2937 // consistent formatting. The *_H_* versions are used to format the
2938 // header for a particular value and they should be kept consistent
2939 // with the corresponding macro. Also note that most of the macros add
2940 // the necessary white space (as a prefix) which makes them a bit
2941 // easier to compose.
2942 
2943 // All the output lines are prefixed with this string to be able to
2944 // identify them easily in a large log file.
2945 #define G1PPRL_LINE_PREFIX            "###"
2946 
2947 #define G1PPRL_ADDR_BASE_FORMAT    " " PTR_FORMAT "-" PTR_FORMAT
2948 #ifdef _LP64
2949 #define G1PPRL_ADDR_BASE_H_FORMAT  " %37s"
2950 #else // _LP64
2951 #define G1PPRL_ADDR_BASE_H_FORMAT  " %21s"
2952 #endif // _LP64
2953 
2954 // For per-region info
2955 #define G1PPRL_TYPE_FORMAT            "   %-4s"
2956 #define G1PPRL_TYPE_H_FORMAT          "   %4s"
2957 #define G1PPRL_STATE_FORMAT           "   %-5s"
2958 #define G1PPRL_STATE_H_FORMAT         "   %5s"
2959 #define G1PPRL_BYTE_FORMAT            "  %9zu"
2960 #define G1PPRL_BYTE_H_FORMAT          "  %9s"
2961 #define G1PPRL_DOUBLE_FORMAT          "%14.1f"
2962 #define G1PPRL_GCEFF_H_FORMAT         "  %14s"
2963 #define G1PPRL_GID_H_FORMAT           "  %9s"
2964 #define G1PPRL_GID_FORMAT             "  " UINT32_FORMAT_W(9)
2965 #define G1PPRL_LEN_FORMAT             "  " UINT32_FORMAT_W(14)
2966 #define G1PPRL_LEN_H_FORMAT           "  %14s"
2967 #define G1PPRL_GID_GCEFF_FORMAT       "  %14.1f"
2968 #define G1PPRL_GID_LIVENESS_FORMAT    "  %9.2f"
2969 
2970 // For summary info
2971 #define G1PPRL_SUM_ADDR_FORMAT(tag)    "  " tag ":" G1PPRL_ADDR_BASE_FORMAT
2972 #define G1PPRL_SUM_BYTE_FORMAT(tag)    "  " tag ": %zu"
2973 #define G1PPRL_SUM_MB_FORMAT(tag)      "  " tag ": %1.2f MB"
2974 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag) " / %1.2f %%"
2975 
2976 G1PrintRegionLivenessInfoClosure::G1PrintRegionLivenessInfoClosure(const char* phase_name) :
2977   _total_used_bytes(0),
2978   _total_capacity_bytes(0),
2979   _total_live_bytes(0),
2980   _total_remset_bytes(0),
2981   _total_code_roots_bytes(0)
2982 {
2983   if (!log_is_enabled(Trace, gc, liveness)) {
2984     return;
2985   }
2986 
2987   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2988   MemRegion reserved = g1h->reserved();
2989   double now = os::elapsedTime();
2990 
2991   // Print the header of the output.
2992   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now);
2993   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" HEAP"
2994                           G1PPRL_SUM_ADDR_FORMAT("reserved")
2995                           G1PPRL_SUM_BYTE_FORMAT("region-size"),
2996                           p2i(reserved.start()), p2i(reserved.end()),
2997                           G1HeapRegion::GrainBytes);
2998   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX);
2999   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3000                           G1PPRL_TYPE_H_FORMAT
3001                           G1PPRL_ADDR_BASE_H_FORMAT
3002                           G1PPRL_BYTE_H_FORMAT
3003                           G1PPRL_BYTE_H_FORMAT
3004                           G1PPRL_STATE_H_FORMAT
3005                           G1PPRL_BYTE_H_FORMAT
3006                           G1PPRL_GID_H_FORMAT,
3007                           "type", "address-range",
3008                           "used", "live",
3009                           "state", "code-roots",
3010                           "group-id");
3011   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3012                           G1PPRL_TYPE_H_FORMAT
3013                           G1PPRL_ADDR_BASE_H_FORMAT
3014                           G1PPRL_BYTE_H_FORMAT
3015                           G1PPRL_BYTE_H_FORMAT
3016                           G1PPRL_STATE_H_FORMAT
3017                           G1PPRL_BYTE_H_FORMAT
3018                           G1PPRL_GID_H_FORMAT,
3019                           "", "",
3020                           "(bytes)", "(bytes)",
3021                           "", "(bytes)", "");
3022 }
3023 
3024 bool G1PrintRegionLivenessInfoClosure::do_heap_region(G1HeapRegion* r) {
3025   if (!log_is_enabled(Trace, gc, liveness)) {
3026     return false;
3027   }
3028 
3029   const char* type       = r->get_type_str();
3030   HeapWord* bottom       = r->bottom();
3031   HeapWord* end          = r->end();
3032   size_t capacity_bytes  = r->capacity();
3033   size_t used_bytes      = r->used();
3034   size_t live_bytes      = r->live_bytes();
3035   size_t remset_bytes    = r->rem_set()->mem_size();
3036   size_t code_roots_bytes = r->rem_set()->code_roots_mem_size();
3037   const char* remset_type = r->rem_set()->get_short_state_str();
3038   uint cset_group_id     = r->rem_set()->has_cset_group()
3039                          ? r->rem_set()->cset_group_id()
3040                          : G1CSetCandidateGroup::NoRemSetId;
3041 
3042   _total_used_bytes      += used_bytes;
3043   _total_capacity_bytes  += capacity_bytes;
3044   _total_live_bytes      += live_bytes;
3045   _total_remset_bytes    += remset_bytes;
3046   _total_code_roots_bytes += code_roots_bytes;
3047 
3048   // Print a line for this particular region.
3049   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3050                         G1PPRL_TYPE_FORMAT
3051                         G1PPRL_ADDR_BASE_FORMAT
3052                         G1PPRL_BYTE_FORMAT
3053                         G1PPRL_BYTE_FORMAT
3054                         G1PPRL_STATE_FORMAT
3055                         G1PPRL_BYTE_FORMAT
3056                         G1PPRL_GID_FORMAT,
3057                         type, p2i(bottom), p2i(end),
3058                         used_bytes, live_bytes,
3059                         remset_type, code_roots_bytes,
3060                         cset_group_id);
3061 
3062   return false;
3063 }
3064 
3065 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() {
3066   if (!log_is_enabled(Trace, gc, liveness)) {
3067     return;
3068   }
3069 
3070   G1CollectedHeap* g1h = G1CollectedHeap::heap();
3071   _total_remset_bytes += g1h->card_set_freelist_pool()->mem_size();
3072   // add static memory usages to remembered set sizes
3073   _total_remset_bytes += G1HeapRegionRemSet::static_mem_size();
3074 
3075   log_cset_candidate_groups();
3076 
3077   // Print the footer of the output.
3078   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX);
3079   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3080                          " SUMMARY"
3081                          G1PPRL_SUM_MB_FORMAT("capacity")
3082                          G1PPRL_SUM_MB_PERC_FORMAT("used")
3083                          G1PPRL_SUM_MB_PERC_FORMAT("live")
3084                          G1PPRL_SUM_MB_FORMAT("remset")
3085                          G1PPRL_SUM_MB_FORMAT("code-roots"),
3086                          bytes_to_mb(_total_capacity_bytes),
3087                          bytes_to_mb(_total_used_bytes),
3088                          percent_of(_total_used_bytes, _total_capacity_bytes),
3089                          bytes_to_mb(_total_live_bytes),
3090                          percent_of(_total_live_bytes, _total_capacity_bytes),
3091                          bytes_to_mb(_total_remset_bytes),
3092                          bytes_to_mb(_total_code_roots_bytes));
3093 }
3094 
3095 void G1PrintRegionLivenessInfoClosure::log_cset_candidate_group_add_total(G1CSetCandidateGroup* group, const char* type) {
3096   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3097                           G1PPRL_GID_FORMAT
3098                           G1PPRL_LEN_FORMAT
3099                           G1PPRL_GID_GCEFF_FORMAT
3100                           G1PPRL_GID_LIVENESS_FORMAT
3101                           G1PPRL_BYTE_FORMAT
3102                           G1PPRL_TYPE_H_FORMAT,
3103                           group->group_id(),
3104                           group->length(),
3105                           group->length() > 0 ? group->gc_efficiency() : 0.0,
3106                           group->length() > 0 ? group->liveness_percent() : 0.0,
3107                           group->card_set()->mem_size(),
3108                           type);
3109   _total_remset_bytes += group->card_set()->mem_size();
3110 }
3111 
3112 void G1PrintRegionLivenessInfoClosure::log_cset_candidate_grouplist(G1CSetCandidateGroupList& gl, const char* type) {
3113   for (G1CSetCandidateGroup* group : gl) {
3114     log_cset_candidate_group_add_total(group, type);
3115   }
3116 }
3117 
3118 void G1PrintRegionLivenessInfoClosure::log_cset_candidate_groups() {
3119   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX);
3120   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" Collection Set Candidate Groups");
3121   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX " Types: Y=Young, M=From Marking Regions, R=Retained Regions");
3122   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3123                           G1PPRL_GID_H_FORMAT
3124                           G1PPRL_LEN_H_FORMAT
3125                           G1PPRL_GCEFF_H_FORMAT
3126                           G1PPRL_BYTE_H_FORMAT
3127                           G1PPRL_BYTE_H_FORMAT
3128                           G1PPRL_TYPE_H_FORMAT,
3129                           "groud-id", "num-regions",
3130                           "gc-eff", "liveness",
3131                           "remset", "type");
3132 
3133   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3134                           G1PPRL_GID_H_FORMAT
3135                           G1PPRL_LEN_H_FORMAT
3136                           G1PPRL_GCEFF_H_FORMAT
3137                           G1PPRL_BYTE_H_FORMAT
3138                           G1PPRL_BYTE_H_FORMAT
3139                           G1PPRL_TYPE_H_FORMAT,
3140                           "", "",
3141                           "(bytes/ms)", "%",
3142                           "(bytes)", "");
3143 
3144   G1CollectedHeap* g1h = G1CollectedHeap::heap();
3145 
3146   log_cset_candidate_group_add_total(g1h->young_regions_cset_group(), "Y");
3147 
3148   G1CollectionSetCandidates* candidates = g1h->policy()->candidates();
3149   log_cset_candidate_grouplist(candidates->from_marking_groups(), "M");
3150   log_cset_candidate_grouplist(candidates->retained_groups(), "R");
3151 }