New src/hotspot/share/gc/g1/g1ConcurrentMark.cpp

   1 /*
   2  * Copyright (c) 2001, 2026, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "classfile/classLoaderData.hpp"
  26 #include "classfile/classLoaderDataGraph.hpp"
  27 #include "cppstdlib/new.hpp"
  28 #include "gc/g1/g1BarrierSet.hpp"
  29 #include "gc/g1/g1BatchedTask.hpp"
  30 #include "gc/g1/g1CardSetMemory.hpp"
  31 #include "gc/g1/g1CardTableClaimTable.inline.hpp"
  32 #include "gc/g1/g1CollectedHeap.inline.hpp"
  33 #include "gc/g1/g1CollectionSetChooser.hpp"
  34 #include "gc/g1/g1CollectorState.hpp"
  35 #include "gc/g1/g1ConcurrentMark.inline.hpp"
  36 #include "gc/g1/g1ConcurrentMarkRemarkTasks.hpp"
  37 #include "gc/g1/g1ConcurrentMarkThread.inline.hpp"
  38 #include "gc/g1/g1ConcurrentRebuildAndScrub.hpp"
  39 #include "gc/g1/g1ConcurrentRefine.hpp"
  40 #include "gc/g1/g1HeapRegion.inline.hpp"
  41 #include "gc/g1/g1HeapRegionManager.hpp"
  42 #include "gc/g1/g1HeapRegionPrinter.hpp"
  43 #include "gc/g1/g1HeapRegionRemSet.inline.hpp"
  44 #include "gc/g1/g1HeapRegionSet.inline.hpp"
  45 #include "gc/g1/g1HeapVerifier.hpp"
  46 #include "gc/g1/g1OopClosures.inline.hpp"
  47 #include "gc/g1/g1Policy.hpp"
  48 #include "gc/g1/g1RegionMarkStatsCache.inline.hpp"
  49 #include "gc/g1/g1ThreadLocalData.hpp"
  50 #include "gc/g1/g1Trace.hpp"
  51 #include "gc/shared/gcId.hpp"
  52 #include "gc/shared/gcTimer.hpp"
  53 #include "gc/shared/gcTraceTime.inline.hpp"
  54 #include "gc/shared/gcVMOperations.hpp"
  55 #include "gc/shared/partialArraySplitter.inline.hpp"
  56 #include "gc/shared/partialArrayState.hpp"
  57 #include "gc/shared/partialArrayTaskStats.hpp"
  58 #include "gc/shared/referencePolicy.hpp"
  59 #include "gc/shared/suspendibleThreadSet.hpp"
  60 #include "gc/shared/taskqueue.inline.hpp"
  61 #include "gc/shared/taskTerminator.hpp"
  62 #include "gc/shared/weakProcessor.inline.hpp"
  63 #include "gc/shared/workerPolicy.hpp"
  64 #include "jvm.h"
  65 #include "logging/log.hpp"
  66 #include "memory/allocation.hpp"
  67 #include "memory/iterator.hpp"
  68 #include "memory/metaspaceUtils.hpp"
  69 #include "memory/resourceArea.hpp"
  70 #include "memory/universe.hpp"
  71 #include "nmt/memTracker.hpp"
  72 #include "oops/access.inline.hpp"
  73 #include "oops/oop.inline.hpp"
  74 #include "oops/oopCast.inline.hpp"
  75 #include "runtime/globals_extension.hpp"
  76 #include "runtime/handles.inline.hpp"
  77 #include "runtime/java.hpp"
  78 #include "runtime/orderAccess.hpp"
  79 #include "runtime/os.hpp"
  80 #include "runtime/prefetch.inline.hpp"
  81 #include "runtime/threads.hpp"
  82 #include "utilities/align.hpp"
  83 #include "utilities/checkedCast.hpp"
  84 #include "utilities/formatBuffer.hpp"
  85 #include "utilities/growableArray.hpp"
  86 #include "utilities/powerOfTwo.hpp"
  87 
  88 G1CMIsAliveClosure::G1CMIsAliveClosure() : _cm(nullptr) { }
  89 
  90 G1CMIsAliveClosure::G1CMIsAliveClosure(G1ConcurrentMark* cm) : _cm(cm) {
  91   assert(cm != nullptr, "must be");
  92 }
  93 
  94 void G1CMIsAliveClosure::initialize(G1ConcurrentMark* cm) {
  95   assert(cm != nullptr, "must be");
  96   assert(_cm == nullptr, "double initialize");
  97   _cm = cm;
  98 }
  99 
 100 bool G1CMBitMapClosure::do_addr(HeapWord* const addr) {
 101   assert(addr < _cm->finger(), "invariant");
 102   assert(addr >= _task->finger(), "invariant");
 103 
 104   // We move that task's local finger along.
 105   _task->move_finger_to(addr);
 106 
 107   _task->process_entry(G1TaskQueueEntry(cast_to_oop(addr)), false /* stolen */);
 108   // we only partially drain the local queue and global stack
 109   _task->drain_local_queue(true);
 110   _task->drain_global_stack(true);
 111 
 112   // if the has_aborted flag has been raised, we need to bail out of
 113   // the iteration
 114   return !_task->has_aborted();
 115 }
 116 
 117 G1CMMarkStack::G1CMMarkStack() :
 118   _chunk_allocator() {
 119   set_empty();
 120 }
 121 
 122 size_t G1CMMarkStack::capacity_alignment() {
 123   return (size_t)lcm(os::vm_allocation_granularity(), sizeof(TaskQueueEntryChunk)) / sizeof(G1TaskQueueEntry);
 124 }
 125 
 126 bool G1CMMarkStack::initialize() {
 127   guarantee(_chunk_allocator.capacity() == 0, "G1CMMarkStack already initialized.");
 128 
 129   size_t initial_capacity = MarkStackSize;
 130   size_t max_capacity = MarkStackSizeMax;
 131 
 132   size_t const TaskEntryChunkSizeInVoidStar = sizeof(TaskQueueEntryChunk) / sizeof(G1TaskQueueEntry);
 133 
 134   size_t max_num_chunks = align_up(max_capacity, capacity_alignment()) / TaskEntryChunkSizeInVoidStar;
 135   size_t initial_num_chunks = align_up(initial_capacity, capacity_alignment()) / TaskEntryChunkSizeInVoidStar;
 136 
 137   initial_num_chunks = round_up_power_of_2(initial_num_chunks);
 138   max_num_chunks = MAX2(initial_num_chunks, max_num_chunks);
 139 
 140   size_t limit = (INT_MAX - 1);
 141   max_capacity = MIN2((max_num_chunks * TaskEntryChunkSizeInVoidStar), limit);
 142   initial_capacity = MIN2((initial_num_chunks * TaskEntryChunkSizeInVoidStar), limit);
 143 
 144   FLAG_SET_ERGO(MarkStackSizeMax, max_capacity);
 145   FLAG_SET_ERGO(MarkStackSize, initial_capacity);
 146 
 147   log_trace(gc)("MarkStackSize: %uk  MarkStackSizeMax: %uk", (uint)(MarkStackSize / K), (uint)(MarkStackSizeMax / K));
 148 
 149   log_debug(gc)("Initialize mark stack with %zu chunks, maximum %zu",
 150                 initial_num_chunks, max_capacity);
 151 
 152   return _chunk_allocator.initialize(initial_num_chunks, max_num_chunks);
 153 }
 154 
 155 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::ChunkAllocator::allocate_new_chunk() {
 156   if (_size.load_relaxed() >= _max_capacity) {
 157     return nullptr;
 158   }
 159 
 160   size_t cur_idx = _size.fetch_then_add(1u);
 161 
 162   if (cur_idx >= _max_capacity) {
 163     return nullptr;
 164   }
 165 
 166   size_t bucket = get_bucket(cur_idx);
 167   if (_buckets[bucket].load_acquire() == nullptr) {
 168     if (!_should_grow) {
 169       // Prefer to restart the CM.
 170       return nullptr;
 171     }
 172 
 173     MutexLocker x(G1MarkStackChunkList_lock, Mutex::_no_safepoint_check_flag);
 174     if (_buckets[bucket].load_acquire() == nullptr) {
 175       size_t desired_capacity = bucket_size(bucket) * 2;
 176       if (!try_expand_to(desired_capacity)) {
 177         return nullptr;
 178       }
 179     }
 180   }
 181 
 182   size_t bucket_idx = get_bucket_index(cur_idx);
 183   TaskQueueEntryChunk* result = ::new (&_buckets[bucket].load_relaxed()[bucket_idx]) TaskQueueEntryChunk;
 184   result->next = nullptr;
 185   return result;
 186 }
 187 
 188 G1CMMarkStack::ChunkAllocator::ChunkAllocator() :
 189   _min_capacity(0),
 190   _max_capacity(0),
 191   _capacity(0),
 192   _num_buckets(0),
 193   _should_grow(false),
 194   _buckets(nullptr),
 195   _size(0)
 196 { }
 197 
 198 bool G1CMMarkStack::ChunkAllocator::initialize(size_t initial_capacity, size_t max_capacity) {
 199   guarantee(is_power_of_2(initial_capacity), "Invalid initial_capacity");
 200 
 201   _min_capacity = initial_capacity;
 202   _max_capacity = max_capacity;
 203   _num_buckets  = get_bucket(_max_capacity) + 1;
 204 
 205   _buckets = NEW_C_HEAP_ARRAY(Atomic<TaskQueueEntryChunk*>, _num_buckets, mtGC);
 206 
 207   for (size_t i = 0; i < _num_buckets; i++) {
 208     _buckets[i].store_relaxed(nullptr);
 209   }
 210 
 211   size_t new_capacity = bucket_size(0);
 212 
 213   if (!reserve(new_capacity)) {
 214     log_warning(gc)("Failed to reserve memory for new overflow mark stack with %zu chunks and size %zuB.", new_capacity, new_capacity * sizeof(TaskQueueEntryChunk));
 215     return false;
 216   }
 217   return true;
 218 }
 219 
 220 bool G1CMMarkStack::ChunkAllocator::try_expand_to(size_t desired_capacity) {
 221   if (_capacity == _max_capacity) {
 222     log_debug(gc)("Can not expand overflow mark stack further, already at maximum capacity of %zu chunks.", _capacity);
 223     return false;
 224   }
 225 
 226   size_t old_capacity = _capacity;
 227   desired_capacity = MIN2(desired_capacity, _max_capacity);
 228 
 229   if (reserve(desired_capacity)) {
 230     log_debug(gc)("Expanded the mark stack capacity from %zu to %zu chunks",
 231                   old_capacity, desired_capacity);
 232     return true;
 233   }
 234   return false;
 235 }
 236 
 237 bool G1CMMarkStack::ChunkAllocator::try_expand() {
 238   size_t new_capacity = _capacity * 2;
 239   return try_expand_to(new_capacity);
 240 }
 241 
 242 G1CMMarkStack::ChunkAllocator::~ChunkAllocator() {
 243   if (_buckets == nullptr) {
 244     return;
 245   }
 246 
 247   for (size_t i = 0; i < _num_buckets; i++) {
 248     if (_buckets[i].load_relaxed() != nullptr) {
 249       MmapArrayAllocator<TaskQueueEntryChunk>::free(_buckets[i].load_relaxed(),  bucket_size(i));
 250       _buckets[i].store_relaxed(nullptr);
 251     }
 252   }
 253 
 254   FREE_C_HEAP_ARRAY(TaskQueueEntryChunk*, _buckets);
 255 }
 256 
 257 bool G1CMMarkStack::ChunkAllocator::reserve(size_t new_capacity) {
 258   assert(new_capacity <= _max_capacity, "Cannot expand overflow mark stack beyond the max_capacity of %zu chunks.", _max_capacity);
 259 
 260   size_t highest_bucket = get_bucket(new_capacity - 1);
 261   size_t i = get_bucket(_capacity);
 262 
 263   // Allocate all buckets associated with indexes between the current capacity (_capacity)
 264   // and the new capacity (new_capacity). This step ensures that there are no gaps in the
 265   // array and that the capacity accurately reflects the reserved memory.
 266   for (; i <= highest_bucket; i++) {
 267     if (_buckets[i].load_acquire() != nullptr) {
 268       continue; // Skip over already allocated buckets.
 269     }
 270 
 271     size_t bucket_capacity = bucket_size(i);
 272 
 273     // Trim bucket size so that we do not exceed the _max_capacity.
 274     bucket_capacity = (_capacity + bucket_capacity) <= _max_capacity ?
 275                       bucket_capacity :
 276                       _max_capacity - _capacity;
 277 
 278 
 279     TaskQueueEntryChunk* bucket_base = MmapArrayAllocator<TaskQueueEntryChunk>::allocate_or_null(bucket_capacity, mtGC);
 280 
 281     if (bucket_base == nullptr) {
 282       log_warning(gc)("Failed to reserve memory for increasing the overflow mark stack capacity with %zu chunks and size %zuB.",
 283                       bucket_capacity, bucket_capacity * sizeof(TaskQueueEntryChunk));
 284       return false;
 285     }
 286     _capacity += bucket_capacity;
 287     _buckets[i].release_store(bucket_base);
 288   }
 289   return true;
 290 }
 291 
 292 void G1CMMarkStack::expand() {
 293   _chunk_allocator.try_expand();
 294 }
 295 
 296 void G1CMMarkStack::add_chunk_to_list(Atomic<TaskQueueEntryChunk*>* list, TaskQueueEntryChunk* elem) {
 297   elem->next = list->load_relaxed();
 298   list->store_relaxed(elem);
 299 }
 300 
 301 void G1CMMarkStack::add_chunk_to_chunk_list(TaskQueueEntryChunk* elem) {
 302   MutexLocker x(G1MarkStackChunkList_lock, Mutex::_no_safepoint_check_flag);
 303   add_chunk_to_list(&_chunk_list, elem);
 304   _chunks_in_chunk_list++;
 305 }
 306 
 307 void G1CMMarkStack::add_chunk_to_free_list(TaskQueueEntryChunk* elem) {
 308   MutexLocker x(G1MarkStackFreeList_lock, Mutex::_no_safepoint_check_flag);
 309   add_chunk_to_list(&_free_list, elem);
 310 }
 311 
 312 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_list(Atomic<TaskQueueEntryChunk*>* list) {
 313   TaskQueueEntryChunk* result = list->load_relaxed();
 314   if (result != nullptr) {
 315     list->store_relaxed(list->load_relaxed()->next);
 316   }
 317   return result;
 318 }
 319 
 320 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_chunk_list() {
 321   MutexLocker x(G1MarkStackChunkList_lock, Mutex::_no_safepoint_check_flag);
 322   TaskQueueEntryChunk* result = remove_chunk_from_list(&_chunk_list);
 323   if (result != nullptr) {
 324     _chunks_in_chunk_list--;
 325   }
 326   return result;
 327 }
 328 
 329 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_free_list() {
 330   MutexLocker x(G1MarkStackFreeList_lock, Mutex::_no_safepoint_check_flag);
 331   return remove_chunk_from_list(&_free_list);
 332 }
 333 
 334 bool G1CMMarkStack::par_push_chunk(G1TaskQueueEntry* ptr_arr) {
 335   // Get a new chunk.
 336   TaskQueueEntryChunk* new_chunk = remove_chunk_from_free_list();
 337 
 338   if (new_chunk == nullptr) {
 339     // Did not get a chunk from the free list. Allocate from backing memory.
 340     new_chunk = _chunk_allocator.allocate_new_chunk();
 341 
 342     if (new_chunk == nullptr) {
 343       return false;
 344     }
 345   }
 346 
 347   Copy::conjoint_memory_atomic(ptr_arr, new_chunk->data, EntriesPerChunk * sizeof(G1TaskQueueEntry));
 348 
 349   add_chunk_to_chunk_list(new_chunk);
 350 
 351   return true;
 352 }
 353 
 354 bool G1CMMarkStack::par_pop_chunk(G1TaskQueueEntry* ptr_arr) {
 355   TaskQueueEntryChunk* cur = remove_chunk_from_chunk_list();
 356 
 357   if (cur == nullptr) {
 358     return false;
 359   }
 360 
 361   Copy::conjoint_memory_atomic(cur->data, ptr_arr, EntriesPerChunk * sizeof(G1TaskQueueEntry));
 362 
 363   add_chunk_to_free_list(cur);
 364   return true;
 365 }
 366 
 367 void G1CMMarkStack::set_empty() {
 368   _chunks_in_chunk_list = 0;
 369   _chunk_list.store_relaxed(nullptr);
 370   _free_list.store_relaxed(nullptr);
 371   _chunk_allocator.reset();
 372 }
 373 
 374 G1CMRootMemRegions::G1CMRootMemRegions(uint const max_regions) :
 375     _root_regions(MemRegion::create_array(max_regions, mtGC)),
 376     _max_regions(max_regions),
 377     _num_root_regions(0),
 378     _claimed_root_regions(0),
 379     _scan_in_progress(false),
 380     _should_abort(false) { }
 381 
 382 G1CMRootMemRegions::~G1CMRootMemRegions() {
 383   MemRegion::destroy_array(_root_regions, _max_regions);
 384 }
 385 
 386 void G1CMRootMemRegions::reset() {
 387   _num_root_regions.store_relaxed(0);
 388 }
 389 
 390 void G1CMRootMemRegions::add(HeapWord* start, HeapWord* end) {
 391   assert_at_safepoint();
 392   size_t idx = _num_root_regions.fetch_then_add(1u);
 393   assert(idx < _max_regions, "Trying to add more root MemRegions than there is space %zu", _max_regions);
 394   assert(start != nullptr && end != nullptr && start <= end, "Start (" PTR_FORMAT ") should be less or equal to "
 395          "end (" PTR_FORMAT ")", p2i(start), p2i(end));
 396   _root_regions[idx].set_start(start);
 397   _root_regions[idx].set_end(end);
 398 }
 399 
 400 void G1CMRootMemRegions::prepare_for_scan() {
 401   assert(!scan_in_progress(), "pre-condition");
 402 
 403   _scan_in_progress.store_relaxed(num_root_regions() > 0);
 404 
 405   _claimed_root_regions.store_relaxed(0);
 406   _should_abort.store_relaxed(false);
 407 }
 408 
 409 const MemRegion* G1CMRootMemRegions::claim_next() {
 410   if (_should_abort.load_relaxed()) {
 411     // If someone has set the should_abort flag, we return null to
 412     // force the caller to bail out of their loop.
 413     return nullptr;
 414   }
 415 
 416   uint local_num_root_regions = num_root_regions();
 417   if (_claimed_root_regions.load_relaxed() >= local_num_root_regions) {
 418     return nullptr;
 419   }
 420 
 421   size_t claimed_index = _claimed_root_regions.fetch_then_add(1u);
 422   if (claimed_index < local_num_root_regions) {
 423     return &_root_regions[claimed_index];
 424   }
 425   return nullptr;
 426 }
 427 
 428 uint G1CMRootMemRegions::num_root_regions() const {
 429   return (uint)_num_root_regions.load_relaxed();
 430 }
 431 
 432 bool G1CMRootMemRegions::contains(const MemRegion mr) const {
 433   uint local_num_root_regions = num_root_regions();
 434   for (uint i = 0; i < local_num_root_regions; i++) {
 435     if (_root_regions[i].equals(mr)) {
 436       return true;
 437     }
 438   }
 439   return false;
 440 }
 441 
 442 void G1CMRootMemRegions::notify_scan_done() {
 443   MutexLocker x(G1RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 444   _scan_in_progress.store_relaxed(false);
 445   G1RootRegionScan_lock->notify_all();
 446 }
 447 
 448 void G1CMRootMemRegions::cancel_scan() {
 449   notify_scan_done();
 450 }
 451 
 452 void G1CMRootMemRegions::scan_finished() {
 453   assert(scan_in_progress(), "pre-condition");
 454 
 455   if (!_should_abort.load_relaxed()) {
 456     assert(_claimed_root_regions.load_relaxed() >= num_root_regions(),
 457            "we should have claimed all root regions, claimed %zu, length = %u",
 458            _claimed_root_regions.load_relaxed(), num_root_regions());
 459   }
 460 
 461   notify_scan_done();
 462 }
 463 
 464 bool G1CMRootMemRegions::wait_until_scan_finished() {
 465   if (!scan_in_progress()) {
 466     return false;
 467   }
 468 
 469   {
 470     MonitorLocker ml(G1RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 471     while (scan_in_progress()) {
 472       ml.wait();
 473     }
 474   }
 475   return true;
 476 }
 477 
 478 G1ConcurrentMark::G1ConcurrentMark(G1CollectedHeap* g1h,
 479                                    G1RegionToSpaceMapper* bitmap_storage) :
 480   _cm_thread(nullptr),
 481   _g1h(g1h),
 482 
 483   _mark_bitmap(),
 484 
 485   _heap(_g1h->reserved()),
 486 
 487   _root_regions(_g1h->max_num_regions()),
 488 
 489   _global_mark_stack(),
 490 
 491   _finger(nullptr), // _finger set in set_non_marking_state
 492 
 493   _worker_id_offset(G1ConcRefinementThreads), // The refinement control thread does not refine cards, so it's just the worker threads.
 494   _max_num_tasks(MAX2(ConcGCThreads, ParallelGCThreads)),
 495   _num_active_tasks(0), // _num_active_tasks set in set_non_marking_state()
 496   _tasks(nullptr), // _tasks set inside late_init()
 497   _task_queues(new G1CMTaskQueueSet(_max_num_tasks)),
 498   _terminator(_max_num_tasks, _task_queues),
 499   _partial_array_state_manager(new PartialArrayStateManager(_max_num_tasks)),
 500 
 501   _first_overflow_barrier_sync(),
 502   _second_overflow_barrier_sync(),
 503 
 504   _completed_mark_cycles(0),
 505   _has_overflown(false),
 506   _concurrent(false),
 507   _has_aborted(false),
 508   _restart_for_overflow(false),
 509   _gc_timer_cm(new ConcurrentGCTimer()),
 510   _gc_tracer_cm(new G1OldTracer()),
 511 
 512   // _verbose_level set below
 513 
 514   _remark_times(),
 515   _remark_mark_times(),
 516   _remark_weak_ref_times(),
 517   _cleanup_times(),
 518 
 519   _concurrent_workers(nullptr),
 520   _num_concurrent_workers(0),
 521   _max_concurrent_workers(0),
 522 
 523   _region_mark_stats(NEW_C_HEAP_ARRAY(G1RegionMarkStats, _g1h->max_num_regions(), mtGC)),
 524   _top_at_mark_starts(NEW_C_HEAP_ARRAY(Atomic<HeapWord*>, _g1h->max_num_regions(), mtGC)),
 525   _top_at_rebuild_starts(NEW_C_HEAP_ARRAY(Atomic<HeapWord*>, _g1h->max_num_regions(), mtGC)),
 526   _needs_remembered_set_rebuild(false)
 527 {
 528   assert(G1CGC_lock != nullptr, "CGC_lock must be initialized");
 529 
 530   _mark_bitmap.initialize(g1h->reserved(), bitmap_storage);
 531 }
 532 
 533 void G1ConcurrentMark::fully_initialize() {
 534   if (is_fully_initialized()) {
 535     return;
 536   }
 537 
 538   // Create & start ConcurrentMark thread.
 539   _cm_thread = new G1ConcurrentMarkThread(this);
 540   if (_cm_thread->osthread() == nullptr) {
 541     vm_shutdown_during_initialization("Could not create ConcurrentMarkThread");
 542   }
 543 
 544   log_debug(gc)("ConcGCThreads: %u offset %u", ConcGCThreads, _worker_id_offset);
 545   log_debug(gc)("ParallelGCThreads: %u", ParallelGCThreads);
 546 
 547   _max_concurrent_workers = ConcGCThreads;
 548 
 549   _concurrent_workers = new WorkerThreads("G1 Conc", _max_concurrent_workers);
 550   _concurrent_workers->initialize_workers();
 551   _num_concurrent_workers = _concurrent_workers->active_workers();
 552 
 553   if (!_global_mark_stack.initialize()) {
 554     vm_exit_during_initialization("Failed to allocate initial concurrent mark overflow mark stack.");
 555   }
 556 
 557   _tasks = NEW_C_HEAP_ARRAY(G1CMTask*, _max_num_tasks, mtGC);
 558 
 559   // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
 560   _num_active_tasks = _max_num_tasks;
 561 
 562   for (uint i = 0; i < _max_num_tasks; ++i) {
 563     G1CMTaskQueue* task_queue = new G1CMTaskQueue();
 564     _task_queues->register_queue(i, task_queue);
 565 
 566     _tasks[i] = new G1CMTask(i, this, task_queue, _region_mark_stats);
 567   }
 568 
 569   for (uint i = 0; i < _g1h->max_num_regions(); i++) {
 570     ::new (&_region_mark_stats[i]) G1RegionMarkStats{};
 571     ::new (&_top_at_mark_starts[i]) Atomic<HeapWord*>{};
 572     ::new (&_top_at_rebuild_starts[i]) Atomic<HeapWord*>{};
 573   }
 574 
 575   reset_at_marking_complete();
 576 }
 577 
 578 bool G1ConcurrentMark::in_progress() const {
 579   return is_fully_initialized() ? _cm_thread->in_progress() : false;
 580 }
 581 
 582 PartialArrayStateManager* G1ConcurrentMark::partial_array_state_manager() const {
 583   return _partial_array_state_manager;
 584 }
 585 
 586 void G1ConcurrentMark::reset() {
 587   _has_aborted.store_relaxed(false);
 588 
 589   reset_marking_for_restart();
 590 
 591   // Reset all tasks, since different phases will use different number of active
 592   // threads. So, it's easiest to have all of them ready.
 593   for (uint i = 0; i < _max_num_tasks; ++i) {
 594     _tasks[i]->reset(mark_bitmap());
 595   }
 596 
 597   uint max_num_regions = _g1h->max_num_regions();
 598   for (uint i = 0; i < max_num_regions; i++) {
 599     _top_at_rebuild_starts[i].store_relaxed(nullptr);
 600     _region_mark_stats[i].clear();
 601   }
 602 
 603   _root_regions.reset();
 604 }
 605 
 606 void G1ConcurrentMark::clear_statistics(G1HeapRegion* r) {
 607   uint region_idx = r->hrm_index();
 608   for (uint j = 0; j < _max_num_tasks; ++j) {
 609     _tasks[j]->clear_mark_stats_cache(region_idx);
 610   }
 611   _top_at_rebuild_starts[region_idx].store_relaxed(nullptr);
 612   _region_mark_stats[region_idx].clear();
 613 }
 614 
 615 void G1ConcurrentMark::humongous_object_eagerly_reclaimed(G1HeapRegion* r) {
 616   assert_at_safepoint();
 617   assert(r->is_starts_humongous(), "Got humongous continues region here");
 618 
 619   // Need to clear mark bit of the humongous object. Doing this unconditionally is fine.
 620   mark_bitmap()->clear(r->bottom());
 621 
 622   if (!_g1h->collector_state()->mark_or_rebuild_in_progress()) {
 623     return;
 624   }
 625 
 626   // Clear any statistics about the region gathered so far.
 627   _g1h->humongous_obj_regions_iterate(r,
 628                                       [&] (G1HeapRegion* r) {
 629                                         clear_statistics(r);
 630                                       });
 631 }
 632 
 633 void G1ConcurrentMark::reset_marking_for_restart() {
 634   _global_mark_stack.set_empty();
 635 
 636   // Expand the marking stack, if we have to and if we can.
 637   if (has_overflown()) {
 638     _global_mark_stack.expand();
 639 
 640     uint max_num_regions = _g1h->max_num_regions();
 641     for (uint i = 0; i < max_num_regions; i++) {
 642       _region_mark_stats[i].clear_during_overflow();
 643     }
 644   }
 645 
 646   clear_has_overflown();
 647   _finger.store_relaxed(_heap.start());
 648 
 649   for (uint i = 0; i < _max_num_tasks; ++i) {
 650     _tasks[i]->reset_for_restart();
 651   }
 652 }
 653 
 654 void G1ConcurrentMark::set_concurrency(uint active_tasks) {
 655   assert(active_tasks <= _max_num_tasks, "we should not have more");
 656 
 657   _num_active_tasks = active_tasks;
 658   // Need to update the three data structures below according to the
 659   // number of active threads for this phase.
 660   _terminator.reset_for_reuse(active_tasks);
 661   _first_overflow_barrier_sync.set_n_workers(active_tasks);
 662   _second_overflow_barrier_sync.set_n_workers(active_tasks);
 663 }
 664 
 665 void G1ConcurrentMark::set_concurrency_and_phase(uint active_tasks, bool concurrent) {
 666   set_concurrency(active_tasks);
 667 
 668   _concurrent.store_relaxed(concurrent);
 669 
 670   if (!concurrent) {
 671     // At this point we should be in a STW phase, and completed marking.
 672     assert_at_safepoint_on_vm_thread();
 673     assert(out_of_regions(),
 674            "only way to get here: _finger: " PTR_FORMAT ", _heap_end: " PTR_FORMAT,
 675            p2i(finger()), p2i(_heap.end()));
 676   }
 677 }
 678 
 679 #if TASKQUEUE_STATS
 680 void G1ConcurrentMark::print_and_reset_taskqueue_stats() {
 681 
 682   _task_queues->print_and_reset_taskqueue_stats("G1ConcurrentMark Oop Queue");
 683 
 684   auto get_pa_stats = [&](uint i) {
 685     return _tasks[i]->partial_array_task_stats();
 686   };
 687 
 688   PartialArrayTaskStats::log_set(_max_num_tasks, get_pa_stats,
 689                                  "G1ConcurrentMark Partial Array Task Stats");
 690 
 691   for (uint i = 0; i < _max_num_tasks; ++i) {
 692     get_pa_stats(i)->reset();
 693   }
 694 }
 695 #endif
 696 
 697 void G1ConcurrentMark::reset_at_marking_complete() {
 698   TASKQUEUE_STATS_ONLY(print_and_reset_taskqueue_stats());
 699   // We set the global marking state to some default values when we're
 700   // not doing marking.
 701   reset_marking_for_restart();
 702   _num_active_tasks = 0;
 703 }
 704 
 705 G1ConcurrentMark::~G1ConcurrentMark() {
 706   FREE_C_HEAP_ARRAY(Atomic<HeapWord*>, _top_at_mark_starts);
 707   FREE_C_HEAP_ARRAY(Atomic<HeapWord*>, _top_at_rebuild_starts);
 708   FREE_C_HEAP_ARRAY(G1RegionMarkStats, _region_mark_stats);
 709   // The G1ConcurrentMark instance is never freed.
 710   ShouldNotReachHere();
 711 }
 712 
 713 class G1ClearBitMapTask : public WorkerTask {
 714 public:
 715   static size_t chunk_size() { return M; }
 716 
 717 private:
 718   // Heap region closure used for clearing the _mark_bitmap.
 719   class G1ClearBitmapHRClosure : public G1HeapRegionClosure {
 720   private:
 721     G1ConcurrentMark* _cm;
 722     G1CMBitMap* _bitmap;
 723     bool _suspendible; // If suspendible, do yield checks.
 724 
 725     bool suspendible() {
 726       return _suspendible;
 727     }
 728 
 729     bool is_clear_concurrent_undo() {
 730       return suspendible() && _cm->cm_thread()->in_undo_mark();
 731     }
 732 
 733     bool has_aborted() {
 734       if (suspendible()) {
 735         _cm->do_yield_check();
 736         return _cm->has_aborted();
 737       }
 738       return false;
 739     }
 740 
 741     HeapWord* region_clear_limit(G1HeapRegion* r) {
 742       // During a Concurrent Undo Mark cycle, the per region top_at_mark_start and
 743       // live_words data are current wrt to the _mark_bitmap. We use this information
 744       // to only clear ranges of the bitmap that require clearing.
 745       if (is_clear_concurrent_undo()) {
 746         // No need to clear bitmaps for empty regions (which includes regions we
 747         // did not mark through).
 748         if (!_cm->contains_live_object(r->hrm_index())) {
 749           assert(_bitmap->get_next_marked_addr(r->bottom(), r->end()) == r->end(), "Should not have marked bits");
 750           return r->bottom();
 751         }
 752         assert(_bitmap->get_next_marked_addr(_cm->top_at_mark_start(r), r->end()) == r->end(), "Should not have marked bits above tams");
 753       }
 754       return r->end();
 755     }
 756 
 757   public:
 758     G1ClearBitmapHRClosure(G1ConcurrentMark* cm, bool suspendible) :
 759       G1HeapRegionClosure(),
 760       _cm(cm),
 761       _bitmap(cm->mark_bitmap()),
 762       _suspendible(suspendible)
 763     { }
 764 
 765     virtual bool do_heap_region(G1HeapRegion* r) {
 766       if (has_aborted()) {
 767         return true;
 768       }
 769 
 770       HeapWord* cur = r->bottom();
 771       HeapWord* const end = region_clear_limit(r);
 772 
 773       size_t const chunk_size_in_words = G1ClearBitMapTask::chunk_size() / HeapWordSize;
 774 
 775       while (cur < end) {
 776 
 777         MemRegion mr(cur, MIN2(cur + chunk_size_in_words, end));
 778         _bitmap->clear_range(mr);
 779 
 780         cur += chunk_size_in_words;
 781 
 782         // Repeat the asserts from before the start of the closure. We will do them
 783         // as asserts here to minimize their overhead on the product. However, we
 784         // will have them as guarantees at the beginning / end of the bitmap
 785         // clearing to get some checking in the product.
 786         assert(!suspendible() || _cm->in_progress(), "invariant");
 787         assert(!suspendible() || !G1CollectedHeap::heap()->collector_state()->mark_or_rebuild_in_progress(), "invariant");
 788 
 789         // Abort iteration if necessary.
 790         if (has_aborted()) {
 791           return true;
 792         }
 793       }
 794       assert(cur >= end, "Must have completed iteration over the bitmap for region %u.", r->hrm_index());
 795 
 796       _cm->reset_top_at_mark_start(r);
 797 
 798       return false;
 799     }
 800   };
 801 
 802   G1ClearBitmapHRClosure _cl;
 803   G1HeapRegionClaimer _hr_claimer;
 804   bool _suspendible; // If the task is suspendible, workers must join the STS.
 805 
 806 public:
 807   G1ClearBitMapTask(G1ConcurrentMark* cm, uint n_workers, bool suspendible) :
 808     WorkerTask("G1 Clear Bitmap"),
 809     _cl(cm, suspendible),
 810     _hr_claimer(n_workers),
 811     _suspendible(suspendible)
 812   { }
 813 
 814   void work(uint worker_id) {
 815     SuspendibleThreadSetJoiner sts_join(_suspendible);
 816     G1CollectedHeap::heap()->heap_region_par_iterate_from_worker_offset(&_cl, &_hr_claimer, worker_id);
 817   }
 818 
 819   bool is_complete() {
 820     return _cl.is_complete();
 821   }
 822 };
 823 
 824 void G1ConcurrentMark::clear_bitmap(WorkerThreads* workers, bool may_yield) {
 825   assert(may_yield || SafepointSynchronize::is_at_safepoint(), "Non-yielding bitmap clear only allowed at safepoint.");
 826 
 827   size_t const num_bytes_to_clear = (G1HeapRegion::GrainBytes * _g1h->num_committed_regions()) / G1CMBitMap::heap_map_factor();
 828   size_t const num_chunks = align_up(num_bytes_to_clear, G1ClearBitMapTask::chunk_size()) / G1ClearBitMapTask::chunk_size();
 829 
 830   uint const num_workers = (uint)MIN2(num_chunks, (size_t)workers->active_workers());
 831 
 832   G1ClearBitMapTask cl(this, num_workers, may_yield);
 833 
 834   log_debug(gc, ergo)("Running %s with %u workers for %zu work units.", cl.name(), num_workers, num_chunks);
 835   workers->run_task(&cl, num_workers);
 836   guarantee(may_yield || cl.is_complete(), "Must have completed iteration when not yielding.");
 837 }
 838 
 839 void G1ConcurrentMark::cleanup_for_next_mark() {
 840   // Make sure that the concurrent mark thread looks to still be in
 841   // the current cycle.
 842   guarantee(is_fully_initialized(), "should be initializd");
 843   guarantee(in_progress(), "invariant");
 844 
 845   // We are finishing up the current cycle by clearing the next
 846   // marking bitmap and getting it ready for the next cycle. During
 847   // this time no other cycle can start. So, let's make sure that this
 848   // is the case.
 849   guarantee(!_g1h->collector_state()->mark_or_rebuild_in_progress(), "invariant");
 850 
 851   clear_bitmap(_concurrent_workers, true);
 852 
 853   reset_partial_array_state_manager();
 854 
 855   // Repeat the asserts from above.
 856   guarantee(is_fully_initialized(), "should be initializd");
 857   guarantee(in_progress(), "invariant");
 858   guarantee(!_g1h->collector_state()->mark_or_rebuild_in_progress(), "invariant");
 859 }
 860 
 861 void G1ConcurrentMark::reset_partial_array_state_manager() {
 862   for (uint i = 0; i < _max_num_tasks; ++i) {
 863     _tasks[i]->unregister_partial_array_splitter();
 864   }
 865 
 866   partial_array_state_manager()->reset();
 867 
 868   for (uint i = 0; i < _max_num_tasks; ++i) {
 869     _tasks[i]->register_partial_array_splitter();
 870   }
 871 }
 872 
 873 void G1ConcurrentMark::clear_bitmap(WorkerThreads* workers) {
 874   assert_at_safepoint_on_vm_thread();
 875   // To avoid fragmentation the full collection requesting to clear the bitmap
 876   // might use fewer workers than available. To ensure the bitmap is cleared
 877   // as efficiently as possible the number of active workers are temporarily
 878   // increased to include all currently created workers.
 879   WithActiveWorkers update(workers, workers->created_workers());
 880   clear_bitmap(workers, false);
 881 }
 882 
 883 class G1PreConcurrentStartTask : public G1BatchedTask {
 884   // Reset marking state.
 885   class ResetMarkingStateTask;
 886   // For each region note start of marking.
 887   class NoteStartOfMarkTask;
 888 
 889 public:
 890   G1PreConcurrentStartTask(GCCause::Cause cause, G1ConcurrentMark* cm);
 891 };
 892 
 893 class G1PreConcurrentStartTask::ResetMarkingStateTask : public G1AbstractSubTask {
 894   G1ConcurrentMark* _cm;
 895 public:
 896   ResetMarkingStateTask(G1ConcurrentMark* cm) : G1AbstractSubTask(G1GCPhaseTimes::ResetMarkingState), _cm(cm) { }
 897 
 898   double worker_cost() const override { return 1.0; }
 899   void do_work(uint worker_id) override;
 900 };
 901 
 902 class G1PreConcurrentStartTask::NoteStartOfMarkTask : public G1AbstractSubTask {
 903   G1HeapRegionClaimer _claimer;
 904 public:
 905   NoteStartOfMarkTask() : G1AbstractSubTask(G1GCPhaseTimes::NoteStartOfMark), _claimer(0) { }
 906 
 907   double worker_cost() const override {
 908     // The work done per region is very small, therefore we choose this magic number to cap the number
 909     // of threads used when there are few regions.
 910     const double regions_per_thread = 1000;
 911     return _claimer.n_regions() / regions_per_thread;
 912   }
 913 
 914   void set_max_workers(uint max_workers) override;
 915   void do_work(uint worker_id) override;
 916 };
 917 
 918 void G1PreConcurrentStartTask::ResetMarkingStateTask::do_work(uint worker_id) {
 919   // Reset marking state.
 920   _cm->reset();
 921 }
 922 
 923 class NoteStartOfMarkHRClosure : public G1HeapRegionClosure {
 924   G1ConcurrentMark* _cm;
 925 
 926 public:
 927   NoteStartOfMarkHRClosure() : G1HeapRegionClosure(), _cm(G1CollectedHeap::heap()->concurrent_mark()) { }
 928 
 929   bool do_heap_region(G1HeapRegion* r) override {
 930     if (r->is_old_or_humongous() && !r->is_collection_set_candidate() && !r->in_collection_set()) {
 931       _cm->update_top_at_mark_start(r);
 932     } else {
 933       _cm->reset_top_at_mark_start(r);
 934     }
 935     return false;
 936   }
 937 };
 938 
 939 void G1PreConcurrentStartTask::NoteStartOfMarkTask::do_work(uint worker_id) {
 940   NoteStartOfMarkHRClosure start_cl;
 941   G1CollectedHeap::heap()->heap_region_par_iterate_from_worker_offset(&start_cl, &_claimer, worker_id);
 942 }
 943 
 944 void G1PreConcurrentStartTask::NoteStartOfMarkTask::set_max_workers(uint max_workers) {
 945   _claimer.set_n_workers(max_workers);
 946 }
 947 
 948 G1PreConcurrentStartTask::G1PreConcurrentStartTask(GCCause::Cause cause, G1ConcurrentMark* cm) :
 949   G1BatchedTask("Pre Concurrent Start", G1CollectedHeap::heap()->phase_times()) {
 950   add_serial_task(new ResetMarkingStateTask(cm));
 951   add_parallel_task(new NoteStartOfMarkTask());
 952 };
 953 
 954 void G1ConcurrentMark::pre_concurrent_start(GCCause::Cause cause) {
 955   assert_at_safepoint_on_vm_thread();
 956 
 957   G1CollectedHeap::start_codecache_marking_cycle_if_inactive(true /* concurrent_mark_start */);
 958 
 959   ClassLoaderDataGraph::verify_claimed_marks_cleared(ClassLoaderData::_claim_strong);
 960 
 961   G1PreConcurrentStartTask cl(cause, this);
 962   G1CollectedHeap::heap()->run_batch_task(&cl);
 963 
 964   _gc_tracer_cm->set_gc_cause(cause);
 965 }
 966 
 967 
 968 void G1ConcurrentMark::post_concurrent_mark_start() {
 969   // Start Concurrent Marking weak-reference discovery.
 970   ReferenceProcessor* rp = _g1h->ref_processor_cm();
 971   rp->start_discovery(false /* always_clear */);
 972 
 973   SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
 974   // This is the start of  the marking cycle, we're expected all
 975   // threads to have SATB queues with active set to false.
 976   satb_mq_set.set_active_all_threads(true, /* new active value */
 977                                      false /* expected_active */);
 978 
 979   _root_regions.prepare_for_scan();
 980 
 981   // update_g1_committed() will be called at the end of an evac pause
 982   // when marking is on. So, it's also called at the end of the
 983   // concurrent start pause to update the heap end, if the heap expands
 984   // during it. No need to call it here.
 985 }
 986 
 987 void G1ConcurrentMark::post_concurrent_undo_start() {
 988   root_regions()->cancel_scan();
 989 }
 990 
 991 /*
 992  * Notice that in the next two methods, we actually leave the STS
 993  * during the barrier sync and join it immediately afterwards. If we
 994  * do not do this, the following deadlock can occur: one thread could
 995  * be in the barrier sync code, waiting for the other thread to also
 996  * sync up, whereas another one could be trying to yield, while also
 997  * waiting for the other threads to sync up too.
 998  *
 999  * Note, however, that this code is also used during remark and in
1000  * this case we should not attempt to leave / enter the STS, otherwise
1001  * we'll either hit an assert (debug / fastdebug) or deadlock
1002  * (product). So we should only leave / enter the STS if we are
1003  * operating concurrently.
1004  *
1005  * Because the thread that does the sync barrier has left the STS, it
1006  * is possible to be suspended for a Full GC or an evacuation pause
1007  * could occur. This is actually safe, since the entering the sync
1008  * barrier is one of the last things do_marking_step() does, and it
1009  * doesn't manipulate any data structures afterwards.
1010  */
1011 
1012 void G1ConcurrentMark::enter_first_sync_barrier(uint worker_id) {
1013   bool barrier_aborted;
1014   {
1015     SuspendibleThreadSetLeaver sts_leave(concurrent());
1016     barrier_aborted = !_first_overflow_barrier_sync.enter();
1017   }
1018 
1019   // at this point everyone should have synced up and not be doing any
1020   // more work
1021 
1022   if (barrier_aborted) {
1023     // If the barrier aborted we ignore the overflow condition and
1024     // just abort the whole marking phase as quickly as possible.
1025     return;
1026   }
1027 }
1028 
1029 void G1ConcurrentMark::enter_second_sync_barrier(uint worker_id) {
1030   SuspendibleThreadSetLeaver sts_leave(concurrent());
1031   _second_overflow_barrier_sync.enter();
1032 
1033   // at this point everything should be re-initialized and ready to go
1034 }
1035 
1036 class G1CMConcurrentMarkingTask : public WorkerTask {
1037   G1ConcurrentMark*     _cm;
1038 
1039 public:
1040   void work(uint worker_id) {
1041     ResourceMark rm;
1042 
1043     SuspendibleThreadSetJoiner sts_join;
1044 
1045     assert(worker_id < _cm->active_tasks(), "invariant");
1046 
1047     G1CMTask* task = _cm->task(worker_id);
1048     task->record_start_time();
1049     if (!_cm->has_aborted()) {
1050       do {
1051         task->do_marking_step(G1ConcMarkStepDurationMillis,
1052                               true  /* do_termination */,
1053                               false /* is_serial*/);
1054 
1055         _cm->do_yield_check();
1056       } while (!_cm->has_aborted() && task->has_aborted());
1057     }
1058     task->record_end_time();
1059     guarantee(!task->has_aborted() || _cm->has_aborted(), "invariant");
1060   }
1061 
1062   G1CMConcurrentMarkingTask(G1ConcurrentMark* cm) :
1063       WorkerTask("Concurrent Mark"), _cm(cm) { }
1064 
1065   ~G1CMConcurrentMarkingTask() { }
1066 };
1067 
1068 uint G1ConcurrentMark::calc_active_marking_workers() {
1069   uint result = 0;
1070   if (!UseDynamicNumberOfGCThreads || !FLAG_IS_DEFAULT(ConcGCThreads)) {
1071     result = _max_concurrent_workers;
1072   } else {
1073     result =
1074       WorkerPolicy::calc_default_active_workers(_max_concurrent_workers,
1075                                                 1, /* Minimum workers */
1076                                                 _num_concurrent_workers,
1077                                                 Threads::number_of_non_daemon_threads());
1078     // Don't scale the result down by scale_concurrent_workers() because
1079     // that scaling has already gone into "_max_concurrent_workers".
1080   }
1081   assert(result > 0 && result <= _max_concurrent_workers,
1082          "Calculated number of marking workers must be larger than zero and at most the maximum %u, but is %u",
1083          _max_concurrent_workers, result);
1084   return result;
1085 }
1086 
1087 void G1ConcurrentMark::scan_root_region(const MemRegion* region, uint worker_id) {
1088 #ifdef ASSERT
1089   HeapWord* last = region->last();
1090   G1HeapRegion* hr = _g1h->heap_region_containing(last);
1091   assert(hr->is_old() || top_at_mark_start(hr) == hr->bottom(),
1092          "Root regions must be old or survivor/eden but region %u is %s", hr->hrm_index(), hr->get_type_str());
1093   assert(top_at_mark_start(hr) == region->start(),
1094          "MemRegion start should be equal to TAMS");
1095 #endif
1096 
1097   G1RootRegionScanClosure cl(_g1h, this, worker_id);
1098 
1099   const uintx interval = PrefetchScanIntervalInBytes;
1100   HeapWord* curr = region->start();
1101   const HeapWord* end = region->end();
1102   while (curr < end) {
1103     Prefetch::read(curr, interval);
1104     oop obj = cast_to_oop(curr);
1105     size_t size = obj->oop_iterate_size(&cl);
1106     assert(size == obj->size(), "sanity");
1107     curr += size;
1108   }
1109 }
1110 
1111 class G1CMRootRegionScanTask : public WorkerTask {
1112   G1ConcurrentMark* _cm;
1113 public:
1114   G1CMRootRegionScanTask(G1ConcurrentMark* cm) :
1115     WorkerTask("G1 Root Region Scan"), _cm(cm) { }
1116 
1117   void work(uint worker_id) {
1118     G1CMRootMemRegions* root_regions = _cm->root_regions();
1119     const MemRegion* region = root_regions->claim_next();
1120     while (region != nullptr) {
1121       _cm->scan_root_region(region, worker_id);
1122       region = root_regions->claim_next();
1123     }
1124   }
1125 };
1126 
1127 void G1ConcurrentMark::scan_root_regions() {
1128   // scan_in_progress() will have been set to true only if there was
1129   // at least one root region to scan. So, if it's false, we
1130   // should not attempt to do any further work.
1131   if (root_regions()->scan_in_progress()) {
1132     assert(!has_aborted(), "Aborting before root region scanning is finished not supported.");
1133 
1134     // Assign one worker to each root-region but subject to the max constraint.
1135     const uint num_workers = MIN2(root_regions()->num_root_regions(),
1136                                   _max_concurrent_workers);
1137 
1138     G1CMRootRegionScanTask task(this);
1139     log_debug(gc, ergo)("Running %s using %u workers for %u work units.",
1140                         task.name(), num_workers, root_regions()->num_root_regions());
1141     _concurrent_workers->run_task(&task, num_workers);
1142 
1143     // It's possible that has_aborted() is true here without actually
1144     // aborting the survivor scan earlier. This is OK as it's
1145     // mainly used for sanity checking.
1146     root_regions()->scan_finished();
1147   }
1148 }
1149 
1150 bool G1ConcurrentMark::wait_until_root_region_scan_finished() {
1151   return root_regions()->wait_until_scan_finished();
1152 }
1153 
1154 void G1ConcurrentMark::add_root_region(G1HeapRegion* r) {
1155   root_regions()->add(top_at_mark_start(r), r->top());
1156 }
1157 
1158 bool G1ConcurrentMark::is_root_region(G1HeapRegion* r) {
1159   return root_regions()->contains(MemRegion(top_at_mark_start(r), r->top()));
1160 }
1161 
1162 void G1ConcurrentMark::root_region_scan_abort_and_wait() {
1163   root_regions()->abort();
1164   root_regions()->wait_until_scan_finished();
1165 }
1166 
1167 void G1ConcurrentMark::concurrent_cycle_start() {
1168   _gc_timer_cm->register_gc_start();
1169 
1170   _gc_tracer_cm->report_gc_start(GCCause::_no_gc /* first parameter is not used */, _gc_timer_cm->gc_start());
1171 
1172   _g1h->trace_heap_before_gc(_gc_tracer_cm);
1173 }
1174 
1175 uint G1ConcurrentMark::completed_mark_cycles() const {
1176   return _completed_mark_cycles.load_relaxed();
1177 }
1178 
1179 void G1ConcurrentMark::concurrent_cycle_end(bool mark_cycle_completed) {
1180   _g1h->collector_state()->set_clear_bitmap_in_progress(false);
1181 
1182   _g1h->trace_heap_after_gc(_gc_tracer_cm);
1183 
1184   if (mark_cycle_completed) {
1185     _completed_mark_cycles.add_then_fetch(1u, memory_order_relaxed);
1186   }
1187 
1188   if (has_aborted()) {
1189     log_info(gc, marking)("Concurrent Mark Abort");
1190     _gc_tracer_cm->report_concurrent_mode_failure();
1191   }
1192 
1193   _gc_timer_cm->register_gc_end();
1194 
1195   _gc_tracer_cm->report_gc_end(_gc_timer_cm->gc_end(), _gc_timer_cm->time_partitions());
1196 }
1197 
1198 void G1ConcurrentMark::mark_from_roots() {
1199   _restart_for_overflow.store_relaxed(false);
1200 
1201   uint active_workers = calc_active_marking_workers();
1202 
1203   // Setting active workers is not guaranteed since fewer
1204   // worker threads may currently exist and more may not be
1205   // available.
1206   active_workers = _concurrent_workers->set_active_workers(active_workers);
1207   log_info(gc, task)("Concurrent Mark Using %u of %u Workers", active_workers, _concurrent_workers->max_workers());
1208 
1209   _num_concurrent_workers = active_workers;
1210 
1211   // Parallel task terminator is set in "set_concurrency_and_phase()"
1212   set_concurrency_and_phase(active_workers, true /* concurrent */);
1213 
1214   G1CMConcurrentMarkingTask marking_task(this);
1215   _concurrent_workers->run_task(&marking_task);
1216   print_stats();
1217 }
1218 
1219 const char* G1ConcurrentMark::verify_location_string(VerifyLocation location) {
1220   static const char* location_strings[] = { "Remark Before",
1221                                             "Remark After",
1222                                             "Remark Overflow",
1223                                             "Cleanup Before",
1224                                             "Cleanup After" };
1225   return location_strings[static_cast<std::underlying_type_t<VerifyLocation>>(location)];
1226 }
1227 
1228 void G1ConcurrentMark::verify_during_pause(G1HeapVerifier::G1VerifyType type,
1229                                            VerifyLocation location) {
1230   G1HeapVerifier* verifier = _g1h->verifier();
1231 
1232   verifier->verify_region_sets_optional();
1233 
1234   const char* caller = verify_location_string(location);
1235 
1236   if (VerifyDuringGC && G1HeapVerifier::should_verify(type)) {
1237     GCTraceTime(Debug, gc, phases) debug(caller, _gc_timer_cm);
1238 
1239     size_t const BufLen = 512;
1240     char buffer[BufLen];
1241 
1242     jio_snprintf(buffer, BufLen, "During GC (%s)", caller);
1243     verifier->verify(VerifyOption::G1UseConcMarking, buffer);
1244 
1245     // Only check bitmap in Remark, and not at After-Verification because the regions
1246     // already have their TAMS'es reset.
1247     if (location != VerifyLocation::RemarkAfter) {
1248       verifier->verify_bitmap_clear(true /* above_tams_only */);
1249     }
1250   }
1251 }
1252 
1253 class G1ObjectCountIsAliveClosure: public BoolObjectClosure {
1254   G1CollectedHeap* _g1h;
1255 public:
1256   G1ObjectCountIsAliveClosure(G1CollectedHeap* g1h) : _g1h(g1h) {}
1257 
1258   bool do_object_b(oop obj) {
1259     return !_g1h->is_obj_dead(obj);
1260   }
1261 };
1262 
1263 void G1ConcurrentMark::remark() {
1264   assert_at_safepoint_on_vm_thread();
1265 
1266   // If a full collection has happened, we should not continue. However we might
1267   // have ended up here as the Remark VM operation has been scheduled already.
1268   if (has_aborted()) {
1269     return;
1270   }
1271 
1272   G1Policy* policy = _g1h->policy();
1273   policy->record_pause_start_time();
1274 
1275   double start = os::elapsedTime();
1276 
1277   verify_during_pause(G1HeapVerifier::G1VerifyRemark, VerifyLocation::RemarkBefore);
1278 
1279   {
1280     GCTraceTime(Debug, gc, phases) debug("Finalize Marking", _gc_timer_cm);
1281     finalize_marking();
1282   }
1283 
1284   double mark_work_end = os::elapsedTime();
1285 
1286   bool const mark_finished = !has_overflown();
1287   if (mark_finished) {
1288     weak_refs_work();
1289 
1290     // Unload Klasses, String, Code Cache, etc.
1291     if (ClassUnloadingWithConcurrentMark) {
1292       G1CMIsAliveClosure is_alive(this);
1293       _g1h->unload_classes_and_code("Class Unloading", &is_alive, _gc_timer_cm);
1294     }
1295 
1296     SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
1297     // We're done with marking.
1298     // This is the end of the marking cycle, we're expected all
1299     // threads to have SATB queues with active set to true.
1300     satb_mq_set.set_active_all_threads(false, /* new active value */
1301                                        true /* expected_active */);
1302 
1303     {
1304       GCTraceTime(Debug, gc, phases) debug("Flush Task Caches", _gc_timer_cm);
1305       flush_all_task_caches();
1306     }
1307 
1308     // All marking completed. Check bitmap now as we will start to reset TAMSes
1309     // in parallel below so that we can not do this in the After-Remark verification.
1310     _g1h->verifier()->verify_bitmap_clear(true /* above_tams_only */);
1311 
1312     {
1313       GCTraceTime(Debug, gc, phases) debug("Select For Rebuild and Reclaim Empty Regions", _gc_timer_cm);
1314 
1315       G1UpdateRegionLivenessAndSelectForRebuildTask cl(_g1h, this, _g1h->workers()->active_workers());
1316       uint const num_workers = MIN2(G1UpdateRegionLivenessAndSelectForRebuildTask::desired_num_workers(_g1h->num_committed_regions()),
1317                                     _g1h->workers()->active_workers());
1318       log_debug(gc,ergo)("Running %s using %u workers for %u regions in heap", cl.name(), num_workers, _g1h->num_committed_regions());
1319       _g1h->workers()->run_task(&cl, num_workers);
1320 
1321       log_debug(gc, remset, tracking)("Remembered Set Tracking update regions total %u, selected %u",
1322                                         _g1h->num_committed_regions(), cl.total_selected_for_rebuild());
1323 
1324       _needs_remembered_set_rebuild = (cl.total_selected_for_rebuild() > 0);
1325 
1326       if (_needs_remembered_set_rebuild) {
1327         // Prune rebuild candidates based on G1HeapWastePercent.
1328         // Improves rebuild time in addition to remembered set memory usage.
1329         G1CollectionSetChooser::build(_g1h->workers(), _g1h->num_committed_regions(), _g1h->policy()->candidates());
1330       }
1331     }
1332 
1333     if (log_is_enabled(Trace, gc, liveness)) {
1334       G1PrintRegionLivenessInfoClosure cl("Post-Marking");
1335       _g1h->heap_region_iterate(&cl);
1336     }
1337 
1338     // Potentially, some empty-regions have been reclaimed; make this a
1339     // "collection" so that pending allocation can retry before attempting a
1340     // GC pause.
1341     _g1h->increment_total_collections();
1342 
1343     // For Remark Pauses that may have been triggered by PeriodicGCs, we maintain
1344     // resizing based on MinHeapFreeRatio or MaxHeapFreeRatio. If a PeriodicGC is
1345     // triggered, it likely means there are very few regular GCs, making resizing
1346     // based on gc heuristics less effective.
1347     if (_g1h->last_gc_was_periodic()) {
1348       _g1h->resize_heap_after_full_collection(0 /* allocation_word_size */);
1349     }
1350 
1351     compute_new_sizes();
1352 
1353     verify_during_pause(G1HeapVerifier::G1VerifyRemark, VerifyLocation::RemarkAfter);
1354 
1355     assert(!restart_for_overflow(), "sanity");
1356     // Completely reset the marking state (except bitmaps) since marking completed.
1357     reset_at_marking_complete();
1358 
1359     G1CollectedHeap::finish_codecache_marking_cycle();
1360 
1361     {
1362       GCTraceTime(Debug, gc, phases) debug("Report Object Count", _gc_timer_cm);
1363       G1ObjectCountIsAliveClosure is_alive(_g1h);
1364       _gc_tracer_cm->report_object_count_after_gc(&is_alive, _g1h->workers());
1365     }
1366   } else {
1367     // We overflowed.  Restart concurrent marking.
1368     _restart_for_overflow.store_relaxed(true);
1369 
1370     verify_during_pause(G1HeapVerifier::G1VerifyRemark, VerifyLocation::RemarkOverflow);
1371 
1372     // Clear the marking state because we will be restarting
1373     // marking due to overflowing the global mark stack.
1374     reset_marking_for_restart();
1375   }
1376 
1377   // Statistics
1378   double now = os::elapsedTime();
1379   _remark_mark_times.add((mark_work_end - start) * 1000.0);
1380   _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
1381   _remark_times.add((now - start) * 1000.0);
1382 
1383   _g1h->update_perf_counter_cpu_time();
1384 
1385   policy->record_concurrent_mark_remark_end();
1386 }
1387 
1388 void G1ConcurrentMark::compute_new_sizes() {
1389   MetaspaceGC::compute_new_size();
1390 
1391   // Cleanup will have freed any regions completely full of garbage.
1392   // Update the soft reference policy with the new heap occupancy.
1393   Universe::heap()->update_capacity_and_used_at_gc();
1394 
1395   // We reclaimed old regions so we should calculate the sizes to make
1396   // sure we update the old gen/space data.
1397   _g1h->monitoring_support()->update_sizes();
1398 }
1399 
1400 class G1UpdateRegionsAfterRebuild : public G1HeapRegionClosure {
1401   G1CollectedHeap* _g1h;
1402 
1403 public:
1404   G1UpdateRegionsAfterRebuild(G1CollectedHeap* g1h) : _g1h(g1h) { }
1405 
1406   bool do_heap_region(G1HeapRegion* r) override {
1407     // Update the remset tracking state from updating to complete
1408     // if remembered sets have been rebuilt.
1409     _g1h->policy()->remset_tracker()->update_after_rebuild(r);
1410     return false;
1411   }
1412 };
1413 
1414 void G1ConcurrentMark::cleanup() {
1415   assert_at_safepoint_on_vm_thread();
1416 
1417   // If a full collection has happened, we shouldn't do this.
1418   if (has_aborted()) {
1419     return;
1420   }
1421 
1422   G1Policy* policy = _g1h->policy();
1423   policy->record_pause_start_time();
1424 
1425   double start = os::elapsedTime();
1426 
1427   verify_during_pause(G1HeapVerifier::G1VerifyCleanup, VerifyLocation::CleanupBefore);
1428 
1429   if (needs_remembered_set_rebuild()) {
1430     // Update the remset tracking information as well as marking all regions
1431     // as fully parsable.
1432     GCTraceTime(Debug, gc, phases) debug("Update Remembered Set Tracking After Rebuild", _gc_timer_cm);
1433     G1UpdateRegionsAfterRebuild cl(_g1h);
1434     _g1h->heap_region_iterate(&cl);
1435   } else {
1436     log_debug(gc, phases)("No Remembered Sets to update after rebuild");
1437   }
1438 
1439   verify_during_pause(G1HeapVerifier::G1VerifyCleanup, VerifyLocation::CleanupAfter);
1440 
1441   // Local statistics
1442   _cleanup_times.add((os::elapsedTime() - start) * 1000.0);
1443 
1444   {
1445     GCTraceTime(Debug, gc, phases) debug("Finalize Concurrent Mark Cleanup", _gc_timer_cm);
1446     policy->record_concurrent_mark_cleanup_end(needs_remembered_set_rebuild());
1447   }
1448 }
1449 
1450 // 'Keep Alive' oop closure used by both serial parallel reference processing.
1451 // Uses the G1CMTask associated with a worker thread (for serial reference
1452 // processing the G1CMTask for worker 0 is used) to preserve (mark) and
1453 // trace referent objects.
1454 //
1455 // Using the G1CMTask and embedded local queues avoids having the worker
1456 // threads operating on the global mark stack. This reduces the risk
1457 // of overflowing the stack - which we would rather avoid at this late
1458 // state. Also using the tasks' local queues removes the potential
1459 // of the workers interfering with each other that could occur if
1460 // operating on the global stack.
1461 
1462 class G1CMKeepAliveAndDrainClosure : public OopClosure {
1463   G1ConcurrentMark* _cm;
1464   G1CMTask*         _task;
1465   uint              _ref_counter_limit;
1466   uint              _ref_counter;
1467   bool              _is_serial;
1468 public:
1469   G1CMKeepAliveAndDrainClosure(G1ConcurrentMark* cm, G1CMTask* task, bool is_serial) :
1470     _cm(cm), _task(task), _ref_counter_limit(G1RefProcDrainInterval),
1471     _ref_counter(_ref_counter_limit), _is_serial(is_serial) {
1472     assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
1473   }
1474 
1475   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
1476   virtual void do_oop(      oop* p) { do_oop_work(p); }
1477 
1478   template <class T> void do_oop_work(T* p) {
1479     if (_cm->has_overflown()) {
1480       return;
1481     }
1482     if (!_task->deal_with_reference(p)) {
1483       // We did not add anything to the mark bitmap (or mark stack), so there is
1484       // no point trying to drain it.
1485       return;
1486     }
1487     _ref_counter--;
1488 
1489     if (_ref_counter == 0) {
1490       // We have dealt with _ref_counter_limit references, pushing them
1491       // and objects reachable from them on to the local stack (and
1492       // possibly the global stack). Call G1CMTask::do_marking_step() to
1493       // process these entries.
1494       //
1495       // We call G1CMTask::do_marking_step() in a loop, which we'll exit if
1496       // there's nothing more to do (i.e. we're done with the entries that
1497       // were pushed as a result of the G1CMTask::deal_with_reference() calls
1498       // above) or we overflow.
1499       //
1500       // Note: G1CMTask::do_marking_step() can set the G1CMTask::has_aborted()
1501       // flag while there may still be some work to do. (See the comment at
1502       // the beginning of G1CMTask::do_marking_step() for those conditions -
1503       // one of which is reaching the specified time target.) It is only
1504       // when G1CMTask::do_marking_step() returns without setting the
1505       // has_aborted() flag that the marking step has completed.
1506       do {
1507         double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
1508         _task->do_marking_step(mark_step_duration_ms,
1509                                false      /* do_termination */,
1510                                _is_serial);
1511       } while (_task->has_aborted() && !_cm->has_overflown());
1512       _ref_counter = _ref_counter_limit;
1513     }
1514   }
1515 };
1516 
1517 // 'Drain' oop closure used by both serial and parallel reference processing.
1518 // Uses the G1CMTask associated with a given worker thread (for serial
1519 // reference processing the G1CMtask for worker 0 is used). Calls the
1520 // do_marking_step routine, with an unbelievably large timeout value,
1521 // to drain the marking data structures of the remaining entries
1522 // added by the 'keep alive' oop closure above.
1523 
1524 class G1CMDrainMarkingStackClosure : public VoidClosure {
1525   G1ConcurrentMark* _cm;
1526   G1CMTask*         _task;
1527   bool              _is_serial;
1528  public:
1529   G1CMDrainMarkingStackClosure(G1ConcurrentMark* cm, G1CMTask* task, bool is_serial) :
1530     _cm(cm), _task(task), _is_serial(is_serial) {
1531     assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
1532   }
1533 
1534   void do_void() {
1535     do {
1536       // We call G1CMTask::do_marking_step() to completely drain the local
1537       // and global marking stacks of entries pushed by the 'keep alive'
1538       // oop closure (an instance of G1CMKeepAliveAndDrainClosure above).
1539       //
1540       // G1CMTask::do_marking_step() is called in a loop, which we'll exit
1541       // if there's nothing more to do (i.e. we've completely drained the
1542       // entries that were pushed as a result of applying the 'keep alive'
1543       // closure to the entries on the discovered ref lists) or we overflow
1544       // the global marking stack.
1545       //
1546       // Note: G1CMTask::do_marking_step() can set the G1CMTask::has_aborted()
1547       // flag while there may still be some work to do. (See the comment at
1548       // the beginning of G1CMTask::do_marking_step() for those conditions -
1549       // one of which is reaching the specified time target.) It is only
1550       // when G1CMTask::do_marking_step() returns without setting the
1551       // has_aborted() flag that the marking step has completed.
1552 
1553       _task->do_marking_step(1000000000.0 /* something very large */,
1554                              true         /* do_termination */,
1555                              _is_serial);
1556     } while (_task->has_aborted() && !_cm->has_overflown());
1557   }
1558 };
1559 
1560 class G1CMRefProcProxyTask : public RefProcProxyTask {
1561   G1CollectedHeap& _g1h;
1562   G1ConcurrentMark& _cm;
1563 
1564 public:
1565   G1CMRefProcProxyTask(uint max_workers, G1CollectedHeap& g1h, G1ConcurrentMark &cm)
1566     : RefProcProxyTask("G1CMRefProcProxyTask", max_workers),
1567       _g1h(g1h),
1568       _cm(cm) {}
1569 
1570   void work(uint worker_id) override {
1571     assert(worker_id < _max_workers, "sanity");
1572     G1CMIsAliveClosure is_alive(&_cm);
1573     uint index = (_tm == RefProcThreadModel::Single) ? 0 : worker_id;
1574     G1CMKeepAliveAndDrainClosure keep_alive(&_cm, _cm.task(index), _tm == RefProcThreadModel::Single);
1575     BarrierEnqueueDiscoveredFieldClosure enqueue;
1576     G1CMDrainMarkingStackClosure complete_gc(&_cm, _cm.task(index), _tm == RefProcThreadModel::Single);
1577     _rp_task->rp_work(worker_id, &is_alive, &keep_alive, &enqueue, &complete_gc);
1578   }
1579 
1580   void prepare_run_task_hook() override {
1581     // We need to reset the concurrency level before each
1582     // proxy task execution, so that the termination protocol
1583     // and overflow handling in G1CMTask::do_marking_step() knows
1584     // how many workers to wait for.
1585     _cm.set_concurrency(_queue_count);
1586   }
1587 };
1588 
1589 void G1ConcurrentMark::weak_refs_work() {
1590   ResourceMark rm;
1591 
1592   {
1593     GCTraceTime(Debug, gc, phases) debug("Reference Processing", _gc_timer_cm);
1594 
1595     ReferenceProcessor* rp = _g1h->ref_processor_cm();
1596 
1597     // See the comment in G1CollectedHeap::ref_processing_init()
1598     // about how reference processing currently works in G1.
1599 
1600     assert(_global_mark_stack.is_empty(), "mark stack should be empty");
1601 
1602     // Prefer to grow the stack until the max capacity.
1603     _global_mark_stack.set_should_grow();
1604 
1605     // Parallel processing task executor.
1606     G1CMRefProcProxyTask task(rp->max_num_queues(), *_g1h, *this);
1607     ReferenceProcessorPhaseTimes pt(_gc_timer_cm, rp->max_num_queues());
1608 
1609     // Process the weak references.
1610     const ReferenceProcessorStats& stats = rp->process_discovered_references(task, _g1h->workers(), pt);
1611     _gc_tracer_cm->report_gc_reference_stats(stats);
1612     pt.print_all_references();
1613 
1614     // The do_oop work routines of the keep_alive and drain_marking_stack
1615     // oop closures will set the has_overflown flag if we overflow the
1616     // global marking stack.
1617 
1618     assert(has_overflown() || _global_mark_stack.is_empty(),
1619            "Mark stack should be empty (unless it has overflown)");
1620   }
1621 
1622   if (has_overflown()) {
1623     // We can not trust g1_is_alive and the contents of the heap if the marking stack
1624     // overflowed while processing references. Exit the VM.
1625     fatal("Overflow during reference processing, can not continue. Current mark stack depth: "
1626           "%zu, MarkStackSize: %zu, MarkStackSizeMax: %zu. "
1627           "Please increase MarkStackSize and/or MarkStackSizeMax and restart.",
1628           _global_mark_stack.size(), MarkStackSize, MarkStackSizeMax);
1629     return;
1630   }
1631 
1632   assert(_global_mark_stack.is_empty(), "Marking should have completed");
1633 
1634   {
1635     GCTraceTime(Debug, gc, phases) debug("Weak Processing", _gc_timer_cm);
1636     G1CMIsAliveClosure is_alive(this);
1637     WeakProcessor::weak_oops_do(_g1h->workers(), &is_alive, &do_nothing_cl, 1);
1638   }
1639 }
1640 
1641 class G1PrecleanYieldClosure : public YieldClosure {
1642   G1ConcurrentMark* _cm;
1643 
1644 public:
1645   G1PrecleanYieldClosure(G1ConcurrentMark* cm) : _cm(cm) { }
1646 
1647   virtual bool should_return() {
1648     return _cm->has_aborted();
1649   }
1650 
1651   virtual bool should_return_fine_grain() {
1652     _cm->do_yield_check();
1653     return _cm->has_aborted();
1654   }
1655 };
1656 
1657 void G1ConcurrentMark::preclean() {
1658   assert(G1UseReferencePrecleaning, "Precleaning must be enabled.");
1659 
1660   SuspendibleThreadSetJoiner joiner;
1661 
1662   BarrierEnqueueDiscoveredFieldClosure enqueue;
1663 
1664   set_concurrency_and_phase(1, true);
1665 
1666   G1PrecleanYieldClosure yield_cl(this);
1667 
1668   ReferenceProcessor* rp = _g1h->ref_processor_cm();
1669   // Precleaning is single threaded. Temporarily disable MT discovery.
1670   ReferenceProcessorMTDiscoveryMutator rp_mut_discovery(rp, false);
1671   rp->preclean_discovered_references(rp->is_alive_non_header(),
1672                                      &enqueue,
1673                                      &yield_cl,
1674                                      _gc_timer_cm);
1675 }
1676 
1677 // Closure for marking entries in SATB buffers.
1678 class G1CMSATBBufferClosure : public SATBBufferClosure {
1679 private:
1680   G1CMTask* _task;
1681   G1CollectedHeap* _g1h;
1682 
1683   // This is very similar to G1CMTask::deal_with_reference, but with
1684   // more relaxed requirements for the argument, so this must be more
1685   // circumspect about treating the argument as an object.
1686   void do_entry(void* entry) const {
1687     _task->increment_refs_reached();
1688     oop const obj = cast_to_oop(entry);
1689     _task->make_reference_grey(obj);
1690   }
1691 
1692 public:
1693   G1CMSATBBufferClosure(G1CMTask* task, G1CollectedHeap* g1h)
1694     : _task(task), _g1h(g1h) { }
1695 
1696   virtual void do_buffer(void** buffer, size_t size) {
1697     for (size_t i = 0; i < size; ++i) {
1698       do_entry(buffer[i]);
1699     }
1700   }
1701 };
1702 
1703 class G1RemarkThreadsClosure : public ThreadClosure {
1704   G1SATBMarkQueueSet& _qset;
1705 
1706  public:
1707   G1RemarkThreadsClosure(G1CollectedHeap* g1h, G1CMTask* task) :
1708     _qset(G1BarrierSet::satb_mark_queue_set()) {}
1709 
1710   void do_thread(Thread* thread) {
1711     // Transfer any partial buffer to the qset for completed buffer processing.
1712     _qset.flush_queue(G1ThreadLocalData::satb_mark_queue(thread));
1713   }
1714 };
1715 
1716 class G1CMRemarkTask : public WorkerTask {
1717   // For Threads::possibly_parallel_threads_do
1718   ThreadsClaimTokenScope _threads_claim_token_scope;
1719   G1ConcurrentMark* _cm;
1720 public:
1721   void work(uint worker_id) {
1722     G1CMTask* task = _cm->task(worker_id);
1723     task->record_start_time();
1724     {
1725       ResourceMark rm;
1726 
1727       G1RemarkThreadsClosure threads_f(G1CollectedHeap::heap(), task);
1728       Threads::possibly_parallel_threads_do(true /* is_par */, &threads_f);
1729     }
1730 
1731     do {
1732       task->do_marking_step(1000000000.0 /* something very large */,
1733                             true         /* do_termination       */,
1734                             false        /* is_serial            */);
1735     } while (task->has_aborted() && !_cm->has_overflown());
1736     // If we overflow, then we do not want to restart. We instead
1737     // want to abort remark and do concurrent marking again.
1738     task->record_end_time();
1739   }
1740 
1741   G1CMRemarkTask(G1ConcurrentMark* cm, uint active_workers) :
1742     WorkerTask("Par Remark"), _threads_claim_token_scope(), _cm(cm) {
1743     _cm->terminator()->reset_for_reuse(active_workers);
1744   }
1745 };
1746 
1747 void G1ConcurrentMark::finalize_marking() {
1748   ResourceMark rm;
1749 
1750   _g1h->ensure_parsability(false);
1751 
1752   // this is remark, so we'll use up all active threads
1753   uint active_workers = _g1h->workers()->active_workers();
1754   set_concurrency_and_phase(active_workers, false /* concurrent */);
1755   // Leave _parallel_marking_threads at it's
1756   // value originally calculated in the G1ConcurrentMark
1757   // constructor and pass values of the active workers
1758   // through the task.
1759 
1760   {
1761     G1CMRemarkTask remarkTask(this, active_workers);
1762     // We will start all available threads, even if we decide that the
1763     // active_workers will be fewer. The extra ones will just bail out
1764     // immediately.
1765     _g1h->workers()->run_task(&remarkTask);
1766   }
1767 
1768   SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
1769   guarantee(has_overflown() ||
1770             satb_mq_set.completed_buffers_num() == 0,
1771             "Invariant: has_overflown = %s, num buffers = %zu",
1772             BOOL_TO_STR(has_overflown()),
1773             satb_mq_set.completed_buffers_num());
1774 
1775   print_stats();
1776 }
1777 
1778 void G1ConcurrentMark::flush_all_task_caches() {
1779   size_t hits = 0;
1780   size_t misses = 0;
1781   for (uint i = 0; i < _max_num_tasks; i++) {
1782     Pair<size_t, size_t> stats = _tasks[i]->flush_mark_stats_cache();
1783     hits += stats.first;
1784     misses += stats.second;
1785   }
1786   size_t sum = hits + misses;
1787   log_debug(gc, stats)("Mark stats cache hits %zu misses %zu ratio %1.3lf",
1788                        hits, misses, percent_of(hits, sum));
1789 }
1790 
1791 void G1ConcurrentMark::clear_bitmap_for_region(G1HeapRegion* hr) {
1792   assert_at_safepoint();
1793   _mark_bitmap.clear_range(MemRegion(hr->bottom(), hr->end()));
1794 }
1795 
1796 G1HeapRegion* G1ConcurrentMark::claim_region(uint worker_id) {
1797   // "Checkpoint" the finger.
1798   HeapWord* local_finger = finger();
1799 
1800   while (local_finger < _heap.end()) {
1801     assert(_g1h->is_in_reserved(local_finger), "invariant");
1802 
1803     G1HeapRegion* curr_region = _g1h->heap_region_containing_or_null(local_finger);
1804     // Make sure that the reads below do not float before loading curr_region.
1805     OrderAccess::loadload();
1806     // Above heap_region_containing may return null as we always scan claim
1807     // until the end of the heap. In this case, just jump to the next region.
1808     HeapWord* end = curr_region != nullptr ? curr_region->end() : local_finger + G1HeapRegion::GrainWords;
1809 
1810     // Is the gap between reading the finger and doing the CAS too long?
1811     HeapWord* res = _finger.compare_exchange(local_finger, end);
1812     if (res == local_finger && curr_region != nullptr) {
1813       // We succeeded.
1814       HeapWord* bottom = curr_region->bottom();
1815       HeapWord* limit = top_at_mark_start(curr_region);
1816 
1817       log_trace(gc, marking)("Claim region %u bottom " PTR_FORMAT " tams " PTR_FORMAT, curr_region->hrm_index(), p2i(curr_region->bottom()), p2i(top_at_mark_start(curr_region)));
1818       // Notice that _finger == end cannot be guaranteed here since,
1819       // someone else might have moved the finger even further.
1820       assert(finger() >= end, "The finger should have moved forward");
1821 
1822       if (limit > bottom) {
1823         return curr_region;
1824       } else {
1825         assert(limit == bottom,
1826                "The region limit should be at bottom");
1827         // We return null and the caller should try calling
1828         // claim_region() again.
1829         return nullptr;
1830       }
1831     } else {
1832       // Read the finger again.
1833       HeapWord* next_finger = finger();
1834       assert(next_finger > local_finger, "The finger should have moved forward " PTR_FORMAT " " PTR_FORMAT, p2i(local_finger), p2i(next_finger));
1835       local_finger = next_finger;
1836     }
1837   }
1838 
1839   return nullptr;
1840 }
1841 
1842 #ifndef PRODUCT
1843 class VerifyNoCSetOops {
1844   G1CollectedHeap* _g1h;
1845   const char* _phase;
1846   int _info;
1847 
1848 public:
1849   VerifyNoCSetOops(const char* phase, int info = -1) :
1850     _g1h(G1CollectedHeap::heap()),
1851     _phase(phase),
1852     _info(info)
1853   { }
1854 
1855   void operator()(G1TaskQueueEntry task_entry) const {
1856     if (task_entry.is_partial_array_state()) {
1857       oop obj = task_entry.to_partial_array_state()->source();
1858       guarantee(_g1h->is_in_reserved(obj), "Partial Array " PTR_FORMAT " must be in heap.", p2i(obj));
1859       return;
1860     }
1861     guarantee(oopDesc::is_oop(task_entry.to_oop()),
1862               "Non-oop " PTR_FORMAT ", phase: %s, info: %d",
1863               p2i(task_entry.to_oop()), _phase, _info);
1864     G1HeapRegion* r = _g1h->heap_region_containing(task_entry.to_oop());
1865     guarantee(!(r->in_collection_set() || r->has_index_in_opt_cset()),
1866               "obj " PTR_FORMAT " from %s (%d) in region %u in (optional) collection set",
1867               p2i(task_entry.to_oop()), _phase, _info, r->hrm_index());
1868   }
1869 };
1870 
1871 void G1ConcurrentMark::verify_no_collection_set_oops() {
1872   assert(SafepointSynchronize::is_at_safepoint() || !is_init_completed(),
1873          "should be at a safepoint or initializing");
1874   if (!_g1h->collector_state()->mark_or_rebuild_in_progress()) {
1875     return;
1876   }
1877 
1878   // Verify entries on the global mark stack
1879   _global_mark_stack.iterate(VerifyNoCSetOops("Stack"));
1880 
1881   // Verify entries on the task queues
1882   for (uint i = 0; i < _max_num_tasks; ++i) {
1883     G1CMTaskQueue* queue = _task_queues->queue(i);
1884     queue->iterate(VerifyNoCSetOops("Queue", i));
1885   }
1886 
1887   // Verify the global finger
1888   HeapWord* global_finger = finger();
1889   if (global_finger != nullptr && global_finger < _heap.end()) {
1890     // Since we always iterate over all regions, we might get a null G1HeapRegion
1891     // here.
1892     G1HeapRegion* global_hr = _g1h->heap_region_containing_or_null(global_finger);
1893     guarantee(global_hr == nullptr || global_finger == global_hr->bottom(),
1894               "global finger: " PTR_FORMAT " region: " HR_FORMAT,
1895               p2i(global_finger), HR_FORMAT_PARAMS(global_hr));
1896   }
1897 
1898   // Verify the task fingers
1899   assert(_num_concurrent_workers <= _max_num_tasks, "sanity");
1900   for (uint i = 0; i < _num_concurrent_workers; ++i) {
1901     G1CMTask* task = _tasks[i];
1902     HeapWord* task_finger = task->finger();
1903     if (task_finger != nullptr && task_finger < _heap.end()) {
1904       // See above note on the global finger verification.
1905       G1HeapRegion* r = _g1h->heap_region_containing_or_null(task_finger);
1906       guarantee(r == nullptr || task_finger == r->bottom() ||
1907                 !r->in_collection_set() || !r->has_index_in_opt_cset(),
1908                 "task finger: " PTR_FORMAT " region: " HR_FORMAT,
1909                 p2i(task_finger), HR_FORMAT_PARAMS(r));
1910     }
1911   }
1912 }
1913 #endif // PRODUCT
1914 
1915 void G1ConcurrentMark::rebuild_and_scrub() {
1916   if (!needs_remembered_set_rebuild()) {
1917     log_debug(gc, marking)("Skipping Remembered Set Rebuild. No regions selected for rebuild, will only scrub");
1918   }
1919 
1920   G1ConcurrentRebuildAndScrub::rebuild_and_scrub(this, needs_remembered_set_rebuild(), _concurrent_workers);
1921 }
1922 
1923 void G1ConcurrentMark::print_stats() {
1924   if (!log_is_enabled(Debug, gc, stats)) {
1925     return;
1926   }
1927   log_debug(gc, stats)("---------------------------------------------------------------------");
1928   for (size_t i = 0; i < _num_active_tasks; ++i) {
1929     _tasks[i]->print_stats();
1930     log_debug(gc, stats)("---------------------------------------------------------------------");
1931   }
1932 }
1933 
1934 bool G1ConcurrentMark::concurrent_cycle_abort() {
1935   // If we start the compaction before the CM threads finish
1936   // scanning the root regions we might trip them over as we'll
1937   // be moving objects / updating references. So let's wait until
1938   // they are done. By telling them to abort, they should complete
1939   // early.
1940   root_region_scan_abort_and_wait();
1941 
1942   // We haven't started a concurrent cycle no need to do anything; we might have
1943   // aborted the marking because of shutting down though. In this case the marking
1944   // might have already completed the abort (leading to in_progress() below to
1945   // return false), however this still left marking state particularly in the
1946   // shared marking bitmap that must be cleaned up.
1947   // If there are multiple full gcs during shutdown we do this work repeatedly for
1948   // nothing, but this situation should be extremely rare (a full gc after shutdown
1949   // has been signalled is already rare), and this work should be negligible compared
1950   // to actual full gc work.
1951 
1952   if (!is_fully_initialized() || (!cm_thread()->in_progress() && !_g1h->concurrent_mark_is_terminating())) {
1953     return false;
1954   }
1955 
1956   reset_marking_for_restart();
1957 
1958   abort_marking_threads();
1959 
1960   SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
1961   satb_mq_set.abandon_partial_marking();
1962   // This can be called either during or outside marking, we'll read
1963   // the expected_active value from the SATB queue set.
1964   satb_mq_set.set_active_all_threads(false, /* new active value */
1965                                      satb_mq_set.is_active() /* expected_active */);
1966   return true;
1967 }
1968 
1969 void G1ConcurrentMark::abort_marking_threads() {
1970   assert(!_root_regions.scan_in_progress(), "still doing root region scan");
1971   _has_aborted.store_relaxed(true);
1972   _first_overflow_barrier_sync.abort();
1973   _second_overflow_barrier_sync.abort();
1974 }
1975 
1976 double G1ConcurrentMark::worker_threads_cpu_time_s() {
1977   class CountCpuTimeThreadClosure : public ThreadClosure {
1978   public:
1979     jlong _total_cpu_time;
1980 
1981     CountCpuTimeThreadClosure() : ThreadClosure(), _total_cpu_time(0) { }
1982 
1983     void do_thread(Thread* t) {
1984       _total_cpu_time += os::thread_cpu_time(t);
1985     }
1986   } cl;
1987 
1988   threads_do(&cl);
1989 
1990   return (double)cl._total_cpu_time / NANOSECS_PER_SEC;
1991 }
1992 
1993 static void print_ms_time_info(const char* prefix, const char* name,
1994                                NumberSeq& ns) {
1995   log_trace(gc, marking)("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
1996                          prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
1997   if (ns.num() > 0) {
1998     log_trace(gc, marking)("%s         [std. dev = %8.2f ms, max = %8.2f ms]",
1999                            prefix, ns.sd(), ns.maximum());
2000   }
2001 }
2002 
2003 void G1ConcurrentMark::print_summary_info() {
2004   Log(gc, marking) log;
2005   if (!log.is_trace()) {
2006     return;
2007   }
2008 
2009   log.trace(" Concurrent marking:");
2010   if (!is_fully_initialized()) {
2011     log.trace("    has not been initialized yet");
2012     return;
2013   }
2014   print_ms_time_info("  ", "remarks", _remark_times);
2015   {
2016     print_ms_time_info("     ", "final marks", _remark_mark_times);
2017     print_ms_time_info("     ", "weak refs", _remark_weak_ref_times);
2018 
2019   }
2020   print_ms_time_info("  ", "cleanups", _cleanup_times);
2021   log.trace("    Finalize live data total time = %8.2f s (avg = %8.2f ms).",
2022             _cleanup_times.sum() / 1000.0, _cleanup_times.avg());
2023   log.trace("  Total stop_world time = %8.2f s.",
2024             (_remark_times.sum() + _cleanup_times.sum())/1000.0);
2025   log.trace("  Total concurrent time = %8.2f s (%8.2f s marking).",
2026             cm_thread()->total_mark_cpu_time_s(), cm_thread()->worker_threads_cpu_time_s());
2027 }
2028 
2029 void G1ConcurrentMark::threads_do(ThreadClosure* tc) const {
2030   if (is_fully_initialized()) { // they are initialized late
2031     tc->do_thread(_cm_thread);
2032     _concurrent_workers->threads_do(tc);
2033   }
2034 }
2035 
2036 void G1ConcurrentMark::print_on(outputStream* st) const {
2037   st->print_cr("Marking Bits: (CMBitMap*) " PTR_FORMAT, p2i(mark_bitmap()));
2038   _mark_bitmap.print_on(st, " Bits: ");
2039 }
2040 
2041 static ReferenceProcessor* get_cm_oop_closure_ref_processor(G1CollectedHeap* g1h) {
2042   ReferenceProcessor* result = g1h->ref_processor_cm();
2043   assert(result != nullptr, "CM reference processor should not be null");
2044   return result;
2045 }
2046 
2047 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h,
2048                                G1CMTask* task)
2049   : ClaimMetadataVisitingOopIterateClosure(ClassLoaderData::_claim_strong, get_cm_oop_closure_ref_processor(g1h)),
2050     _g1h(g1h), _task(task)
2051 { }
2052 
2053 void G1CMTask::setup_for_region(G1HeapRegion* hr) {
2054   assert(hr != nullptr,
2055         "claim_region() should have filtered out null regions");
2056   _curr_region  = hr;
2057   _finger       = hr->bottom();
2058   update_region_limit();
2059 }
2060 
2061 void G1CMTask::update_region_limit() {
2062   G1HeapRegion* hr = _curr_region;
2063   HeapWord* bottom = hr->bottom();
2064   HeapWord* limit = _cm->top_at_mark_start(hr);
2065 
2066   if (limit == bottom) {
2067     // The region was collected underneath our feet.
2068     // We set the finger to bottom to ensure that the bitmap
2069     // iteration that will follow this will not do anything.
2070     // (this is not a condition that holds when we set the region up,
2071     // as the region is not supposed to be empty in the first place)
2072     _finger = bottom;
2073   } else if (limit >= _region_limit) {
2074     assert(limit >= _finger, "peace of mind");
2075   } else {
2076     assert(limit < _region_limit, "only way to get here");
2077     // This can happen under some pretty unusual circumstances.  An
2078     // evacuation pause empties the region underneath our feet (TAMS
2079     // at bottom). We then do some allocation in the region (TAMS
2080     // stays at bottom), followed by the region being used as a GC
2081     // alloc region (TAMS will move to top() and the objects
2082     // originally below it will be greyed). All objects now marked in
2083     // the region are explicitly greyed, if below the global finger,
2084     // and we do not need in fact to scan anything else. So, we simply
2085     // set _finger to be limit to ensure that the bitmap iteration
2086     // doesn't do anything.
2087     _finger = limit;
2088   }
2089 
2090   _region_limit = limit;
2091 }
2092 
2093 void G1CMTask::giveup_current_region() {
2094   assert(_curr_region != nullptr, "invariant");
2095   clear_region_fields();
2096 }
2097 
2098 void G1CMTask::clear_region_fields() {
2099   // Values for these three fields that indicate that we're not
2100   // holding on to a region.
2101   _curr_region   = nullptr;
2102   _finger        = nullptr;
2103   _region_limit  = nullptr;
2104 }
2105 
2106 void G1CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) {
2107   if (cm_oop_closure == nullptr) {
2108     assert(_cm_oop_closure != nullptr, "invariant");
2109   } else {
2110     assert(_cm_oop_closure == nullptr, "invariant");
2111   }
2112   _cm_oop_closure = cm_oop_closure;
2113 }
2114 
2115 void G1CMTask::reset(G1CMBitMap* mark_bitmap) {
2116   guarantee(mark_bitmap != nullptr, "invariant");
2117   _mark_bitmap              = mark_bitmap;
2118   clear_region_fields();
2119 
2120   _calls                         = 0;
2121   _elapsed_time_ms               = 0.0;
2122   _termination_time_ms           = 0.0;
2123 
2124   _mark_stats_cache.reset();
2125 }
2126 
2127 void G1CMTask::reset_for_restart() {
2128   clear_region_fields();
2129   _task_queue->set_empty();
2130   TASKQUEUE_STATS_ONLY(_partial_array_splitter.stats()->reset());
2131   TASKQUEUE_STATS_ONLY(_task_queue->stats.reset());
2132 }
2133 
2134 void G1CMTask::register_partial_array_splitter() {
2135 
2136   ::new (&_partial_array_splitter) PartialArraySplitter(_cm->partial_array_state_manager(),
2137                                                         _cm->max_num_tasks(),
2138                                                         ObjArrayMarkingStride);
2139 }
2140 
2141 void G1CMTask::unregister_partial_array_splitter() {
2142   _partial_array_splitter.~PartialArraySplitter();
2143 }
2144 
2145 bool G1CMTask::should_exit_termination() {
2146   if (!regular_clock_call()) {
2147     return true;
2148   }
2149 
2150   // This is called when we are in the termination protocol. We should
2151   // quit if, for some reason, this task wants to abort or the global
2152   // stack is not empty (this means that we can get work from it).
2153   return !_cm->mark_stack_empty() || has_aborted();
2154 }
2155 
2156 void G1CMTask::reached_limit() {
2157   assert(_words_scanned >= _words_scanned_limit ||
2158          _refs_reached >= _refs_reached_limit ,
2159          "shouldn't have been called otherwise");
2160   abort_marking_if_regular_check_fail();
2161 }
2162 
2163 bool G1CMTask::regular_clock_call() {
2164   if (has_aborted()) {
2165     return false;
2166   }
2167 
2168   // First, we need to recalculate the words scanned and refs reached
2169   // limits for the next clock call.
2170   recalculate_limits();
2171 
2172   // During the regular clock call we do the following
2173 
2174   // (1) If an overflow has been flagged, then we abort.
2175   if (_cm->has_overflown()) {
2176     return false;
2177   }
2178 
2179   // If we are not concurrent (i.e. we're doing remark) we don't need
2180   // to check anything else. The other steps are only needed during
2181   // the concurrent marking phase.
2182   if (!_cm->concurrent()) {
2183     return true;
2184   }
2185 
2186   // (2) If marking has been aborted for Full GC, then we also abort.
2187   if (_cm->has_aborted()) {
2188     return false;
2189   }
2190 
2191   // (4) We check whether we should yield. If we have to, then we abort.
2192   if (SuspendibleThreadSet::should_yield()) {
2193     // We should yield. To do this we abort the task. The caller is
2194     // responsible for yielding.
2195     return false;
2196   }
2197 
2198   // (5) We check whether we've reached our time quota. If we have,
2199   // then we abort.
2200   double elapsed_time_ms = (double)(os::current_thread_cpu_time() - _start_cpu_time_ns) / NANOSECS_PER_MILLISEC;
2201   if (elapsed_time_ms > _time_target_ms) {
2202     _has_timed_out = true;
2203     return false;
2204   }
2205 
2206   // (6) Finally, we check whether there are enough completed STAB
2207   // buffers available for processing. If there are, we abort.
2208   SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
2209   if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
2210     // we do need to process SATB buffers, we'll abort and restart
2211     // the marking task to do so
2212     return false;
2213   }
2214   return true;
2215 }
2216 
2217 void G1CMTask::recalculate_limits() {
2218   _real_words_scanned_limit = _words_scanned + words_scanned_period;
2219   _words_scanned_limit      = _real_words_scanned_limit;
2220 
2221   _real_refs_reached_limit  = _refs_reached  + refs_reached_period;
2222   _refs_reached_limit       = _real_refs_reached_limit;
2223 }
2224 
2225 void G1CMTask::decrease_limits() {
2226   // This is called when we believe that we're going to do an infrequent
2227   // operation which will increase the per byte scanned cost (i.e. move
2228   // entries to/from the global stack). It basically tries to decrease the
2229   // scanning limit so that the clock is called earlier.
2230 
2231   _words_scanned_limit = _real_words_scanned_limit - 3 * words_scanned_period / 4;
2232   _refs_reached_limit  = _real_refs_reached_limit - 3 * refs_reached_period / 4;
2233 }
2234 
2235 void G1CMTask::move_entries_to_global_stack() {
2236   // Local array where we'll store the entries that will be popped
2237   // from the local queue.
2238   G1TaskQueueEntry buffer[G1CMMarkStack::EntriesPerChunk];
2239 
2240   size_t n = 0;
2241   G1TaskQueueEntry task_entry;
2242   while (n < G1CMMarkStack::EntriesPerChunk && _task_queue->pop_local(task_entry)) {
2243     buffer[n] = task_entry;
2244     ++n;
2245   }
2246   if (n < G1CMMarkStack::EntriesPerChunk) {
2247     buffer[n] = G1TaskQueueEntry();
2248   }
2249 
2250   if (n > 0) {
2251     if (!_cm->mark_stack_push(buffer)) {
2252       set_has_aborted();
2253     }
2254   }
2255 
2256   // This operation was quite expensive, so decrease the limits.
2257   decrease_limits();
2258 }
2259 
2260 bool G1CMTask::get_entries_from_global_stack() {
2261   // Local array where we'll store the entries that will be popped
2262   // from the global stack.
2263   G1TaskQueueEntry buffer[G1CMMarkStack::EntriesPerChunk];
2264 
2265   if (!_cm->mark_stack_pop(buffer)) {
2266     return false;
2267   }
2268 
2269   // We did actually pop at least one entry.
2270   for (size_t i = 0; i < G1CMMarkStack::EntriesPerChunk; ++i) {
2271     G1TaskQueueEntry task_entry = buffer[i];
2272     if (task_entry.is_null()) {
2273       break;
2274     }
2275     assert(task_entry.is_partial_array_state() || oopDesc::is_oop(task_entry.to_oop()), "Element " PTR_FORMAT " must be an array slice or oop", p2i(task_entry.to_oop()));
2276     bool success = _task_queue->push(task_entry);
2277     // We only call this when the local queue is empty or under a
2278     // given target limit. So, we do not expect this push to fail.
2279     assert(success, "invariant");
2280   }
2281 
2282   // This operation was quite expensive, so decrease the limits
2283   decrease_limits();
2284   return true;
2285 }
2286 
2287 void G1CMTask::drain_local_queue(bool partially) {
2288   if (has_aborted()) {
2289     return;
2290   }
2291 
2292   // Decide what the target size is, depending whether we're going to
2293   // drain it partially (so that other tasks can steal if they run out
2294   // of things to do) or totally (at the very end).
2295   uint target_size;
2296   if (partially) {
2297     target_size = GCDrainStackTargetSize;
2298   } else {
2299     target_size = 0;
2300   }
2301 
2302   if (_task_queue->size() > target_size) {
2303     G1TaskQueueEntry entry;
2304     bool ret = _task_queue->pop_local(entry);
2305     while (ret) {
2306       process_entry(entry, false /* stolen */);
2307       if (_task_queue->size() <= target_size || has_aborted()) {
2308         ret = false;
2309       } else {
2310         ret = _task_queue->pop_local(entry);
2311       }
2312     }
2313   }
2314 }
2315 
2316 size_t G1CMTask::start_partial_array_processing(oop obj) {
2317   assert(should_be_sliced(obj), "Must be an array object %d and large %zu", obj->is_objArray(), obj->size());
2318 
2319   objArrayOop obj_array = oop_cast<objArrayOop>(obj);
2320   size_t array_length = obj_array->length();
2321 
2322   size_t initial_chunk_size = _partial_array_splitter.start(_task_queue, obj_array, nullptr, array_length);
2323 
2324   // Mark objArray klass metadata
2325   if (_cm_oop_closure->do_metadata()) {
2326     _cm_oop_closure->do_klass(obj_array->klass());
2327 
2328     if (obj_array->is_flatArray()) {
2329       FlatArrayKlass* faklass = FlatArrayKlass::cast(obj_array->klass());
2330       _cm_oop_closure->do_klass(faklass->element_klass());
2331     }
2332   }
2333 
2334   process_array_chunk(obj_array, 0, initial_chunk_size);
2335 
2336   // Include object header size
2337   if (obj_array->is_refArray()) {
2338     return refArrayOopDesc::object_size(checked_cast<int>(initial_chunk_size));
2339   } else {
2340     FlatArrayKlass* faKlass = FlatArrayKlass::cast(obj_array->klass());
2341     return flatArrayOopDesc::object_size(faKlass->layout_helper(), checked_cast<int>(initial_chunk_size));
2342   }
2343 }
2344 
2345 size_t G1CMTask::process_partial_array(const G1TaskQueueEntry& task, bool stolen) {
2346   PartialArrayState* state = task.to_partial_array_state();
2347   // Access state before release by claim().
2348   objArrayOop obj = oop_cast<objArrayOop>(state->source());
2349 
2350   PartialArraySplitter::Claim claim =
2351     _partial_array_splitter.claim(state, _task_queue, stolen);
2352 
2353   process_array_chunk(obj, claim._start, claim._end);
2354 
2355   if (obj->is_refArray()) {
2356     return heap_word_size((claim._end - claim._start) * heapOopSize);
2357   } else {
2358     assert(obj->is_flatArray(), "Must be!");
2359     size_t element_byte_size = FlatArrayKlass::cast(obj->klass())->element_byte_size();
2360     size_t nof_elements = claim._end - claim._start;
2361     return heap_word_size(nof_elements * element_byte_size);
2362   }
2363 }
2364 
2365 void G1CMTask::drain_global_stack(bool partially) {
2366   if (has_aborted()) {
2367     return;
2368   }
2369 
2370   // We have a policy to drain the local queue before we attempt to
2371   // drain the global stack.
2372   assert(partially || _task_queue->size() == 0, "invariant");
2373 
2374   // Decide what the target size is, depending whether we're going to
2375   // drain it partially (so that other tasks can steal if they run out
2376   // of things to do) or totally (at the very end).
2377   // Notice that when draining the global mark stack partially, due to the racyness
2378   // of the mark stack size update we might in fact drop below the target. But,
2379   // this is not a problem.
2380   // In case of total draining, we simply process until the global mark stack is
2381   // totally empty, disregarding the size counter.
2382   if (partially) {
2383     size_t const target_size = _cm->partial_mark_stack_size_target();
2384     while (!has_aborted() && _cm->mark_stack_size() > target_size) {
2385       if (get_entries_from_global_stack()) {
2386         drain_local_queue(partially);
2387       }
2388     }
2389   } else {
2390     while (!has_aborted() && get_entries_from_global_stack()) {
2391       drain_local_queue(partially);
2392     }
2393   }
2394 }
2395 
2396 // SATB Queue has several assumptions on whether to call the par or
2397 // non-par versions of the methods. this is why some of the code is
2398 // replicated. We should really get rid of the single-threaded version
2399 // of the code to simplify things.
2400 void G1CMTask::drain_satb_buffers() {
2401   if (has_aborted()) {
2402     return;
2403   }
2404 
2405   // We set this so that the regular clock knows that we're in the
2406   // middle of draining buffers and doesn't set the abort flag when it
2407   // notices that SATB buffers are available for draining. It'd be
2408   // very counter productive if it did that. :-)
2409   _draining_satb_buffers = true;
2410 
2411   G1CMSATBBufferClosure satb_cl(this, _g1h);
2412   SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
2413 
2414   // This keeps claiming and applying the closure to completed buffers
2415   // until we run out of buffers or we need to abort.
2416   while (!has_aborted() &&
2417          satb_mq_set.apply_closure_to_completed_buffer(&satb_cl)) {
2418     abort_marking_if_regular_check_fail();
2419   }
2420 
2421   // Can't assert qset is empty here, even if not aborted.  If concurrent,
2422   // some other thread might be adding to the queue.  If not concurrent,
2423   // some other thread might have won the race for the last buffer, but
2424   // has not yet decremented the count.
2425 
2426   _draining_satb_buffers = false;
2427 
2428   // again, this was a potentially expensive operation, decrease the
2429   // limits to get the regular clock call early
2430   decrease_limits();
2431 }
2432 
2433 void G1CMTask::clear_mark_stats_cache(uint region_idx) {
2434   _mark_stats_cache.reset(region_idx);
2435 }
2436 
2437 Pair<size_t, size_t> G1CMTask::flush_mark_stats_cache() {
2438   return _mark_stats_cache.evict_all();
2439 }
2440 
2441 void G1CMTask::print_stats() {
2442   log_debug(gc, stats)("Marking Stats, task = %u, calls = %u", _worker_id, _calls);
2443   log_debug(gc, stats)("  Elapsed time = %1.2lfms, Termination time = %1.2lfms",
2444                        _elapsed_time_ms, _termination_time_ms);
2445   log_debug(gc, stats)("  Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms max = %1.2lfms, total = %1.2lfms",
2446                        _step_times_ms.num(),
2447                        _step_times_ms.avg(),
2448                        _step_times_ms.sd(),
2449                        _step_times_ms.maximum(),
2450                        _step_times_ms.sum());
2451   size_t const hits = _mark_stats_cache.hits();
2452   size_t const misses = _mark_stats_cache.misses();
2453   log_debug(gc, stats)("  Mark Stats Cache: hits %zu misses %zu ratio %.3f",
2454                        hits, misses, percent_of(hits, hits + misses));
2455 }
2456 
2457 bool G1ConcurrentMark::try_stealing(uint worker_id, G1TaskQueueEntry& task_entry) {
2458   return _task_queues->steal(worker_id, task_entry);
2459 }
2460 
2461 void G1CMTask::process_current_region(G1CMBitMapClosure& bitmap_closure) {
2462   if (has_aborted() || _curr_region == nullptr) {
2463     return;
2464   }
2465 
2466   // This means that we're already holding on to a region.
2467   assert(_finger != nullptr, "if region is not null, then the finger "
2468          "should not be null either");
2469 
2470   // We might have restarted this task after an evacuation pause
2471   // which might have evacuated the region we're holding on to
2472   // underneath our feet. Let's read its limit again to make sure
2473   // that we do not iterate over a region of the heap that
2474   // contains garbage (update_region_limit() will also move
2475   // _finger to the start of the region if it is found empty).
2476   update_region_limit();
2477   // We will start from _finger not from the start of the region,
2478   // as we might be restarting this task after aborting half-way
2479   // through scanning this region. In this case, _finger points to
2480   // the address where we last found a marked object. If this is a
2481   // fresh region, _finger points to start().
2482   MemRegion mr = MemRegion(_finger, _region_limit);
2483 
2484   assert(!_curr_region->is_humongous() || mr.start() == _curr_region->bottom(),
2485          "humongous regions should go around loop once only");
2486 
2487   // Some special cases:
2488   // If the memory region is empty, we can just give up the region.
2489   // If the current region is humongous then we only need to check
2490   // the bitmap for the bit associated with the start of the object,
2491   // scan the object if it's live, and give up the region.
2492   // Otherwise, let's iterate over the bitmap of the part of the region
2493   // that is left.
2494   // If the iteration is successful, give up the region.
2495   if (mr.is_empty()) {
2496     giveup_current_region();
2497     abort_marking_if_regular_check_fail();
2498   } else if (_curr_region->is_humongous() && mr.start() == _curr_region->bottom()) {
2499     if (_mark_bitmap->is_marked(mr.start())) {
2500       // The object is marked - apply the closure
2501       bitmap_closure.do_addr(mr.start());
2502     }
2503     // Even if this task aborted while scanning the humongous object
2504     // we can (and should) give up the current region.
2505     giveup_current_region();
2506     abort_marking_if_regular_check_fail();
2507   } else if (_mark_bitmap->iterate(&bitmap_closure, mr)) {
2508     giveup_current_region();
2509     abort_marking_if_regular_check_fail();
2510   } else {
2511     assert(has_aborted(), "currently the only way to do so");
2512     // The only way to abort the bitmap iteration is to return
2513     // false from the do_bit() method. However, inside the
2514     // do_bit() method we move the _finger to point to the
2515     // object currently being looked at. So, if we bail out, we
2516     // have definitely set _finger to something non-null.
2517     assert(_finger != nullptr, "invariant");
2518 
2519     // Region iteration was actually aborted. So now _finger
2520     // points to the address of the object we last scanned. If we
2521     // leave it there, when we restart this task, we will rescan
2522     // the object. It is easy to avoid this. We move the finger by
2523     // enough to point to the next possible object header.
2524     assert(_finger < _region_limit, "invariant");
2525     HeapWord* const new_finger = _finger + cast_to_oop(_finger)->size();
2526     if (new_finger >= _region_limit) {
2527       giveup_current_region();
2528     } else {
2529       move_finger_to(new_finger);
2530     }
2531   }
2532 }
2533 
2534 void G1CMTask::claim_new_region() {
2535   // Read the note on the claim_region() method on why it might
2536   // return null with potentially more regions available for
2537   // claiming and why we have to check out_of_regions() to determine
2538   // whether we're done or not.
2539   while (!has_aborted() && _curr_region == nullptr && !_cm->out_of_regions()) {
2540     // We are going to try to claim a new region. We should have
2541     // given up on the previous one.
2542     // Separated the asserts so that we know which one fires.
2543     assert(_curr_region  == nullptr, "invariant");
2544     assert(_finger       == nullptr, "invariant");
2545     assert(_region_limit == nullptr, "invariant");
2546     G1HeapRegion* claimed_region = _cm->claim_region(_worker_id);
2547     if (claimed_region != nullptr) {
2548       // Yes, we managed to claim one
2549       setup_for_region(claimed_region);
2550       assert(_curr_region == claimed_region, "invariant");
2551     }
2552     // It is important to call the regular clock here. It might take
2553     // a while to claim a region if, for example, we hit a large
2554     // block of empty regions. So we need to call the regular clock
2555     // method once round the loop to make sure it's called
2556     // frequently enough.
2557     abort_marking_if_regular_check_fail();
2558   }
2559 }
2560 
2561 void G1CMTask::attempt_stealing() {
2562   // We cannot check whether the global stack is empty, since other
2563   // tasks might be pushing objects to it concurrently.
2564   assert(_cm->out_of_regions() && _task_queue->size() == 0,
2565          "only way to reach here");
2566   while (!has_aborted()) {
2567     G1TaskQueueEntry entry;
2568     if (_cm->try_stealing(_worker_id, entry)) {
2569       process_entry(entry, true /* stolen */);
2570 
2571       // And since we're towards the end, let's totally drain the
2572       // local queue and global stack.
2573       drain_local_queue(false);
2574       drain_global_stack(false);
2575     } else {
2576       break;
2577     }
2578   }
2579 }
2580 
2581 void G1CMTask::attempt_termination(bool is_serial) {
2582   // We cannot check whether the global stack is empty, since other
2583   // tasks might be concurrently pushing objects on it.
2584   // Separated the asserts so that we know which one fires.
2585   assert(_cm->out_of_regions(), "only way to reach here");
2586   assert(_task_queue->size() == 0, "only way to reach here");
2587   double termination_start_time_ms = os::elapsedTime() * 1000.0;
2588 
2589   // The G1CMTask class also extends the TerminatorTerminator class,
2590   // hence its should_exit_termination() method will also decide
2591   // whether to exit the termination protocol or not.
2592   bool finished = (is_serial ||
2593                    _cm->terminator()->offer_termination(this));
2594   _termination_time_ms += (os::elapsedTime() * 1000.0 - termination_start_time_ms);
2595 
2596   if (finished) {
2597     // We're all done.
2598 
2599     // We can now guarantee that the global stack is empty, since
2600     // all other tasks have finished. We separated the guarantees so
2601     // that, if a condition is false, we can immediately find out
2602     // which one.
2603     guarantee(_cm->out_of_regions(), "only way to reach here");
2604     guarantee(_cm->mark_stack_empty(), "only way to reach here");
2605     guarantee(_task_queue->size() == 0, "only way to reach here");
2606     guarantee(!_cm->has_overflown(), "only way to reach here");
2607     guarantee(!has_aborted(), "should never happen if termination has completed");
2608   } else {
2609     // Apparently there's more work to do. Let's abort this task. We
2610     // will restart it and hopefully we can find more things to do.
2611     set_has_aborted();
2612   }
2613 }
2614 
2615 void G1CMTask::handle_abort(bool is_serial, double elapsed_time_ms) {
2616   if (_has_timed_out) {
2617     double diff_ms = elapsed_time_ms - _time_target_ms;
2618     // Keep statistics of how well we did with respect to hitting
2619     // our target only if we actually timed out (if we aborted for
2620     // other reasons, then the results might get skewed).
2621     _marking_step_diff_ms.add(diff_ms);
2622   }
2623 
2624   if (!_cm->has_overflown()) {
2625     return;
2626   }
2627 
2628   // This is the interesting one. We aborted because a global
2629   // overflow was raised. This means we have to restart the
2630   // marking phase and start iterating over regions. However, in
2631   // order to do this we have to make sure that all tasks stop
2632   // what they are doing and re-initialize in a safe manner. We
2633   // will achieve this with the use of two barrier sync points.
2634   if (!is_serial) {
2635     // We only need to enter the sync barrier if being called
2636     // from a parallel context
2637     _cm->enter_first_sync_barrier(_worker_id);
2638 
2639     // When we exit this sync barrier we know that all tasks have
2640     // stopped doing marking work. So, it's now safe to
2641     // re-initialize our data structures.
2642   }
2643 
2644   clear_region_fields();
2645   flush_mark_stats_cache();
2646 
2647   if (!is_serial) {
2648     // If we're executing the concurrent phase of marking, reset the marking
2649     // state; otherwise the marking state is reset after reference processing,
2650     // during the remark pause.
2651     // If we reset here as a result of an overflow during the remark we will
2652     // see assertion failures from any subsequent set_concurrency_and_phase()
2653     // calls.
2654     if (_cm->concurrent() && _worker_id == 0) {
2655       // Worker 0 is responsible for clearing the global data structures because
2656       // of an overflow. During STW we should not clear the overflow flag (in
2657       // G1ConcurrentMark::reset_marking_state()) since we rely on it being true when we exit
2658       // method to abort the pause and restart concurrent marking.
2659       _cm->reset_marking_for_restart();
2660 
2661       log_info(gc, marking)("Concurrent Mark reset for overflow");
2662     }
2663 
2664     // ...and enter the second barrier.
2665     _cm->enter_second_sync_barrier(_worker_id);
2666   }
2667 }
2668 
2669 /*****************************************************************************
2670 
2671     The do_marking_step(time_target_ms, ...) method is the building
2672     block of the parallel marking framework. It can be called in parallel
2673     with other invocations of do_marking_step() on different tasks
2674     (but only one per task, obviously) and concurrently with the
2675     mutator threads, or during remark, hence it eliminates the need
2676     for two versions of the code. When called during remark, it will
2677     pick up from where the task left off during the concurrent marking
2678     phase. Interestingly, tasks are also claimable during evacuation
2679     pauses too, since do_marking_step() ensures that it aborts before
2680     it needs to yield.
2681 
2682     The data structures that it uses to do marking work are the
2683     following:
2684 
2685       (1) Marking Bitmap. If there are grey objects that appear only
2686       on the bitmap (this happens either when dealing with an overflow
2687       or when the concurrent start pause has simply marked the roots
2688       and didn't push them on the stack), then tasks claim heap
2689       regions whose bitmap they then scan to find grey objects. A
2690       global finger indicates where the end of the last claimed region
2691       is. A local finger indicates how far into the region a task has
2692       scanned. The two fingers are used to determine how to grey an
2693       object (i.e. whether simply marking it is OK, as it will be
2694       visited by a task in the future, or whether it needs to be also
2695       pushed on a stack).
2696 
2697       (2) Local Queue. The local queue of the task which is accessed
2698       reasonably efficiently by the task. Other tasks can steal from
2699       it when they run out of work. Throughout the marking phase, a
2700       task attempts to keep its local queue short but not totally
2701       empty, so that entries are available for stealing by other
2702       tasks. Only when there is no more work, a task will totally
2703       drain its local queue.
2704 
2705       (3) Global Mark Stack. This handles local queue overflow. During
2706       marking only sets of entries are moved between it and the local
2707       queues, as access to it requires a mutex and more fine-grain
2708       interaction with it which might cause contention. If it
2709       overflows, then the marking phase should restart and iterate
2710       over the bitmap to identify grey objects. Throughout the marking
2711       phase, tasks attempt to keep the global mark stack at a small
2712       length but not totally empty, so that entries are available for
2713       popping by other tasks. Only when there is no more work, tasks
2714       will totally drain the global mark stack.
2715 
2716       (4) SATB Buffer Queue. This is where completed SATB buffers are
2717       made available. Buffers are regularly removed from this queue
2718       and scanned for roots, so that the queue doesn't get too
2719       long. During remark, all completed buffers are processed, as
2720       well as the filled in parts of any uncompleted buffers.
2721 
2722     The do_marking_step() method tries to abort when the time target
2723     has been reached. There are a few other cases when the
2724     do_marking_step() method also aborts:
2725 
2726       (1) When the marking phase has been aborted (after a Full GC).
2727 
2728       (2) When a global overflow (on the global stack) has been
2729       triggered. Before the task aborts, it will actually sync up with
2730       the other tasks to ensure that all the marking data structures
2731       (local queues, stacks, fingers etc.)  are re-initialized so that
2732       when do_marking_step() completes, the marking phase can
2733       immediately restart.
2734 
2735       (3) When enough completed SATB buffers are available. The
2736       do_marking_step() method only tries to drain SATB buffers right
2737       at the beginning. So, if enough buffers are available, the
2738       marking step aborts and the SATB buffers are processed at
2739       the beginning of the next invocation.
2740 
2741       (4) To yield. when we have to yield then we abort and yield
2742       right at the end of do_marking_step(). This saves us from a lot
2743       of hassle as, by yielding we might allow a Full GC. If this
2744       happens then objects will be compacted underneath our feet, the
2745       heap might shrink, etc. We save checking for this by just
2746       aborting and doing the yield right at the end.
2747 
2748     From the above it follows that the do_marking_step() method should
2749     be called in a loop (or, otherwise, regularly) until it completes.
2750 
2751     If a marking step completes without its has_aborted() flag being
2752     true, it means it has completed the current marking phase (and
2753     also all other marking tasks have done so and have all synced up).
2754 
2755     A method called regular_clock_call() is invoked "regularly" (in
2756     sub ms intervals) throughout marking. It is this clock method that
2757     checks all the abort conditions which were mentioned above and
2758     decides when the task should abort. A work-based scheme is used to
2759     trigger this clock method: when the number of object words the
2760     marking phase has scanned or the number of references the marking
2761     phase has visited reach a given limit. Additional invocations to
2762     the method clock have been planted in a few other strategic places
2763     too. The initial reason for the clock method was to avoid calling
2764     cpu time gathering too regularly, as it is quite expensive. So,
2765     once it was in place, it was natural to piggy-back all the other
2766     conditions on it too and not constantly check them throughout the code.
2767 
2768     If do_termination is true then do_marking_step will enter its
2769     termination protocol.
2770 
2771     The value of is_serial must be true when do_marking_step is being
2772     called serially (i.e. by the VMThread) and do_marking_step should
2773     skip any synchronization in the termination and overflow code.
2774     Examples include the serial remark code and the serial reference
2775     processing closures.
2776 
2777     The value of is_serial must be false when do_marking_step is
2778     being called by any of the worker threads.
2779     Examples include the concurrent marking code (CMMarkingTask),
2780     the MT remark code, and the MT reference processing closures.
2781 
2782  *****************************************************************************/
2783 
2784 void G1CMTask::do_marking_step(double time_target_ms,
2785                                bool do_termination,
2786                                bool is_serial) {
2787   assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
2788 
2789   _start_cpu_time_ns = os::current_thread_cpu_time();
2790 
2791   // If do_stealing is true then do_marking_step will attempt to
2792   // steal work from the other G1CMTasks. It only makes sense to
2793   // enable stealing when the termination protocol is enabled
2794   // and do_marking_step() is not being called serially.
2795   bool do_stealing = do_termination && !is_serial;
2796 
2797   G1Predictions const& predictor = _g1h->policy()->predictor();
2798   double diff_prediction_ms = predictor.predict_zero_bounded(&_marking_step_diff_ms);
2799   _time_target_ms = time_target_ms - diff_prediction_ms;
2800 
2801   // set up the variables that are used in the work-based scheme to
2802   // call the regular clock method
2803   _words_scanned = 0;
2804   _refs_reached  = 0;
2805   recalculate_limits();
2806 
2807   // clear all flags
2808   clear_has_aborted();
2809   _has_timed_out = false;
2810   _draining_satb_buffers = false;
2811 
2812   ++_calls;
2813 
2814   // Set up the bitmap and oop closures. Anything that uses them is
2815   // eventually called from this method, so it is OK to allocate these
2816   // statically.
2817   G1CMBitMapClosure bitmap_closure(this, _cm);
2818   G1CMOopClosure cm_oop_closure(_g1h, this);
2819   set_cm_oop_closure(&cm_oop_closure);
2820 
2821   if (_cm->has_overflown()) {
2822     // This can happen if the mark stack overflows during a GC pause
2823     // and this task, after a yield point, restarts. We have to abort
2824     // as we need to get into the overflow protocol which happens
2825     // right at the end of this task.
2826     set_has_aborted();
2827   }
2828 
2829   // First drain any available SATB buffers. After this, we will not
2830   // look at SATB buffers before the next invocation of this method.
2831   // If enough completed SATB buffers are queued up, the regular clock
2832   // will abort this task so that it restarts.
2833   drain_satb_buffers();
2834   // ...then partially drain the local queue and the global stack
2835   drain_local_queue(true);
2836   drain_global_stack(true);
2837 
2838   do {
2839     process_current_region(bitmap_closure);
2840     // At this point we have either completed iterating over the
2841     // region we were holding on to, or we have aborted.
2842 
2843     // We then partially drain the local queue and the global stack.
2844     drain_local_queue(true);
2845     drain_global_stack(true);
2846 
2847     claim_new_region();
2848 
2849     assert(has_aborted() || _curr_region != nullptr || _cm->out_of_regions(),
2850            "at this point we should be out of regions");
2851   } while ( _curr_region != nullptr && !has_aborted());
2852 
2853   // We cannot check whether the global stack is empty, since other
2854   // tasks might be pushing objects to it concurrently.
2855   assert(has_aborted() || _cm->out_of_regions(),
2856          "at this point we should be out of regions");
2857   // Try to reduce the number of available SATB buffers so that
2858   // remark has less work to do.
2859   drain_satb_buffers();
2860 
2861   // Since we've done everything else, we can now totally drain the
2862   // local queue and global stack.
2863   drain_local_queue(false);
2864   drain_global_stack(false);
2865 
2866   // Attempt at work stealing from other task's queues.
2867   if (do_stealing && !has_aborted()) {
2868     // We have not aborted. This means that we have finished all that
2869     // we could. Let's try to do some stealing...
2870     attempt_stealing();
2871   }
2872 
2873   // We still haven't aborted. Now, let's try to get into the
2874   // termination protocol.
2875   if (do_termination && !has_aborted()) {
2876     attempt_termination(is_serial);
2877   }
2878 
2879   // Mainly for debugging purposes to make sure that a pointer to the
2880   // closure which was statically allocated in this frame doesn't
2881   // escape it by accident.
2882   set_cm_oop_closure(nullptr);
2883   jlong end_cpu_time_ns = os::current_thread_cpu_time();
2884   double elapsed_time_ms = (double)(end_cpu_time_ns - _start_cpu_time_ns) / NANOSECS_PER_MILLISEC;
2885   // Update the step history.
2886   _step_times_ms.add(elapsed_time_ms);
2887 
2888   if (has_aborted()) {
2889     // The task was aborted for some reason.
2890     handle_abort(is_serial, elapsed_time_ms);
2891   }
2892 }
2893 
2894 G1CMTask::G1CMTask(uint worker_id,
2895                    G1ConcurrentMark* cm,
2896                    G1CMTaskQueue* task_queue,
2897                    G1RegionMarkStats* mark_stats) :
2898   _worker_id(worker_id),
2899   _g1h(G1CollectedHeap::heap()),
2900   _cm(cm),
2901   _mark_bitmap(nullptr),
2902   _task_queue(task_queue),
2903   _partial_array_splitter(_cm->partial_array_state_manager(), _cm->max_num_tasks(), ObjArrayMarkingStride),
2904   _mark_stats_cache(mark_stats, G1RegionMarkStatsCache::RegionMarkStatsCacheSize),
2905   _calls(0),
2906   _time_target_ms(0.0),
2907   _start_cpu_time_ns(0),
2908   _cm_oop_closure(nullptr),
2909   _curr_region(nullptr),
2910   _finger(nullptr),
2911   _region_limit(nullptr),
2912   _words_scanned(0),
2913   _words_scanned_limit(0),
2914   _real_words_scanned_limit(0),
2915   _refs_reached(0),
2916   _refs_reached_limit(0),
2917   _real_refs_reached_limit(0),
2918   _has_aborted(false),
2919   _has_timed_out(false),
2920   _draining_satb_buffers(false),
2921   _step_times_ms(),
2922   _elapsed_time_ms(0.0),
2923   _termination_time_ms(0.0),
2924   _marking_step_diff_ms()
2925 {
2926   guarantee(task_queue != nullptr, "invariant");
2927 
2928   _marking_step_diff_ms.add(0.5);
2929 }
2930 
2931 // These are formatting macros that are used below to ensure
2932 // consistent formatting. The *_H_* versions are used to format the
2933 // header for a particular value and they should be kept consistent
2934 // with the corresponding macro. Also note that most of the macros add
2935 // the necessary white space (as a prefix) which makes them a bit
2936 // easier to compose.
2937 
2938 // All the output lines are prefixed with this string to be able to
2939 // identify them easily in a large log file.
2940 #define G1PPRL_LINE_PREFIX            "###"
2941 
2942 #define G1PPRL_ADDR_BASE_FORMAT    " " PTR_FORMAT "-" PTR_FORMAT
2943 #ifdef _LP64
2944 #define G1PPRL_ADDR_BASE_H_FORMAT  " %37s"
2945 #else // _LP64
2946 #define G1PPRL_ADDR_BASE_H_FORMAT  " %21s"
2947 #endif // _LP64
2948 
2949 // For per-region info
2950 #define G1PPRL_TYPE_FORMAT            "   %-4s"
2951 #define G1PPRL_TYPE_H_FORMAT          "   %4s"
2952 #define G1PPRL_STATE_FORMAT           "   %-5s"
2953 #define G1PPRL_STATE_H_FORMAT         "   %5s"
2954 #define G1PPRL_BYTE_FORMAT            "  %9zu"
2955 #define G1PPRL_BYTE_H_FORMAT          "  %9s"
2956 #define G1PPRL_DOUBLE_FORMAT          "%14.1f"
2957 #define G1PPRL_GCEFF_H_FORMAT         "  %14s"
2958 #define G1PPRL_GID_H_FORMAT           "  %9s"
2959 #define G1PPRL_GID_FORMAT             "  " UINT32_FORMAT_W(9)
2960 #define G1PPRL_LEN_FORMAT             "  " UINT32_FORMAT_W(14)
2961 #define G1PPRL_LEN_H_FORMAT           "  %14s"
2962 #define G1PPRL_GID_GCEFF_FORMAT       "  %14.1f"
2963 #define G1PPRL_GID_LIVENESS_FORMAT    "  %9.2f"
2964 
2965 // For summary info
2966 #define G1PPRL_SUM_ADDR_FORMAT(tag)    "  " tag ":" G1PPRL_ADDR_BASE_FORMAT
2967 #define G1PPRL_SUM_BYTE_FORMAT(tag)    "  " tag ": %zu"
2968 #define G1PPRL_SUM_MB_FORMAT(tag)      "  " tag ": %1.2f MB"
2969 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag) " / %1.2f %%"
2970 
2971 G1PrintRegionLivenessInfoClosure::G1PrintRegionLivenessInfoClosure(const char* phase_name) :
2972   _total_used_bytes(0),
2973   _total_capacity_bytes(0),
2974   _total_live_bytes(0),
2975   _total_remset_bytes(0),
2976   _total_code_roots_bytes(0)
2977 {
2978   if (!log_is_enabled(Trace, gc, liveness)) {
2979     return;
2980   }
2981 
2982   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2983   MemRegion reserved = g1h->reserved();
2984   double now = os::elapsedTime();
2985 
2986   // Print the header of the output.
2987   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now);
2988   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" HEAP"
2989                           G1PPRL_SUM_ADDR_FORMAT("reserved")
2990                           G1PPRL_SUM_BYTE_FORMAT("region-size"),
2991                           p2i(reserved.start()), p2i(reserved.end()),
2992                           G1HeapRegion::GrainBytes);
2993   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX);
2994   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
2995                           G1PPRL_TYPE_H_FORMAT
2996                           G1PPRL_ADDR_BASE_H_FORMAT
2997                           G1PPRL_BYTE_H_FORMAT
2998                           G1PPRL_BYTE_H_FORMAT
2999                           G1PPRL_STATE_H_FORMAT
3000                           G1PPRL_BYTE_H_FORMAT
3001                           G1PPRL_GID_H_FORMAT,
3002                           "type", "address-range",
3003                           "used", "live",
3004                           "state", "code-roots",
3005                           "group-id");
3006   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3007                           G1PPRL_TYPE_H_FORMAT
3008                           G1PPRL_ADDR_BASE_H_FORMAT
3009                           G1PPRL_BYTE_H_FORMAT
3010                           G1PPRL_BYTE_H_FORMAT
3011                           G1PPRL_STATE_H_FORMAT
3012                           G1PPRL_BYTE_H_FORMAT
3013                           G1PPRL_GID_H_FORMAT,
3014                           "", "",
3015                           "(bytes)", "(bytes)",
3016                           "", "(bytes)", "");
3017 }
3018 
3019 bool G1PrintRegionLivenessInfoClosure::do_heap_region(G1HeapRegion* r) {
3020   if (!log_is_enabled(Trace, gc, liveness)) {
3021     return false;
3022   }
3023 
3024   const char* type       = r->get_type_str();
3025   HeapWord* bottom       = r->bottom();
3026   HeapWord* end          = r->end();
3027   size_t capacity_bytes  = r->capacity();
3028   size_t used_bytes      = r->used();
3029   size_t live_bytes      = r->live_bytes();
3030   size_t remset_bytes    = r->rem_set()->mem_size();
3031   size_t code_roots_bytes = r->rem_set()->code_roots_mem_size();
3032   const char* remset_type = r->rem_set()->get_short_state_str();
3033   uint cset_group_id     = r->rem_set()->has_cset_group()
3034                          ? r->rem_set()->cset_group_id()
3035                          : G1CSetCandidateGroup::NoRemSetId;
3036 
3037   _total_used_bytes      += used_bytes;
3038   _total_capacity_bytes  += capacity_bytes;
3039   _total_live_bytes      += live_bytes;
3040   _total_remset_bytes    += remset_bytes;
3041   _total_code_roots_bytes += code_roots_bytes;
3042 
3043   // Print a line for this particular region.
3044   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3045                         G1PPRL_TYPE_FORMAT
3046                         G1PPRL_ADDR_BASE_FORMAT
3047                         G1PPRL_BYTE_FORMAT
3048                         G1PPRL_BYTE_FORMAT
3049                         G1PPRL_STATE_FORMAT
3050                         G1PPRL_BYTE_FORMAT
3051                         G1PPRL_GID_FORMAT,
3052                         type, p2i(bottom), p2i(end),
3053                         used_bytes, live_bytes,
3054                         remset_type, code_roots_bytes,
3055                         cset_group_id);
3056 
3057   return false;
3058 }
3059 
3060 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() {
3061   if (!log_is_enabled(Trace, gc, liveness)) {
3062     return;
3063   }
3064 
3065   G1CollectedHeap* g1h = G1CollectedHeap::heap();
3066   _total_remset_bytes += g1h->card_set_freelist_pool()->mem_size();
3067   // add static memory usages to remembered set sizes
3068   _total_remset_bytes += G1HeapRegionRemSet::static_mem_size();
3069 
3070   log_cset_candidate_groups();
3071 
3072   // Print the footer of the output.
3073   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX);
3074   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3075                          " SUMMARY"
3076                          G1PPRL_SUM_MB_FORMAT("capacity")
3077                          G1PPRL_SUM_MB_PERC_FORMAT("used")
3078                          G1PPRL_SUM_MB_PERC_FORMAT("live")
3079                          G1PPRL_SUM_MB_FORMAT("remset")
3080                          G1PPRL_SUM_MB_FORMAT("code-roots"),
3081                          bytes_to_mb(_total_capacity_bytes),
3082                          bytes_to_mb(_total_used_bytes),
3083                          percent_of(_total_used_bytes, _total_capacity_bytes),
3084                          bytes_to_mb(_total_live_bytes),
3085                          percent_of(_total_live_bytes, _total_capacity_bytes),
3086                          bytes_to_mb(_total_remset_bytes),
3087                          bytes_to_mb(_total_code_roots_bytes));
3088 }
3089 
3090 void G1PrintRegionLivenessInfoClosure::log_cset_candidate_group_add_total(G1CSetCandidateGroup* group, const char* type) {
3091   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3092                           G1PPRL_GID_FORMAT
3093                           G1PPRL_LEN_FORMAT
3094                           G1PPRL_GID_GCEFF_FORMAT
3095                           G1PPRL_GID_LIVENESS_FORMAT
3096                           G1PPRL_BYTE_FORMAT
3097                           G1PPRL_TYPE_H_FORMAT,
3098                           group->group_id(),
3099                           group->length(),
3100                           group->length() > 0 ? group->gc_efficiency() : 0.0,
3101                           group->length() > 0 ? group->liveness_percent() : 0.0,
3102                           group->card_set()->mem_size(),
3103                           type);
3104   _total_remset_bytes += group->card_set()->mem_size();
3105 }
3106 
3107 void G1PrintRegionLivenessInfoClosure::log_cset_candidate_grouplist(G1CSetCandidateGroupList& gl, const char* type) {
3108   for (G1CSetCandidateGroup* group : gl) {
3109     log_cset_candidate_group_add_total(group, type);
3110   }
3111 }
3112 
3113 void G1PrintRegionLivenessInfoClosure::log_cset_candidate_groups() {
3114   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX);
3115   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" Collection Set Candidate Groups");
3116   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX " Types: Y=Young, M=From Marking Regions, R=Retained Regions");
3117   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3118                           G1PPRL_GID_H_FORMAT
3119                           G1PPRL_LEN_H_FORMAT
3120                           G1PPRL_GCEFF_H_FORMAT
3121                           G1PPRL_BYTE_H_FORMAT
3122                           G1PPRL_BYTE_H_FORMAT
3123                           G1PPRL_TYPE_H_FORMAT,
3124                           "groud-id", "num-regions",
3125                           "gc-eff", "liveness",
3126                           "remset", "type");
3127 
3128   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3129                           G1PPRL_GID_H_FORMAT
3130                           G1PPRL_LEN_H_FORMAT
3131                           G1PPRL_GCEFF_H_FORMAT
3132                           G1PPRL_BYTE_H_FORMAT
3133                           G1PPRL_BYTE_H_FORMAT
3134                           G1PPRL_TYPE_H_FORMAT,
3135                           "", "",
3136                           "(bytes/ms)", "%",
3137                           "(bytes)", "");
3138 
3139   G1CollectedHeap* g1h = G1CollectedHeap::heap();
3140 
3141   log_cset_candidate_group_add_total(g1h->young_regions_cset_group(), "Y");
3142 
3143   G1CollectionSetCandidates* candidates = g1h->policy()->candidates();
3144   log_cset_candidate_grouplist(candidates->from_marking_groups(), "M");
3145   log_cset_candidate_grouplist(candidates->retained_groups(), "R");
3146 }