1 /*
   2  * Copyright (c) 2001, 2026, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "classfile/classLoaderData.hpp"
  26 #include "classfile/classLoaderDataGraph.hpp"
  27 #include "cppstdlib/new.hpp"
  28 #include "gc/g1/g1BarrierSet.hpp"
  29 #include "gc/g1/g1BatchedTask.hpp"
  30 #include "gc/g1/g1CardSetMemory.hpp"
  31 #include "gc/g1/g1CardTableClaimTable.inline.hpp"
  32 #include "gc/g1/g1CollectedHeap.inline.hpp"
  33 #include "gc/g1/g1CollectorState.inline.hpp"
  34 #include "gc/g1/g1ConcurrentMark.inline.hpp"
  35 #include "gc/g1/g1ConcurrentMarkRemarkTasks.hpp"
  36 #include "gc/g1/g1ConcurrentMarkThread.inline.hpp"
  37 #include "gc/g1/g1ConcurrentRebuildAndScrub.hpp"
  38 #include "gc/g1/g1ConcurrentRefine.hpp"
  39 #include "gc/g1/g1HeapRegion.inline.hpp"
  40 #include "gc/g1/g1HeapRegionManager.hpp"
  41 #include "gc/g1/g1HeapRegionPrinter.hpp"
  42 #include "gc/g1/g1HeapRegionRemSet.inline.hpp"
  43 #include "gc/g1/g1HeapRegionSet.inline.hpp"
  44 #include "gc/g1/g1HeapVerifier.hpp"
  45 #include "gc/g1/g1OopClosures.inline.hpp"
  46 #include "gc/g1/g1Policy.hpp"
  47 #include "gc/g1/g1RegionMarkStatsCache.inline.hpp"
  48 #include "gc/g1/g1ThreadLocalData.hpp"
  49 #include "gc/g1/g1Trace.hpp"
  50 #include "gc/shared/gcId.hpp"
  51 #include "gc/shared/gcTimer.hpp"
  52 #include "gc/shared/gcTraceTime.inline.hpp"
  53 #include "gc/shared/gcVMOperations.hpp"
  54 #include "gc/shared/partialArraySplitter.inline.hpp"
  55 #include "gc/shared/partialArrayState.hpp"
  56 #include "gc/shared/partialArrayTaskStats.hpp"
  57 #include "gc/shared/referencePolicy.hpp"
  58 #include "gc/shared/suspendibleThreadSet.hpp"
  59 #include "gc/shared/taskqueue.inline.hpp"
  60 #include "gc/shared/taskTerminator.hpp"
  61 #include "gc/shared/weakProcessor.inline.hpp"
  62 #include "gc/shared/workerPolicy.hpp"
  63 #include "jvm.h"
  64 #include "logging/log.hpp"
  65 #include "memory/allocation.hpp"
  66 #include "memory/iterator.hpp"
  67 #include "memory/metaspaceUtils.hpp"
  68 #include "memory/resourceArea.hpp"
  69 #include "memory/universe.hpp"
  70 #include "nmt/memTracker.hpp"
  71 #include "oops/access.inline.hpp"
  72 #include "oops/oop.inline.hpp"
  73 #include "oops/oopCast.inline.hpp"
  74 #include "runtime/globals_extension.hpp"
  75 #include "runtime/handles.inline.hpp"
  76 #include "runtime/java.hpp"
  77 #include "runtime/orderAccess.hpp"
  78 #include "runtime/os.hpp"
  79 #include "runtime/prefetch.inline.hpp"
  80 #include "runtime/threads.hpp"
  81 #include "utilities/align.hpp"
  82 #include "utilities/checkedCast.hpp"
  83 #include "utilities/formatBuffer.hpp"
  84 #include "utilities/growableArray.hpp"
  85 #include "utilities/powerOfTwo.hpp"
  86 
  87 G1CMIsAliveClosure::G1CMIsAliveClosure() : _cm(nullptr) { }
  88 
  89 G1CMIsAliveClosure::G1CMIsAliveClosure(G1ConcurrentMark* cm) : _cm(cm) {
  90   assert(cm != nullptr, "must be");
  91 }
  92 
  93 void G1CMIsAliveClosure::initialize(G1ConcurrentMark* cm) {
  94   assert(cm != nullptr, "must be");
  95   assert(_cm == nullptr, "double initialize");
  96   _cm = cm;
  97 }
  98 
  99 bool G1CMBitMapClosure::do_addr(HeapWord* const addr) {
 100   assert(addr < _cm->finger(), "invariant");
 101   assert(addr >= _task->finger(), "invariant");
 102 
 103   // We move that task's local finger along.
 104   _task->move_finger_to(addr);
 105 
 106   _task->process_entry(G1TaskQueueEntry(cast_to_oop(addr)), false /* stolen */);
 107   // we only partially drain the local queue and global stack
 108   _task->drain_local_queue(true);
 109   _task->drain_global_stack(true);
 110 
 111   // if the has_aborted flag has been raised, we need to bail out of
 112   // the iteration
 113   return !_task->has_aborted();
 114 }
 115 
 116 G1CMMarkStack::G1CMMarkStack() :
 117   _chunk_allocator() {
 118   set_empty();
 119 }
 120 
 121 size_t G1CMMarkStack::capacity_alignment() {
 122   return (size_t)lcm(os::vm_allocation_granularity(), sizeof(TaskQueueEntryChunk)) / sizeof(G1TaskQueueEntry);
 123 }
 124 
 125 bool G1CMMarkStack::initialize() {
 126   guarantee(_chunk_allocator.capacity() == 0, "G1CMMarkStack already initialized.");
 127 
 128   size_t initial_capacity = MarkStackSize;
 129   size_t max_capacity = MarkStackSizeMax;
 130 
 131   size_t const TaskEntryChunkSizeInVoidStar = sizeof(TaskQueueEntryChunk) / sizeof(G1TaskQueueEntry);
 132 
 133   size_t max_num_chunks = align_up(max_capacity, capacity_alignment()) / TaskEntryChunkSizeInVoidStar;
 134   size_t initial_num_chunks = align_up(initial_capacity, capacity_alignment()) / TaskEntryChunkSizeInVoidStar;
 135 
 136   initial_num_chunks = round_up_power_of_2(initial_num_chunks);
 137   max_num_chunks = MAX2(initial_num_chunks, max_num_chunks);
 138 
 139   size_t limit = (INT_MAX - 1);
 140   max_capacity = MIN2((max_num_chunks * TaskEntryChunkSizeInVoidStar), limit);
 141   initial_capacity = MIN2((initial_num_chunks * TaskEntryChunkSizeInVoidStar), limit);
 142 
 143   FLAG_SET_ERGO(MarkStackSizeMax, max_capacity);
 144   FLAG_SET_ERGO(MarkStackSize, initial_capacity);
 145 
 146   log_trace(gc)("MarkStackSize: %uk  MarkStackSizeMax: %uk", (uint)(MarkStackSize / K), (uint)(MarkStackSizeMax / K));
 147 
 148   log_debug(gc)("Initialize mark stack with %zu chunks, maximum %zu",
 149                 initial_num_chunks, max_capacity);
 150 
 151   return _chunk_allocator.initialize(initial_num_chunks, max_num_chunks);
 152 }
 153 
 154 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::ChunkAllocator::allocate_new_chunk() {
 155   if (_size.load_relaxed() >= _max_capacity) {
 156     return nullptr;
 157   }
 158 
 159   size_t cur_idx = _size.fetch_then_add(1u);
 160 
 161   if (cur_idx >= _max_capacity) {
 162     return nullptr;
 163   }
 164 
 165   size_t bucket = get_bucket(cur_idx);
 166   if (_buckets[bucket].load_acquire() == nullptr) {
 167     if (!_should_grow) {
 168       // Prefer to restart the CM.
 169       return nullptr;
 170     }
 171 
 172     MutexLocker x(G1MarkStackChunkList_lock, Mutex::_no_safepoint_check_flag);
 173     if (_buckets[bucket].load_acquire() == nullptr) {
 174       size_t desired_capacity = bucket_size(bucket) * 2;
 175       if (!try_expand_to(desired_capacity)) {
 176         return nullptr;
 177       }
 178     }
 179   }
 180 
 181   size_t bucket_idx = get_bucket_index(cur_idx);
 182   TaskQueueEntryChunk* result = ::new (&_buckets[bucket].load_relaxed()[bucket_idx]) TaskQueueEntryChunk;
 183   result->next = nullptr;
 184   return result;
 185 }
 186 
 187 G1CMMarkStack::ChunkAllocator::ChunkAllocator() :
 188   _min_capacity(0),
 189   _max_capacity(0),
 190   _capacity(0),
 191   _num_buckets(0),
 192   _should_grow(false),
 193   _buckets(nullptr),
 194   _size(0)
 195 { }
 196 
 197 bool G1CMMarkStack::ChunkAllocator::initialize(size_t initial_capacity, size_t max_capacity) {
 198   guarantee(is_power_of_2(initial_capacity), "Invalid initial_capacity");
 199 
 200   _min_capacity = initial_capacity;
 201   _max_capacity = max_capacity;
 202   _num_buckets  = get_bucket(_max_capacity) + 1;
 203 
 204   _buckets = NEW_C_HEAP_ARRAY(Atomic<TaskQueueEntryChunk*>, _num_buckets, mtGC);
 205 
 206   for (size_t i = 0; i < _num_buckets; i++) {
 207     _buckets[i].store_relaxed(nullptr);
 208   }
 209 
 210   size_t new_capacity = bucket_size(0);
 211 
 212   if (!reserve(new_capacity)) {
 213     log_warning(gc)("Failed to reserve memory for new overflow mark stack with %zu chunks and size %zuB.", new_capacity, new_capacity * sizeof(TaskQueueEntryChunk));
 214     return false;
 215   }
 216   return true;
 217 }
 218 
 219 bool G1CMMarkStack::ChunkAllocator::try_expand_to(size_t desired_capacity) {
 220   if (_capacity == _max_capacity) {
 221     log_debug(gc)("Can not expand overflow mark stack further, already at maximum capacity of %zu chunks.", _capacity);
 222     return false;
 223   }
 224 
 225   size_t old_capacity = _capacity;
 226   desired_capacity = MIN2(desired_capacity, _max_capacity);
 227 
 228   if (reserve(desired_capacity)) {
 229     log_debug(gc)("Expanded the mark stack capacity from %zu to %zu chunks",
 230                   old_capacity, desired_capacity);
 231     return true;
 232   }
 233   return false;
 234 }
 235 
 236 bool G1CMMarkStack::ChunkAllocator::try_expand() {
 237   size_t new_capacity = _capacity * 2;
 238   return try_expand_to(new_capacity);
 239 }
 240 
 241 G1CMMarkStack::ChunkAllocator::~ChunkAllocator() {
 242   if (_buckets == nullptr) {
 243     return;
 244   }
 245 
 246   for (size_t i = 0; i < _num_buckets; i++) {
 247     if (_buckets[i].load_relaxed() != nullptr) {
 248       MmapArrayAllocator<TaskQueueEntryChunk>::free(_buckets[i].load_relaxed(),  bucket_size(i));
 249       _buckets[i].store_relaxed(nullptr);
 250     }
 251   }
 252 
 253   FREE_C_HEAP_ARRAY(_buckets);
 254 }
 255 
 256 bool G1CMMarkStack::ChunkAllocator::reserve(size_t new_capacity) {
 257   assert(new_capacity <= _max_capacity, "Cannot expand overflow mark stack beyond the max_capacity of %zu chunks.", _max_capacity);
 258 
 259   size_t highest_bucket = get_bucket(new_capacity - 1);
 260   size_t i = get_bucket(_capacity);
 261 
 262   // Allocate all buckets associated with indexes between the current capacity (_capacity)
 263   // and the new capacity (new_capacity). This step ensures that there are no gaps in the
 264   // array and that the capacity accurately reflects the reserved memory.
 265   for (; i <= highest_bucket; i++) {
 266     if (_buckets[i].load_acquire() != nullptr) {
 267       continue; // Skip over already allocated buckets.
 268     }
 269 
 270     size_t bucket_capacity = bucket_size(i);
 271 
 272     // Trim bucket size so that we do not exceed the _max_capacity.
 273     bucket_capacity = (_capacity + bucket_capacity) <= _max_capacity ?
 274                       bucket_capacity :
 275                       _max_capacity - _capacity;
 276 
 277 
 278     TaskQueueEntryChunk* bucket_base = MmapArrayAllocator<TaskQueueEntryChunk>::allocate_or_null(bucket_capacity, mtGC);
 279 
 280     if (bucket_base == nullptr) {
 281       log_warning(gc)("Failed to reserve memory for increasing the overflow mark stack capacity with %zu chunks and size %zuB.",
 282                       bucket_capacity, bucket_capacity * sizeof(TaskQueueEntryChunk));
 283       return false;
 284     }
 285     _capacity += bucket_capacity;
 286     _buckets[i].release_store(bucket_base);
 287   }
 288   return true;
 289 }
 290 
 291 void G1CMMarkStack::expand() {
 292   _chunk_allocator.try_expand();
 293 }
 294 
 295 void G1CMMarkStack::add_chunk_to_list(Atomic<TaskQueueEntryChunk*>* list, TaskQueueEntryChunk* elem) {
 296   elem->next = list->load_relaxed();
 297   list->store_relaxed(elem);
 298 }
 299 
 300 void G1CMMarkStack::add_chunk_to_chunk_list(TaskQueueEntryChunk* elem) {
 301   MutexLocker x(G1MarkStackChunkList_lock, Mutex::_no_safepoint_check_flag);
 302   add_chunk_to_list(&_chunk_list, elem);
 303   _chunks_in_chunk_list++;
 304 }
 305 
 306 void G1CMMarkStack::add_chunk_to_free_list(TaskQueueEntryChunk* elem) {
 307   MutexLocker x(G1MarkStackFreeList_lock, Mutex::_no_safepoint_check_flag);
 308   add_chunk_to_list(&_free_list, elem);
 309 }
 310 
 311 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_list(Atomic<TaskQueueEntryChunk*>* list) {
 312   TaskQueueEntryChunk* result = list->load_relaxed();
 313   if (result != nullptr) {
 314     list->store_relaxed(list->load_relaxed()->next);
 315   }
 316   return result;
 317 }
 318 
 319 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_chunk_list() {
 320   MutexLocker x(G1MarkStackChunkList_lock, Mutex::_no_safepoint_check_flag);
 321   TaskQueueEntryChunk* result = remove_chunk_from_list(&_chunk_list);
 322   if (result != nullptr) {
 323     _chunks_in_chunk_list--;
 324   }
 325   return result;
 326 }
 327 
 328 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_free_list() {
 329   MutexLocker x(G1MarkStackFreeList_lock, Mutex::_no_safepoint_check_flag);
 330   return remove_chunk_from_list(&_free_list);
 331 }
 332 
 333 bool G1CMMarkStack::par_push_chunk(G1TaskQueueEntry* ptr_arr) {
 334   // Get a new chunk.
 335   TaskQueueEntryChunk* new_chunk = remove_chunk_from_free_list();
 336 
 337   if (new_chunk == nullptr) {
 338     // Did not get a chunk from the free list. Allocate from backing memory.
 339     new_chunk = _chunk_allocator.allocate_new_chunk();
 340 
 341     if (new_chunk == nullptr) {
 342       return false;
 343     }
 344   }
 345 
 346   Copy::conjoint_memory_atomic(ptr_arr, new_chunk->data, EntriesPerChunk * sizeof(G1TaskQueueEntry));
 347 
 348   add_chunk_to_chunk_list(new_chunk);
 349 
 350   return true;
 351 }
 352 
 353 bool G1CMMarkStack::par_pop_chunk(G1TaskQueueEntry* ptr_arr) {
 354   TaskQueueEntryChunk* cur = remove_chunk_from_chunk_list();
 355 
 356   if (cur == nullptr) {
 357     return false;
 358   }
 359 
 360   Copy::conjoint_memory_atomic(cur->data, ptr_arr, EntriesPerChunk * sizeof(G1TaskQueueEntry));
 361 
 362   add_chunk_to_free_list(cur);
 363   return true;
 364 }
 365 
 366 void G1CMMarkStack::set_empty() {
 367   _chunks_in_chunk_list = 0;
 368   _chunk_list.store_relaxed(nullptr);
 369   _free_list.store_relaxed(nullptr);
 370   _chunk_allocator.reset();
 371 }
 372 
 373 G1CMRootMemRegions::G1CMRootMemRegions(uint const max_regions) :
 374     _root_regions(MemRegion::create_array(max_regions, mtGC)),
 375     _max_regions(max_regions),
 376     _num_regions(0),
 377     _num_claimed_regions(0) { }
 378 
 379 G1CMRootMemRegions::~G1CMRootMemRegions() {
 380   MemRegion::destroy_array(_root_regions, _max_regions);
 381 }
 382 
 383 void G1CMRootMemRegions::reset() {
 384   assert_at_safepoint();
 385   assert(G1CollectedHeap::heap()->collector_state()->is_in_concurrent_start_gc(), "must be");
 386 
 387   _num_regions.store_relaxed(0);
 388   _num_claimed_regions.store_relaxed(0);
 389 }
 390 
 391 void G1CMRootMemRegions::add(HeapWord* start, HeapWord* end) {
 392   assert_at_safepoint();
 393   uint idx = _num_regions.fetch_then_add(1u);
 394   assert(idx < _max_regions, "Trying to add more root MemRegions than there is space %u", _max_regions);
 395   assert(start != nullptr && end != nullptr && start <= end, "Start (" PTR_FORMAT ") should be less or equal to "
 396          "end (" PTR_FORMAT ")", p2i(start), p2i(end));
 397   _root_regions[idx].set_start(start);
 398   _root_regions[idx].set_end(end);
 399 }
 400 
 401 const MemRegion* G1CMRootMemRegions::claim_next() {
 402   uint local_num_regions = num_regions();
 403   if (num_claimed_regions() >= local_num_regions) {
 404     return nullptr;
 405   }
 406 
 407   uint claimed_index = _num_claimed_regions.fetch_then_add(1u);
 408   if (claimed_index < local_num_regions) {
 409     return &_root_regions[claimed_index];
 410   }
 411   return nullptr;
 412 }
 413 
 414 bool G1CMRootMemRegions::work_completed() const {
 415   return num_remaining_regions() == 0;
 416 }
 417 
 418 uint G1CMRootMemRegions::num_remaining_regions() const {
 419   uint total = num_regions();
 420   uint claimed = num_claimed_regions();
 421   return (total > claimed) ? total - claimed : 0;
 422 }
 423 
 424 bool G1CMRootMemRegions::contains(const MemRegion mr) const {
 425   uint local_num_root_regions = num_regions();
 426   for (uint i = 0; i < local_num_root_regions; i++) {
 427     if (_root_regions[i].equals(mr)) {
 428       return true;
 429     }
 430   }
 431   return false;
 432 }
 433 
 434 G1ConcurrentMark::G1ConcurrentMark(G1CollectedHeap* g1h,
 435                                    G1RegionToSpaceMapper* bitmap_storage) :
 436   _cm_thread(nullptr),
 437   _g1h(g1h),
 438 
 439   _mark_bitmap(),
 440 
 441   _heap(_g1h->reserved()),
 442 
 443   _root_regions(_g1h->max_num_regions()),
 444   _root_region_scan_aborted(false),
 445 
 446   _global_mark_stack(),
 447 
 448   _finger(nullptr), // _finger set in set_non_marking_state
 449 
 450   _worker_id_offset(G1ConcRefinementThreads), // The refinement control thread does not refine cards, so it's just the worker threads.
 451   _max_num_tasks(MAX2(ConcGCThreads, ParallelGCThreads)),
 452   _num_active_tasks(0), // _num_active_tasks set in set_non_marking_state()
 453   _tasks(nullptr),
 454   _task_queues(new G1CMTaskQueueSet(_max_num_tasks)),
 455   _terminator(_max_num_tasks, _task_queues),
 456   _partial_array_state_manager(new PartialArrayStateManager(_max_num_tasks)),
 457 
 458   _first_overflow_barrier_sync(),
 459   _second_overflow_barrier_sync(),
 460 
 461   _completed_mark_cycles(0),
 462   _has_overflown(false),
 463   _concurrent(false),
 464   _has_aborted(false),
 465   _restart_for_overflow(false),
 466   _gc_timer_cm(new ConcurrentGCTimer()),
 467   _gc_tracer_cm(new G1OldTracer()),
 468 
 469   // _verbose_level set below
 470 
 471   _remark_times(),
 472   _remark_mark_times(),
 473   _remark_weak_ref_times(),
 474   _cleanup_times(),
 475 
 476   _concurrent_workers(nullptr),
 477   _num_concurrent_workers(0),
 478   _max_concurrent_workers(0),
 479 
 480   _is_region_mark_stats_cache_in_use(false),
 481   _region_mark_stats(nullptr),
 482   _top_at_mark_starts(nullptr),
 483   _top_at_rebuild_starts(nullptr),
 484   _needs_remembered_set_rebuild(false)
 485 {
 486   assert(G1CGC_lock != nullptr, "CGC_lock must be initialized");
 487 
 488   _mark_bitmap.initialize(g1h->reserved(), bitmap_storage);
 489 }
 490 
 491 void G1ConcurrentMark::fully_initialize() {
 492   assert_at_safepoint();
 493 
 494   if (is_fully_initialized()) {
 495     return;
 496   }
 497 
 498   // Create & start ConcurrentMark thread.
 499   _cm_thread = new G1ConcurrentMarkThread(this);
 500   if (_cm_thread->osthread() == nullptr) {
 501     vm_shutdown_during_initialization("Could not create ConcurrentMarkThread");
 502   }
 503 
 504   log_debug(gc)("ConcGCThreads: %u offset %u", ConcGCThreads, _worker_id_offset);
 505   log_debug(gc)("ParallelGCThreads: %u", ParallelGCThreads);
 506 
 507   _max_concurrent_workers = ConcGCThreads;
 508 
 509   _concurrent_workers = new WorkerThreads("G1 Conc", _max_concurrent_workers);
 510   _concurrent_workers->initialize_workers();
 511   _num_concurrent_workers = _concurrent_workers->active_workers();
 512 
 513   if (!_global_mark_stack.initialize()) {
 514     vm_exit_during_initialization("Failed to allocate initial concurrent mark overflow mark stack.");
 515   }
 516 
 517   _region_mark_stats = NEW_C_HEAP_ARRAY(G1RegionMarkStats, _g1h->max_num_regions(), mtGC);
 518   _top_at_mark_starts = NEW_C_HEAP_ARRAY(Atomic<HeapWord*>, _g1h->max_num_regions(), mtGC);
 519   _top_at_rebuild_starts = NEW_C_HEAP_ARRAY(Atomic<HeapWord*>, _g1h->max_num_regions(), mtGC);
 520 
 521   _tasks = NEW_C_HEAP_ARRAY(G1CMTask*, _max_num_tasks, mtGC);
 522 
 523   // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
 524   _num_active_tasks = _max_num_tasks;
 525 
 526   for (uint i = 0; i < _max_num_tasks; ++i) {
 527     G1CMTaskQueue* task_queue = new G1CMTaskQueue();
 528     _task_queues->register_queue(i, task_queue);
 529 
 530     _tasks[i] = new G1CMTask(i, this, task_queue, _region_mark_stats);
 531   }
 532 
 533   uint max_num_regions = _g1h->max_num_regions();
 534   ::new (_region_mark_stats) G1RegionMarkStats[max_num_regions]{};
 535   for (uint i = 0; i < max_num_regions; i++) {
 536     ::new (&_top_at_mark_starts[i]) Atomic<HeapWord*>(_g1h->bottom_addr_for_region(i));
 537   }
 538   ::new (_top_at_rebuild_starts) Atomic<HeapWord*>[max_num_regions]{};
 539 
 540   reset_at_marking_complete();
 541 }
 542 
 543 bool G1ConcurrentMark::is_in_concurrent_cycle() const {
 544   return _cm_thread->is_in_progress();
 545 }
 546 
 547 bool G1ConcurrentMark::is_in_marking() const {
 548   return _cm_thread->is_in_marking();
 549 }
 550 
 551 bool G1ConcurrentMark::is_in_marking_or_rebuild() const {
 552   return _cm_thread->is_in_marking_or_rebuild();
 553 }
 554 
 555 bool G1ConcurrentMark::is_in_reset_for_next_cycle() const {
 556   return cm_thread()->is_in_reset_for_next_cycle();
 557 }
 558 
 559 PartialArrayStateManager* G1ConcurrentMark::partial_array_state_manager() const {
 560   return _partial_array_state_manager;
 561 }
 562 
 563 G1ConcurrentMarkThread* G1ConcurrentMark::cm_thread() const {
 564   assert(is_fully_initialized(), "must be");
 565   return _cm_thread;
 566 }
 567 
 568 void G1ConcurrentMark::reset() {
 569   assert_fully_initialized();
 570 
 571   _has_aborted.store_relaxed(false);
 572 
 573   _is_region_mark_stats_cache_in_use = true;
 574   reset_marking_for_restart();
 575 
 576   // Reset all tasks, since different phases will use different number of active
 577   // threads. So, it's easiest to have all of them ready.
 578   for (uint i = 0; i < _max_num_tasks; ++i) {
 579     _tasks[i]->reset(mark_bitmap());
 580   }
 581 
 582   uint max_num_regions = _g1h->max_num_regions();
 583   ::new (_top_at_rebuild_starts) Atomic<HeapWord*>[max_num_regions]{};
 584   for (uint i = 0; i < max_num_regions; i++) {
 585     // Do not update TAMS here. NoteStartOfMarkTask updates this in parallel in
 586     // the pre-concurrent-start WorkerTask.
 587     _top_at_rebuild_starts[i].store_relaxed(nullptr);
 588     _region_mark_stats[i].clear();
 589   }
 590 
 591   _root_region_scan_aborted.store_relaxed(false);
 592   _root_regions.reset();
 593 }
 594 
 595 void G1ConcurrentMark::assert_statistics_clear(G1HeapRegion* r) {
 596   assert_fully_initialized();
 597 #ifdef ASSERT
 598   uint region_idx = r->hrm_index();
 599   for (uint j = 0; j < _max_num_tasks; ++j) {
 600     _tasks[j]->verify_no_mark_stats_for(r->hrm_index());
 601   }
 602 
 603   assert(_top_at_rebuild_starts[region_idx].load_relaxed() == nullptr, "must be");
 604 
 605   G1RegionMarkStats* s = &_region_mark_stats[region_idx];
 606   assert(s->incoming_refs() == 0, "must be");
 607   assert(s->live_words() == 0, "must be");
 608 #endif
 609 }
 610 
 611 void G1ConcurrentMark::note_start_of_mark_for_region(G1HeapRegion* r) {
 612   assert_at_safepoint();
 613   assert_fully_initialized();
 614   if (r->is_old_or_humongous() && !r->is_collection_set_candidate() && !r->in_collection_set()) {
 615     update_top_at_mark_start(r);
 616   } else {
 617     set_top_at_mark_start_to_bottom(r);
 618   }
 619 }
 620 
 621 void G1ConcurrentMark::notify_new_region(G1HeapRegion* r, size_t marked_live_bytes_below_tams) {
 622   assert_at_safepoint();
 623   if (!is_fully_initialized()) {
 624     return;
 625   }
 626   G1CollectorState* state = _g1h->collector_state();
 627   if (state->is_in_concurrent_start_gc()) {
 628     update_top_at_mark_start(r);
 629     set_live_bytes(r->hrm_index(), marked_live_bytes_below_tams);
 630   }
 631 }
 632 
 633 void G1ConcurrentMark::reset_region_marking_state(G1HeapRegion* r) {
 634   assert_at_safepoint();
 635   if (!is_fully_initialized()) {
 636     return;
 637   }
 638   uint region_idx = r->hrm_index();
 639   // Only need to clear the stats cache for the given region if we are using the cache.
 640   if (_is_region_mark_stats_cache_in_use) {
 641     for (uint j = 0; j < _max_num_tasks; ++j) {
 642       _tasks[j]->clear_mark_stats_cache(region_idx);
 643     }
 644   } else {
 645     for (uint j = 0; j < _max_num_tasks; ++j) {
 646       _tasks[j]->verify_no_mark_stats_for(region_idx);
 647     }
 648   }
 649   set_top_at_mark_start_to_bottom(r);
 650   _top_at_rebuild_starts[region_idx].store_relaxed(nullptr);
 651   _region_mark_stats[region_idx].clear();
 652 }
 653 
 654 void G1ConcurrentMark::humongous_object_eagerly_reclaimed(G1HeapRegion* r) {
 655   assert_at_safepoint();
 656   assert(r->is_starts_humongous(), "Got humongous continues region here");
 657 
 658   // Need to clear mark bit of the humongous object. Doing this unconditionally is fine.
 659   mark_bitmap()->clear(r->bottom());
 660 }
 661 
 662 void G1ConcurrentMark::reset_marking_for_restart() {
 663   assert_fully_initialized();
 664 
 665   _global_mark_stack.set_empty();
 666 
 667   // Expand the marking stack, if we have to and if we can.
 668   if (has_overflown()) {
 669     _global_mark_stack.expand();
 670 
 671     uint max_num_regions = _g1h->max_num_regions();
 672     for (uint i = 0; i < max_num_regions; i++) {
 673       _region_mark_stats[i].clear_during_overflow();
 674     }
 675   }
 676 
 677   clear_has_overflown();
 678   _finger.store_relaxed(_heap.start());
 679 
 680   for (uint i = 0; i < _max_num_tasks; ++i) {
 681     _tasks[i]->reset_for_restart();
 682   }
 683 }
 684 
 685 void G1ConcurrentMark::set_concurrency(uint active_tasks) {
 686   assert(active_tasks <= _max_num_tasks, "we should not have more");
 687 
 688   _num_active_tasks = active_tasks;
 689   // Need to update the three data structures below according to the
 690   // number of active threads for this phase.
 691   _terminator.reset_for_reuse(active_tasks);
 692   _first_overflow_barrier_sync.set_n_workers(active_tasks);
 693   _second_overflow_barrier_sync.set_n_workers(active_tasks);
 694 }
 695 
 696 void G1ConcurrentMark::set_concurrency_and_phase(uint active_tasks, bool concurrent) {
 697   set_concurrency(active_tasks);
 698 
 699   _concurrent.store_relaxed(concurrent);
 700 
 701   if (!concurrent) {
 702     // At this point we should be in a STW phase, and completed marking.
 703     assert_at_safepoint_on_vm_thread();
 704     assert(out_of_regions(),
 705            "only way to get here: _finger: " PTR_FORMAT ", _heap_end: " PTR_FORMAT,
 706            p2i(finger()), p2i(_heap.end()));
 707   }
 708 }
 709 
 710 #if TASKQUEUE_STATS
 711 void G1ConcurrentMark::print_and_reset_taskqueue_stats() {
 712 
 713   _task_queues->print_and_reset_taskqueue_stats("Concurrent Mark");
 714 
 715   auto get_pa_stats = [&](uint i) {
 716     return _tasks[i]->partial_array_task_stats();
 717   };
 718 
 719   PartialArrayTaskStats::log_set(_max_num_tasks, get_pa_stats,
 720                                  "Concurrent Mark Partial Array");
 721 
 722   for (uint i = 0; i < _max_num_tasks; ++i) {
 723     get_pa_stats(i)->reset();
 724   }
 725 }
 726 #endif
 727 
 728 void G1ConcurrentMark::reset_at_marking_complete() {
 729   TASKQUEUE_STATS_ONLY(print_and_reset_taskqueue_stats());
 730   // We set the global marking state to some default values when we're
 731   // not doing marking.
 732   reset_marking_for_restart();
 733   _num_active_tasks = 0;
 734 }
 735 
 736 G1ConcurrentMark::~G1ConcurrentMark() {
 737   FREE_C_HEAP_ARRAY(_top_at_mark_starts);
 738   FREE_C_HEAP_ARRAY(_top_at_rebuild_starts);
 739   FREE_C_HEAP_ARRAY(_region_mark_stats);
 740   // The G1ConcurrentMark instance is never freed.
 741   ShouldNotReachHere();
 742 }
 743 
 744 class G1ClearBitMapTask : public WorkerTask {
 745 public:
 746   static size_t chunk_size() { return M; }
 747 
 748 private:
 749   // Heap region closure used for clearing the _mark_bitmap.
 750   class G1ClearBitmapHRClosure : public G1HeapRegionClosure {
 751     G1ConcurrentMark* _cm;
 752     G1CMBitMap* _bitmap;
 753     bool _suspendible; // If suspendible, do yield checks.
 754 
 755     bool suspendible() {
 756       return _suspendible;
 757     }
 758 
 759     bool is_clear_concurrent_undo() {
 760       return suspendible() && _cm->cm_thread()->is_in_undo_cycle();
 761     }
 762 
 763     bool has_aborted() {
 764       if (suspendible()) {
 765         _cm->do_yield_check();
 766         return _cm->has_aborted();
 767       }
 768       return false;
 769     }
 770 
 771     HeapWord* region_clear_limit(G1HeapRegion* r) {
 772       // A garbage collection might have made the region unavailable after a yield during
 773       // clearing. Just return bottom as the limit, causing the clearing for this region to end.
 774       if (G1CollectedHeap::heap()->region_at_or_null(r->hrm_index()) == nullptr) {
 775         return r->bottom();
 776       }
 777       // During a Concurrent Undo Mark cycle, the per region top_at_mark_start and
 778       // live_words data are current wrt to the _mark_bitmap. We use this information
 779       // to only clear ranges of the bitmap that require clearing.
 780       if (is_clear_concurrent_undo()) {
 781         // No need to clear bitmaps for empty regions (which includes regions we
 782         // did not mark through).
 783         if (!_cm->contains_live_object(r->hrm_index())) {
 784           assert(_bitmap->get_next_marked_addr(r->bottom(), r->end()) == r->end(), "Should not have marked bits");
 785           return r->bottom();
 786         }
 787       }
 788       return r->end();
 789     }
 790 
 791   public:
 792     G1ClearBitmapHRClosure(G1ConcurrentMark* cm, bool suspendible) :
 793       G1HeapRegionClosure(),
 794       _cm(cm),
 795       _bitmap(cm->mark_bitmap()),
 796       _suspendible(suspendible)
 797     { }
 798 
 799     virtual bool do_heap_region(G1HeapRegion* r) {
 800       if (has_aborted()) {
 801         return true;
 802       }
 803 
 804       HeapWord* cur = r->bottom();
 805       HeapWord* end = region_clear_limit(r);
 806 
 807       size_t const chunk_size_in_words = G1ClearBitMapTask::chunk_size() / HeapWordSize;
 808 
 809       while (cur < end) {
 810 
 811         MemRegion mr(cur, MIN2(cur + chunk_size_in_words, end));
 812         _bitmap->clear_range(mr);
 813 
 814         cur += chunk_size_in_words;
 815 
 816         // Repeat the asserts from before the start of the closure. We will do them
 817         // as asserts here to minimize their overhead on the product. However, we
 818         // will have them as guarantees at the beginning / end of the bitmap
 819         // clearing to get some checking in the product.
 820         assert(!suspendible() || _cm->is_in_reset_for_next_cycle(), "invariant");
 821 
 822         // Abort iteration if necessary.
 823         if (suspendible() && _cm->do_yield_check()) {
 824           if (_cm->has_aborted()) {
 825             return true;
 826           }
 827           // Re-read end. The region might have been uncommitted.
 828           end = region_clear_limit(r);
 829         }
 830       }
 831       assert(cur >= end, "Must have completed iteration over the bitmap for region %u.", r->hrm_index());
 832 
 833       return false;
 834     }
 835   };
 836 
 837   G1ClearBitmapHRClosure _cl;
 838   G1HeapRegionClaimer _hr_claimer;
 839   bool _suspendible; // If the task is suspendible, workers must join the STS.
 840 
 841 public:
 842   G1ClearBitMapTask(G1ConcurrentMark* cm, uint n_workers, bool suspendible) :
 843     WorkerTask("G1 Clear Bitmap"),
 844     _cl(cm, suspendible),
 845     _hr_claimer(n_workers),
 846     _suspendible(suspendible)
 847   { }
 848 
 849   void work(uint worker_id) {
 850     SuspendibleThreadSetJoiner sts_join(_suspendible);
 851     G1CollectedHeap::heap()->heap_region_par_iterate_from_worker_offset(&_cl, &_hr_claimer, worker_id);
 852   }
 853 };
 854 
 855 void G1ConcurrentMark::clear_bitmap(WorkerThreads* workers, bool may_yield) {
 856   assert(may_yield || SafepointSynchronize::is_at_safepoint(), "Non-yielding bitmap clear only allowed at safepoint.");
 857 
 858   size_t const num_bytes_to_clear = (G1HeapRegion::GrainBytes * _g1h->num_committed_regions()) / G1CMBitMap::heap_map_factor();
 859   size_t const num_chunks = align_up(num_bytes_to_clear, G1ClearBitMapTask::chunk_size()) / G1ClearBitMapTask::chunk_size();
 860 
 861   uint const num_workers = (uint)MIN2(num_chunks, (size_t)workers->active_workers());
 862 
 863   G1ClearBitMapTask cl(this, num_workers, may_yield);
 864 
 865   log_debug(gc, ergo)("Running %s with %u workers for %zu work units.", cl.name(), num_workers, num_chunks);
 866   workers->run_task(&cl, num_workers);
 867 }
 868 
 869 void G1ConcurrentMark::cleanup_for_next_mark() {
 870   // Make sure that the concurrent mark thread looks to still be in
 871   // the current cycle.
 872   guarantee(is_in_reset_for_next_cycle(), "invariant");
 873 
 874   clear_bitmap(_concurrent_workers, true);
 875 
 876   reset_partial_array_state_manager();
 877 
 878   // Should not have changed state yet (even if a Full GC interrupted us).
 879   guarantee(is_in_reset_for_next_cycle(), "invariant");
 880 }
 881 
 882 void G1ConcurrentMark::reset_partial_array_state_manager() {
 883   for (uint i = 0; i < _max_num_tasks; ++i) {
 884     _tasks[i]->unregister_partial_array_splitter();
 885   }
 886 
 887   partial_array_state_manager()->reset();
 888 
 889   for (uint i = 0; i < _max_num_tasks; ++i) {
 890     _tasks[i]->register_partial_array_splitter();
 891   }
 892 }
 893 
 894 void G1ConcurrentMark::clear_bitmap(WorkerThreads* workers) {
 895   assert_at_safepoint_on_vm_thread();
 896   // To avoid fragmentation the full collection requesting to clear the bitmap
 897   // might use fewer workers than available. To ensure the bitmap is cleared
 898   // as efficiently as possible the number of active workers are temporarily
 899   // increased to include all currently created workers.
 900   WithActiveWorkers update(workers, workers->created_workers());
 901   clear_bitmap(workers, false);
 902 }
 903 
 904 class G1PreConcurrentStartTask : public G1BatchedTask {
 905   // Reset marking state.
 906   class ResetMarkingStateTask;
 907   // For each region note start of marking.
 908   class NoteStartOfMarkTask;
 909 
 910 public:
 911   G1PreConcurrentStartTask(GCCause::Cause cause, G1ConcurrentMark* cm);
 912 };
 913 
 914 class G1PreConcurrentStartTask::ResetMarkingStateTask : public G1AbstractSubTask {
 915   G1ConcurrentMark* _cm;
 916 public:
 917   ResetMarkingStateTask(G1ConcurrentMark* cm) : G1AbstractSubTask(G1GCPhaseTimes::ResetMarkingState), _cm(cm) { }
 918 
 919   double worker_cost() const override { return 1.0; }
 920   void do_work(uint worker_id) override;
 921 };
 922 
 923 class G1PreConcurrentStartTask::NoteStartOfMarkTask : public G1AbstractSubTask {
 924 
 925   class NoteStartOfMarkHRClosure : public G1HeapRegionClosure {
 926     G1ConcurrentMark* _cm;
 927 
 928   public:
 929     NoteStartOfMarkHRClosure() : G1HeapRegionClosure(), _cm(G1CollectedHeap::heap()->concurrent_mark()) { }
 930 
 931     bool do_heap_region(G1HeapRegion* r) override {
 932       _cm->note_start_of_mark_for_region(r);
 933       return false;
 934     }
 935   } _region_cl;
 936 
 937   G1HeapRegionClaimer _claimer;
 938 public:
 939   NoteStartOfMarkTask() : G1AbstractSubTask(G1GCPhaseTimes::NoteStartOfMark), _region_cl(), _claimer(0) { }
 940 
 941   double worker_cost() const override {
 942     // The work done per region is very small, therefore we choose this magic number to cap the number
 943     // of threads used when there are few regions.
 944     const double regions_per_thread = 1000;
 945     return _claimer.n_regions() / regions_per_thread;
 946   }
 947 
 948   void set_max_workers(uint max_workers) override {
 949     _claimer.set_n_workers(max_workers);
 950   }
 951 
 952   void do_work(uint worker_id) override {
 953     G1CollectedHeap::heap()->heap_region_par_iterate_from_worker_offset(&_region_cl, &_claimer, worker_id);
 954   }
 955 };
 956 
 957 void G1PreConcurrentStartTask::ResetMarkingStateTask::do_work(uint worker_id) {
 958   // Reset marking state.
 959   _cm->reset();
 960 }
 961 
 962 G1PreConcurrentStartTask::G1PreConcurrentStartTask(GCCause::Cause cause, G1ConcurrentMark* cm) :
 963   G1BatchedTask("Pre Concurrent Start", G1CollectedHeap::heap()->phase_times()) {
 964   add_serial_task(new ResetMarkingStateTask(cm));
 965   add_parallel_task(new NoteStartOfMarkTask());
 966 };
 967 
 968 void G1ConcurrentMark::pre_concurrent_start(GCCause::Cause cause) {
 969   assert_at_safepoint_on_vm_thread();
 970 
 971   G1CollectedHeap::start_codecache_marking_cycle_if_inactive(true /* concurrent_mark_start */);
 972 
 973   ClassLoaderDataGraph::verify_claimed_marks_cleared(ClassLoaderData::_claim_strong);
 974 
 975   G1PreConcurrentStartTask cl(cause, this);
 976   G1CollectedHeap::heap()->run_batch_task(&cl);
 977 
 978   _gc_tracer_cm->set_gc_cause(cause);
 979 }
 980 
 981 void G1ConcurrentMark::start_full_concurrent_cycle() {
 982   // Start Concurrent Marking weak-reference discovery.
 983   ReferenceProcessor* rp = _g1h->ref_processor_cm();
 984   rp->start_discovery(false /* always_clear */);
 985 
 986   SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
 987   // This is the start of  the marking cycle, we're expected all
 988   // threads to have SATB queues with active set to false.
 989   satb_mq_set.set_active_all_threads(true, /* new active value */
 990                                      false /* expected_active */);
 991 
 992   // update_g1_committed() will be called at the end of an evac pause
 993   // when marking is on. So, it's also called at the end of the
 994   // concurrent start pause to update the heap end, if the heap expands
 995   // during it. No need to call it here.
 996 
 997   // Signal the thread to start work.
 998   cm_thread()->start_full_cycle();
 999 }
1000 
1001 void G1ConcurrentMark::start_undo_concurrent_cycle() {
1002   assert_at_safepoint_on_vm_thread();
1003   // At this time this GC is not a concurrent start gc any more, can only check for young only gc/phase.
1004   assert(_g1h->collector_state()->is_in_young_only_phase(), "must be");
1005 
1006   abort_root_region_scan_at_safepoint();
1007 
1008   // Signal the thread to start work.
1009   cm_thread()->start_undo_cycle();
1010 }
1011 
1012 void G1ConcurrentMark::notify_concurrent_cycle_completed() {
1013   cm_thread()->set_idle();
1014 }
1015 
1016 void G1ConcurrentMark::stop() {
1017   if (is_fully_initialized()) {
1018     cm_thread()->stop();
1019   }
1020 }
1021 
1022 /*
1023  * Notice that in the next two methods, we actually leave the STS
1024  * during the barrier sync and join it immediately afterwards. If we
1025  * do not do this, the following deadlock can occur: one thread could
1026  * be in the barrier sync code, waiting for the other thread to also
1027  * sync up, whereas another one could be trying to yield, while also
1028  * waiting for the other threads to sync up too.
1029  *
1030  * Note, however, that this code is also used during remark and in
1031  * this case we should not attempt to leave / enter the STS, otherwise
1032  * we'll either hit an assert (debug / fastdebug) or deadlock
1033  * (product). So we should only leave / enter the STS if we are
1034  * operating concurrently.
1035  *
1036  * Because the thread that does the sync barrier has left the STS, it
1037  * is possible to be suspended for a Full GC or an evacuation pause
1038  * could occur. This is actually safe, since the entering the sync
1039  * barrier is one of the last things do_marking_step() does, and it
1040  * doesn't manipulate any data structures afterwards.
1041  */
1042 
1043 void G1ConcurrentMark::enter_first_sync_barrier(uint worker_id) {
1044   bool barrier_aborted;
1045   {
1046     SuspendibleThreadSetLeaver sts_leave(concurrent());
1047     barrier_aborted = !_first_overflow_barrier_sync.enter();
1048   }
1049 
1050   // at this point everyone should have synced up and not be doing any
1051   // more work
1052 
1053   if (barrier_aborted) {
1054     // If the barrier aborted we ignore the overflow condition and
1055     // just abort the whole marking phase as quickly as possible.
1056     return;
1057   }
1058 }
1059 
1060 void G1ConcurrentMark::enter_second_sync_barrier(uint worker_id) {
1061   SuspendibleThreadSetLeaver sts_leave(concurrent());
1062   _second_overflow_barrier_sync.enter();
1063 
1064   // at this point everything should be re-initialized and ready to go
1065 }
1066 
1067 class G1CMConcurrentMarkingTask : public WorkerTask {
1068   G1ConcurrentMark*     _cm;
1069 
1070 public:
1071   void work(uint worker_id) {
1072     ResourceMark rm;
1073 
1074     SuspendibleThreadSetJoiner sts_join;
1075 
1076     assert(worker_id < _cm->active_tasks(), "invariant");
1077 
1078     G1CMTask* task = _cm->task(worker_id);
1079     task->record_start_time();
1080     if (!_cm->has_aborted()) {
1081       do {
1082         task->do_marking_step(G1ConcMarkStepDurationMillis,
1083                               true  /* do_termination */,
1084                               false /* is_serial*/);
1085 
1086         _cm->do_yield_check();
1087       } while (!_cm->has_aborted() && task->has_aborted());
1088     }
1089     task->record_end_time();
1090     guarantee(!task->has_aborted() || _cm->has_aborted(), "invariant");
1091   }
1092 
1093   G1CMConcurrentMarkingTask(G1ConcurrentMark* cm) :
1094       WorkerTask("Concurrent Mark"), _cm(cm) { }
1095 
1096   ~G1CMConcurrentMarkingTask() { }
1097 };
1098 
1099 uint G1ConcurrentMark::calc_active_marking_workers() {
1100   uint result = 0;
1101   if (!UseDynamicNumberOfGCThreads || !FLAG_IS_DEFAULT(ConcGCThreads)) {
1102     result = _max_concurrent_workers;
1103   } else {
1104     result =
1105       WorkerPolicy::calc_default_active_workers(_max_concurrent_workers,
1106                                                 1, /* Minimum workers */
1107                                                 _num_concurrent_workers,
1108                                                 Threads::number_of_non_daemon_threads());
1109     // Don't scale the result down by scale_concurrent_workers() because
1110     // that scaling has already gone into "_max_concurrent_workers".
1111   }
1112   assert(result > 0 && result <= _max_concurrent_workers,
1113          "Calculated number of marking workers must be larger than zero and at most the maximum %u, but is %u",
1114          _max_concurrent_workers, result);
1115   return result;
1116 }
1117 
1118 bool G1ConcurrentMark::has_root_region_scan_aborted() const {
1119   return _root_region_scan_aborted.load_relaxed();
1120 }
1121 
1122 #ifndef PRODUCT
1123 void G1ConcurrentMark::assert_root_region_scan_completed_or_aborted() {
1124   assert(root_regions()->work_completed() || has_root_region_scan_aborted(), "must be");
1125 }
1126 #endif
1127 
1128 void G1ConcurrentMark::scan_root_region(const MemRegion* region, uint worker_id) {
1129 #ifdef ASSERT
1130   HeapWord* last = region->last();
1131   G1HeapRegion* hr = _g1h->heap_region_containing(last);
1132   assert(hr->is_old() || top_at_mark_start(hr) == hr->bottom(),
1133          "Root regions must be old or survivor/eden but region %u is %s", hr->hrm_index(), hr->get_type_str());
1134   assert(top_at_mark_start(hr) == region->start(),
1135          "MemRegion start should be equal to TAMS");
1136 #endif
1137 
1138   G1RootRegionScanClosure cl(_g1h, this, worker_id);
1139 
1140   const uintx interval = PrefetchScanIntervalInBytes;
1141   HeapWord* curr = region->start();
1142   const HeapWord* end = region->end();
1143   while (curr < end) {
1144     Prefetch::read(curr, interval);
1145     oop obj = cast_to_oop(curr);
1146     size_t size = obj->oop_iterate_size(&cl);
1147     assert(size == obj->size(), "sanity");
1148     curr += size;
1149   }
1150 }
1151 
1152 class G1CMRootRegionScanTask : public WorkerTask {
1153   G1ConcurrentMark* _cm;
1154   bool _should_yield;
1155 
1156 public:
1157   G1CMRootRegionScanTask(G1ConcurrentMark* cm, bool should_yield) :
1158     WorkerTask("G1 Root Region Scan"), _cm(cm), _should_yield(should_yield) { }
1159 
1160   void work(uint worker_id) {
1161     SuspendibleThreadSetJoiner sts_join(_should_yield);
1162 
1163     while (true) {
1164       if (_cm->has_root_region_scan_aborted()) {
1165         return;
1166       }
1167       G1CMRootMemRegions* root_regions = _cm->root_regions();
1168       const MemRegion* region = root_regions->claim_next();
1169       if (region == nullptr) {
1170         return;
1171       }
1172       _cm->scan_root_region(region, worker_id);
1173       if (_should_yield) {
1174         SuspendibleThreadSet::yield();
1175         // If we yielded, a GC may have processed all root regions,
1176         // so this loop will naturally exit on the next claim_next() call.
1177         // Same if a Full GC signalled abort of the concurrent mark.
1178       }
1179     }
1180   }
1181 };
1182 
1183 bool G1ConcurrentMark::scan_root_regions(WorkerThreads* workers, bool concurrent) {
1184   // We first check whether there is any work to do as we might have already aborted
1185   // the concurrent cycle, or ran into a GC that did the actual work when we reach here.
1186   // We want to avoid spinning up the worker threads if that happened.
1187   // (Note that due to races reading the abort-flag, we might spin up the threads anyway).
1188   //
1189   // Abort happens if a Full GC occurs right after starting the concurrent cycle or
1190   // a young gc doing the work.
1191   //
1192   // Concurrent gc threads enter an STS when starting the task, so they stop, then
1193   // continue after that safepoint.
1194   //
1195   // Must not use G1CMRootMemRegions::work_completed() here because we need to get a
1196   // consistent view of the value containing the number of remaining regions across the
1197   // usages below. The safepoint/gc may already be running and modifying it
1198   // while this code is still executing.
1199   uint num_remaining = root_regions()->num_remaining_regions();
1200   bool do_scan = num_remaining > 0 && !has_root_region_scan_aborted();
1201   if (do_scan) {
1202     // Assign one worker to each root-region but subject to the max constraint.
1203     // The constraint is also important to avoid accesses beyond the allocated per-worker
1204     // marking helper data structures. We might get passed different WorkerThreads with
1205     // different number of threads (potential worker ids) than helper data structures when
1206     // completing this work during GC.
1207     const uint num_workers = MIN2(num_remaining,
1208                                   _max_concurrent_workers);
1209 
1210     G1CMRootRegionScanTask task(this, concurrent);
1211     log_debug(gc, ergo)("Running %s using %u workers for %u work units.",
1212                         task.name(), num_workers, num_remaining);
1213     workers->run_task(&task, num_workers);
1214   }
1215 
1216   // At the end of this method, we can re-read num_remaining() in the assert: either
1217   // we got non-zero above and we processed all root regions (and it must be zero
1218   // after the worker task synchronization) or it had already been zero. We also
1219   // can't have started another concurrent cycle that could have set it to something else
1220   // while still in the concurrent cycle (if called concurrently).
1221   assert_root_region_scan_completed_or_aborted();
1222 
1223   return do_scan;
1224 }
1225 
1226 void G1ConcurrentMark::scan_root_regions_concurrently() {
1227   assert(Thread::current() == cm_thread(), "must be on Concurrent Mark Thread");
1228   scan_root_regions(_concurrent_workers, true /* concurrent */);
1229 }
1230 
1231 bool G1ConcurrentMark::complete_root_regions_scan_in_safepoint() {
1232   assert_at_safepoint_on_vm_thread();
1233   return scan_root_regions(_g1h->workers(), false /* concurrent */);
1234 }
1235 
1236 void G1ConcurrentMark::add_root_region(G1HeapRegion* r) {
1237   root_regions()->add(top_at_mark_start(r), r->top());
1238 }
1239 
1240 void G1ConcurrentMark::add_root_region_set_bottom(G1HeapRegion* r) {
1241   set_top_at_mark_start_to_bottom(r);
1242   root_regions()->add(r->bottom(), r->top());
1243 }
1244 
1245 bool G1ConcurrentMark::is_root_region(G1HeapRegion* r) {
1246   return root_regions()->contains(MemRegion(top_at_mark_start(r), r->top()));
1247 }
1248 
1249 void G1ConcurrentMark::abort_root_region_scan() {
1250   assert_not_at_safepoint();
1251 
1252   _root_region_scan_aborted.store_relaxed(true);
1253 }
1254 
1255 void G1ConcurrentMark::abort_root_region_scan_at_safepoint() {
1256   assert_at_safepoint_on_vm_thread();
1257 
1258   _root_region_scan_aborted.store_relaxed(true);
1259 }
1260 
1261 void G1ConcurrentMark::concurrent_cycle_start() {
1262   _gc_timer_cm->register_gc_start();
1263 
1264   _gc_tracer_cm->report_gc_start(GCCause::_no_gc /* first parameter is not used */, _gc_timer_cm->gc_start());
1265 
1266   _g1h->trace_heap_before_gc(_gc_tracer_cm);
1267 }
1268 
1269 uint G1ConcurrentMark::completed_mark_cycles() const {
1270   return _completed_mark_cycles.load_relaxed();
1271 }
1272 
1273 void G1ConcurrentMark::concurrent_cycle_end(bool mark_cycle_completed) {
1274   _g1h->trace_heap_after_gc(_gc_tracer_cm);
1275 
1276   if (mark_cycle_completed) {
1277     _completed_mark_cycles.add_then_fetch(1u, memory_order_relaxed);
1278   }
1279 
1280   if (has_aborted()) {
1281     log_info(gc, marking)("Concurrent Mark Abort");
1282     _gc_tracer_cm->report_concurrent_mode_failure();
1283   }
1284 
1285   _gc_timer_cm->register_gc_end();
1286 
1287   _gc_tracer_cm->report_gc_end(_gc_timer_cm->gc_end(), _gc_timer_cm->time_partitions());
1288 }
1289 
1290 void G1ConcurrentMark::mark_from_roots() {
1291   _restart_for_overflow.store_relaxed(false);
1292 
1293   uint active_workers = calc_active_marking_workers();
1294 
1295   // Setting active workers is not guaranteed since fewer
1296   // worker threads may currently exist and more may not be
1297   // available.
1298   active_workers = _concurrent_workers->set_active_workers(active_workers);
1299   log_info(gc, task)("Concurrent Mark Using %u of %u Workers", active_workers, _concurrent_workers->max_workers());
1300 
1301   _num_concurrent_workers = active_workers;
1302 
1303   // Parallel task terminator is set in "set_concurrency_and_phase()"
1304   set_concurrency_and_phase(active_workers, true /* concurrent */);
1305 
1306   G1CMConcurrentMarkingTask marking_task(this);
1307   _concurrent_workers->run_task(&marking_task);
1308   print_stats();
1309 }
1310 
1311 const char* G1ConcurrentMark::verify_location_string(VerifyLocation location) {
1312   static const char* location_strings[] = { "Remark Before",
1313                                             "Remark After",
1314                                             "Remark Overflow",
1315                                             "Cleanup Before",
1316                                             "Cleanup After" };
1317   return location_strings[static_cast<std::underlying_type_t<VerifyLocation>>(location)];
1318 }
1319 
1320 void G1ConcurrentMark::verify_during_pause(G1HeapVerifier::G1VerifyType type,
1321                                            VerifyLocation location) {
1322   G1HeapVerifier* verifier = _g1h->verifier();
1323 
1324   verifier->verify_region_sets_optional();
1325 
1326   const char* caller = verify_location_string(location);
1327 
1328   if (VerifyDuringGC && G1HeapVerifier::should_verify(type)) {
1329     GCTraceTime(Debug, gc, phases) debug(caller, _gc_timer_cm);
1330 
1331     size_t const BufLen = 512;
1332     char buffer[BufLen];
1333 
1334     jio_snprintf(buffer, BufLen, "During GC (%s)", caller);
1335     verifier->verify(VerifyOption::G1UseConcMarking, buffer);
1336 
1337     // Only check bitmap in Remark, and not at After-Verification because the regions
1338     // already have their TAMS'es reset.
1339     if (location != VerifyLocation::RemarkAfter) {
1340       verifier->verify_bitmap_clear(true /* above_tams_only */);
1341     }
1342   }
1343 }
1344 
1345 class G1ObjectCountIsAliveClosure: public BoolObjectClosure {
1346   G1CollectedHeap* _g1h;
1347 public:
1348   G1ObjectCountIsAliveClosure(G1CollectedHeap* g1h) : _g1h(g1h) {}
1349 
1350   bool do_object_b(oop obj) {
1351     return !_g1h->is_obj_dead(obj);
1352   }
1353 };
1354 
1355 void G1ConcurrentMark::remark() {
1356   assert_at_safepoint_on_vm_thread();
1357 
1358   // If a full collection has happened, we should not continue. However we might
1359   // have ended up here as the Remark VM operation has been scheduled already.
1360   if (has_aborted()) {
1361     return;
1362   }
1363 
1364   G1Policy* policy = _g1h->policy();
1365   policy->record_pause_start_time();
1366 
1367   double start = os::elapsedTime();
1368 
1369   verify_during_pause(G1HeapVerifier::G1VerifyRemark, VerifyLocation::RemarkBefore);
1370 
1371   {
1372     GCTraceTime(Debug, gc, phases) debug("Finalize Marking", _gc_timer_cm);
1373     finalize_marking();
1374   }
1375 
1376   double mark_work_end = os::elapsedTime();
1377 
1378   bool const mark_finished = !has_overflown();
1379   if (mark_finished) {
1380     weak_refs_work();
1381 
1382     // Unload Klasses, String, Code Cache, etc.
1383     if (ClassUnloadingWithConcurrentMark) {
1384       G1CMIsAliveClosure is_alive(this);
1385       _g1h->unload_classes_and_code("Class Unloading", &is_alive, _gc_timer_cm);
1386     }
1387 
1388     SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
1389     // We're done with marking.
1390     // This is the end of the marking cycle, we're expected all
1391     // threads to have SATB queues with active set to true.
1392     satb_mq_set.set_active_all_threads(false, /* new active value */
1393                                        true /* expected_active */);
1394 
1395     {
1396       GCTraceTime(Debug, gc, phases) debug("Flush Task Caches", _gc_timer_cm);
1397       flush_all_task_caches();
1398     }
1399 
1400     // All marking completed. Check bitmap now as we will start to reset TAMSes
1401     // in parallel below so that we can not do this in the After-Remark verification.
1402     _g1h->verifier()->verify_bitmap_clear(true /* above_tams_only */);
1403 
1404     {
1405       GCTraceTime(Debug, gc, phases) debug("Select For Rebuild and Reclaim Empty Regions", _gc_timer_cm);
1406 
1407       G1UpdateRegionLivenessAndSelectForRebuildTask cl(_g1h, this, _g1h->workers()->active_workers());
1408       uint const num_workers = MIN2(G1UpdateRegionLivenessAndSelectForRebuildTask::desired_num_workers(_g1h->num_committed_regions()),
1409                                     _g1h->workers()->active_workers());
1410       log_debug(gc,ergo)("Running %s using %u workers for %u regions in heap", cl.name(), num_workers, _g1h->num_committed_regions());
1411       _g1h->workers()->run_task(&cl, num_workers);
1412 
1413       log_debug(gc, remset, tracking)("Remembered Set Tracking update regions total %u, selected %u",
1414                                       _g1h->num_committed_regions(), cl.total_selected_for_rebuild());
1415 
1416       _needs_remembered_set_rebuild = (cl.total_selected_for_rebuild() > 0);
1417 
1418       if (_needs_remembered_set_rebuild) {
1419         GrowableArrayCHeap<G1HeapRegion*, mtGC>* selected = cl.sort_and_prune_old_selected();
1420         _g1h->policy()->candidates()->set_candidates_from_marking(selected);
1421       }
1422     }
1423 
1424     if (log_is_enabled(Trace, gc, liveness)) {
1425       G1PrintRegionLivenessInfoClosure cl("Post-Marking");
1426       _g1h->heap_region_iterate(&cl);
1427     }
1428 
1429     // Potentially, some empty-regions have been reclaimed; make this a
1430     // "collection" so that pending allocation can retry before attempting a
1431     // GC pause.
1432     _g1h->increment_total_collections();
1433 
1434     // For Remark Pauses that may have been triggered by PeriodicGCs, we maintain
1435     // resizing based on MinHeapFreeRatio or MaxHeapFreeRatio. If a PeriodicGC is
1436     // triggered, it likely means there are very few regular GCs, making resizing
1437     // based on gc heuristics less effective.
1438     if (_g1h->last_gc_was_periodic()) {
1439       _g1h->resize_heap_after_full_collection(0 /* allocation_word_size */);
1440     }
1441 
1442     compute_new_sizes();
1443 
1444     verify_during_pause(G1HeapVerifier::G1VerifyRemark, VerifyLocation::RemarkAfter);
1445 
1446     assert(!restart_for_overflow(), "sanity");
1447     // Completely reset the marking state (except bitmaps) since marking completed.
1448     reset_at_marking_complete();
1449 
1450     G1CollectedHeap::finish_codecache_marking_cycle();
1451 
1452     {
1453       GCTraceTime(Debug, gc, phases) debug("Report Object Count", _gc_timer_cm);
1454       G1ObjectCountIsAliveClosure is_alive(_g1h);
1455       _gc_tracer_cm->report_object_count_after_gc(&is_alive, _g1h->workers());
1456     }
1457 
1458     // Successfully completed marking, advance state.
1459     cm_thread()->set_full_cycle_rebuild_and_scrub();
1460   } else {
1461     // We overflowed.  Restart concurrent marking.
1462     _restart_for_overflow.store_relaxed(true);
1463 
1464     verify_during_pause(G1HeapVerifier::G1VerifyRemark, VerifyLocation::RemarkOverflow);
1465 
1466     // Clear the marking state because we will be restarting
1467     // marking due to overflowing the global mark stack.
1468     reset_marking_for_restart();
1469   }
1470 
1471   // Statistics
1472   double now = os::elapsedTime();
1473   _remark_mark_times.add((mark_work_end - start) * 1000.0);
1474   _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
1475   _remark_times.add((now - start) * 1000.0);
1476 
1477   _g1h->update_perf_counter_cpu_time();
1478 
1479   policy->record_concurrent_mark_remark_end();
1480 
1481   return;
1482 }
1483 
1484 void G1ConcurrentMark::compute_new_sizes() {
1485   MetaspaceGC::compute_new_size();
1486 
1487   // Cleanup will have freed any regions completely full of garbage.
1488   // Update the soft reference policy with the new heap occupancy.
1489   Universe::heap()->update_capacity_and_used_at_gc();
1490 
1491   // We reclaimed old regions so we should calculate the sizes to make
1492   // sure we update the old gen/space data.
1493   _g1h->monitoring_support()->update_sizes();
1494 }
1495 
1496 class G1UpdateRegionsAfterRebuild : public G1HeapRegionClosure {
1497   G1CollectedHeap* _g1h;
1498 
1499 public:
1500   G1UpdateRegionsAfterRebuild(G1CollectedHeap* g1h) : _g1h(g1h) { }
1501 
1502   bool do_heap_region(G1HeapRegion* r) override {
1503     // Update the remset tracking state from updating to complete
1504     // if remembered sets have been rebuilt.
1505     _g1h->policy()->remset_tracker()->update_after_rebuild(r);
1506     return false;
1507   }
1508 };
1509 
1510 void G1ConcurrentMark::cleanup() {
1511   assert_at_safepoint_on_vm_thread();
1512 
1513   // If a full collection has happened, we shouldn't do this.
1514   if (has_aborted()) {
1515     return;
1516   }
1517 
1518   G1Policy* policy = _g1h->policy();
1519   policy->record_pause_start_time();
1520 
1521   double start = os::elapsedTime();
1522 
1523   verify_during_pause(G1HeapVerifier::G1VerifyCleanup, VerifyLocation::CleanupBefore);
1524 
1525   if (needs_remembered_set_rebuild()) {
1526     // Update the remset tracking information as well as marking all regions
1527     // as fully parsable.
1528     GCTraceTime(Debug, gc, phases) debug("Update Remembered Set Tracking After Rebuild", _gc_timer_cm);
1529     G1UpdateRegionsAfterRebuild cl(_g1h);
1530     _g1h->heap_region_iterate(&cl);
1531   } else {
1532     log_debug(gc, phases)("No Remembered Sets to update after rebuild");
1533   }
1534 
1535   verify_during_pause(G1HeapVerifier::G1VerifyCleanup, VerifyLocation::CleanupAfter);
1536 
1537   // Local statistics
1538   _cleanup_times.add((os::elapsedTime() - start) * 1000.0);
1539 
1540   {
1541     GCTraceTime(Debug, gc, phases) debug("Finalize Concurrent Mark Cleanup", _gc_timer_cm);
1542     policy->record_concurrent_mark_cleanup_end(needs_remembered_set_rebuild());
1543   }
1544 
1545   // Advance state.
1546   cm_thread()->set_full_cycle_reset_for_next_cycle();
1547   return;
1548 }
1549 
1550 // 'Keep Alive' oop closure used by both serial parallel reference processing.
1551 // Uses the G1CMTask associated with a worker thread (for serial reference
1552 // processing the G1CMTask for worker 0 is used) to preserve (mark) and
1553 // trace referent objects.
1554 //
1555 // Using the G1CMTask and embedded local queues avoids having the worker
1556 // threads operating on the global mark stack. This reduces the risk
1557 // of overflowing the stack - which we would rather avoid at this late
1558 // state. Also using the tasks' local queues removes the potential
1559 // of the workers interfering with each other that could occur if
1560 // operating on the global stack.
1561 
1562 class G1CMKeepAliveAndDrainClosure : public OopClosure {
1563   G1ConcurrentMark* _cm;
1564   G1CMTask*         _task;
1565   uint              _ref_counter_limit;
1566   uint              _ref_counter;
1567   bool              _is_serial;
1568 public:
1569   G1CMKeepAliveAndDrainClosure(G1ConcurrentMark* cm, G1CMTask* task, bool is_serial) :
1570     _cm(cm), _task(task), _ref_counter_limit(G1RefProcDrainInterval),
1571     _ref_counter(_ref_counter_limit), _is_serial(is_serial) {
1572     assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
1573   }
1574 
1575   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
1576   virtual void do_oop(      oop* p) { do_oop_work(p); }
1577 
1578   template <class T> void do_oop_work(T* p) {
1579     if (_cm->has_overflown()) {
1580       return;
1581     }
1582     if (!_task->deal_with_reference(p)) {
1583       // We did not add anything to the mark bitmap (or mark stack), so there is
1584       // no point trying to drain it.
1585       return;
1586     }
1587     _ref_counter--;
1588 
1589     if (_ref_counter == 0) {
1590       // We have dealt with _ref_counter_limit references, pushing them
1591       // and objects reachable from them on to the local stack (and
1592       // possibly the global stack). Call G1CMTask::do_marking_step() to
1593       // process these entries.
1594       //
1595       // We call G1CMTask::do_marking_step() in a loop, which we'll exit if
1596       // there's nothing more to do (i.e. we're done with the entries that
1597       // were pushed as a result of the G1CMTask::deal_with_reference() calls
1598       // above) or we overflow.
1599       //
1600       // Note: G1CMTask::do_marking_step() can set the G1CMTask::has_aborted()
1601       // flag while there may still be some work to do. (See the comment at
1602       // the beginning of G1CMTask::do_marking_step() for those conditions -
1603       // one of which is reaching the specified time target.) It is only
1604       // when G1CMTask::do_marking_step() returns without setting the
1605       // has_aborted() flag that the marking step has completed.
1606       do {
1607         double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
1608         _task->do_marking_step(mark_step_duration_ms,
1609                                false      /* do_termination */,
1610                                _is_serial);
1611       } while (_task->has_aborted() && !_cm->has_overflown());
1612       _ref_counter = _ref_counter_limit;
1613     }
1614   }
1615 };
1616 
1617 // 'Drain' oop closure used by both serial and parallel reference processing.
1618 // Uses the G1CMTask associated with a given worker thread (for serial
1619 // reference processing the G1CMtask for worker 0 is used). Calls the
1620 // do_marking_step routine, with an unbelievably large timeout value,
1621 // to drain the marking data structures of the remaining entries
1622 // added by the 'keep alive' oop closure above.
1623 
1624 class G1CMDrainMarkingStackClosure : public VoidClosure {
1625   G1ConcurrentMark* _cm;
1626   G1CMTask*         _task;
1627   bool              _is_serial;
1628  public:
1629   G1CMDrainMarkingStackClosure(G1ConcurrentMark* cm, G1CMTask* task, bool is_serial) :
1630     _cm(cm), _task(task), _is_serial(is_serial) {
1631     assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
1632   }
1633 
1634   void do_void() {
1635     do {
1636       // We call G1CMTask::do_marking_step() to completely drain the local
1637       // and global marking stacks of entries pushed by the 'keep alive'
1638       // oop closure (an instance of G1CMKeepAliveAndDrainClosure above).
1639       //
1640       // G1CMTask::do_marking_step() is called in a loop, which we'll exit
1641       // if there's nothing more to do (i.e. we've completely drained the
1642       // entries that were pushed as a result of applying the 'keep alive'
1643       // closure to the entries on the discovered ref lists) or we overflow
1644       // the global marking stack.
1645       //
1646       // Note: G1CMTask::do_marking_step() can set the G1CMTask::has_aborted()
1647       // flag while there may still be some work to do. (See the comment at
1648       // the beginning of G1CMTask::do_marking_step() for those conditions -
1649       // one of which is reaching the specified time target.) It is only
1650       // when G1CMTask::do_marking_step() returns without setting the
1651       // has_aborted() flag that the marking step has completed.
1652 
1653       _task->do_marking_step(1000000000.0 /* something very large */,
1654                              true         /* do_termination */,
1655                              _is_serial);
1656     } while (_task->has_aborted() && !_cm->has_overflown());
1657   }
1658 };
1659 
1660 class G1CMRefProcProxyTask : public RefProcProxyTask {
1661   G1CollectedHeap& _g1h;
1662   G1ConcurrentMark& _cm;
1663 
1664 public:
1665   G1CMRefProcProxyTask(uint max_workers, G1CollectedHeap& g1h, G1ConcurrentMark &cm)
1666     : RefProcProxyTask("G1CMRefProcProxyTask", max_workers),
1667       _g1h(g1h),
1668       _cm(cm) {}
1669 
1670   void work(uint worker_id) override {
1671     assert(worker_id < _max_workers, "sanity");
1672     G1CMIsAliveClosure is_alive(&_cm);
1673     uint index = (_tm == RefProcThreadModel::Single) ? 0 : worker_id;
1674     G1CMKeepAliveAndDrainClosure keep_alive(&_cm, _cm.task(index), _tm == RefProcThreadModel::Single);
1675     BarrierEnqueueDiscoveredFieldClosure enqueue;
1676     G1CMDrainMarkingStackClosure complete_gc(&_cm, _cm.task(index), _tm == RefProcThreadModel::Single);
1677     _rp_task->rp_work(worker_id, &is_alive, &keep_alive, &enqueue, &complete_gc);
1678   }
1679 
1680   void prepare_run_task_hook() override {
1681     // We need to reset the concurrency level before each
1682     // proxy task execution, so that the termination protocol
1683     // and overflow handling in G1CMTask::do_marking_step() knows
1684     // how many workers to wait for.
1685     _cm.set_concurrency(_queue_count);
1686   }
1687 };
1688 
1689 void G1ConcurrentMark::weak_refs_work() {
1690   ResourceMark rm;
1691 
1692   {
1693     GCTraceTime(Debug, gc, phases) debug("Reference Processing", _gc_timer_cm);
1694 
1695     ReferenceProcessor* rp = _g1h->ref_processor_cm();
1696 
1697     // See the comment in G1CollectedHeap::ref_processing_init()
1698     // about how reference processing currently works in G1.
1699 
1700     assert(_global_mark_stack.is_empty(), "mark stack should be empty");
1701 
1702     // Prefer to grow the stack until the max capacity.
1703     _global_mark_stack.set_should_grow();
1704 
1705     // Parallel processing task executor.
1706     G1CMRefProcProxyTask task(rp->max_num_queues(), *_g1h, *this);
1707     ReferenceProcessorPhaseTimes pt(_gc_timer_cm, rp->max_num_queues());
1708 
1709     // Process the weak references.
1710     const ReferenceProcessorStats& stats = rp->process_discovered_references(task, _g1h->workers(), pt);
1711     _gc_tracer_cm->report_gc_reference_stats(stats);
1712     pt.print_all_references();
1713 
1714     // The do_oop work routines of the keep_alive and drain_marking_stack
1715     // oop closures will set the has_overflown flag if we overflow the
1716     // global marking stack.
1717 
1718     assert(has_overflown() || _global_mark_stack.is_empty(),
1719            "Mark stack should be empty (unless it has overflown)");
1720   }
1721 
1722   if (has_overflown()) {
1723     // We can not trust g1_is_alive and the contents of the heap if the marking stack
1724     // overflowed while processing references. Exit the VM.
1725     fatal("Overflow during reference processing, can not continue. Current mark stack depth: "
1726           "%zu, MarkStackSize: %zu, MarkStackSizeMax: %zu. "
1727           "Please increase MarkStackSize and/or MarkStackSizeMax and restart.",
1728           _global_mark_stack.size(), MarkStackSize, MarkStackSizeMax);
1729     return;
1730   }
1731 
1732   assert(_global_mark_stack.is_empty(), "Marking should have completed");
1733 
1734   {
1735     GCTraceTime(Debug, gc, phases) debug("Weak Processing", _gc_timer_cm);
1736     G1CMIsAliveClosure is_alive(this);
1737     WeakProcessor::weak_oops_do(_g1h->workers(), &is_alive, &do_nothing_cl, 1);
1738   }
1739 }
1740 
1741 class G1PrecleanYieldClosure : public YieldClosure {
1742   G1ConcurrentMark* _cm;
1743 
1744 public:
1745   G1PrecleanYieldClosure(G1ConcurrentMark* cm) : _cm(cm) { }
1746 
1747   virtual bool should_return() {
1748     return _cm->has_aborted();
1749   }
1750 
1751   virtual bool should_return_fine_grain() {
1752     _cm->do_yield_check();
1753     return _cm->has_aborted();
1754   }
1755 };
1756 
1757 void G1ConcurrentMark::preclean() {
1758   assert(G1UseReferencePrecleaning, "Precleaning must be enabled.");
1759 
1760   SuspendibleThreadSetJoiner joiner;
1761 
1762   BarrierEnqueueDiscoveredFieldClosure enqueue;
1763 
1764   set_concurrency_and_phase(1, true);
1765 
1766   G1PrecleanYieldClosure yield_cl(this);
1767 
1768   ReferenceProcessor* rp = _g1h->ref_processor_cm();
1769   // Precleaning is single threaded. Temporarily disable MT discovery.
1770   ReferenceProcessorMTDiscoveryMutator rp_mut_discovery(rp, false);
1771   rp->preclean_discovered_references(rp->is_alive_non_header(),
1772                                      &enqueue,
1773                                      &yield_cl,
1774                                      _gc_timer_cm);
1775 }
1776 
1777 // Closure for marking entries in SATB buffers.
1778 class G1CMSATBBufferClosure : public SATBBufferClosure {
1779 private:
1780   G1CMTask* _task;
1781   G1CollectedHeap* _g1h;
1782 
1783   // This is very similar to G1CMTask::deal_with_reference, but with
1784   // more relaxed requirements for the argument, so this must be more
1785   // circumspect about treating the argument as an object.
1786   void do_entry(void* entry) const {
1787     _task->increment_refs_reached();
1788     oop const obj = cast_to_oop(entry);
1789     _task->make_reference_grey(obj);
1790   }
1791 
1792 public:
1793   G1CMSATBBufferClosure(G1CMTask* task, G1CollectedHeap* g1h)
1794     : _task(task), _g1h(g1h) { }
1795 
1796   virtual void do_buffer(void** buffer, size_t size) {
1797     for (size_t i = 0; i < size; ++i) {
1798       do_entry(buffer[i]);
1799     }
1800   }
1801 };
1802 
1803 class G1RemarkThreadsClosure : public ThreadClosure {
1804   G1SATBMarkQueueSet& _qset;
1805 
1806  public:
1807   G1RemarkThreadsClosure(G1CollectedHeap* g1h, G1CMTask* task) :
1808     _qset(G1BarrierSet::satb_mark_queue_set()) {}
1809 
1810   void do_thread(Thread* thread) {
1811     // Transfer any partial buffer to the qset for completed buffer processing.
1812     _qset.flush_queue(G1ThreadLocalData::satb_mark_queue(thread));
1813   }
1814 };
1815 
1816 class G1CMRemarkTask : public WorkerTask {
1817   // For Threads::possibly_parallel_threads_do
1818   ThreadsClaimTokenScope _threads_claim_token_scope;
1819   G1ConcurrentMark* _cm;
1820 public:
1821   void work(uint worker_id) {
1822     G1CMTask* task = _cm->task(worker_id);
1823     task->record_start_time();
1824     {
1825       ResourceMark rm;
1826 
1827       G1RemarkThreadsClosure threads_f(G1CollectedHeap::heap(), task);
1828       Threads::possibly_parallel_threads_do(true /* is_par */, &threads_f);
1829     }
1830 
1831     do {
1832       task->do_marking_step(1000000000.0 /* something very large */,
1833                             true         /* do_termination       */,
1834                             false        /* is_serial            */);
1835     } while (task->has_aborted() && !_cm->has_overflown());
1836     // If we overflow, then we do not want to restart. We instead
1837     // want to abort remark and do concurrent marking again.
1838     task->record_end_time();
1839   }
1840 
1841   G1CMRemarkTask(G1ConcurrentMark* cm, uint active_workers) :
1842     WorkerTask("Par Remark"), _threads_claim_token_scope(), _cm(cm) {
1843     _cm->terminator()->reset_for_reuse(active_workers);
1844   }
1845 };
1846 
1847 void G1ConcurrentMark::finalize_marking() {
1848   ResourceMark rm;
1849 
1850   _g1h->ensure_parsability(false);
1851 
1852   // this is remark, so we'll use up all active threads
1853   uint active_workers = _g1h->workers()->active_workers();
1854   set_concurrency_and_phase(active_workers, false /* concurrent */);
1855   // Leave _parallel_marking_threads at it's
1856   // value originally calculated in the G1ConcurrentMark
1857   // constructor and pass values of the active workers
1858   // through the task.
1859 
1860   {
1861     G1CMRemarkTask remarkTask(this, active_workers);
1862     // We will start all available threads, even if we decide that the
1863     // active_workers will be fewer. The extra ones will just bail out
1864     // immediately.
1865     _g1h->workers()->run_task(&remarkTask);
1866   }
1867 
1868   SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
1869   guarantee(has_overflown() ||
1870             satb_mq_set.completed_buffers_num() == 0,
1871             "Invariant: has_overflown = %s, num buffers = %zu",
1872             BOOL_TO_STR(has_overflown()),
1873             satb_mq_set.completed_buffers_num());
1874 
1875   print_stats();
1876 }
1877 
1878 void G1ConcurrentMark::flush_all_task_caches(bool ends_use_of_mark_cache) {
1879   size_t hits = 0;
1880   size_t misses = 0;
1881   for (uint i = 0; i < _max_num_tasks; i++) {
1882     Pair<size_t, size_t> stats = _tasks[i]->flush_mark_stats_cache();
1883     hits += stats.first;
1884     misses += stats.second;
1885   }
1886   size_t sum = hits + misses;
1887   log_debug(gc, stats)("Mark stats cache hits %zu misses %zu ratio %1.3lf",
1888                        hits, misses, percent_of(hits, sum));
1889   if (ends_use_of_mark_cache) {
1890     _is_region_mark_stats_cache_in_use = false;
1891   }
1892 }
1893 
1894 void G1ConcurrentMark::clear_bitmap_for_region(G1HeapRegion* hr) {
1895   assert_at_safepoint();
1896   _mark_bitmap.clear_range(MemRegion(hr->bottom(), hr->end()));
1897 }
1898 
1899 G1HeapRegion* G1ConcurrentMark::claim_region(uint worker_id) {
1900   // "Checkpoint" the finger.
1901   HeapWord* local_finger = finger();
1902 
1903   while (local_finger < _heap.end()) {
1904     assert(_g1h->is_in_reserved(local_finger), "invariant");
1905 
1906     G1HeapRegion* curr_region = _g1h->heap_region_containing_or_null(local_finger);
1907     // Make sure that the reads below do not float before loading curr_region.
1908     OrderAccess::loadload();
1909     // Above heap_region_containing may return null as we always scan claim
1910     // until the end of the heap. In this case, just jump to the next region.
1911     HeapWord* end = curr_region != nullptr ? curr_region->end() : local_finger + G1HeapRegion::GrainWords;
1912 
1913     // Is the gap between reading the finger and doing the CAS too long?
1914     HeapWord* res = _finger.compare_exchange(local_finger, end);
1915     if (res == local_finger && curr_region != nullptr) {
1916       // We succeeded.
1917       HeapWord* bottom = curr_region->bottom();
1918       HeapWord* limit = top_at_mark_start(curr_region);
1919 
1920       log_trace(gc, marking)("Claim region %u bottom " PTR_FORMAT " tams " PTR_FORMAT, curr_region->hrm_index(), p2i(curr_region->bottom()), p2i(top_at_mark_start(curr_region)));
1921       // Notice that _finger == end cannot be guaranteed here since,
1922       // someone else might have moved the finger even further.
1923       assert(finger() >= end, "The finger should have moved forward");
1924 
1925       if (limit > bottom) {
1926         return curr_region;
1927       } else {
1928         assert(limit == bottom,
1929                "The scan limit for region %u (%s) should be bottom but is " PTR_FORMAT,
1930                curr_region->hrm_index(), curr_region->get_short_type_str(), p2i(limit));
1931         // We return null and the caller should try calling
1932         // claim_region() again.
1933         return nullptr;
1934       }
1935     } else {
1936       // Read the finger again.
1937       HeapWord* next_finger = finger();
1938       assert(next_finger > local_finger,
1939              "The finger should have moved forward " PTR_FORMAT " " PTR_FORMAT,
1940              p2i(local_finger), p2i(next_finger));
1941       local_finger = next_finger;
1942     }
1943   }
1944 
1945   return nullptr;
1946 }
1947 
1948 #ifndef PRODUCT
1949 class VerifyNoCSetOops {
1950   G1CollectedHeap* _g1h;
1951   const char* _phase;
1952   int _info;
1953 
1954 public:
1955   VerifyNoCSetOops(const char* phase, int info = -1) :
1956     _g1h(G1CollectedHeap::heap()),
1957     _phase(phase),
1958     _info(info)
1959   { }
1960 
1961   void operator()(G1TaskQueueEntry task_entry) const {
1962     if (task_entry.is_partial_array_state()) {
1963       oop obj = task_entry.to_partial_array_state()->source();
1964       guarantee(_g1h->is_in_reserved(obj), "Partial Array " PTR_FORMAT " must be in heap.", p2i(obj));
1965       return;
1966     }
1967     guarantee(oopDesc::is_oop(task_entry.to_oop()),
1968               "Non-oop " PTR_FORMAT ", phase: %s, info: %d",
1969               p2i(task_entry.to_oop()), _phase, _info);
1970     G1HeapRegion* r = _g1h->heap_region_containing(task_entry.to_oop());
1971     guarantee(!(r->in_collection_set() || r->has_index_in_opt_cset()),
1972               "obj " PTR_FORMAT " from %s (%d) in region %u in (optional) collection set",
1973               p2i(task_entry.to_oop()), _phase, _info, r->hrm_index());
1974   }
1975 };
1976 
1977 void G1ConcurrentMark::verify_no_collection_set_oops() {
1978   assert(SafepointSynchronize::is_at_safepoint() || !is_init_completed(),
1979          "should be at a safepoint or initializing");
1980   if (!is_fully_initialized() || !_g1h->collector_state()->is_in_mark_or_rebuild()) {
1981     return;
1982   }
1983 
1984   // Verify entries on the global mark stack
1985   _global_mark_stack.iterate(VerifyNoCSetOops("Stack"));
1986 
1987   // Verify entries on the task queues
1988   for (uint i = 0; i < _max_num_tasks; ++i) {
1989     G1CMTaskQueue* queue = _task_queues->queue(i);
1990     queue->iterate(VerifyNoCSetOops("Queue", i));
1991   }
1992 
1993   // Verify the global finger
1994   HeapWord* global_finger = finger();
1995   if (global_finger != nullptr && global_finger < _heap.end()) {
1996     // Since we always iterate over all regions, we might get a null G1HeapRegion
1997     // here.
1998     G1HeapRegion* global_hr = _g1h->heap_region_containing_or_null(global_finger);
1999     guarantee(global_hr == nullptr || global_finger == global_hr->bottom(),
2000               "global finger: " PTR_FORMAT " region: " HR_FORMAT,
2001               p2i(global_finger), HR_FORMAT_PARAMS(global_hr));
2002   }
2003 
2004   // Verify the task fingers
2005   assert(_num_concurrent_workers <= _max_num_tasks, "sanity");
2006   for (uint i = 0; i < _num_concurrent_workers; ++i) {
2007     G1CMTask* task = _tasks[i];
2008     HeapWord* task_finger = task->finger();
2009     if (task_finger != nullptr && task_finger < _heap.end()) {
2010       // See above note on the global finger verification.
2011       G1HeapRegion* r = _g1h->heap_region_containing_or_null(task_finger);
2012       guarantee(r == nullptr || task_finger == r->bottom() ||
2013                 !r->in_collection_set() || !r->has_index_in_opt_cset(),
2014                 "task finger: " PTR_FORMAT " region: " HR_FORMAT,
2015                 p2i(task_finger), HR_FORMAT_PARAMS(r));
2016     }
2017   }
2018 }
2019 #endif // PRODUCT
2020 
2021 void G1ConcurrentMark::rebuild_and_scrub() {
2022   if (!needs_remembered_set_rebuild()) {
2023     log_debug(gc, marking)("Skipping Remembered Set Rebuild. No regions selected for rebuild, will only scrub");
2024   }
2025 
2026   G1ConcurrentRebuildAndScrub::rebuild_and_scrub(this, needs_remembered_set_rebuild(), _concurrent_workers);
2027 }
2028 
2029 void G1ConcurrentMark::print_stats() {
2030   if (!log_is_enabled(Debug, gc, stats)) {
2031     return;
2032   }
2033   log_debug(gc, stats)("---------------------------------------------------------------------");
2034   for (size_t i = 0; i < _num_active_tasks; ++i) {
2035     _tasks[i]->print_stats();
2036     log_debug(gc, stats)("---------------------------------------------------------------------");
2037   }
2038 }
2039 
2040 bool G1ConcurrentMark::shutdown_cleanup_needed() const {
2041   // Cleanup (aborting threads, setting abort flags) is needed throughout the whole cycle before
2042   // stopping the CM thread.
2043   return is_fully_initialized() && is_in_concurrent_cycle();
2044 }
2045 
2046 void G1ConcurrentMark::shutdown_concurrent_cycle() {
2047   assert_at_safepoint_on_vm_thread();
2048 
2049   abort_root_region_scan_at_safepoint();
2050   abort_marking_threads();
2051 
2052   SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
2053   satb_mq_set.abandon_partial_marking();
2054   // This can be called either during or outside marking, we'll read
2055   // the expected_active value from the SATB queue set.
2056   satb_mq_set.set_active_all_threads(false, /* new active value */
2057                                      satb_mq_set.is_active() /* expected_active */);
2058 }
2059 
2060 bool G1ConcurrentMark::concurrent_cycle_abort() {
2061   assert_at_safepoint_on_vm_thread();
2062   assert(_g1h->collector_state()->is_in_full_gc(), "must be");
2063 
2064   // If we start the compaction before the CM threads finish
2065   // scanning the root regions we might trip them over as we'll
2066   // be moving objects / updating references. Since the root region
2067   // scan synchronized with the safepoint, just tell it to abort.
2068   // It will notice when the threads start up again later.
2069   abort_root_region_scan_at_safepoint();
2070 
2071   // We haven't started a concurrent cycle no need to do anything; we might have
2072   // aborted the marking because of shutting down though. In this case the marking
2073   // might have already completed the abort (leading to in_progress() below to
2074   // return false), however this still left marking state particularly in the
2075   // shared marking bitmap that must be cleaned up.
2076   // If there are multiple full gcs during shutdown we do this work repeatedly for
2077   // nothing, but this situation should be extremely rare (a full gc after shutdown
2078   // has been signalled is already rare), and this work should be negligible compared
2079   // to actual full gc work.
2080 
2081   if (!is_fully_initialized() || (!cm_thread()->is_in_progress() && !cm_thread()->should_terminate())) {
2082     return false;
2083   }
2084 
2085   flush_all_task_caches();
2086   reset_marking_for_restart();
2087 
2088   abort_marking_threads();
2089 
2090   SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
2091   satb_mq_set.abandon_partial_marking();
2092   // This can be called either during or outside marking, we'll read
2093   // the expected_active value from the SATB queue set.
2094   satb_mq_set.set_active_all_threads(false, /* new active value */
2095                                      satb_mq_set.is_active() /* expected_active */);
2096   return true;
2097 }
2098 
2099 void G1ConcurrentMark::abort_marking_threads() {
2100   assert_root_region_scan_completed_or_aborted();
2101   _has_aborted.store_relaxed(true);
2102   _first_overflow_barrier_sync.abort();
2103   _second_overflow_barrier_sync.abort();
2104 }
2105 
2106 double G1ConcurrentMark::worker_threads_cpu_time_s() {
2107   class CountCpuTimeThreadClosure : public ThreadClosure {
2108   public:
2109     jlong _total_cpu_time;
2110 
2111     CountCpuTimeThreadClosure() : ThreadClosure(), _total_cpu_time(0) { }
2112 
2113     void do_thread(Thread* t) {
2114       _total_cpu_time += os::thread_cpu_time(t);
2115     }
2116   } cl;
2117 
2118   threads_do(&cl);
2119 
2120   return (double)cl._total_cpu_time / NANOSECS_PER_SEC;
2121 }
2122 
2123 static void print_ms_time_info(const char* prefix, const char* name,
2124                                NumberSeq& ns) {
2125   log_trace(gc, marking)("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
2126                          prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
2127   if (ns.num() > 0) {
2128     log_trace(gc, marking)("%s         [std. dev = %8.2f ms, max = %8.2f ms]",
2129                            prefix, ns.sd(), ns.maximum());
2130   }
2131 }
2132 
2133 void G1ConcurrentMark::print_summary_info() {
2134   Log(gc, marking) log;
2135   if (!log.is_trace()) {
2136     return;
2137   }
2138 
2139   log.trace(" Concurrent marking:");
2140   if (!is_fully_initialized()) {
2141     log.trace("    has not been initialized yet");
2142     return;
2143   }
2144   print_ms_time_info("  ", "remarks", _remark_times);
2145   {
2146     print_ms_time_info("     ", "final marks", _remark_mark_times);
2147     print_ms_time_info("     ", "weak refs", _remark_weak_ref_times);
2148 
2149   }
2150   print_ms_time_info("  ", "cleanups", _cleanup_times);
2151   log.trace("    Finalize live data total time = %8.2f s (avg = %8.2f ms).",
2152             _cleanup_times.sum() / 1000.0, _cleanup_times.avg());
2153   log.trace("  Total stop_world time = %8.2f s.",
2154             (_remark_times.sum() + _cleanup_times.sum())/1000.0);
2155   log.trace("  Total concurrent time = %8.2f s (%8.2f s marking).",
2156             cm_thread()->total_mark_cpu_time_s(), cm_thread()->worker_threads_cpu_time_s());
2157 }
2158 
2159 void G1ConcurrentMark::threads_do(ThreadClosure* tc) const {
2160   if (is_fully_initialized()) { // they are initialized late
2161     tc->do_thread(_cm_thread);
2162     _concurrent_workers->threads_do(tc);
2163   }
2164 }
2165 
2166 void G1ConcurrentMark::print_on(outputStream* st) const {
2167   st->print_cr("Marking Bits: (CMBitMap*) " PTR_FORMAT, p2i(mark_bitmap()));
2168   _mark_bitmap.print_on(st, " Bits: ");
2169 }
2170 
2171 static ReferenceProcessor* get_cm_oop_closure_ref_processor(G1CollectedHeap* g1h) {
2172   ReferenceProcessor* result = g1h->ref_processor_cm();
2173   assert(result != nullptr, "CM reference processor should not be null");
2174   return result;
2175 }
2176 
2177 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h,
2178                                G1CMTask* task)
2179   : ClaimMetadataVisitingOopIterateClosure(ClassLoaderData::_claim_strong, get_cm_oop_closure_ref_processor(g1h)),
2180     _g1h(g1h), _task(task)
2181 { }
2182 
2183 void G1CMTask::setup_for_region(G1HeapRegion* hr) {
2184   assert(hr != nullptr,
2185         "claim_region() should have filtered out null regions");
2186   _curr_region  = hr;
2187   _finger       = hr->bottom();
2188   update_region_limit();
2189 }
2190 
2191 void G1CMTask::update_region_limit() {
2192   G1HeapRegion* hr = _curr_region;
2193   HeapWord* bottom = hr->bottom();
2194   HeapWord* limit = _cm->top_at_mark_start(hr);
2195 
2196   if (limit == bottom) {
2197     // The region was collected underneath our feet.
2198     // We set the finger to bottom to ensure that the bitmap
2199     // iteration that will follow this will not do anything.
2200     // (this is not a condition that holds when we set the region up,
2201     // as the region is not supposed to be empty in the first place)
2202     _finger = bottom;
2203   } else if (limit >= _region_limit) {
2204     assert(limit >= _finger, "peace of mind");
2205   } else {
2206     assert(limit < _region_limit, "only way to get here");
2207     // This can happen under some pretty unusual circumstances.  An
2208     // evacuation pause empties the region underneath our feet (TAMS
2209     // at bottom). We then do some allocation in the region (TAMS
2210     // stays at bottom), followed by the region being used as a GC
2211     // alloc region (TAMS will move to top() and the objects
2212     // originally below it will be greyed). All objects now marked in
2213     // the region are explicitly greyed, if below the global finger,
2214     // and we do not need in fact to scan anything else. So, we simply
2215     // set _finger to be limit to ensure that the bitmap iteration
2216     // doesn't do anything.
2217     _finger = limit;
2218   }
2219 
2220   _region_limit = limit;
2221 }
2222 
2223 void G1CMTask::giveup_current_region() {
2224   assert(_curr_region != nullptr, "invariant");
2225   clear_region_fields();
2226 }
2227 
2228 void G1CMTask::clear_region_fields() {
2229   // Values for these three fields that indicate that we're not
2230   // holding on to a region.
2231   _curr_region   = nullptr;
2232   _finger        = nullptr;
2233   _region_limit  = nullptr;
2234 }
2235 
2236 void G1CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) {
2237   if (cm_oop_closure == nullptr) {
2238     assert(_cm_oop_closure != nullptr, "invariant");
2239   } else {
2240     assert(_cm_oop_closure == nullptr, "invariant");
2241   }
2242   _cm_oop_closure = cm_oop_closure;
2243 }
2244 
2245 void G1CMTask::reset(G1CMBitMap* mark_bitmap) {
2246   guarantee(mark_bitmap != nullptr, "invariant");
2247   _mark_bitmap              = mark_bitmap;
2248   clear_region_fields();
2249 
2250   _calls                         = 0;
2251   _elapsed_time_ms               = 0.0;
2252   _termination_time_ms           = 0.0;
2253 
2254   _mark_stats_cache.reset();
2255 }
2256 
2257 void G1CMTask::reset_for_restart() {
2258   clear_region_fields();
2259   _task_queue->set_empty();
2260   TASKQUEUE_STATS_ONLY(_partial_array_splitter.stats()->reset());
2261   TASKQUEUE_STATS_ONLY(_task_queue->stats.reset());
2262 }
2263 
2264 void G1CMTask::register_partial_array_splitter() {
2265 
2266   ::new (&_partial_array_splitter) PartialArraySplitter(_cm->partial_array_state_manager(),
2267                                                         _cm->max_num_tasks());
2268 }
2269 
2270 void G1CMTask::unregister_partial_array_splitter() {
2271   _partial_array_splitter.~PartialArraySplitter();
2272 }
2273 
2274 bool G1CMTask::should_exit_termination() {
2275   if (!regular_clock_call()) {
2276     return true;
2277   }
2278 
2279   // This is called when we are in the termination protocol. We should
2280   // quit if, for some reason, this task wants to abort or the global
2281   // stack is not empty (this means that we can get work from it).
2282   return !_cm->mark_stack_empty() || has_aborted();
2283 }
2284 
2285 void G1CMTask::reached_limit() {
2286   assert(_words_scanned >= _words_scanned_limit ||
2287          _refs_reached >= _refs_reached_limit ,
2288          "shouldn't have been called otherwise");
2289   abort_marking_if_regular_check_fail();
2290 }
2291 
2292 bool G1CMTask::regular_clock_call() {
2293   if (has_aborted()) {
2294     return false;
2295   }
2296 
2297   // First, we need to recalculate the words scanned and refs reached
2298   // limits for the next clock call.
2299   recalculate_limits();
2300 
2301   // During the regular clock call we do the following
2302 
2303   // (1) If an overflow has been flagged, then we abort.
2304   if (_cm->has_overflown()) {
2305     return false;
2306   }
2307 
2308   // If we are not concurrent (i.e. we're doing remark) we don't need
2309   // to check anything else. The other steps are only needed during
2310   // the concurrent marking phase.
2311   if (!_cm->concurrent()) {
2312     return true;
2313   }
2314 
2315   // (2) If marking has been aborted for Full GC, then we also abort.
2316   if (_cm->has_aborted()) {
2317     return false;
2318   }
2319 
2320   // (4) We check whether we should yield. If we have to, then we abort.
2321   if (SuspendibleThreadSet::should_yield()) {
2322     // We should yield. To do this we abort the task. The caller is
2323     // responsible for yielding.
2324     return false;
2325   }
2326 
2327   // (5) We check whether we've reached our time quota. If we have,
2328   // then we abort.
2329   double elapsed_time_ms = (double)(os::current_thread_cpu_time() - _start_cpu_time_ns) / NANOSECS_PER_MILLISEC;
2330   if (elapsed_time_ms > _time_target_ms) {
2331     _has_timed_out = true;
2332     return false;
2333   }
2334 
2335   // (6) Finally, we check whether there are enough completed STAB
2336   // buffers available for processing. If there are, we abort.
2337   SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
2338   if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
2339     // we do need to process SATB buffers, we'll abort and restart
2340     // the marking task to do so
2341     return false;
2342   }
2343   return true;
2344 }
2345 
2346 void G1CMTask::recalculate_limits() {
2347   _real_words_scanned_limit = _words_scanned + words_scanned_period;
2348   _words_scanned_limit      = _real_words_scanned_limit;
2349 
2350   _real_refs_reached_limit  = _refs_reached  + refs_reached_period;
2351   _refs_reached_limit       = _real_refs_reached_limit;
2352 }
2353 
2354 void G1CMTask::decrease_limits() {
2355   // This is called when we believe that we're going to do an infrequent
2356   // operation which will increase the per byte scanned cost (i.e. move
2357   // entries to/from the global stack). It basically tries to decrease the
2358   // scanning limit so that the clock is called earlier.
2359 
2360   _words_scanned_limit = _real_words_scanned_limit - 3 * words_scanned_period / 4;
2361   _refs_reached_limit  = _real_refs_reached_limit - 3 * refs_reached_period / 4;
2362 }
2363 
2364 void G1CMTask::move_entries_to_global_stack() {
2365   // Local array where we'll store the entries that will be popped
2366   // from the local queue.
2367   G1TaskQueueEntry buffer[G1CMMarkStack::EntriesPerChunk];
2368 
2369   size_t n = 0;
2370   G1TaskQueueEntry task_entry;
2371   while (n < G1CMMarkStack::EntriesPerChunk && _task_queue->pop_local(task_entry)) {
2372     buffer[n] = task_entry;
2373     ++n;
2374   }
2375   if (n < G1CMMarkStack::EntriesPerChunk) {
2376     buffer[n] = G1TaskQueueEntry();
2377   }
2378 
2379   if (n > 0) {
2380     if (!_cm->mark_stack_push(buffer)) {
2381       set_has_aborted();
2382     }
2383   }
2384 
2385   // This operation was quite expensive, so decrease the limits.
2386   decrease_limits();
2387 }
2388 
2389 bool G1CMTask::get_entries_from_global_stack() {
2390   // Local array where we'll store the entries that will be popped
2391   // from the global stack.
2392   G1TaskQueueEntry buffer[G1CMMarkStack::EntriesPerChunk];
2393 
2394   if (!_cm->mark_stack_pop(buffer)) {
2395     return false;
2396   }
2397 
2398   // We did actually pop at least one entry.
2399   for (size_t i = 0; i < G1CMMarkStack::EntriesPerChunk; ++i) {
2400     G1TaskQueueEntry task_entry = buffer[i];
2401     if (task_entry.is_null()) {
2402       break;
2403     }
2404     assert(task_entry.is_partial_array_state() || oopDesc::is_oop(task_entry.to_oop()), "Element " PTR_FORMAT " must be an array slice or oop", p2i(task_entry.to_oop()));
2405     bool success = _task_queue->push(task_entry);
2406     // We only call this when the local queue is empty or under a
2407     // given target limit. So, we do not expect this push to fail.
2408     assert(success, "invariant");
2409   }
2410 
2411   // This operation was quite expensive, so decrease the limits
2412   decrease_limits();
2413   return true;
2414 }
2415 
2416 void G1CMTask::drain_local_queue(bool partially) {
2417   if (has_aborted()) {
2418     return;
2419   }
2420 
2421   // Decide what the target size is, depending whether we're going to
2422   // drain it partially (so that other tasks can steal if they run out
2423   // of things to do) or totally (at the very end).
2424   uint target_size;
2425   if (partially) {
2426     target_size = GCDrainStackTargetSize;
2427   } else {
2428     target_size = 0;
2429   }
2430 
2431   if (_task_queue->size() > target_size) {
2432     G1TaskQueueEntry entry;
2433     bool ret = _task_queue->pop_local(entry);
2434     while (ret) {
2435       process_entry(entry, false /* stolen */);
2436       if (_task_queue->size() <= target_size || has_aborted()) {
2437         ret = false;
2438       } else {
2439         ret = _task_queue->pop_local(entry);
2440       }
2441     }
2442   }
2443 }
2444 
2445 size_t G1CMTask::start_partial_array_processing(objArrayOop obj) {
2446   assert(obj->length() >= (int)ObjArrayMarkingStride, "Must be a large array object %d", obj->length());
2447 
2448   // Mark klass metadata
2449   process_klass(obj->klass());
2450 
2451   size_t array_length = obj->length();
2452   size_t initial_chunk_size = _partial_array_splitter.start(_task_queue, obj, nullptr, array_length, ObjArrayMarkingStride);
2453 
2454   process_array_chunk(obj, 0, initial_chunk_size);
2455 
2456   // Include object header size
2457   if (obj->is_refArray()) {
2458     return refArrayOopDesc::object_size(checked_cast<int>(initial_chunk_size));
2459   } else {
2460     FlatArrayKlass* fak = FlatArrayKlass::cast(obj->klass());
2461     return flatArrayOopDesc::object_size(fak->layout_helper(), checked_cast<int>(initial_chunk_size));
2462   }
2463 }
2464 
2465 size_t G1CMTask::process_partial_array(const G1TaskQueueEntry& task, bool stolen) {
2466   PartialArrayState* state = task.to_partial_array_state();
2467   // Access state before release by claim().
2468   objArrayOop obj = oop_cast<objArrayOop>(state->source());
2469 
2470   PartialArraySplitter::Claim claim =
2471     _partial_array_splitter.claim(state, _task_queue, stolen);
2472 
2473   process_array_chunk(obj, claim._start, claim._end);
2474 
2475   if (obj->is_refArray()) {
2476     return heap_word_size((claim._end - claim._start) * heapOopSize);
2477   } else {
2478     assert(obj->is_flatArray(), "Must be!");
2479     size_t element_byte_size = FlatArrayKlass::cast(obj->klass())->element_byte_size();
2480     size_t nof_elements = claim._end - claim._start;
2481     return heap_word_size(nof_elements * element_byte_size);
2482   }
2483 }
2484 
2485 void G1CMTask::drain_global_stack(bool partially) {
2486   if (has_aborted()) {
2487     return;
2488   }
2489 
2490   // We have a policy to drain the local queue before we attempt to
2491   // drain the global stack.
2492   assert(partially || _task_queue->size() == 0, "invariant");
2493 
2494   // Decide what the target size is, depending whether we're going to
2495   // drain it partially (so that other tasks can steal if they run out
2496   // of things to do) or totally (at the very end).
2497   // Notice that when draining the global mark stack partially, due to the racyness
2498   // of the mark stack size update we might in fact drop below the target. But,
2499   // this is not a problem.
2500   // In case of total draining, we simply process until the global mark stack is
2501   // totally empty, disregarding the size counter.
2502   if (partially) {
2503     size_t const target_size = _cm->partial_mark_stack_size_target();
2504     while (!has_aborted() && _cm->mark_stack_size() > target_size) {
2505       if (get_entries_from_global_stack()) {
2506         drain_local_queue(partially);
2507       }
2508     }
2509   } else {
2510     while (!has_aborted() && get_entries_from_global_stack()) {
2511       drain_local_queue(partially);
2512     }
2513   }
2514 }
2515 
2516 // SATB Queue has several assumptions on whether to call the par or
2517 // non-par versions of the methods. this is why some of the code is
2518 // replicated. We should really get rid of the single-threaded version
2519 // of the code to simplify things.
2520 void G1CMTask::drain_satb_buffers() {
2521   if (has_aborted()) {
2522     return;
2523   }
2524 
2525   // We set this so that the regular clock knows that we're in the
2526   // middle of draining buffers and doesn't set the abort flag when it
2527   // notices that SATB buffers are available for draining. It'd be
2528   // very counter productive if it did that. :-)
2529   _draining_satb_buffers = true;
2530 
2531   G1CMSATBBufferClosure satb_cl(this, _g1h);
2532   SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
2533 
2534   // This keeps claiming and applying the closure to completed buffers
2535   // until we run out of buffers or we need to abort.
2536   while (!has_aborted() &&
2537          satb_mq_set.apply_closure_to_completed_buffer(&satb_cl)) {
2538     abort_marking_if_regular_check_fail();
2539   }
2540 
2541   // Can't assert qset is empty here, even if not aborted.  If concurrent,
2542   // some other thread might be adding to the queue.  If not concurrent,
2543   // some other thread might have won the race for the last buffer, but
2544   // has not yet decremented the count.
2545 
2546   _draining_satb_buffers = false;
2547 
2548   // again, this was a potentially expensive operation, decrease the
2549   // limits to get the regular clock call early
2550   decrease_limits();
2551 }
2552 
2553 #ifndef PRODUCT
2554 void G1CMTask::verify_no_mark_stats_for(uint region_idx) {
2555   _mark_stats_cache.verify_no_mark_stats_for(region_idx);
2556 }
2557 #endif
2558 
2559 void G1CMTask::clear_mark_stats_cache(uint region_idx) {
2560   _mark_stats_cache.reset(region_idx);
2561 }
2562 
2563 Pair<size_t, size_t> G1CMTask::flush_mark_stats_cache() {
2564   return _mark_stats_cache.evict_all();
2565 }
2566 
2567 void G1CMTask::print_stats() {
2568   log_debug(gc, stats)("Marking Stats, task = %u, calls = %u", _worker_id, _calls);
2569   log_debug(gc, stats)("  Elapsed time = %1.2lfms, Termination time = %1.2lfms",
2570                        _elapsed_time_ms, _termination_time_ms);
2571   log_debug(gc, stats)("  Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms max = %1.2lfms, total = %1.2lfms",
2572                        _step_times_ms.num(),
2573                        _step_times_ms.avg(),
2574                        _step_times_ms.sd(),
2575                        _step_times_ms.maximum(),
2576                        _step_times_ms.sum());
2577   size_t const hits = _mark_stats_cache.hits();
2578   size_t const misses = _mark_stats_cache.misses();
2579   log_debug(gc, stats)("  Mark Stats Cache: hits %zu misses %zu ratio %.3f",
2580                        hits, misses, percent_of(hits, hits + misses));
2581 }
2582 
2583 bool G1ConcurrentMark::try_stealing(uint worker_id, G1TaskQueueEntry& task_entry) {
2584   return _task_queues->steal(worker_id, task_entry);
2585 }
2586 
2587 void G1CMTask::process_current_region(G1CMBitMapClosure& bitmap_closure) {
2588   if (has_aborted() || _curr_region == nullptr) {
2589     return;
2590   }
2591 
2592   // This means that we're already holding on to a region.
2593   assert(_finger != nullptr, "if region is not null, then the finger "
2594          "should not be null either");
2595 
2596   // We might have restarted this task after an evacuation pause
2597   // which might have evacuated the region we're holding on to
2598   // underneath our feet. Let's read its limit again to make sure
2599   // that we do not iterate over a region of the heap that
2600   // contains garbage (update_region_limit() will also move
2601   // _finger to the start of the region if it is found empty).
2602   update_region_limit();
2603   // We will start from _finger not from the start of the region,
2604   // as we might be restarting this task after aborting half-way
2605   // through scanning this region. In this case, _finger points to
2606   // the address where we last found a marked object. If this is a
2607   // fresh region, _finger points to start().
2608   MemRegion mr = MemRegion(_finger, _region_limit);
2609 
2610   assert(!_curr_region->is_humongous() || mr.start() == _curr_region->bottom(),
2611          "humongous regions should go around loop once only");
2612 
2613   // Some special cases:
2614   // If the memory region is empty, we can just give up the region.
2615   // If the current region is humongous then we only need to check
2616   // the bitmap for the bit associated with the start of the object,
2617   // scan the object if it's live, and give up the region.
2618   // Otherwise, let's iterate over the bitmap of the part of the region
2619   // that is left.
2620   // If the iteration is successful, give up the region.
2621   if (mr.is_empty()) {
2622     giveup_current_region();
2623     abort_marking_if_regular_check_fail();
2624   } else if (_curr_region->is_humongous() && mr.start() == _curr_region->bottom()) {
2625     if (_mark_bitmap->is_marked(mr.start())) {
2626       // The object is marked - apply the closure
2627       bitmap_closure.do_addr(mr.start());
2628     }
2629     // Even if this task aborted while scanning the humongous object
2630     // we can (and should) give up the current region.
2631     giveup_current_region();
2632     abort_marking_if_regular_check_fail();
2633   } else if (_mark_bitmap->iterate(&bitmap_closure, mr)) {
2634     giveup_current_region();
2635     abort_marking_if_regular_check_fail();
2636   } else {
2637     assert(has_aborted(), "currently the only way to do so");
2638     // The only way to abort the bitmap iteration is to return
2639     // false from the do_bit() method. However, inside the
2640     // do_bit() method we move the _finger to point to the
2641     // object currently being looked at. So, if we bail out, we
2642     // have definitely set _finger to something non-null.
2643     assert(_finger != nullptr, "invariant");
2644 
2645     // Region iteration was actually aborted. So now _finger
2646     // points to the address of the object we last scanned. If we
2647     // leave it there, when we restart this task, we will rescan
2648     // the object. It is easy to avoid this. We move the finger by
2649     // enough to point to the next possible object header.
2650     assert(_finger < _region_limit, "invariant");
2651     HeapWord* const new_finger = _finger + cast_to_oop(_finger)->size();
2652     if (new_finger >= _region_limit) {
2653       giveup_current_region();
2654     } else {
2655       move_finger_to(new_finger);
2656     }
2657   }
2658 }
2659 
2660 void G1CMTask::claim_new_region() {
2661   // Read the note on the claim_region() method on why it might
2662   // return null with potentially more regions available for
2663   // claiming and why we have to check out_of_regions() to determine
2664   // whether we're done or not.
2665   while (!has_aborted() && _curr_region == nullptr && !_cm->out_of_regions()) {
2666     // We are going to try to claim a new region. We should have
2667     // given up on the previous one.
2668     // Separated the asserts so that we know which one fires.
2669     assert(_curr_region  == nullptr, "invariant");
2670     assert(_finger       == nullptr, "invariant");
2671     assert(_region_limit == nullptr, "invariant");
2672     G1HeapRegion* claimed_region = _cm->claim_region(_worker_id);
2673     if (claimed_region != nullptr) {
2674       // Yes, we managed to claim one
2675       setup_for_region(claimed_region);
2676       assert(_curr_region == claimed_region, "invariant");
2677     }
2678     // It is important to call the regular clock here. It might take
2679     // a while to claim a region if, for example, we hit a large
2680     // block of empty regions. So we need to call the regular clock
2681     // method once round the loop to make sure it's called
2682     // frequently enough.
2683     abort_marking_if_regular_check_fail();
2684   }
2685 }
2686 
2687 void G1CMTask::attempt_stealing() {
2688   // We cannot check whether the global stack is empty, since other
2689   // tasks might be pushing objects to it concurrently.
2690   assert(_cm->out_of_regions() && _task_queue->size() == 0,
2691          "only way to reach here");
2692   while (!has_aborted()) {
2693     G1TaskQueueEntry entry;
2694     if (_cm->try_stealing(_worker_id, entry)) {
2695       process_entry(entry, true /* stolen */);
2696 
2697       // And since we're towards the end, let's totally drain the
2698       // local queue and global stack.
2699       drain_local_queue(false);
2700       drain_global_stack(false);
2701     } else {
2702       break;
2703     }
2704   }
2705 }
2706 
2707 void G1CMTask::attempt_termination(bool is_serial) {
2708   // We cannot check whether the global stack is empty, since other
2709   // tasks might be concurrently pushing objects on it.
2710   // Separated the asserts so that we know which one fires.
2711   assert(_cm->out_of_regions(), "only way to reach here");
2712   assert(_task_queue->size() == 0, "only way to reach here");
2713   double termination_start_time_ms = os::elapsedTime() * 1000.0;
2714 
2715   // The G1CMTask class also extends the TerminatorTerminator class,
2716   // hence its should_exit_termination() method will also decide
2717   // whether to exit the termination protocol or not.
2718   bool finished = (is_serial ||
2719                    _cm->terminator()->offer_termination(this));
2720   _termination_time_ms += (os::elapsedTime() * 1000.0 - termination_start_time_ms);
2721 
2722   if (finished) {
2723     // We're all done.
2724 
2725     // We can now guarantee that the global stack is empty, since
2726     // all other tasks have finished. We separated the guarantees so
2727     // that, if a condition is false, we can immediately find out
2728     // which one.
2729     guarantee(_cm->out_of_regions(), "only way to reach here");
2730     guarantee(_cm->mark_stack_empty(), "only way to reach here");
2731     guarantee(_task_queue->size() == 0, "only way to reach here");
2732     guarantee(!_cm->has_overflown(), "only way to reach here");
2733     guarantee(!has_aborted(), "should never happen if termination has completed");
2734   } else {
2735     // Apparently there's more work to do. Let's abort this task. We
2736     // will restart it and hopefully we can find more things to do.
2737     set_has_aborted();
2738   }
2739 }
2740 
2741 void G1CMTask::handle_abort(bool is_serial, double elapsed_time_ms) {
2742   if (_has_timed_out) {
2743     double diff_ms = elapsed_time_ms - _time_target_ms;
2744     // Keep statistics of how well we did with respect to hitting
2745     // our target only if we actually timed out (if we aborted for
2746     // other reasons, then the results might get skewed).
2747     _marking_step_diff_ms.add(diff_ms);
2748   }
2749 
2750   if (!_cm->has_overflown()) {
2751     return;
2752   }
2753 
2754   // This is the interesting one. We aborted because a global
2755   // overflow was raised. This means we have to restart the
2756   // marking phase and start iterating over regions. However, in
2757   // order to do this we have to make sure that all tasks stop
2758   // what they are doing and re-initialize in a safe manner. We
2759   // will achieve this with the use of two barrier sync points.
2760   if (!is_serial) {
2761     // We only need to enter the sync barrier if being called
2762     // from a parallel context
2763     _cm->enter_first_sync_barrier(_worker_id);
2764 
2765     // When we exit this sync barrier we know that all tasks have
2766     // stopped doing marking work. So, it's now safe to
2767     // re-initialize our data structures.
2768   }
2769 
2770   clear_region_fields();
2771   flush_mark_stats_cache();
2772 
2773   if (!is_serial) {
2774     // If we're executing the concurrent phase of marking, reset the marking
2775     // state; otherwise the marking state is reset after reference processing,
2776     // during the remark pause.
2777     // If we reset here as a result of an overflow during the remark we will
2778     // see assertion failures from any subsequent set_concurrency_and_phase()
2779     // calls.
2780     if (_cm->concurrent() && _worker_id == 0) {
2781       // Worker 0 is responsible for clearing the global data structures because
2782       // of an overflow. During STW we should not clear the overflow flag (in
2783       // G1ConcurrentMark::reset_marking_state()) since we rely on it being true when we exit
2784       // method to abort the pause and restart concurrent marking.
2785       _cm->reset_marking_for_restart();
2786 
2787       log_info(gc, marking)("Concurrent Mark reset for overflow");
2788     }
2789 
2790     // ...and enter the second barrier.
2791     _cm->enter_second_sync_barrier(_worker_id);
2792   }
2793 }
2794 
2795 /*****************************************************************************
2796 
2797     The do_marking_step(time_target_ms, ...) method is the building
2798     block of the parallel marking framework. It can be called in parallel
2799     with other invocations of do_marking_step() on different tasks
2800     (but only one per task, obviously) and concurrently with the
2801     mutator threads, or during remark, hence it eliminates the need
2802     for two versions of the code. When called during remark, it will
2803     pick up from where the task left off during the concurrent marking
2804     phase. Interestingly, tasks are also claimable during evacuation
2805     pauses too, since do_marking_step() ensures that it aborts before
2806     it needs to yield.
2807 
2808     The data structures that it uses to do marking work are the
2809     following:
2810 
2811       (1) Marking Bitmap. If there are grey objects that appear only
2812       on the bitmap (this happens either when dealing with an overflow
2813       or when the concurrent start pause has simply marked the roots
2814       and didn't push them on the stack), then tasks claim heap
2815       regions whose bitmap they then scan to find grey objects. A
2816       global finger indicates where the end of the last claimed region
2817       is. A local finger indicates how far into the region a task has
2818       scanned. The two fingers are used to determine how to grey an
2819       object (i.e. whether simply marking it is OK, as it will be
2820       visited by a task in the future, or whether it needs to be also
2821       pushed on a stack).
2822 
2823       (2) Local Queue. The local queue of the task which is accessed
2824       reasonably efficiently by the task. Other tasks can steal from
2825       it when they run out of work. Throughout the marking phase, a
2826       task attempts to keep its local queue short but not totally
2827       empty, so that entries are available for stealing by other
2828       tasks. Only when there is no more work, a task will totally
2829       drain its local queue.
2830 
2831       (3) Global Mark Stack. This handles local queue overflow. During
2832       marking only sets of entries are moved between it and the local
2833       queues, as access to it requires a mutex and more fine-grain
2834       interaction with it which might cause contention. If it
2835       overflows, then the marking phase should restart and iterate
2836       over the bitmap to identify grey objects. Throughout the marking
2837       phase, tasks attempt to keep the global mark stack at a small
2838       length but not totally empty, so that entries are available for
2839       popping by other tasks. Only when there is no more work, tasks
2840       will totally drain the global mark stack.
2841 
2842       (4) SATB Buffer Queue. This is where completed SATB buffers are
2843       made available. Buffers are regularly removed from this queue
2844       and scanned for roots, so that the queue doesn't get too
2845       long. During remark, all completed buffers are processed, as
2846       well as the filled in parts of any uncompleted buffers.
2847 
2848     The do_marking_step() method tries to abort when the time target
2849     has been reached. There are a few other cases when the
2850     do_marking_step() method also aborts:
2851 
2852       (1) When the marking phase has been aborted (after a Full GC).
2853 
2854       (2) When a global overflow (on the global stack) has been
2855       triggered. Before the task aborts, it will actually sync up with
2856       the other tasks to ensure that all the marking data structures
2857       (local queues, stacks, fingers etc.)  are re-initialized so that
2858       when do_marking_step() completes, the marking phase can
2859       immediately restart.
2860 
2861       (3) When enough completed SATB buffers are available. The
2862       do_marking_step() method only tries to drain SATB buffers right
2863       at the beginning. So, if enough buffers are available, the
2864       marking step aborts and the SATB buffers are processed at
2865       the beginning of the next invocation.
2866 
2867       (4) To yield. when we have to yield then we abort and yield
2868       right at the end of do_marking_step(). This saves us from a lot
2869       of hassle as, by yielding we might allow a Full GC. If this
2870       happens then objects will be compacted underneath our feet, the
2871       heap might shrink, etc. We save checking for this by just
2872       aborting and doing the yield right at the end.
2873 
2874     From the above it follows that the do_marking_step() method should
2875     be called in a loop (or, otherwise, regularly) until it completes.
2876 
2877     If a marking step completes without its has_aborted() flag being
2878     true, it means it has completed the current marking phase (and
2879     also all other marking tasks have done so and have all synced up).
2880 
2881     A method called regular_clock_call() is invoked "regularly" (in
2882     sub ms intervals) throughout marking. It is this clock method that
2883     checks all the abort conditions which were mentioned above and
2884     decides when the task should abort. A work-based scheme is used to
2885     trigger this clock method: when the number of object words the
2886     marking phase has scanned or the number of references the marking
2887     phase has visited reach a given limit. Additional invocations to
2888     the method clock have been planted in a few other strategic places
2889     too. The initial reason for the clock method was to avoid calling
2890     cpu time gathering too regularly, as it is quite expensive. So,
2891     once it was in place, it was natural to piggy-back all the other
2892     conditions on it too and not constantly check them throughout the code.
2893 
2894     If do_termination is true then do_marking_step will enter its
2895     termination protocol.
2896 
2897     The value of is_serial must be true when do_marking_step is being
2898     called serially (i.e. by the VMThread) and do_marking_step should
2899     skip any synchronization in the termination and overflow code.
2900     Examples include the serial remark code and the serial reference
2901     processing closures.
2902 
2903     The value of is_serial must be false when do_marking_step is
2904     being called by any of the worker threads.
2905     Examples include the concurrent marking code (CMMarkingTask),
2906     the MT remark code, and the MT reference processing closures.
2907 
2908  *****************************************************************************/
2909 
2910 void G1CMTask::do_marking_step(double time_target_ms,
2911                                bool do_termination,
2912                                bool is_serial) {
2913   assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
2914 
2915   _start_cpu_time_ns = os::current_thread_cpu_time();
2916 
2917   // If do_stealing is true then do_marking_step will attempt to
2918   // steal work from the other G1CMTasks. It only makes sense to
2919   // enable stealing when the termination protocol is enabled
2920   // and do_marking_step() is not being called serially.
2921   bool do_stealing = do_termination && !is_serial;
2922 
2923   G1Predictions const& predictor = _g1h->policy()->predictor();
2924   double diff_prediction_ms = predictor.predict_zero_bounded(&_marking_step_diff_ms);
2925   _time_target_ms = time_target_ms - diff_prediction_ms;
2926 
2927   // set up the variables that are used in the work-based scheme to
2928   // call the regular clock method
2929   _words_scanned = 0;
2930   _refs_reached  = 0;
2931   recalculate_limits();
2932 
2933   // clear all flags
2934   clear_has_aborted();
2935   _has_timed_out = false;
2936   _draining_satb_buffers = false;
2937 
2938   ++_calls;
2939 
2940   // Set up the bitmap and oop closures. Anything that uses them is
2941   // eventually called from this method, so it is OK to allocate these
2942   // statically.
2943   G1CMBitMapClosure bitmap_closure(this, _cm);
2944   G1CMOopClosure cm_oop_closure(_g1h, this);
2945   set_cm_oop_closure(&cm_oop_closure);
2946 
2947   if (_cm->has_overflown()) {
2948     // This can happen if the mark stack overflows during a GC pause
2949     // and this task, after a yield point, restarts. We have to abort
2950     // as we need to get into the overflow protocol which happens
2951     // right at the end of this task.
2952     set_has_aborted();
2953   }
2954 
2955   // First drain any available SATB buffers. After this, we will not
2956   // look at SATB buffers before the next invocation of this method.
2957   // If enough completed SATB buffers are queued up, the regular clock
2958   // will abort this task so that it restarts.
2959   drain_satb_buffers();
2960   // ...then partially drain the local queue and the global stack
2961   drain_local_queue(true);
2962   drain_global_stack(true);
2963 
2964   do {
2965     process_current_region(bitmap_closure);
2966     // At this point we have either completed iterating over the
2967     // region we were holding on to, or we have aborted.
2968 
2969     // We then partially drain the local queue and the global stack.
2970     drain_local_queue(true);
2971     drain_global_stack(true);
2972 
2973     claim_new_region();
2974 
2975     assert(has_aborted() || _curr_region != nullptr || _cm->out_of_regions(),
2976            "at this point we should be out of regions");
2977   } while ( _curr_region != nullptr && !has_aborted());
2978 
2979   // We cannot check whether the global stack is empty, since other
2980   // tasks might be pushing objects to it concurrently.
2981   assert(has_aborted() || _cm->out_of_regions(),
2982          "at this point we should be out of regions");
2983   // Try to reduce the number of available SATB buffers so that
2984   // remark has less work to do.
2985   drain_satb_buffers();
2986 
2987   // Since we've done everything else, we can now totally drain the
2988   // local queue and global stack.
2989   drain_local_queue(false);
2990   drain_global_stack(false);
2991 
2992   // Attempt at work stealing from other task's queues.
2993   if (do_stealing && !has_aborted()) {
2994     // We have not aborted. This means that we have finished all that
2995     // we could. Let's try to do some stealing...
2996     attempt_stealing();
2997   }
2998 
2999   // We still haven't aborted. Now, let's try to get into the
3000   // termination protocol.
3001   if (do_termination && !has_aborted()) {
3002     attempt_termination(is_serial);
3003   }
3004 
3005   // Mainly for debugging purposes to make sure that a pointer to the
3006   // closure which was statically allocated in this frame doesn't
3007   // escape it by accident.
3008   set_cm_oop_closure(nullptr);
3009   jlong end_cpu_time_ns = os::current_thread_cpu_time();
3010   double elapsed_time_ms = (double)(end_cpu_time_ns - _start_cpu_time_ns) / NANOSECS_PER_MILLISEC;
3011   // Update the step history.
3012   _step_times_ms.add(elapsed_time_ms);
3013 
3014   if (has_aborted()) {
3015     // The task was aborted for some reason.
3016     handle_abort(is_serial, elapsed_time_ms);
3017   }
3018 }
3019 
3020 G1CMTask::G1CMTask(uint worker_id,
3021                    G1ConcurrentMark* cm,
3022                    G1CMTaskQueue* task_queue,
3023                    G1RegionMarkStats* mark_stats) :
3024   _worker_id(worker_id),
3025   _g1h(G1CollectedHeap::heap()),
3026   _cm(cm),
3027   _mark_bitmap(nullptr),
3028   _task_queue(task_queue),
3029   _partial_array_splitter(_cm->partial_array_state_manager(), _cm->max_num_tasks()),
3030   _mark_stats_cache(mark_stats, G1RegionMarkStatsCache::RegionMarkStatsCacheSize),
3031   _calls(0),
3032   _time_target_ms(0.0),
3033   _start_cpu_time_ns(0),
3034   _cm_oop_closure(nullptr),
3035   _curr_region(nullptr),
3036   _finger(nullptr),
3037   _region_limit(nullptr),
3038   _words_scanned(0),
3039   _words_scanned_limit(0),
3040   _real_words_scanned_limit(0),
3041   _refs_reached(0),
3042   _refs_reached_limit(0),
3043   _real_refs_reached_limit(0),
3044   _has_aborted(false),
3045   _has_timed_out(false),
3046   _draining_satb_buffers(false),
3047   _step_times_ms(),
3048   _elapsed_time_ms(0.0),
3049   _termination_time_ms(0.0),
3050   _marking_step_diff_ms()
3051 {
3052   guarantee(task_queue != nullptr, "invariant");
3053 
3054   _marking_step_diff_ms.add(0.5);
3055 }
3056 
3057 // These are formatting macros that are used below to ensure
3058 // consistent formatting. The *_H_* versions are used to format the
3059 // header for a particular value and they should be kept consistent
3060 // with the corresponding macro. Also note that most of the macros add
3061 // the necessary white space (as a prefix) which makes them a bit
3062 // easier to compose.
3063 
3064 // All the output lines are prefixed with this string to be able to
3065 // identify them easily in a large log file.
3066 #define G1PPRL_LINE_PREFIX            "###"
3067 
3068 #define G1PPRL_ADDR_BASE_FORMAT    " " PTR_FORMAT "-" PTR_FORMAT
3069 #ifdef _LP64
3070 #define G1PPRL_ADDR_BASE_H_FORMAT  " %37s"
3071 #else // _LP64
3072 #define G1PPRL_ADDR_BASE_H_FORMAT  " %21s"
3073 #endif // _LP64
3074 
3075 // For per-region info
3076 #define G1PPRL_TYPE_FORMAT            "   %-4s"
3077 #define G1PPRL_TYPE_H_FORMAT          "   %4s"
3078 #define G1PPRL_STATE_FORMAT           "   %-5s"
3079 #define G1PPRL_STATE_H_FORMAT         "   %5s"
3080 #define G1PPRL_BYTE_FORMAT            "  %9zu"
3081 #define G1PPRL_BYTE_H_FORMAT          "  %9s"
3082 #define G1PPRL_DOUBLE_FORMAT          "%14.1f"
3083 #define G1PPRL_GCEFF_H_FORMAT         "  %14s"
3084 #define G1PPRL_GID_H_FORMAT           "  %9s"
3085 #define G1PPRL_GID_FORMAT             "  " UINT32_FORMAT_W(9)
3086 #define G1PPRL_LEN_FORMAT             "  " UINT32_FORMAT_W(14)
3087 #define G1PPRL_LEN_H_FORMAT           "  %14s"
3088 #define G1PPRL_GID_GCEFF_FORMAT       "  %14.1f"
3089 #define G1PPRL_GID_LIVENESS_FORMAT    "  %9.2f"
3090 
3091 // For summary info
3092 #define G1PPRL_SUM_ADDR_FORMAT(tag)    "  " tag ":" G1PPRL_ADDR_BASE_FORMAT
3093 #define G1PPRL_SUM_BYTE_FORMAT(tag)    "  " tag ": %zu"
3094 #define G1PPRL_SUM_MB_FORMAT(tag)      "  " tag ": %1.2f MB"
3095 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag) " / %1.2f %%"
3096 
3097 G1PrintRegionLivenessInfoClosure::G1PrintRegionLivenessInfoClosure(const char* phase_name) :
3098   _total_used_bytes(0),
3099   _total_capacity_bytes(0),
3100   _total_live_bytes(0),
3101   _total_remset_bytes(0),
3102   _total_code_roots_bytes(0)
3103 {
3104   if (!log_is_enabled(Trace, gc, liveness)) {
3105     return;
3106   }
3107 
3108   G1CollectedHeap* g1h = G1CollectedHeap::heap();
3109   MemRegion reserved = g1h->reserved();
3110   double now = os::elapsedTime();
3111 
3112   // Print the header of the output.
3113   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now);
3114   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" HEAP"
3115                           G1PPRL_SUM_ADDR_FORMAT("reserved")
3116                           G1PPRL_SUM_BYTE_FORMAT("region-size"),
3117                           p2i(reserved.start()), p2i(reserved.end()),
3118                           G1HeapRegion::GrainBytes);
3119   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX);
3120   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3121                           G1PPRL_TYPE_H_FORMAT
3122                           G1PPRL_ADDR_BASE_H_FORMAT
3123                           G1PPRL_BYTE_H_FORMAT
3124                           G1PPRL_BYTE_H_FORMAT
3125                           G1PPRL_STATE_H_FORMAT
3126                           G1PPRL_BYTE_H_FORMAT
3127                           G1PPRL_GID_H_FORMAT,
3128                           "type", "address-range",
3129                           "used", "live",
3130                           "state", "code-roots",
3131                           "group-id");
3132   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3133                           G1PPRL_TYPE_H_FORMAT
3134                           G1PPRL_ADDR_BASE_H_FORMAT
3135                           G1PPRL_BYTE_H_FORMAT
3136                           G1PPRL_BYTE_H_FORMAT
3137                           G1PPRL_STATE_H_FORMAT
3138                           G1PPRL_BYTE_H_FORMAT
3139                           G1PPRL_GID_H_FORMAT,
3140                           "", "",
3141                           "(bytes)", "(bytes)",
3142                           "", "(bytes)", "");
3143 }
3144 
3145 bool G1PrintRegionLivenessInfoClosure::do_heap_region(G1HeapRegion* r) {
3146   if (!log_is_enabled(Trace, gc, liveness)) {
3147     return false;
3148   }
3149 
3150   const char* type       = r->get_type_str();
3151   HeapWord* bottom       = r->bottom();
3152   HeapWord* end          = r->end();
3153   size_t capacity_bytes  = r->capacity();
3154   size_t used_bytes      = r->used();
3155   size_t live_bytes      = r->live_bytes();
3156   size_t remset_bytes    = r->rem_set()->mem_size();
3157   size_t code_roots_bytes = r->rem_set()->code_roots_mem_size();
3158   const char* remset_type = r->rem_set()->get_short_state_str();
3159   uint cset_group_id     = r->rem_set()->has_cset_group()
3160                          ? r->rem_set()->cset_group_id()
3161                          : G1CSetCandidateGroup::NoRemSetId;
3162 
3163   _total_used_bytes      += used_bytes;
3164   _total_capacity_bytes  += capacity_bytes;
3165   _total_live_bytes      += live_bytes;
3166   _total_remset_bytes    += remset_bytes;
3167   _total_code_roots_bytes += code_roots_bytes;
3168 
3169   // Print a line for this particular region.
3170   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3171                         G1PPRL_TYPE_FORMAT
3172                         G1PPRL_ADDR_BASE_FORMAT
3173                         G1PPRL_BYTE_FORMAT
3174                         G1PPRL_BYTE_FORMAT
3175                         G1PPRL_STATE_FORMAT
3176                         G1PPRL_BYTE_FORMAT
3177                         G1PPRL_GID_FORMAT,
3178                         type, p2i(bottom), p2i(end),
3179                         used_bytes, live_bytes,
3180                         remset_type, code_roots_bytes,
3181                         cset_group_id);
3182 
3183   return false;
3184 }
3185 
3186 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() {
3187   if (!log_is_enabled(Trace, gc, liveness)) {
3188     return;
3189   }
3190 
3191   G1CollectedHeap* g1h = G1CollectedHeap::heap();
3192   _total_remset_bytes += g1h->card_set_freelist_pool()->mem_size();
3193   // add static memory usages to remembered set sizes
3194   _total_remset_bytes += G1HeapRegionRemSet::static_mem_size();
3195 
3196   log_cset_candidate_groups();
3197 
3198   // Print the footer of the output.
3199   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX);
3200   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3201                          " SUMMARY"
3202                          G1PPRL_SUM_MB_FORMAT("capacity")
3203                          G1PPRL_SUM_MB_PERC_FORMAT("used")
3204                          G1PPRL_SUM_MB_PERC_FORMAT("live")
3205                          G1PPRL_SUM_MB_FORMAT("remset")
3206                          G1PPRL_SUM_MB_FORMAT("code-roots"),
3207                          bytes_to_mb(_total_capacity_bytes),
3208                          bytes_to_mb(_total_used_bytes),
3209                          percent_of(_total_used_bytes, _total_capacity_bytes),
3210                          bytes_to_mb(_total_live_bytes),
3211                          percent_of(_total_live_bytes, _total_capacity_bytes),
3212                          bytes_to_mb(_total_remset_bytes),
3213                          bytes_to_mb(_total_code_roots_bytes));
3214 }
3215 
3216 void G1PrintRegionLivenessInfoClosure::log_cset_candidate_group_add_total(G1CSetCandidateGroup* group, const char* type) {
3217   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3218                           G1PPRL_GID_FORMAT
3219                           G1PPRL_LEN_FORMAT
3220                           G1PPRL_GID_GCEFF_FORMAT
3221                           G1PPRL_GID_LIVENESS_FORMAT
3222                           G1PPRL_BYTE_FORMAT
3223                           G1PPRL_TYPE_H_FORMAT,
3224                           group->group_id(),
3225                           group->length(),
3226                           group->length() > 0 ? group->gc_efficiency() : 0.0,
3227                           group->length() > 0 ? group->liveness_percent() : 0.0,
3228                           group->card_set()->mem_size(),
3229                           type);
3230   _total_remset_bytes += group->card_set()->mem_size();
3231 }
3232 
3233 void G1PrintRegionLivenessInfoClosure::log_cset_candidate_grouplist(G1CSetCandidateGroupList& gl, const char* type) {
3234   for (G1CSetCandidateGroup* group : gl) {
3235     log_cset_candidate_group_add_total(group, type);
3236   }
3237 }
3238 
3239 void G1PrintRegionLivenessInfoClosure::log_cset_candidate_groups() {
3240   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX);
3241   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" Collection Set Candidate Groups");
3242   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX " Types: Y=Young, M=From Marking Regions, R=Retained Regions");
3243   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3244                           G1PPRL_GID_H_FORMAT
3245                           G1PPRL_LEN_H_FORMAT
3246                           G1PPRL_GCEFF_H_FORMAT
3247                           G1PPRL_BYTE_H_FORMAT
3248                           G1PPRL_BYTE_H_FORMAT
3249                           G1PPRL_TYPE_H_FORMAT,
3250                           "groud-id", "num-regions",
3251                           "gc-eff", "liveness",
3252                           "remset", "type");
3253 
3254   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3255                           G1PPRL_GID_H_FORMAT
3256                           G1PPRL_LEN_H_FORMAT
3257                           G1PPRL_GCEFF_H_FORMAT
3258                           G1PPRL_BYTE_H_FORMAT
3259                           G1PPRL_BYTE_H_FORMAT
3260                           G1PPRL_TYPE_H_FORMAT,
3261                           "", "",
3262                           "(bytes/ms)", "%",
3263                           "(bytes)", "");
3264 
3265   G1CollectedHeap* g1h = G1CollectedHeap::heap();
3266 
3267   log_cset_candidate_group_add_total(g1h->young_regions_cset_group(), "Y");
3268 
3269   G1CollectionSetCandidates* candidates = g1h->policy()->candidates();
3270   log_cset_candidate_grouplist(candidates->from_marking_groups(), "M");
3271   log_cset_candidate_grouplist(candidates->retained_groups(), "R");
3272 }