1 /*
   2  * Copyright (c) 2001, 2026, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "classfile/classLoaderData.hpp"
  26 #include "classfile/classLoaderDataGraph.hpp"
  27 #include "cppstdlib/new.hpp"
  28 #include "gc/g1/g1BarrierSet.hpp"
  29 #include "gc/g1/g1BatchedTask.hpp"
  30 #include "gc/g1/g1CardSetMemory.hpp"
  31 #include "gc/g1/g1CardTableClaimTable.inline.hpp"
  32 #include "gc/g1/g1CollectedHeap.inline.hpp"
  33 #include "gc/g1/g1CollectorState.inline.hpp"
  34 #include "gc/g1/g1ConcurrentMark.inline.hpp"
  35 #include "gc/g1/g1ConcurrentMarkRemarkTasks.hpp"
  36 #include "gc/g1/g1ConcurrentMarkThread.inline.hpp"
  37 #include "gc/g1/g1ConcurrentRebuildAndScrub.hpp"
  38 #include "gc/g1/g1ConcurrentRefine.hpp"
  39 #include "gc/g1/g1HeapRegion.inline.hpp"
  40 #include "gc/g1/g1HeapRegionManager.hpp"
  41 #include "gc/g1/g1HeapRegionPrinter.hpp"
  42 #include "gc/g1/g1HeapRegionRemSet.inline.hpp"
  43 #include "gc/g1/g1HeapRegionSet.inline.hpp"
  44 #include "gc/g1/g1HeapVerifier.hpp"
  45 #include "gc/g1/g1OopClosures.inline.hpp"
  46 #include "gc/g1/g1Policy.hpp"
  47 #include "gc/g1/g1RegionMarkStatsCache.inline.hpp"
  48 #include "gc/g1/g1ThreadLocalData.hpp"
  49 #include "gc/g1/g1Trace.hpp"
  50 #include "gc/shared/gcId.hpp"
  51 #include "gc/shared/gcTimer.hpp"
  52 #include "gc/shared/gcTraceTime.inline.hpp"
  53 #include "gc/shared/gcVMOperations.hpp"
  54 #include "gc/shared/partialArraySplitter.inline.hpp"
  55 #include "gc/shared/partialArrayState.hpp"
  56 #include "gc/shared/partialArrayTaskStats.hpp"
  57 #include "gc/shared/referencePolicy.hpp"
  58 #include "gc/shared/suspendibleThreadSet.hpp"
  59 #include "gc/shared/taskqueue.inline.hpp"
  60 #include "gc/shared/taskTerminator.hpp"
  61 #include "gc/shared/weakProcessor.inline.hpp"
  62 #include "gc/shared/workerPolicy.hpp"
  63 #include "jvm.h"
  64 #include "logging/log.hpp"
  65 #include "memory/allocation.hpp"
  66 #include "memory/iterator.hpp"
  67 #include "memory/metaspaceUtils.hpp"
  68 #include "memory/resourceArea.hpp"
  69 #include "memory/universe.hpp"
  70 #include "nmt/memTracker.hpp"
  71 #include "oops/access.inline.hpp"
  72 #include "oops/oop.inline.hpp"
  73 #include "oops/oopCast.inline.hpp"
  74 #include "runtime/globals_extension.hpp"
  75 #include "runtime/handles.inline.hpp"
  76 #include "runtime/java.hpp"
  77 #include "runtime/orderAccess.hpp"
  78 #include "runtime/os.hpp"
  79 #include "runtime/prefetch.inline.hpp"
  80 #include "runtime/threads.hpp"
  81 #include "utilities/align.hpp"
  82 #include "utilities/checkedCast.hpp"
  83 #include "utilities/formatBuffer.hpp"
  84 #include "utilities/growableArray.hpp"
  85 #include "utilities/powerOfTwo.hpp"
  86 
  87 G1CMIsAliveClosure::G1CMIsAliveClosure() : _cm(nullptr) { }
  88 
  89 G1CMIsAliveClosure::G1CMIsAliveClosure(G1ConcurrentMark* cm) : _cm(cm) {
  90   assert(cm != nullptr, "must be");
  91 }
  92 
  93 void G1CMIsAliveClosure::initialize(G1ConcurrentMark* cm) {
  94   assert(cm != nullptr, "must be");
  95   assert(_cm == nullptr, "double initialize");
  96   _cm = cm;
  97 }
  98 
  99 bool G1CMBitMapClosure::do_addr(HeapWord* const addr) {
 100   assert(addr < _cm->finger(), "invariant");
 101   assert(addr >= _task->finger(), "invariant");
 102 
 103   // We move that task's local finger along.
 104   _task->move_finger_to(addr);
 105 
 106   _task->process_entry(G1TaskQueueEntry(cast_to_oop(addr)), false /* stolen */);
 107   // we only partially drain the local queue and global stack
 108   _task->drain_local_queue(true);
 109   _task->drain_global_stack(true);
 110 
 111   // if the has_aborted flag has been raised, we need to bail out of
 112   // the iteration
 113   return !_task->has_aborted();
 114 }
 115 
 116 G1CMMarkStack::G1CMMarkStack() :
 117   _chunk_allocator() {
 118   set_empty();
 119 }
 120 
 121 size_t G1CMMarkStack::capacity_alignment() {
 122   return (size_t)lcm(os::vm_allocation_granularity(), sizeof(TaskQueueEntryChunk)) / sizeof(G1TaskQueueEntry);
 123 }
 124 
 125 bool G1CMMarkStack::initialize() {
 126   guarantee(_chunk_allocator.capacity() == 0, "G1CMMarkStack already initialized.");
 127 
 128   size_t initial_capacity = MarkStackSize;
 129   size_t max_capacity = MarkStackSizeMax;
 130 
 131   size_t const TaskEntryChunkSizeInVoidStar = sizeof(TaskQueueEntryChunk) / sizeof(G1TaskQueueEntry);
 132 
 133   size_t max_num_chunks = align_up(max_capacity, capacity_alignment()) / TaskEntryChunkSizeInVoidStar;
 134   size_t initial_num_chunks = align_up(initial_capacity, capacity_alignment()) / TaskEntryChunkSizeInVoidStar;
 135 
 136   initial_num_chunks = round_up_power_of_2(initial_num_chunks);
 137   max_num_chunks = MAX2(initial_num_chunks, max_num_chunks);
 138 
 139   size_t limit = (INT_MAX - 1);
 140   max_capacity = MIN2((max_num_chunks * TaskEntryChunkSizeInVoidStar), limit);
 141   initial_capacity = MIN2((initial_num_chunks * TaskEntryChunkSizeInVoidStar), limit);
 142 
 143   FLAG_SET_ERGO(MarkStackSizeMax, max_capacity);
 144   FLAG_SET_ERGO(MarkStackSize, initial_capacity);
 145 
 146   log_trace(gc)("MarkStackSize: %uk  MarkStackSizeMax: %uk", (uint)(MarkStackSize / K), (uint)(MarkStackSizeMax / K));
 147 
 148   log_debug(gc)("Initialize mark stack with %zu chunks, maximum %zu",
 149                 initial_num_chunks, max_capacity);
 150 
 151   return _chunk_allocator.initialize(initial_num_chunks, max_num_chunks);
 152 }
 153 
 154 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::ChunkAllocator::allocate_new_chunk() {
 155   if (_size.load_relaxed() >= _max_capacity) {
 156     return nullptr;
 157   }
 158 
 159   size_t cur_idx = _size.fetch_then_add(1u);
 160 
 161   if (cur_idx >= _max_capacity) {
 162     return nullptr;
 163   }
 164 
 165   size_t bucket = get_bucket(cur_idx);
 166   if (_buckets[bucket].load_acquire() == nullptr) {
 167     if (!_should_grow) {
 168       // Prefer to restart the CM.
 169       return nullptr;
 170     }
 171 
 172     MutexLocker x(G1MarkStackChunkList_lock, Mutex::_no_safepoint_check_flag);
 173     if (_buckets[bucket].load_acquire() == nullptr) {
 174       size_t desired_capacity = bucket_size(bucket) * 2;
 175       if (!try_expand_to(desired_capacity)) {
 176         return nullptr;
 177       }
 178     }
 179   }
 180 
 181   size_t bucket_idx = get_bucket_index(cur_idx);
 182   TaskQueueEntryChunk* result = ::new (&_buckets[bucket].load_relaxed()[bucket_idx]) TaskQueueEntryChunk;
 183   result->next = nullptr;
 184   return result;
 185 }
 186 
 187 G1CMMarkStack::ChunkAllocator::ChunkAllocator() :
 188   _min_capacity(0),
 189   _max_capacity(0),
 190   _capacity(0),
 191   _num_buckets(0),
 192   _should_grow(false),
 193   _buckets(nullptr),
 194   _size(0)
 195 { }
 196 
 197 bool G1CMMarkStack::ChunkAllocator::initialize(size_t initial_capacity, size_t max_capacity) {
 198   guarantee(is_power_of_2(initial_capacity), "Invalid initial_capacity");
 199 
 200   _min_capacity = initial_capacity;
 201   _max_capacity = max_capacity;
 202   _num_buckets  = get_bucket(_max_capacity) + 1;
 203 
 204   _buckets = NEW_C_HEAP_ARRAY(Atomic<TaskQueueEntryChunk*>, _num_buckets, mtGC);
 205 
 206   for (size_t i = 0; i < _num_buckets; i++) {
 207     _buckets[i].store_relaxed(nullptr);
 208   }
 209 
 210   size_t new_capacity = bucket_size(0);
 211 
 212   if (!reserve(new_capacity)) {
 213     log_warning(gc)("Failed to reserve memory for new overflow mark stack with %zu chunks and size %zuB.", new_capacity, new_capacity * sizeof(TaskQueueEntryChunk));
 214     return false;
 215   }
 216   return true;
 217 }
 218 
 219 bool G1CMMarkStack::ChunkAllocator::try_expand_to(size_t desired_capacity) {
 220   if (_capacity == _max_capacity) {
 221     log_debug(gc)("Can not expand overflow mark stack further, already at maximum capacity of %zu chunks.", _capacity);
 222     return false;
 223   }
 224 
 225   size_t old_capacity = _capacity;
 226   desired_capacity = MIN2(desired_capacity, _max_capacity);
 227 
 228   if (reserve(desired_capacity)) {
 229     log_debug(gc)("Expanded the mark stack capacity from %zu to %zu chunks",
 230                   old_capacity, desired_capacity);
 231     return true;
 232   }
 233   return false;
 234 }
 235 
 236 bool G1CMMarkStack::ChunkAllocator::try_expand() {
 237   size_t new_capacity = _capacity * 2;
 238   return try_expand_to(new_capacity);
 239 }
 240 
 241 G1CMMarkStack::ChunkAllocator::~ChunkAllocator() {
 242   if (_buckets == nullptr) {
 243     return;
 244   }
 245 
 246   for (size_t i = 0; i < _num_buckets; i++) {
 247     if (_buckets[i].load_relaxed() != nullptr) {
 248       MmapArrayAllocator<TaskQueueEntryChunk>::free(_buckets[i].load_relaxed(),  bucket_size(i));
 249       _buckets[i].store_relaxed(nullptr);
 250     }
 251   }
 252 
 253   FREE_C_HEAP_ARRAY(_buckets);
 254 }
 255 
 256 bool G1CMMarkStack::ChunkAllocator::reserve(size_t new_capacity) {
 257   assert(new_capacity <= _max_capacity, "Cannot expand overflow mark stack beyond the max_capacity of %zu chunks.", _max_capacity);
 258 
 259   size_t highest_bucket = get_bucket(new_capacity - 1);
 260   size_t i = get_bucket(_capacity);
 261 
 262   // Allocate all buckets associated with indexes between the current capacity (_capacity)
 263   // and the new capacity (new_capacity). This step ensures that there are no gaps in the
 264   // array and that the capacity accurately reflects the reserved memory.
 265   for (; i <= highest_bucket; i++) {
 266     if (_buckets[i].load_acquire() != nullptr) {
 267       continue; // Skip over already allocated buckets.
 268     }
 269 
 270     size_t bucket_capacity = bucket_size(i);
 271 
 272     // Trim bucket size so that we do not exceed the _max_capacity.
 273     bucket_capacity = (_capacity + bucket_capacity) <= _max_capacity ?
 274                       bucket_capacity :
 275                       _max_capacity - _capacity;
 276 
 277 
 278     TaskQueueEntryChunk* bucket_base = MmapArrayAllocator<TaskQueueEntryChunk>::allocate_or_null(bucket_capacity, mtGC);
 279 
 280     if (bucket_base == nullptr) {
 281       log_warning(gc)("Failed to reserve memory for increasing the overflow mark stack capacity with %zu chunks and size %zuB.",
 282                       bucket_capacity, bucket_capacity * sizeof(TaskQueueEntryChunk));
 283       return false;
 284     }
 285     _capacity += bucket_capacity;
 286     _buckets[i].release_store(bucket_base);
 287   }
 288   return true;
 289 }
 290 
 291 void G1CMMarkStack::expand() {
 292   _chunk_allocator.try_expand();
 293 }
 294 
 295 void G1CMMarkStack::add_chunk_to_list(Atomic<TaskQueueEntryChunk*>* list, TaskQueueEntryChunk* elem) {
 296   elem->next = list->load_relaxed();
 297   list->store_relaxed(elem);
 298 }
 299 
 300 void G1CMMarkStack::add_chunk_to_chunk_list(TaskQueueEntryChunk* elem) {
 301   MutexLocker x(G1MarkStackChunkList_lock, Mutex::_no_safepoint_check_flag);
 302   add_chunk_to_list(&_chunk_list, elem);
 303   _chunks_in_chunk_list++;
 304 }
 305 
 306 void G1CMMarkStack::add_chunk_to_free_list(TaskQueueEntryChunk* elem) {
 307   MutexLocker x(G1MarkStackFreeList_lock, Mutex::_no_safepoint_check_flag);
 308   add_chunk_to_list(&_free_list, elem);
 309 }
 310 
 311 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_list(Atomic<TaskQueueEntryChunk*>* list) {
 312   TaskQueueEntryChunk* result = list->load_relaxed();
 313   if (result != nullptr) {
 314     list->store_relaxed(list->load_relaxed()->next);
 315   }
 316   return result;
 317 }
 318 
 319 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_chunk_list() {
 320   MutexLocker x(G1MarkStackChunkList_lock, Mutex::_no_safepoint_check_flag);
 321   TaskQueueEntryChunk* result = remove_chunk_from_list(&_chunk_list);
 322   if (result != nullptr) {
 323     _chunks_in_chunk_list--;
 324   }
 325   return result;
 326 }
 327 
 328 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_free_list() {
 329   MutexLocker x(G1MarkStackFreeList_lock, Mutex::_no_safepoint_check_flag);
 330   return remove_chunk_from_list(&_free_list);
 331 }
 332 
 333 bool G1CMMarkStack::par_push_chunk(G1TaskQueueEntry* ptr_arr) {
 334   // Get a new chunk.
 335   TaskQueueEntryChunk* new_chunk = remove_chunk_from_free_list();
 336 
 337   if (new_chunk == nullptr) {
 338     // Did not get a chunk from the free list. Allocate from backing memory.
 339     new_chunk = _chunk_allocator.allocate_new_chunk();
 340 
 341     if (new_chunk == nullptr) {
 342       return false;
 343     }
 344   }
 345 
 346   Copy::conjoint_memory_atomic(ptr_arr, new_chunk->data, EntriesPerChunk * sizeof(G1TaskQueueEntry));
 347 
 348   add_chunk_to_chunk_list(new_chunk);
 349 
 350   return true;
 351 }
 352 
 353 bool G1CMMarkStack::par_pop_chunk(G1TaskQueueEntry* ptr_arr) {
 354   TaskQueueEntryChunk* cur = remove_chunk_from_chunk_list();
 355 
 356   if (cur == nullptr) {
 357     return false;
 358   }
 359 
 360   Copy::conjoint_memory_atomic(cur->data, ptr_arr, EntriesPerChunk * sizeof(G1TaskQueueEntry));
 361 
 362   add_chunk_to_free_list(cur);
 363   return true;
 364 }
 365 
 366 void G1CMMarkStack::set_empty() {
 367   _chunks_in_chunk_list = 0;
 368   _chunk_list.store_relaxed(nullptr);
 369   _free_list.store_relaxed(nullptr);
 370   _chunk_allocator.reset();
 371 }
 372 
 373 G1CMRootMemRegions::G1CMRootMemRegions(uint const max_regions) :
 374     _root_regions(MemRegion::create_array(max_regions, mtGC)),
 375     _max_regions(max_regions),
 376     _num_regions(0),
 377     _num_claimed_regions(0) { }
 378 
 379 G1CMRootMemRegions::~G1CMRootMemRegions() {
 380   MemRegion::destroy_array(_root_regions, _max_regions);
 381 }
 382 
 383 void G1CMRootMemRegions::reset() {
 384   assert_at_safepoint();
 385   assert(G1CollectedHeap::heap()->collector_state()->is_in_concurrent_start_gc(), "must be");
 386 
 387   _num_regions.store_relaxed(0);
 388   _num_claimed_regions.store_relaxed(0);
 389 }
 390 
 391 void G1CMRootMemRegions::add(HeapWord* start, HeapWord* end) {
 392   assert_at_safepoint();
 393   uint idx = _num_regions.fetch_then_add(1u);
 394   assert(idx < _max_regions, "Trying to add more root MemRegions than there is space %u", _max_regions);
 395   assert(start != nullptr && end != nullptr && start <= end, "Start (" PTR_FORMAT ") should be less or equal to "
 396          "end (" PTR_FORMAT ")", p2i(start), p2i(end));
 397   _root_regions[idx].set_start(start);
 398   _root_regions[idx].set_end(end);
 399 }
 400 
 401 const MemRegion* G1CMRootMemRegions::claim_next() {
 402   uint local_num_regions = num_regions();
 403   if (num_claimed_regions() >= local_num_regions) {
 404     return nullptr;
 405   }
 406 
 407   uint claimed_index = _num_claimed_regions.fetch_then_add(1u);
 408   if (claimed_index < local_num_regions) {
 409     return &_root_regions[claimed_index];
 410   }
 411   return nullptr;
 412 }
 413 
 414 bool G1CMRootMemRegions::work_completed() const {
 415   return num_remaining_regions() == 0;
 416 }
 417 
 418 uint G1CMRootMemRegions::num_remaining_regions() const {
 419   uint total = num_regions();
 420   uint claimed = num_claimed_regions();
 421   return (total > claimed) ? total - claimed : 0;
 422 }
 423 
 424 bool G1CMRootMemRegions::contains(const MemRegion mr) const {
 425   uint local_num_root_regions = num_regions();
 426   for (uint i = 0; i < local_num_root_regions; i++) {
 427     if (_root_regions[i].equals(mr)) {
 428       return true;
 429     }
 430   }
 431   return false;
 432 }
 433 
 434 G1ConcurrentMark::G1ConcurrentMark(G1CollectedHeap* g1h,
 435                                    G1RegionToSpaceMapper* bitmap_storage) :
 436   _cm_thread(nullptr),
 437   _g1h(g1h),
 438 
 439   _mark_bitmap(),
 440 
 441   _heap(_g1h->reserved()),
 442 
 443   _root_regions(_g1h->max_num_regions()),
 444   _root_region_scan_aborted(false),
 445 
 446   _global_mark_stack(),
 447 
 448   _finger(nullptr), // _finger set in set_non_marking_state
 449 
 450   _worker_id_offset(G1ConcRefinementThreads), // The refinement control thread does not refine cards, so it's just the worker threads.
 451   _max_num_tasks(MAX2(ConcGCThreads, ParallelGCThreads)),
 452   _num_active_tasks(0), // _num_active_tasks set in set_non_marking_state()
 453   _tasks(nullptr), // _tasks set inside late_init()
 454   _task_queues(new G1CMTaskQueueSet(_max_num_tasks)),
 455   _terminator(_max_num_tasks, _task_queues),
 456   _partial_array_state_manager(new PartialArrayStateManager(_max_num_tasks)),
 457 
 458   _first_overflow_barrier_sync(),
 459   _second_overflow_barrier_sync(),
 460 
 461   _completed_mark_cycles(0),
 462   _has_overflown(false),
 463   _concurrent(false),
 464   _has_aborted(false),
 465   _restart_for_overflow(false),
 466   _gc_timer_cm(new ConcurrentGCTimer()),
 467   _gc_tracer_cm(new G1OldTracer()),
 468 
 469   // _verbose_level set below
 470 
 471   _remark_times(),
 472   _remark_mark_times(),
 473   _remark_weak_ref_times(),
 474   _cleanup_times(),
 475 
 476   _concurrent_workers(nullptr),
 477   _num_concurrent_workers(0),
 478   _max_concurrent_workers(0),
 479 
 480   _region_mark_stats(NEW_C_HEAP_ARRAY(G1RegionMarkStats, _g1h->max_num_regions(), mtGC)),
 481   _top_at_mark_starts(NEW_C_HEAP_ARRAY(Atomic<HeapWord*>, _g1h->max_num_regions(), mtGC)),
 482   _top_at_rebuild_starts(NEW_C_HEAP_ARRAY(Atomic<HeapWord*>, _g1h->max_num_regions(), mtGC)),
 483   _needs_remembered_set_rebuild(false)
 484 {
 485   assert(G1CGC_lock != nullptr, "CGC_lock must be initialized");
 486 
 487   _mark_bitmap.initialize(g1h->reserved(), bitmap_storage);
 488 }
 489 
 490 void G1ConcurrentMark::fully_initialize() {
 491   if (is_fully_initialized()) {
 492     return;
 493   }
 494 
 495   // Create & start ConcurrentMark thread.
 496   _cm_thread = new G1ConcurrentMarkThread(this);
 497   if (_cm_thread->osthread() == nullptr) {
 498     vm_shutdown_during_initialization("Could not create ConcurrentMarkThread");
 499   }
 500 
 501   log_debug(gc)("ConcGCThreads: %u offset %u", ConcGCThreads, _worker_id_offset);
 502   log_debug(gc)("ParallelGCThreads: %u", ParallelGCThreads);
 503 
 504   _max_concurrent_workers = ConcGCThreads;
 505 
 506   _concurrent_workers = new WorkerThreads("G1 Conc", _max_concurrent_workers);
 507   _concurrent_workers->initialize_workers();
 508   _num_concurrent_workers = _concurrent_workers->active_workers();
 509 
 510   if (!_global_mark_stack.initialize()) {
 511     vm_exit_during_initialization("Failed to allocate initial concurrent mark overflow mark stack.");
 512   }
 513 
 514   _tasks = NEW_C_HEAP_ARRAY(G1CMTask*, _max_num_tasks, mtGC);
 515 
 516   // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
 517   _num_active_tasks = _max_num_tasks;
 518 
 519   for (uint i = 0; i < _max_num_tasks; ++i) {
 520     G1CMTaskQueue* task_queue = new G1CMTaskQueue();
 521     _task_queues->register_queue(i, task_queue);
 522 
 523     _tasks[i] = new G1CMTask(i, this, task_queue, _region_mark_stats);
 524   }
 525 
 526   uint max_num_regions = _g1h->max_num_regions();
 527   ::new (_region_mark_stats) G1RegionMarkStats[max_num_regions]{};
 528   ::new (_top_at_mark_starts) Atomic<HeapWord*>[max_num_regions]{};
 529   ::new (_top_at_rebuild_starts) Atomic<HeapWord*>[max_num_regions]{};
 530 
 531   reset_at_marking_complete();
 532 }
 533 
 534 bool G1ConcurrentMark::is_in_concurrent_cycle() const {
 535   return is_fully_initialized() ? _cm_thread->is_in_progress() : false;
 536 }
 537 
 538 bool G1ConcurrentMark::is_in_marking() const {
 539   return is_fully_initialized() ? cm_thread()->is_in_marking() : false;
 540 }
 541 
 542 bool G1ConcurrentMark::is_in_rebuild_or_scrub() const {
 543   return cm_thread()->is_in_rebuild_or_scrub();
 544 }
 545 
 546 bool G1ConcurrentMark::is_in_reset_for_next_cycle() const {
 547   return cm_thread()->is_in_reset_for_next_cycle();
 548 }
 549 
 550 PartialArrayStateManager* G1ConcurrentMark::partial_array_state_manager() const {
 551   return _partial_array_state_manager;
 552 }
 553 
 554 G1ConcurrentMarkThread* G1ConcurrentMark::cm_thread() const {
 555   assert(is_fully_initialized(), "must be");
 556   return _cm_thread;
 557 }
 558 
 559 void G1ConcurrentMark::reset() {
 560   _has_aborted.store_relaxed(false);
 561 
 562   reset_marking_for_restart();
 563 
 564   // Reset all tasks, since different phases will use different number of active
 565   // threads. So, it's easiest to have all of them ready.
 566   for (uint i = 0; i < _max_num_tasks; ++i) {
 567     _tasks[i]->reset(mark_bitmap());
 568   }
 569 
 570   uint max_num_regions = _g1h->max_num_regions();
 571   ::new (_top_at_rebuild_starts) Atomic<HeapWord*>[max_num_regions]{};
 572   for (uint i = 0; i < max_num_regions; i++) {
 573     _region_mark_stats[i].clear();
 574   }
 575 
 576   _root_region_scan_aborted.store_relaxed(false);
 577   _root_regions.reset();
 578 }
 579 
 580 void G1ConcurrentMark::clear_statistics(G1HeapRegion* r) {
 581   uint region_idx = r->hrm_index();
 582   for (uint j = 0; j < _max_num_tasks; ++j) {
 583     _tasks[j]->clear_mark_stats_cache(region_idx);
 584   }
 585   _top_at_rebuild_starts[region_idx].store_relaxed(nullptr);
 586   _region_mark_stats[region_idx].clear();
 587 }
 588 
 589 void G1ConcurrentMark::humongous_object_eagerly_reclaimed(G1HeapRegion* r) {
 590   assert_at_safepoint();
 591   assert(r->is_starts_humongous(), "Got humongous continues region here");
 592 
 593   // Need to clear mark bit of the humongous object. Doing this unconditionally is fine.
 594   mark_bitmap()->clear(r->bottom());
 595 
 596   if (!_g1h->collector_state()->is_in_mark_or_rebuild()) {
 597     return;
 598   }
 599 
 600   // Clear any statistics about the region gathered so far.
 601   _g1h->humongous_obj_regions_iterate(r,
 602                                       [&] (G1HeapRegion* r) {
 603                                         clear_statistics(r);
 604                                       });
 605 }
 606 
 607 void G1ConcurrentMark::reset_marking_for_restart() {
 608   _global_mark_stack.set_empty();
 609 
 610   // Expand the marking stack, if we have to and if we can.
 611   if (has_overflown()) {
 612     _global_mark_stack.expand();
 613 
 614     uint max_num_regions = _g1h->max_num_regions();
 615     for (uint i = 0; i < max_num_regions; i++) {
 616       _region_mark_stats[i].clear_during_overflow();
 617     }
 618   }
 619 
 620   clear_has_overflown();
 621   _finger.store_relaxed(_heap.start());
 622 
 623   for (uint i = 0; i < _max_num_tasks; ++i) {
 624     _tasks[i]->reset_for_restart();
 625   }
 626 }
 627 
 628 void G1ConcurrentMark::set_concurrency(uint active_tasks) {
 629   assert(active_tasks <= _max_num_tasks, "we should not have more");
 630 
 631   _num_active_tasks = active_tasks;
 632   // Need to update the three data structures below according to the
 633   // number of active threads for this phase.
 634   _terminator.reset_for_reuse(active_tasks);
 635   _first_overflow_barrier_sync.set_n_workers(active_tasks);
 636   _second_overflow_barrier_sync.set_n_workers(active_tasks);
 637 }
 638 
 639 void G1ConcurrentMark::set_concurrency_and_phase(uint active_tasks, bool concurrent) {
 640   set_concurrency(active_tasks);
 641 
 642   _concurrent.store_relaxed(concurrent);
 643 
 644   if (!concurrent) {
 645     // At this point we should be in a STW phase, and completed marking.
 646     assert_at_safepoint_on_vm_thread();
 647     assert(out_of_regions(),
 648            "only way to get here: _finger: " PTR_FORMAT ", _heap_end: " PTR_FORMAT,
 649            p2i(finger()), p2i(_heap.end()));
 650   }
 651 }
 652 
 653 #if TASKQUEUE_STATS
 654 void G1ConcurrentMark::print_and_reset_taskqueue_stats() {
 655 
 656   _task_queues->print_and_reset_taskqueue_stats("Concurrent Mark");
 657 
 658   auto get_pa_stats = [&](uint i) {
 659     return _tasks[i]->partial_array_task_stats();
 660   };
 661 
 662   PartialArrayTaskStats::log_set(_max_num_tasks, get_pa_stats,
 663                                  "Concurrent Mark Partial Array");
 664 
 665   for (uint i = 0; i < _max_num_tasks; ++i) {
 666     get_pa_stats(i)->reset();
 667   }
 668 }
 669 #endif
 670 
 671 void G1ConcurrentMark::reset_at_marking_complete() {
 672   TASKQUEUE_STATS_ONLY(print_and_reset_taskqueue_stats());
 673   // We set the global marking state to some default values when we're
 674   // not doing marking.
 675   reset_marking_for_restart();
 676   _num_active_tasks = 0;
 677 }
 678 
 679 G1ConcurrentMark::~G1ConcurrentMark() {
 680   FREE_C_HEAP_ARRAY(_top_at_mark_starts);
 681   FREE_C_HEAP_ARRAY(_top_at_rebuild_starts);
 682   FREE_C_HEAP_ARRAY(_region_mark_stats);
 683   // The G1ConcurrentMark instance is never freed.
 684   ShouldNotReachHere();
 685 }
 686 
 687 class G1ClearBitMapTask : public WorkerTask {
 688 public:
 689   static size_t chunk_size() { return M; }
 690 
 691 private:
 692   // Heap region closure used for clearing the _mark_bitmap.
 693   class G1ClearBitmapHRClosure : public G1HeapRegionClosure {
 694     G1ConcurrentMark* _cm;
 695     G1CMBitMap* _bitmap;
 696     bool _suspendible; // If suspendible, do yield checks.
 697 
 698     bool suspendible() {
 699       return _suspendible;
 700     }
 701 
 702     bool is_clear_concurrent_undo() {
 703       return suspendible() && _cm->cm_thread()->is_in_undo_cycle();
 704     }
 705 
 706     bool has_aborted() {
 707       if (suspendible()) {
 708         _cm->do_yield_check();
 709         return _cm->has_aborted();
 710       }
 711       return false;
 712     }
 713 
 714     HeapWord* region_clear_limit(G1HeapRegion* r) {
 715       // During a Concurrent Undo Mark cycle, the per region top_at_mark_start and
 716       // live_words data are current wrt to the _mark_bitmap. We use this information
 717       // to only clear ranges of the bitmap that require clearing.
 718       if (is_clear_concurrent_undo()) {
 719         // No need to clear bitmaps for empty regions (which includes regions we
 720         // did not mark through).
 721         if (!_cm->contains_live_object(r->hrm_index())) {
 722           assert(_bitmap->get_next_marked_addr(r->bottom(), r->end()) == r->end(), "Should not have marked bits");
 723           return r->bottom();
 724         }
 725         assert(_bitmap->get_next_marked_addr(_cm->top_at_mark_start(r), r->end()) == r->end(), "Should not have marked bits above tams");
 726       }
 727       return r->end();
 728     }
 729 
 730   public:
 731     G1ClearBitmapHRClosure(G1ConcurrentMark* cm, bool suspendible) :
 732       G1HeapRegionClosure(),
 733       _cm(cm),
 734       _bitmap(cm->mark_bitmap()),
 735       _suspendible(suspendible)
 736     { }
 737 
 738     virtual bool do_heap_region(G1HeapRegion* r) {
 739       if (has_aborted()) {
 740         return true;
 741       }
 742 
 743       HeapWord* cur = r->bottom();
 744       HeapWord* const end = region_clear_limit(r);
 745 
 746       size_t const chunk_size_in_words = G1ClearBitMapTask::chunk_size() / HeapWordSize;
 747 
 748       while (cur < end) {
 749 
 750         MemRegion mr(cur, MIN2(cur + chunk_size_in_words, end));
 751         _bitmap->clear_range(mr);
 752 
 753         cur += chunk_size_in_words;
 754 
 755         // Repeat the asserts from before the start of the closure. We will do them
 756         // as asserts here to minimize their overhead on the product. However, we
 757         // will have them as guarantees at the beginning / end of the bitmap
 758         // clearing to get some checking in the product.
 759         assert(!suspendible() || _cm->is_in_reset_for_next_cycle(), "invariant");
 760 
 761         // Abort iteration if necessary.
 762         if (has_aborted()) {
 763           return true;
 764         }
 765       }
 766       assert(cur >= end, "Must have completed iteration over the bitmap for region %u.", r->hrm_index());
 767 
 768       _cm->reset_top_at_mark_start(r);
 769 
 770       return false;
 771     }
 772   };
 773 
 774   G1ClearBitmapHRClosure _cl;
 775   G1HeapRegionClaimer _hr_claimer;
 776   bool _suspendible; // If the task is suspendible, workers must join the STS.
 777 
 778 public:
 779   G1ClearBitMapTask(G1ConcurrentMark* cm, uint n_workers, bool suspendible) :
 780     WorkerTask("G1 Clear Bitmap"),
 781     _cl(cm, suspendible),
 782     _hr_claimer(n_workers),
 783     _suspendible(suspendible)
 784   { }
 785 
 786   void work(uint worker_id) {
 787     SuspendibleThreadSetJoiner sts_join(_suspendible);
 788     G1CollectedHeap::heap()->heap_region_par_iterate_from_worker_offset(&_cl, &_hr_claimer, worker_id);
 789   }
 790 };
 791 
 792 void G1ConcurrentMark::clear_bitmap(WorkerThreads* workers, bool may_yield) {
 793   assert(may_yield || SafepointSynchronize::is_at_safepoint(), "Non-yielding bitmap clear only allowed at safepoint.");
 794 
 795   size_t const num_bytes_to_clear = (G1HeapRegion::GrainBytes * _g1h->num_committed_regions()) / G1CMBitMap::heap_map_factor();
 796   size_t const num_chunks = align_up(num_bytes_to_clear, G1ClearBitMapTask::chunk_size()) / G1ClearBitMapTask::chunk_size();
 797 
 798   uint const num_workers = (uint)MIN2(num_chunks, (size_t)workers->active_workers());
 799 
 800   G1ClearBitMapTask cl(this, num_workers, may_yield);
 801 
 802   log_debug(gc, ergo)("Running %s with %u workers for %zu work units.", cl.name(), num_workers, num_chunks);
 803   workers->run_task(&cl, num_workers);
 804 }
 805 
 806 void G1ConcurrentMark::cleanup_for_next_mark() {
 807   // Make sure that the concurrent mark thread looks to still be in
 808   // the current cycle.
 809   guarantee(is_in_reset_for_next_cycle(), "invariant");
 810 
 811   clear_bitmap(_concurrent_workers, true);
 812 
 813   reset_partial_array_state_manager();
 814 
 815   // Should not have changed state yet (even if a Full GC interrupted us).
 816   guarantee(is_in_reset_for_next_cycle(), "invariant");
 817 }
 818 
 819 void G1ConcurrentMark::reset_partial_array_state_manager() {
 820   for (uint i = 0; i < _max_num_tasks; ++i) {
 821     _tasks[i]->unregister_partial_array_splitter();
 822   }
 823 
 824   partial_array_state_manager()->reset();
 825 
 826   for (uint i = 0; i < _max_num_tasks; ++i) {
 827     _tasks[i]->register_partial_array_splitter();
 828   }
 829 }
 830 
 831 void G1ConcurrentMark::clear_bitmap(WorkerThreads* workers) {
 832   assert_at_safepoint_on_vm_thread();
 833   // To avoid fragmentation the full collection requesting to clear the bitmap
 834   // might use fewer workers than available. To ensure the bitmap is cleared
 835   // as efficiently as possible the number of active workers are temporarily
 836   // increased to include all currently created workers.
 837   WithActiveWorkers update(workers, workers->created_workers());
 838   clear_bitmap(workers, false);
 839 }
 840 
 841 class G1PreConcurrentStartTask : public G1BatchedTask {
 842   // Reset marking state.
 843   class ResetMarkingStateTask;
 844   // For each region note start of marking.
 845   class NoteStartOfMarkTask;
 846 
 847 public:
 848   G1PreConcurrentStartTask(GCCause::Cause cause, G1ConcurrentMark* cm);
 849 };
 850 
 851 class G1PreConcurrentStartTask::ResetMarkingStateTask : public G1AbstractSubTask {
 852   G1ConcurrentMark* _cm;
 853 public:
 854   ResetMarkingStateTask(G1ConcurrentMark* cm) : G1AbstractSubTask(G1GCPhaseTimes::ResetMarkingState), _cm(cm) { }
 855 
 856   double worker_cost() const override { return 1.0; }
 857   void do_work(uint worker_id) override;
 858 };
 859 
 860 class G1PreConcurrentStartTask::NoteStartOfMarkTask : public G1AbstractSubTask {
 861 
 862   class NoteStartOfMarkHRClosure : public G1HeapRegionClosure {
 863     G1ConcurrentMark* _cm;
 864 
 865   public:
 866     NoteStartOfMarkHRClosure() : G1HeapRegionClosure(), _cm(G1CollectedHeap::heap()->concurrent_mark()) { }
 867 
 868     bool do_heap_region(G1HeapRegion* r) override {
 869       if (r->is_old_or_humongous() && !r->is_collection_set_candidate() && !r->in_collection_set()) {
 870         _cm->update_top_at_mark_start(r);
 871       } else {
 872         _cm->reset_top_at_mark_start(r);
 873       }
 874       return false;
 875     }
 876   } _region_cl;
 877 
 878   G1HeapRegionClaimer _claimer;
 879 public:
 880   NoteStartOfMarkTask() : G1AbstractSubTask(G1GCPhaseTimes::NoteStartOfMark), _region_cl(), _claimer(0) { }
 881 
 882   double worker_cost() const override {
 883     // The work done per region is very small, therefore we choose this magic number to cap the number
 884     // of threads used when there are few regions.
 885     const double regions_per_thread = 1000;
 886     return _claimer.n_regions() / regions_per_thread;
 887   }
 888 
 889   void set_max_workers(uint max_workers) override {
 890     _claimer.set_n_workers(max_workers);
 891   }
 892 
 893   void do_work(uint worker_id) override {
 894     G1CollectedHeap::heap()->heap_region_par_iterate_from_worker_offset(&_region_cl, &_claimer, worker_id);
 895   }
 896 };
 897 
 898 void G1PreConcurrentStartTask::ResetMarkingStateTask::do_work(uint worker_id) {
 899   // Reset marking state.
 900   _cm->reset();
 901 }
 902 
 903 G1PreConcurrentStartTask::G1PreConcurrentStartTask(GCCause::Cause cause, G1ConcurrentMark* cm) :
 904   G1BatchedTask("Pre Concurrent Start", G1CollectedHeap::heap()->phase_times()) {
 905   add_serial_task(new ResetMarkingStateTask(cm));
 906   add_parallel_task(new NoteStartOfMarkTask());
 907 };
 908 
 909 void G1ConcurrentMark::pre_concurrent_start(GCCause::Cause cause) {
 910   assert_at_safepoint_on_vm_thread();
 911 
 912   G1CollectedHeap::start_codecache_marking_cycle_if_inactive(true /* concurrent_mark_start */);
 913 
 914   ClassLoaderDataGraph::verify_claimed_marks_cleared(ClassLoaderData::_claim_strong);
 915 
 916   G1PreConcurrentStartTask cl(cause, this);
 917   G1CollectedHeap::heap()->run_batch_task(&cl);
 918 
 919   _gc_tracer_cm->set_gc_cause(cause);
 920 }
 921 
 922 void G1ConcurrentMark::start_full_concurrent_cycle() {
 923   // Start Concurrent Marking weak-reference discovery.
 924   ReferenceProcessor* rp = _g1h->ref_processor_cm();
 925   rp->start_discovery(false /* always_clear */);
 926 
 927   SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
 928   // This is the start of  the marking cycle, we're expected all
 929   // threads to have SATB queues with active set to false.
 930   satb_mq_set.set_active_all_threads(true, /* new active value */
 931                                      false /* expected_active */);
 932 
 933   // update_g1_committed() will be called at the end of an evac pause
 934   // when marking is on. So, it's also called at the end of the
 935   // concurrent start pause to update the heap end, if the heap expands
 936   // during it. No need to call it here.
 937 
 938   // Signal the thread to start work.
 939   cm_thread()->start_full_cycle();
 940 }
 941 
 942 void G1ConcurrentMark::start_undo_concurrent_cycle() {
 943   assert_at_safepoint_on_vm_thread();
 944   // At this time this GC is not a concurrent start gc any more, can only check for young only gc/phase.
 945   assert(_g1h->collector_state()->is_in_young_only_phase(), "must be");
 946 
 947   abort_root_region_scan_at_safepoint();
 948 
 949   // Signal the thread to start work.
 950   cm_thread()->start_undo_cycle();
 951 }
 952 
 953 void G1ConcurrentMark::notify_concurrent_cycle_completed() {
 954   cm_thread()->set_idle();
 955 }
 956 
 957 void G1ConcurrentMark::stop() {
 958   if (is_fully_initialized()) {
 959     cm_thread()->stop();
 960   }
 961 }
 962 
 963 /*
 964  * Notice that in the next two methods, we actually leave the STS
 965  * during the barrier sync and join it immediately afterwards. If we
 966  * do not do this, the following deadlock can occur: one thread could
 967  * be in the barrier sync code, waiting for the other thread to also
 968  * sync up, whereas another one could be trying to yield, while also
 969  * waiting for the other threads to sync up too.
 970  *
 971  * Note, however, that this code is also used during remark and in
 972  * this case we should not attempt to leave / enter the STS, otherwise
 973  * we'll either hit an assert (debug / fastdebug) or deadlock
 974  * (product). So we should only leave / enter the STS if we are
 975  * operating concurrently.
 976  *
 977  * Because the thread that does the sync barrier has left the STS, it
 978  * is possible to be suspended for a Full GC or an evacuation pause
 979  * could occur. This is actually safe, since the entering the sync
 980  * barrier is one of the last things do_marking_step() does, and it
 981  * doesn't manipulate any data structures afterwards.
 982  */
 983 
 984 void G1ConcurrentMark::enter_first_sync_barrier(uint worker_id) {
 985   bool barrier_aborted;
 986   {
 987     SuspendibleThreadSetLeaver sts_leave(concurrent());
 988     barrier_aborted = !_first_overflow_barrier_sync.enter();
 989   }
 990 
 991   // at this point everyone should have synced up and not be doing any
 992   // more work
 993 
 994   if (barrier_aborted) {
 995     // If the barrier aborted we ignore the overflow condition and
 996     // just abort the whole marking phase as quickly as possible.
 997     return;
 998   }
 999 }
1000 
1001 void G1ConcurrentMark::enter_second_sync_barrier(uint worker_id) {
1002   SuspendibleThreadSetLeaver sts_leave(concurrent());
1003   _second_overflow_barrier_sync.enter();
1004 
1005   // at this point everything should be re-initialized and ready to go
1006 }
1007 
1008 class G1CMConcurrentMarkingTask : public WorkerTask {
1009   G1ConcurrentMark*     _cm;
1010 
1011 public:
1012   void work(uint worker_id) {
1013     ResourceMark rm;
1014 
1015     SuspendibleThreadSetJoiner sts_join;
1016 
1017     assert(worker_id < _cm->active_tasks(), "invariant");
1018 
1019     G1CMTask* task = _cm->task(worker_id);
1020     task->record_start_time();
1021     if (!_cm->has_aborted()) {
1022       do {
1023         task->do_marking_step(G1ConcMarkStepDurationMillis,
1024                               true  /* do_termination */,
1025                               false /* is_serial*/);
1026 
1027         _cm->do_yield_check();
1028       } while (!_cm->has_aborted() && task->has_aborted());
1029     }
1030     task->record_end_time();
1031     guarantee(!task->has_aborted() || _cm->has_aborted(), "invariant");
1032   }
1033 
1034   G1CMConcurrentMarkingTask(G1ConcurrentMark* cm) :
1035       WorkerTask("Concurrent Mark"), _cm(cm) { }
1036 
1037   ~G1CMConcurrentMarkingTask() { }
1038 };
1039 
1040 uint G1ConcurrentMark::calc_active_marking_workers() {
1041   uint result = 0;
1042   if (!UseDynamicNumberOfGCThreads || !FLAG_IS_DEFAULT(ConcGCThreads)) {
1043     result = _max_concurrent_workers;
1044   } else {
1045     result =
1046       WorkerPolicy::calc_default_active_workers(_max_concurrent_workers,
1047                                                 1, /* Minimum workers */
1048                                                 _num_concurrent_workers,
1049                                                 Threads::number_of_non_daemon_threads());
1050     // Don't scale the result down by scale_concurrent_workers() because
1051     // that scaling has already gone into "_max_concurrent_workers".
1052   }
1053   assert(result > 0 && result <= _max_concurrent_workers,
1054          "Calculated number of marking workers must be larger than zero and at most the maximum %u, but is %u",
1055          _max_concurrent_workers, result);
1056   return result;
1057 }
1058 
1059 bool G1ConcurrentMark::has_root_region_scan_aborted() const {
1060   return _root_region_scan_aborted.load_relaxed();
1061 }
1062 
1063 #ifndef PRODUCT
1064 void G1ConcurrentMark::assert_root_region_scan_completed_or_aborted() {
1065   assert(root_regions()->work_completed() || has_root_region_scan_aborted(), "must be");
1066 }
1067 #endif
1068 
1069 void G1ConcurrentMark::scan_root_region(const MemRegion* region, uint worker_id) {
1070 #ifdef ASSERT
1071   HeapWord* last = region->last();
1072   G1HeapRegion* hr = _g1h->heap_region_containing(last);
1073   assert(hr->is_old() || top_at_mark_start(hr) == hr->bottom(),
1074          "Root regions must be old or survivor/eden but region %u is %s", hr->hrm_index(), hr->get_type_str());
1075   assert(top_at_mark_start(hr) == region->start(),
1076          "MemRegion start should be equal to TAMS");
1077 #endif
1078 
1079   G1RootRegionScanClosure cl(_g1h, this, worker_id);
1080 
1081   const uintx interval = PrefetchScanIntervalInBytes;
1082   HeapWord* curr = region->start();
1083   const HeapWord* end = region->end();
1084   while (curr < end) {
1085     Prefetch::read(curr, interval);
1086     oop obj = cast_to_oop(curr);
1087     size_t size = obj->oop_iterate_size(&cl);
1088     assert(size == obj->size(), "sanity");
1089     curr += size;
1090   }
1091 }
1092 
1093 class G1CMRootRegionScanTask : public WorkerTask {
1094   G1ConcurrentMark* _cm;
1095   bool _should_yield;
1096 
1097 public:
1098   G1CMRootRegionScanTask(G1ConcurrentMark* cm, bool should_yield) :
1099     WorkerTask("G1 Root Region Scan"), _cm(cm), _should_yield(should_yield) { }
1100 
1101   void work(uint worker_id) {
1102     SuspendibleThreadSetJoiner sts_join(_should_yield);
1103 
1104     while (true) {
1105       if (_cm->has_root_region_scan_aborted()) {
1106         return;
1107       }
1108       G1CMRootMemRegions* root_regions = _cm->root_regions();
1109       const MemRegion* region = root_regions->claim_next();
1110       if (region == nullptr) {
1111         return;
1112       }
1113       _cm->scan_root_region(region, worker_id);
1114       if (_should_yield) {
1115         SuspendibleThreadSet::yield();
1116         // If we yielded, a GC may have processed all root regions,
1117         // so this loop will naturally exit on the next claim_next() call.
1118         // Same if a Full GC signalled abort of the concurrent mark.
1119       }
1120     }
1121   }
1122 };
1123 
1124 bool G1ConcurrentMark::scan_root_regions(WorkerThreads* workers, bool concurrent) {
1125   // We first check whether there is any work to do as we might have already aborted
1126   // the concurrent cycle, or ran into a GC that did the actual work when we reach here.
1127   // We want to avoid spinning up the worker threads if that happened.
1128   // (Note that due to races reading the abort-flag, we might spin up the threads anyway).
1129   //
1130   // Abort happens if a Full GC occurs right after starting the concurrent cycle or
1131   // a young gc doing the work.
1132   //
1133   // Concurrent gc threads enter an STS when starting the task, so they stop, then
1134   // continue after that safepoint.
1135   //
1136   // Must not use G1CMRootMemRegions::work_completed() here because we need to get a
1137   // consistent view of the value containing the number of remaining regions across the
1138   // usages below. The safepoint/gc may already be running and modifying it
1139   // while this code is still executing.
1140   uint num_remaining = root_regions()->num_remaining_regions();
1141   bool do_scan = num_remaining > 0 && !has_root_region_scan_aborted();
1142   if (do_scan) {
1143     // Assign one worker to each root-region but subject to the max constraint.
1144     // The constraint is also important to avoid accesses beyond the allocated per-worker
1145     // marking helper data structures. We might get passed different WorkerThreads with
1146     // different number of threads (potential worker ids) than helper data structures when
1147     // completing this work during GC.
1148     const uint num_workers = MIN2(num_remaining,
1149                                   _max_concurrent_workers);
1150     assert(num_workers > 0, "no more remaining root regions to process");
1151 
1152     G1CMRootRegionScanTask task(this, concurrent);
1153     log_debug(gc, ergo)("Running %s using %u workers for %u work units.",
1154                         task.name(), num_workers, num_remaining);
1155     workers->run_task(&task, num_workers);
1156   }
1157 
1158   // At the end of this method, we can re-read num_remaining() in the assert: either
1159   // we got non-zero above and we processed all root regions (and it must be zero
1160   // after the worker task synchronization) or it had already been zero. We also
1161   // can't have started another concurrent cycle that could have set it to something else
1162   // while still in the concurrent cycle (if called concurrently).
1163   assert_root_region_scan_completed_or_aborted();
1164 
1165   return do_scan;
1166 }
1167 
1168 void G1ConcurrentMark::scan_root_regions_concurrently() {
1169   assert(Thread::current() == cm_thread(), "must be on Concurrent Mark Thread");
1170   scan_root_regions(_concurrent_workers, true /* concurrent */);
1171 }
1172 
1173 bool G1ConcurrentMark::complete_root_regions_scan_in_safepoint() {
1174   assert_at_safepoint_on_vm_thread();
1175   return scan_root_regions(_g1h->workers(), false /* concurrent */);
1176 }
1177 
1178 void G1ConcurrentMark::add_root_region(G1HeapRegion* r) {
1179   root_regions()->add(top_at_mark_start(r), r->top());
1180 }
1181 
1182 bool G1ConcurrentMark::is_root_region(G1HeapRegion* r) {
1183   return root_regions()->contains(MemRegion(top_at_mark_start(r), r->top()));
1184 }
1185 
1186 void G1ConcurrentMark::abort_root_region_scan() {
1187   assert_not_at_safepoint();
1188 
1189   _root_region_scan_aborted.store_relaxed(true);
1190 }
1191 
1192 void G1ConcurrentMark::abort_root_region_scan_at_safepoint() {
1193   assert_at_safepoint_on_vm_thread();
1194 
1195   _root_region_scan_aborted.store_relaxed(true);
1196 }
1197 
1198 void G1ConcurrentMark::concurrent_cycle_start() {
1199   _gc_timer_cm->register_gc_start();
1200 
1201   _gc_tracer_cm->report_gc_start(GCCause::_no_gc /* first parameter is not used */, _gc_timer_cm->gc_start());
1202 
1203   _g1h->trace_heap_before_gc(_gc_tracer_cm);
1204 }
1205 
1206 uint G1ConcurrentMark::completed_mark_cycles() const {
1207   return _completed_mark_cycles.load_relaxed();
1208 }
1209 
1210 void G1ConcurrentMark::concurrent_cycle_end(bool mark_cycle_completed) {
1211   _g1h->trace_heap_after_gc(_gc_tracer_cm);
1212 
1213   if (mark_cycle_completed) {
1214     _completed_mark_cycles.add_then_fetch(1u, memory_order_relaxed);
1215   }
1216 
1217   if (has_aborted()) {
1218     log_info(gc, marking)("Concurrent Mark Abort");
1219     _gc_tracer_cm->report_concurrent_mode_failure();
1220   }
1221 
1222   _gc_timer_cm->register_gc_end();
1223 
1224   _gc_tracer_cm->report_gc_end(_gc_timer_cm->gc_end(), _gc_timer_cm->time_partitions());
1225 }
1226 
1227 void G1ConcurrentMark::mark_from_roots() {
1228   _restart_for_overflow.store_relaxed(false);
1229 
1230   uint active_workers = calc_active_marking_workers();
1231 
1232   // Setting active workers is not guaranteed since fewer
1233   // worker threads may currently exist and more may not be
1234   // available.
1235   active_workers = _concurrent_workers->set_active_workers(active_workers);
1236   log_info(gc, task)("Concurrent Mark Using %u of %u Workers", active_workers, _concurrent_workers->max_workers());
1237 
1238   _num_concurrent_workers = active_workers;
1239 
1240   // Parallel task terminator is set in "set_concurrency_and_phase()"
1241   set_concurrency_and_phase(active_workers, true /* concurrent */);
1242 
1243   G1CMConcurrentMarkingTask marking_task(this);
1244   _concurrent_workers->run_task(&marking_task);
1245   print_stats();
1246 }
1247 
1248 const char* G1ConcurrentMark::verify_location_string(VerifyLocation location) {
1249   static const char* location_strings[] = { "Remark Before",
1250                                             "Remark After",
1251                                             "Remark Overflow",
1252                                             "Cleanup Before",
1253                                             "Cleanup After" };
1254   return location_strings[static_cast<std::underlying_type_t<VerifyLocation>>(location)];
1255 }
1256 
1257 void G1ConcurrentMark::verify_during_pause(G1HeapVerifier::G1VerifyType type,
1258                                            VerifyLocation location) {
1259   G1HeapVerifier* verifier = _g1h->verifier();
1260 
1261   verifier->verify_region_sets_optional();
1262 
1263   const char* caller = verify_location_string(location);
1264 
1265   if (VerifyDuringGC && G1HeapVerifier::should_verify(type)) {
1266     GCTraceTime(Debug, gc, phases) debug(caller, _gc_timer_cm);
1267 
1268     size_t const BufLen = 512;
1269     char buffer[BufLen];
1270 
1271     jio_snprintf(buffer, BufLen, "During GC (%s)", caller);
1272     verifier->verify(VerifyOption::G1UseConcMarking, buffer);
1273 
1274     // Only check bitmap in Remark, and not at After-Verification because the regions
1275     // already have their TAMS'es reset.
1276     if (location != VerifyLocation::RemarkAfter) {
1277       verifier->verify_bitmap_clear(true /* above_tams_only */);
1278     }
1279   }
1280 }
1281 
1282 class G1ObjectCountIsAliveClosure: public BoolObjectClosure {
1283   G1CollectedHeap* _g1h;
1284 public:
1285   G1ObjectCountIsAliveClosure(G1CollectedHeap* g1h) : _g1h(g1h) {}
1286 
1287   bool do_object_b(oop obj) {
1288     return !_g1h->is_obj_dead(obj);
1289   }
1290 };
1291 
1292 void G1ConcurrentMark::remark() {
1293   assert_at_safepoint_on_vm_thread();
1294 
1295   // If a full collection has happened, we should not continue. However we might
1296   // have ended up here as the Remark VM operation has been scheduled already.
1297   if (has_aborted()) {
1298     return;
1299   }
1300 
1301   G1Policy* policy = _g1h->policy();
1302   policy->record_pause_start_time();
1303 
1304   double start = os::elapsedTime();
1305 
1306   verify_during_pause(G1HeapVerifier::G1VerifyRemark, VerifyLocation::RemarkBefore);
1307 
1308   {
1309     GCTraceTime(Debug, gc, phases) debug("Finalize Marking", _gc_timer_cm);
1310     finalize_marking();
1311   }
1312 
1313   double mark_work_end = os::elapsedTime();
1314 
1315   bool const mark_finished = !has_overflown();
1316   if (mark_finished) {
1317     weak_refs_work();
1318 
1319     // Unload Klasses, String, Code Cache, etc.
1320     if (ClassUnloadingWithConcurrentMark) {
1321       G1CMIsAliveClosure is_alive(this);
1322       _g1h->unload_classes_and_code("Class Unloading", &is_alive, _gc_timer_cm);
1323     }
1324 
1325     SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
1326     // We're done with marking.
1327     // This is the end of the marking cycle, we're expected all
1328     // threads to have SATB queues with active set to true.
1329     satb_mq_set.set_active_all_threads(false, /* new active value */
1330                                        true /* expected_active */);
1331 
1332     {
1333       GCTraceTime(Debug, gc, phases) debug("Flush Task Caches", _gc_timer_cm);
1334       flush_all_task_caches();
1335     }
1336 
1337     // All marking completed. Check bitmap now as we will start to reset TAMSes
1338     // in parallel below so that we can not do this in the After-Remark verification.
1339     _g1h->verifier()->verify_bitmap_clear(true /* above_tams_only */);
1340 
1341     {
1342       GCTraceTime(Debug, gc, phases) debug("Select For Rebuild and Reclaim Empty Regions", _gc_timer_cm);
1343 
1344       G1UpdateRegionLivenessAndSelectForRebuildTask cl(_g1h, this, _g1h->workers()->active_workers());
1345       uint const num_workers = MIN2(G1UpdateRegionLivenessAndSelectForRebuildTask::desired_num_workers(_g1h->num_committed_regions()),
1346                                     _g1h->workers()->active_workers());
1347       log_debug(gc,ergo)("Running %s using %u workers for %u regions in heap", cl.name(), num_workers, _g1h->num_committed_regions());
1348       _g1h->workers()->run_task(&cl, num_workers);
1349 
1350       log_debug(gc, remset, tracking)("Remembered Set Tracking update regions total %u, selected %u",
1351                                       _g1h->num_committed_regions(), cl.total_selected_for_rebuild());
1352 
1353       _needs_remembered_set_rebuild = (cl.total_selected_for_rebuild() > 0);
1354 
1355       if (_needs_remembered_set_rebuild) {
1356         GrowableArrayCHeap<G1HeapRegion*, mtGC>* selected = cl.sort_and_prune_old_selected();
1357         _g1h->policy()->candidates()->set_candidates_from_marking(selected);
1358       }
1359     }
1360 
1361     if (log_is_enabled(Trace, gc, liveness)) {
1362       G1PrintRegionLivenessInfoClosure cl("Post-Marking");
1363       _g1h->heap_region_iterate(&cl);
1364     }
1365 
1366     // Potentially, some empty-regions have been reclaimed; make this a
1367     // "collection" so that pending allocation can retry before attempting a
1368     // GC pause.
1369     _g1h->increment_total_collections();
1370 
1371     // For Remark Pauses that may have been triggered by PeriodicGCs, we maintain
1372     // resizing based on MinHeapFreeRatio or MaxHeapFreeRatio. If a PeriodicGC is
1373     // triggered, it likely means there are very few regular GCs, making resizing
1374     // based on gc heuristics less effective.
1375     if (_g1h->last_gc_was_periodic()) {
1376       _g1h->resize_heap_after_full_collection(0 /* allocation_word_size */);
1377     }
1378 
1379     compute_new_sizes();
1380 
1381     verify_during_pause(G1HeapVerifier::G1VerifyRemark, VerifyLocation::RemarkAfter);
1382 
1383     assert(!restart_for_overflow(), "sanity");
1384     // Completely reset the marking state (except bitmaps) since marking completed.
1385     reset_at_marking_complete();
1386 
1387     G1CollectedHeap::finish_codecache_marking_cycle();
1388 
1389     {
1390       GCTraceTime(Debug, gc, phases) debug("Report Object Count", _gc_timer_cm);
1391       G1ObjectCountIsAliveClosure is_alive(_g1h);
1392       _gc_tracer_cm->report_object_count_after_gc(&is_alive, _g1h->workers());
1393     }
1394 
1395     // Successfully completed marking, advance state.
1396     cm_thread()->set_full_cycle_rebuild_and_scrub();
1397   } else {
1398     // We overflowed.  Restart concurrent marking.
1399     _restart_for_overflow.store_relaxed(true);
1400 
1401     verify_during_pause(G1HeapVerifier::G1VerifyRemark, VerifyLocation::RemarkOverflow);
1402 
1403     // Clear the marking state because we will be restarting
1404     // marking due to overflowing the global mark stack.
1405     reset_marking_for_restart();
1406   }
1407 
1408   // Statistics
1409   double now = os::elapsedTime();
1410   _remark_mark_times.add((mark_work_end - start) * 1000.0);
1411   _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
1412   _remark_times.add((now - start) * 1000.0);
1413 
1414   _g1h->update_perf_counter_cpu_time();
1415 
1416   policy->record_concurrent_mark_remark_end();
1417 
1418   return;
1419 }
1420 
1421 void G1ConcurrentMark::compute_new_sizes() {
1422   MetaspaceGC::compute_new_size();
1423 
1424   // Cleanup will have freed any regions completely full of garbage.
1425   // Update the soft reference policy with the new heap occupancy.
1426   Universe::heap()->update_capacity_and_used_at_gc();
1427 
1428   // We reclaimed old regions so we should calculate the sizes to make
1429   // sure we update the old gen/space data.
1430   _g1h->monitoring_support()->update_sizes();
1431 }
1432 
1433 class G1UpdateRegionsAfterRebuild : public G1HeapRegionClosure {
1434   G1CollectedHeap* _g1h;
1435 
1436 public:
1437   G1UpdateRegionsAfterRebuild(G1CollectedHeap* g1h) : _g1h(g1h) { }
1438 
1439   bool do_heap_region(G1HeapRegion* r) override {
1440     // Update the remset tracking state from updating to complete
1441     // if remembered sets have been rebuilt.
1442     _g1h->policy()->remset_tracker()->update_after_rebuild(r);
1443     return false;
1444   }
1445 };
1446 
1447 void G1ConcurrentMark::cleanup() {
1448   assert_at_safepoint_on_vm_thread();
1449 
1450   // If a full collection has happened, we shouldn't do this.
1451   if (has_aborted()) {
1452     return;
1453   }
1454 
1455   G1Policy* policy = _g1h->policy();
1456   policy->record_pause_start_time();
1457 
1458   double start = os::elapsedTime();
1459 
1460   verify_during_pause(G1HeapVerifier::G1VerifyCleanup, VerifyLocation::CleanupBefore);
1461 
1462   if (needs_remembered_set_rebuild()) {
1463     // Update the remset tracking information as well as marking all regions
1464     // as fully parsable.
1465     GCTraceTime(Debug, gc, phases) debug("Update Remembered Set Tracking After Rebuild", _gc_timer_cm);
1466     G1UpdateRegionsAfterRebuild cl(_g1h);
1467     _g1h->heap_region_iterate(&cl);
1468   } else {
1469     log_debug(gc, phases)("No Remembered Sets to update after rebuild");
1470   }
1471 
1472   verify_during_pause(G1HeapVerifier::G1VerifyCleanup, VerifyLocation::CleanupAfter);
1473 
1474   // Local statistics
1475   _cleanup_times.add((os::elapsedTime() - start) * 1000.0);
1476 
1477   {
1478     GCTraceTime(Debug, gc, phases) debug("Finalize Concurrent Mark Cleanup", _gc_timer_cm);
1479     policy->record_concurrent_mark_cleanup_end(needs_remembered_set_rebuild());
1480   }
1481 
1482   // Advance state.
1483   cm_thread()->set_full_cycle_reset_for_next_cycle();
1484   return;
1485 }
1486 
1487 // 'Keep Alive' oop closure used by both serial parallel reference processing.
1488 // Uses the G1CMTask associated with a worker thread (for serial reference
1489 // processing the G1CMTask for worker 0 is used) to preserve (mark) and
1490 // trace referent objects.
1491 //
1492 // Using the G1CMTask and embedded local queues avoids having the worker
1493 // threads operating on the global mark stack. This reduces the risk
1494 // of overflowing the stack - which we would rather avoid at this late
1495 // state. Also using the tasks' local queues removes the potential
1496 // of the workers interfering with each other that could occur if
1497 // operating on the global stack.
1498 
1499 class G1CMKeepAliveAndDrainClosure : public OopClosure {
1500   G1ConcurrentMark* _cm;
1501   G1CMTask*         _task;
1502   uint              _ref_counter_limit;
1503   uint              _ref_counter;
1504   bool              _is_serial;
1505 public:
1506   G1CMKeepAliveAndDrainClosure(G1ConcurrentMark* cm, G1CMTask* task, bool is_serial) :
1507     _cm(cm), _task(task), _ref_counter_limit(G1RefProcDrainInterval),
1508     _ref_counter(_ref_counter_limit), _is_serial(is_serial) {
1509     assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
1510   }
1511 
1512   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
1513   virtual void do_oop(      oop* p) { do_oop_work(p); }
1514 
1515   template <class T> void do_oop_work(T* p) {
1516     if (_cm->has_overflown()) {
1517       return;
1518     }
1519     if (!_task->deal_with_reference(p)) {
1520       // We did not add anything to the mark bitmap (or mark stack), so there is
1521       // no point trying to drain it.
1522       return;
1523     }
1524     _ref_counter--;
1525 
1526     if (_ref_counter == 0) {
1527       // We have dealt with _ref_counter_limit references, pushing them
1528       // and objects reachable from them on to the local stack (and
1529       // possibly the global stack). Call G1CMTask::do_marking_step() to
1530       // process these entries.
1531       //
1532       // We call G1CMTask::do_marking_step() in a loop, which we'll exit if
1533       // there's nothing more to do (i.e. we're done with the entries that
1534       // were pushed as a result of the G1CMTask::deal_with_reference() calls
1535       // above) or we overflow.
1536       //
1537       // Note: G1CMTask::do_marking_step() can set the G1CMTask::has_aborted()
1538       // flag while there may still be some work to do. (See the comment at
1539       // the beginning of G1CMTask::do_marking_step() for those conditions -
1540       // one of which is reaching the specified time target.) It is only
1541       // when G1CMTask::do_marking_step() returns without setting the
1542       // has_aborted() flag that the marking step has completed.
1543       do {
1544         double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
1545         _task->do_marking_step(mark_step_duration_ms,
1546                                false      /* do_termination */,
1547                                _is_serial);
1548       } while (_task->has_aborted() && !_cm->has_overflown());
1549       _ref_counter = _ref_counter_limit;
1550     }
1551   }
1552 };
1553 
1554 // 'Drain' oop closure used by both serial and parallel reference processing.
1555 // Uses the G1CMTask associated with a given worker thread (for serial
1556 // reference processing the G1CMtask for worker 0 is used). Calls the
1557 // do_marking_step routine, with an unbelievably large timeout value,
1558 // to drain the marking data structures of the remaining entries
1559 // added by the 'keep alive' oop closure above.
1560 
1561 class G1CMDrainMarkingStackClosure : public VoidClosure {
1562   G1ConcurrentMark* _cm;
1563   G1CMTask*         _task;
1564   bool              _is_serial;
1565  public:
1566   G1CMDrainMarkingStackClosure(G1ConcurrentMark* cm, G1CMTask* task, bool is_serial) :
1567     _cm(cm), _task(task), _is_serial(is_serial) {
1568     assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
1569   }
1570 
1571   void do_void() {
1572     do {
1573       // We call G1CMTask::do_marking_step() to completely drain the local
1574       // and global marking stacks of entries pushed by the 'keep alive'
1575       // oop closure (an instance of G1CMKeepAliveAndDrainClosure above).
1576       //
1577       // G1CMTask::do_marking_step() is called in a loop, which we'll exit
1578       // if there's nothing more to do (i.e. we've completely drained the
1579       // entries that were pushed as a result of applying the 'keep alive'
1580       // closure to the entries on the discovered ref lists) or we overflow
1581       // the global marking stack.
1582       //
1583       // Note: G1CMTask::do_marking_step() can set the G1CMTask::has_aborted()
1584       // flag while there may still be some work to do. (See the comment at
1585       // the beginning of G1CMTask::do_marking_step() for those conditions -
1586       // one of which is reaching the specified time target.) It is only
1587       // when G1CMTask::do_marking_step() returns without setting the
1588       // has_aborted() flag that the marking step has completed.
1589 
1590       _task->do_marking_step(1000000000.0 /* something very large */,
1591                              true         /* do_termination */,
1592                              _is_serial);
1593     } while (_task->has_aborted() && !_cm->has_overflown());
1594   }
1595 };
1596 
1597 class G1CMRefProcProxyTask : public RefProcProxyTask {
1598   G1CollectedHeap& _g1h;
1599   G1ConcurrentMark& _cm;
1600 
1601 public:
1602   G1CMRefProcProxyTask(uint max_workers, G1CollectedHeap& g1h, G1ConcurrentMark &cm)
1603     : RefProcProxyTask("G1CMRefProcProxyTask", max_workers),
1604       _g1h(g1h),
1605       _cm(cm) {}
1606 
1607   void work(uint worker_id) override {
1608     assert(worker_id < _max_workers, "sanity");
1609     G1CMIsAliveClosure is_alive(&_cm);
1610     uint index = (_tm == RefProcThreadModel::Single) ? 0 : worker_id;
1611     G1CMKeepAliveAndDrainClosure keep_alive(&_cm, _cm.task(index), _tm == RefProcThreadModel::Single);
1612     BarrierEnqueueDiscoveredFieldClosure enqueue;
1613     G1CMDrainMarkingStackClosure complete_gc(&_cm, _cm.task(index), _tm == RefProcThreadModel::Single);
1614     _rp_task->rp_work(worker_id, &is_alive, &keep_alive, &enqueue, &complete_gc);
1615   }
1616 
1617   void prepare_run_task_hook() override {
1618     // We need to reset the concurrency level before each
1619     // proxy task execution, so that the termination protocol
1620     // and overflow handling in G1CMTask::do_marking_step() knows
1621     // how many workers to wait for.
1622     _cm.set_concurrency(_queue_count);
1623   }
1624 };
1625 
1626 void G1ConcurrentMark::weak_refs_work() {
1627   ResourceMark rm;
1628 
1629   {
1630     GCTraceTime(Debug, gc, phases) debug("Reference Processing", _gc_timer_cm);
1631 
1632     ReferenceProcessor* rp = _g1h->ref_processor_cm();
1633 
1634     // See the comment in G1CollectedHeap::ref_processing_init()
1635     // about how reference processing currently works in G1.
1636 
1637     assert(_global_mark_stack.is_empty(), "mark stack should be empty");
1638 
1639     // Prefer to grow the stack until the max capacity.
1640     _global_mark_stack.set_should_grow();
1641 
1642     // Parallel processing task executor.
1643     G1CMRefProcProxyTask task(rp->max_num_queues(), *_g1h, *this);
1644     ReferenceProcessorPhaseTimes pt(_gc_timer_cm, rp->max_num_queues());
1645 
1646     // Process the weak references.
1647     const ReferenceProcessorStats& stats = rp->process_discovered_references(task, _g1h->workers(), pt);
1648     _gc_tracer_cm->report_gc_reference_stats(stats);
1649     pt.print_all_references();
1650 
1651     // The do_oop work routines of the keep_alive and drain_marking_stack
1652     // oop closures will set the has_overflown flag if we overflow the
1653     // global marking stack.
1654 
1655     assert(has_overflown() || _global_mark_stack.is_empty(),
1656            "Mark stack should be empty (unless it has overflown)");
1657   }
1658 
1659   if (has_overflown()) {
1660     // We can not trust g1_is_alive and the contents of the heap if the marking stack
1661     // overflowed while processing references. Exit the VM.
1662     fatal("Overflow during reference processing, can not continue. Current mark stack depth: "
1663           "%zu, MarkStackSize: %zu, MarkStackSizeMax: %zu. "
1664           "Please increase MarkStackSize and/or MarkStackSizeMax and restart.",
1665           _global_mark_stack.size(), MarkStackSize, MarkStackSizeMax);
1666     return;
1667   }
1668 
1669   assert(_global_mark_stack.is_empty(), "Marking should have completed");
1670 
1671   {
1672     GCTraceTime(Debug, gc, phases) debug("Weak Processing", _gc_timer_cm);
1673     G1CMIsAliveClosure is_alive(this);
1674     WeakProcessor::weak_oops_do(_g1h->workers(), &is_alive, &do_nothing_cl, 1);
1675   }
1676 }
1677 
1678 class G1PrecleanYieldClosure : public YieldClosure {
1679   G1ConcurrentMark* _cm;
1680 
1681 public:
1682   G1PrecleanYieldClosure(G1ConcurrentMark* cm) : _cm(cm) { }
1683 
1684   virtual bool should_return() {
1685     return _cm->has_aborted();
1686   }
1687 
1688   virtual bool should_return_fine_grain() {
1689     _cm->do_yield_check();
1690     return _cm->has_aborted();
1691   }
1692 };
1693 
1694 void G1ConcurrentMark::preclean() {
1695   assert(G1UseReferencePrecleaning, "Precleaning must be enabled.");
1696 
1697   SuspendibleThreadSetJoiner joiner;
1698 
1699   BarrierEnqueueDiscoveredFieldClosure enqueue;
1700 
1701   set_concurrency_and_phase(1, true);
1702 
1703   G1PrecleanYieldClosure yield_cl(this);
1704 
1705   ReferenceProcessor* rp = _g1h->ref_processor_cm();
1706   // Precleaning is single threaded. Temporarily disable MT discovery.
1707   ReferenceProcessorMTDiscoveryMutator rp_mut_discovery(rp, false);
1708   rp->preclean_discovered_references(rp->is_alive_non_header(),
1709                                      &enqueue,
1710                                      &yield_cl,
1711                                      _gc_timer_cm);
1712 }
1713 
1714 // Closure for marking entries in SATB buffers.
1715 class G1CMSATBBufferClosure : public SATBBufferClosure {
1716 private:
1717   G1CMTask* _task;
1718   G1CollectedHeap* _g1h;
1719 
1720   // This is very similar to G1CMTask::deal_with_reference, but with
1721   // more relaxed requirements for the argument, so this must be more
1722   // circumspect about treating the argument as an object.
1723   void do_entry(void* entry) const {
1724     _task->increment_refs_reached();
1725     oop const obj = cast_to_oop(entry);
1726     _task->make_reference_grey(obj);
1727   }
1728 
1729 public:
1730   G1CMSATBBufferClosure(G1CMTask* task, G1CollectedHeap* g1h)
1731     : _task(task), _g1h(g1h) { }
1732 
1733   virtual void do_buffer(void** buffer, size_t size) {
1734     for (size_t i = 0; i < size; ++i) {
1735       do_entry(buffer[i]);
1736     }
1737   }
1738 };
1739 
1740 class G1RemarkThreadsClosure : public ThreadClosure {
1741   G1SATBMarkQueueSet& _qset;
1742 
1743  public:
1744   G1RemarkThreadsClosure(G1CollectedHeap* g1h, G1CMTask* task) :
1745     _qset(G1BarrierSet::satb_mark_queue_set()) {}
1746 
1747   void do_thread(Thread* thread) {
1748     // Transfer any partial buffer to the qset for completed buffer processing.
1749     _qset.flush_queue(G1ThreadLocalData::satb_mark_queue(thread));
1750   }
1751 };
1752 
1753 class G1CMRemarkTask : public WorkerTask {
1754   // For Threads::possibly_parallel_threads_do
1755   ThreadsClaimTokenScope _threads_claim_token_scope;
1756   G1ConcurrentMark* _cm;
1757 public:
1758   void work(uint worker_id) {
1759     G1CMTask* task = _cm->task(worker_id);
1760     task->record_start_time();
1761     {
1762       ResourceMark rm;
1763 
1764       G1RemarkThreadsClosure threads_f(G1CollectedHeap::heap(), task);
1765       Threads::possibly_parallel_threads_do(true /* is_par */, &threads_f);
1766     }
1767 
1768     do {
1769       task->do_marking_step(1000000000.0 /* something very large */,
1770                             true         /* do_termination       */,
1771                             false        /* is_serial            */);
1772     } while (task->has_aborted() && !_cm->has_overflown());
1773     // If we overflow, then we do not want to restart. We instead
1774     // want to abort remark and do concurrent marking again.
1775     task->record_end_time();
1776   }
1777 
1778   G1CMRemarkTask(G1ConcurrentMark* cm, uint active_workers) :
1779     WorkerTask("Par Remark"), _threads_claim_token_scope(), _cm(cm) {
1780     _cm->terminator()->reset_for_reuse(active_workers);
1781   }
1782 };
1783 
1784 void G1ConcurrentMark::finalize_marking() {
1785   ResourceMark rm;
1786 
1787   _g1h->ensure_parsability(false);
1788 
1789   // this is remark, so we'll use up all active threads
1790   uint active_workers = _g1h->workers()->active_workers();
1791   set_concurrency_and_phase(active_workers, false /* concurrent */);
1792   // Leave _parallel_marking_threads at it's
1793   // value originally calculated in the G1ConcurrentMark
1794   // constructor and pass values of the active workers
1795   // through the task.
1796 
1797   {
1798     G1CMRemarkTask remarkTask(this, active_workers);
1799     // We will start all available threads, even if we decide that the
1800     // active_workers will be fewer. The extra ones will just bail out
1801     // immediately.
1802     _g1h->workers()->run_task(&remarkTask);
1803   }
1804 
1805   SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
1806   guarantee(has_overflown() ||
1807             satb_mq_set.completed_buffers_num() == 0,
1808             "Invariant: has_overflown = %s, num buffers = %zu",
1809             BOOL_TO_STR(has_overflown()),
1810             satb_mq_set.completed_buffers_num());
1811 
1812   print_stats();
1813 }
1814 
1815 void G1ConcurrentMark::flush_all_task_caches() {
1816   size_t hits = 0;
1817   size_t misses = 0;
1818   for (uint i = 0; i < _max_num_tasks; i++) {
1819     Pair<size_t, size_t> stats = _tasks[i]->flush_mark_stats_cache();
1820     hits += stats.first;
1821     misses += stats.second;
1822   }
1823   size_t sum = hits + misses;
1824   log_debug(gc, stats)("Mark stats cache hits %zu misses %zu ratio %1.3lf",
1825                        hits, misses, percent_of(hits, sum));
1826 }
1827 
1828 void G1ConcurrentMark::clear_bitmap_for_region(G1HeapRegion* hr) {
1829   assert_at_safepoint();
1830   _mark_bitmap.clear_range(MemRegion(hr->bottom(), hr->end()));
1831 }
1832 
1833 G1HeapRegion* G1ConcurrentMark::claim_region(uint worker_id) {
1834   // "Checkpoint" the finger.
1835   HeapWord* local_finger = finger();
1836 
1837   while (local_finger < _heap.end()) {
1838     assert(_g1h->is_in_reserved(local_finger), "invariant");
1839 
1840     G1HeapRegion* curr_region = _g1h->heap_region_containing_or_null(local_finger);
1841     // Make sure that the reads below do not float before loading curr_region.
1842     OrderAccess::loadload();
1843     // Above heap_region_containing may return null as we always scan claim
1844     // until the end of the heap. In this case, just jump to the next region.
1845     HeapWord* end = curr_region != nullptr ? curr_region->end() : local_finger + G1HeapRegion::GrainWords;
1846 
1847     // Is the gap between reading the finger and doing the CAS too long?
1848     HeapWord* res = _finger.compare_exchange(local_finger, end);
1849     if (res == local_finger && curr_region != nullptr) {
1850       // We succeeded.
1851       HeapWord* bottom = curr_region->bottom();
1852       HeapWord* limit = top_at_mark_start(curr_region);
1853 
1854       log_trace(gc, marking)("Claim region %u bottom " PTR_FORMAT " tams " PTR_FORMAT, curr_region->hrm_index(), p2i(curr_region->bottom()), p2i(top_at_mark_start(curr_region)));
1855       // Notice that _finger == end cannot be guaranteed here since,
1856       // someone else might have moved the finger even further.
1857       assert(finger() >= end, "The finger should have moved forward");
1858 
1859       if (limit > bottom) {
1860         return curr_region;
1861       } else {
1862         assert(limit == bottom,
1863                "The region limit should be at bottom");
1864         // We return null and the caller should try calling
1865         // claim_region() again.
1866         return nullptr;
1867       }
1868     } else {
1869       // Read the finger again.
1870       HeapWord* next_finger = finger();
1871       assert(next_finger > local_finger, "The finger should have moved forward " PTR_FORMAT " " PTR_FORMAT, p2i(local_finger), p2i(next_finger));
1872       local_finger = next_finger;
1873     }
1874   }
1875 
1876   return nullptr;
1877 }
1878 
1879 #ifndef PRODUCT
1880 class VerifyNoCSetOops {
1881   G1CollectedHeap* _g1h;
1882   const char* _phase;
1883   int _info;
1884 
1885 public:
1886   VerifyNoCSetOops(const char* phase, int info = -1) :
1887     _g1h(G1CollectedHeap::heap()),
1888     _phase(phase),
1889     _info(info)
1890   { }
1891 
1892   void operator()(G1TaskQueueEntry task_entry) const {
1893     if (task_entry.is_partial_array_state()) {
1894       oop obj = task_entry.to_partial_array_state()->source();
1895       guarantee(_g1h->is_in_reserved(obj), "Partial Array " PTR_FORMAT " must be in heap.", p2i(obj));
1896       return;
1897     }
1898     guarantee(oopDesc::is_oop(task_entry.to_oop()),
1899               "Non-oop " PTR_FORMAT ", phase: %s, info: %d",
1900               p2i(task_entry.to_oop()), _phase, _info);
1901     G1HeapRegion* r = _g1h->heap_region_containing(task_entry.to_oop());
1902     guarantee(!(r->in_collection_set() || r->has_index_in_opt_cset()),
1903               "obj " PTR_FORMAT " from %s (%d) in region %u in (optional) collection set",
1904               p2i(task_entry.to_oop()), _phase, _info, r->hrm_index());
1905   }
1906 };
1907 
1908 void G1ConcurrentMark::verify_no_collection_set_oops() {
1909   assert(SafepointSynchronize::is_at_safepoint() || !is_init_completed(),
1910          "should be at a safepoint or initializing");
1911   if (!is_fully_initialized() || !_g1h->collector_state()->is_in_mark_or_rebuild()) {
1912     return;
1913   }
1914 
1915   // Verify entries on the global mark stack
1916   _global_mark_stack.iterate(VerifyNoCSetOops("Stack"));
1917 
1918   // Verify entries on the task queues
1919   for (uint i = 0; i < _max_num_tasks; ++i) {
1920     G1CMTaskQueue* queue = _task_queues->queue(i);
1921     queue->iterate(VerifyNoCSetOops("Queue", i));
1922   }
1923 
1924   // Verify the global finger
1925   HeapWord* global_finger = finger();
1926   if (global_finger != nullptr && global_finger < _heap.end()) {
1927     // Since we always iterate over all regions, we might get a null G1HeapRegion
1928     // here.
1929     G1HeapRegion* global_hr = _g1h->heap_region_containing_or_null(global_finger);
1930     guarantee(global_hr == nullptr || global_finger == global_hr->bottom(),
1931               "global finger: " PTR_FORMAT " region: " HR_FORMAT,
1932               p2i(global_finger), HR_FORMAT_PARAMS(global_hr));
1933   }
1934 
1935   // Verify the task fingers
1936   assert(_num_concurrent_workers <= _max_num_tasks, "sanity");
1937   for (uint i = 0; i < _num_concurrent_workers; ++i) {
1938     G1CMTask* task = _tasks[i];
1939     HeapWord* task_finger = task->finger();
1940     if (task_finger != nullptr && task_finger < _heap.end()) {
1941       // See above note on the global finger verification.
1942       G1HeapRegion* r = _g1h->heap_region_containing_or_null(task_finger);
1943       guarantee(r == nullptr || task_finger == r->bottom() ||
1944                 !r->in_collection_set() || !r->has_index_in_opt_cset(),
1945                 "task finger: " PTR_FORMAT " region: " HR_FORMAT,
1946                 p2i(task_finger), HR_FORMAT_PARAMS(r));
1947     }
1948   }
1949 }
1950 #endif // PRODUCT
1951 
1952 void G1ConcurrentMark::rebuild_and_scrub() {
1953   if (!needs_remembered_set_rebuild()) {
1954     log_debug(gc, marking)("Skipping Remembered Set Rebuild. No regions selected for rebuild, will only scrub");
1955   }
1956 
1957   G1ConcurrentRebuildAndScrub::rebuild_and_scrub(this, needs_remembered_set_rebuild(), _concurrent_workers);
1958 }
1959 
1960 void G1ConcurrentMark::print_stats() {
1961   if (!log_is_enabled(Debug, gc, stats)) {
1962     return;
1963   }
1964   log_debug(gc, stats)("---------------------------------------------------------------------");
1965   for (size_t i = 0; i < _num_active_tasks; ++i) {
1966     _tasks[i]->print_stats();
1967     log_debug(gc, stats)("---------------------------------------------------------------------");
1968   }
1969 }
1970 
1971 bool G1ConcurrentMark::concurrent_cycle_abort() {
1972   assert_at_safepoint_on_vm_thread();
1973   assert(_g1h->collector_state()->is_in_full_gc(), "must be");
1974 
1975   // If we start the compaction before the CM threads finish
1976   // scanning the root regions we might trip them over as we'll
1977   // be moving objects / updating references. Since the root region
1978   // scan synchronized with the safepoint, just tell it to abort.
1979   // It will notice when the threads start up again later.
1980   abort_root_region_scan_at_safepoint();
1981 
1982   // We haven't started a concurrent cycle no need to do anything; we might have
1983   // aborted the marking because of shutting down though. In this case the marking
1984   // might have already completed the abort (leading to in_progress() below to
1985   // return false), however this still left marking state particularly in the
1986   // shared marking bitmap that must be cleaned up.
1987   // If there are multiple full gcs during shutdown we do this work repeatedly for
1988   // nothing, but this situation should be extremely rare (a full gc after shutdown
1989   // has been signalled is already rare), and this work should be negligible compared
1990   // to actual full gc work.
1991 
1992   if (!is_fully_initialized() || (!cm_thread()->is_in_progress() && !cm_thread()->should_terminate())) {
1993     return false;
1994   }
1995 
1996   reset_marking_for_restart();
1997 
1998   abort_marking_threads();
1999 
2000   SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
2001   satb_mq_set.abandon_partial_marking();
2002   // This can be called either during or outside marking, we'll read
2003   // the expected_active value from the SATB queue set.
2004   satb_mq_set.set_active_all_threads(false, /* new active value */
2005                                      satb_mq_set.is_active() /* expected_active */);
2006   return true;
2007 }
2008 
2009 void G1ConcurrentMark::abort_marking_threads() {
2010   assert_root_region_scan_completed_or_aborted();
2011   _has_aborted.store_relaxed(true);
2012   _first_overflow_barrier_sync.abort();
2013   _second_overflow_barrier_sync.abort();
2014 }
2015 
2016 double G1ConcurrentMark::worker_threads_cpu_time_s() {
2017   class CountCpuTimeThreadClosure : public ThreadClosure {
2018   public:
2019     jlong _total_cpu_time;
2020 
2021     CountCpuTimeThreadClosure() : ThreadClosure(), _total_cpu_time(0) { }
2022 
2023     void do_thread(Thread* t) {
2024       _total_cpu_time += os::thread_cpu_time(t);
2025     }
2026   } cl;
2027 
2028   threads_do(&cl);
2029 
2030   return (double)cl._total_cpu_time / NANOSECS_PER_SEC;
2031 }
2032 
2033 static void print_ms_time_info(const char* prefix, const char* name,
2034                                NumberSeq& ns) {
2035   log_trace(gc, marking)("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
2036                          prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
2037   if (ns.num() > 0) {
2038     log_trace(gc, marking)("%s         [std. dev = %8.2f ms, max = %8.2f ms]",
2039                            prefix, ns.sd(), ns.maximum());
2040   }
2041 }
2042 
2043 void G1ConcurrentMark::print_summary_info() {
2044   Log(gc, marking) log;
2045   if (!log.is_trace()) {
2046     return;
2047   }
2048 
2049   log.trace(" Concurrent marking:");
2050   if (!is_fully_initialized()) {
2051     log.trace("    has not been initialized yet");
2052     return;
2053   }
2054   print_ms_time_info("  ", "remarks", _remark_times);
2055   {
2056     print_ms_time_info("     ", "final marks", _remark_mark_times);
2057     print_ms_time_info("     ", "weak refs", _remark_weak_ref_times);
2058 
2059   }
2060   print_ms_time_info("  ", "cleanups", _cleanup_times);
2061   log.trace("    Finalize live data total time = %8.2f s (avg = %8.2f ms).",
2062             _cleanup_times.sum() / 1000.0, _cleanup_times.avg());
2063   log.trace("  Total stop_world time = %8.2f s.",
2064             (_remark_times.sum() + _cleanup_times.sum())/1000.0);
2065   log.trace("  Total concurrent time = %8.2f s (%8.2f s marking).",
2066             cm_thread()->total_mark_cpu_time_s(), cm_thread()->worker_threads_cpu_time_s());
2067 }
2068 
2069 void G1ConcurrentMark::threads_do(ThreadClosure* tc) const {
2070   if (is_fully_initialized()) { // they are initialized late
2071     tc->do_thread(_cm_thread);
2072     _concurrent_workers->threads_do(tc);
2073   }
2074 }
2075 
2076 void G1ConcurrentMark::print_on(outputStream* st) const {
2077   st->print_cr("Marking Bits: (CMBitMap*) " PTR_FORMAT, p2i(mark_bitmap()));
2078   _mark_bitmap.print_on(st, " Bits: ");
2079 }
2080 
2081 static ReferenceProcessor* get_cm_oop_closure_ref_processor(G1CollectedHeap* g1h) {
2082   ReferenceProcessor* result = g1h->ref_processor_cm();
2083   assert(result != nullptr, "CM reference processor should not be null");
2084   return result;
2085 }
2086 
2087 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h,
2088                                G1CMTask* task)
2089   : ClaimMetadataVisitingOopIterateClosure(ClassLoaderData::_claim_strong, get_cm_oop_closure_ref_processor(g1h)),
2090     _g1h(g1h), _task(task)
2091 { }
2092 
2093 void G1CMTask::setup_for_region(G1HeapRegion* hr) {
2094   assert(hr != nullptr,
2095         "claim_region() should have filtered out null regions");
2096   _curr_region  = hr;
2097   _finger       = hr->bottom();
2098   update_region_limit();
2099 }
2100 
2101 void G1CMTask::update_region_limit() {
2102   G1HeapRegion* hr = _curr_region;
2103   HeapWord* bottom = hr->bottom();
2104   HeapWord* limit = _cm->top_at_mark_start(hr);
2105 
2106   if (limit == bottom) {
2107     // The region was collected underneath our feet.
2108     // We set the finger to bottom to ensure that the bitmap
2109     // iteration that will follow this will not do anything.
2110     // (this is not a condition that holds when we set the region up,
2111     // as the region is not supposed to be empty in the first place)
2112     _finger = bottom;
2113   } else if (limit >= _region_limit) {
2114     assert(limit >= _finger, "peace of mind");
2115   } else {
2116     assert(limit < _region_limit, "only way to get here");
2117     // This can happen under some pretty unusual circumstances.  An
2118     // evacuation pause empties the region underneath our feet (TAMS
2119     // at bottom). We then do some allocation in the region (TAMS
2120     // stays at bottom), followed by the region being used as a GC
2121     // alloc region (TAMS will move to top() and the objects
2122     // originally below it will be greyed). All objects now marked in
2123     // the region are explicitly greyed, if below the global finger,
2124     // and we do not need in fact to scan anything else. So, we simply
2125     // set _finger to be limit to ensure that the bitmap iteration
2126     // doesn't do anything.
2127     _finger = limit;
2128   }
2129 
2130   _region_limit = limit;
2131 }
2132 
2133 void G1CMTask::giveup_current_region() {
2134   assert(_curr_region != nullptr, "invariant");
2135   clear_region_fields();
2136 }
2137 
2138 void G1CMTask::clear_region_fields() {
2139   // Values for these three fields that indicate that we're not
2140   // holding on to a region.
2141   _curr_region   = nullptr;
2142   _finger        = nullptr;
2143   _region_limit  = nullptr;
2144 }
2145 
2146 void G1CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) {
2147   if (cm_oop_closure == nullptr) {
2148     assert(_cm_oop_closure != nullptr, "invariant");
2149   } else {
2150     assert(_cm_oop_closure == nullptr, "invariant");
2151   }
2152   _cm_oop_closure = cm_oop_closure;
2153 }
2154 
2155 void G1CMTask::reset(G1CMBitMap* mark_bitmap) {
2156   guarantee(mark_bitmap != nullptr, "invariant");
2157   _mark_bitmap              = mark_bitmap;
2158   clear_region_fields();
2159 
2160   _calls                         = 0;
2161   _elapsed_time_ms               = 0.0;
2162   _termination_time_ms           = 0.0;
2163 
2164   _mark_stats_cache.reset();
2165 }
2166 
2167 void G1CMTask::reset_for_restart() {
2168   clear_region_fields();
2169   _task_queue->set_empty();
2170   TASKQUEUE_STATS_ONLY(_partial_array_splitter.stats()->reset());
2171   TASKQUEUE_STATS_ONLY(_task_queue->stats.reset());
2172 }
2173 
2174 void G1CMTask::register_partial_array_splitter() {
2175 
2176   ::new (&_partial_array_splitter) PartialArraySplitter(_cm->partial_array_state_manager(),
2177                                                         _cm->max_num_tasks());
2178 }
2179 
2180 void G1CMTask::unregister_partial_array_splitter() {
2181   _partial_array_splitter.~PartialArraySplitter();
2182 }
2183 
2184 bool G1CMTask::should_exit_termination() {
2185   if (!regular_clock_call()) {
2186     return true;
2187   }
2188 
2189   // This is called when we are in the termination protocol. We should
2190   // quit if, for some reason, this task wants to abort or the global
2191   // stack is not empty (this means that we can get work from it).
2192   return !_cm->mark_stack_empty() || has_aborted();
2193 }
2194 
2195 void G1CMTask::reached_limit() {
2196   assert(_words_scanned >= _words_scanned_limit ||
2197          _refs_reached >= _refs_reached_limit ,
2198          "shouldn't have been called otherwise");
2199   abort_marking_if_regular_check_fail();
2200 }
2201 
2202 bool G1CMTask::regular_clock_call() {
2203   if (has_aborted()) {
2204     return false;
2205   }
2206 
2207   // First, we need to recalculate the words scanned and refs reached
2208   // limits for the next clock call.
2209   recalculate_limits();
2210 
2211   // During the regular clock call we do the following
2212 
2213   // (1) If an overflow has been flagged, then we abort.
2214   if (_cm->has_overflown()) {
2215     return false;
2216   }
2217 
2218   // If we are not concurrent (i.e. we're doing remark) we don't need
2219   // to check anything else. The other steps are only needed during
2220   // the concurrent marking phase.
2221   if (!_cm->concurrent()) {
2222     return true;
2223   }
2224 
2225   // (2) If marking has been aborted for Full GC, then we also abort.
2226   if (_cm->has_aborted()) {
2227     return false;
2228   }
2229 
2230   // (4) We check whether we should yield. If we have to, then we abort.
2231   if (SuspendibleThreadSet::should_yield()) {
2232     // We should yield. To do this we abort the task. The caller is
2233     // responsible for yielding.
2234     return false;
2235   }
2236 
2237   // (5) We check whether we've reached our time quota. If we have,
2238   // then we abort.
2239   double elapsed_time_ms = (double)(os::current_thread_cpu_time() - _start_cpu_time_ns) / NANOSECS_PER_MILLISEC;
2240   if (elapsed_time_ms > _time_target_ms) {
2241     _has_timed_out = true;
2242     return false;
2243   }
2244 
2245   // (6) Finally, we check whether there are enough completed STAB
2246   // buffers available for processing. If there are, we abort.
2247   SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
2248   if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
2249     // we do need to process SATB buffers, we'll abort and restart
2250     // the marking task to do so
2251     return false;
2252   }
2253   return true;
2254 }
2255 
2256 void G1CMTask::recalculate_limits() {
2257   _real_words_scanned_limit = _words_scanned + words_scanned_period;
2258   _words_scanned_limit      = _real_words_scanned_limit;
2259 
2260   _real_refs_reached_limit  = _refs_reached  + refs_reached_period;
2261   _refs_reached_limit       = _real_refs_reached_limit;
2262 }
2263 
2264 void G1CMTask::decrease_limits() {
2265   // This is called when we believe that we're going to do an infrequent
2266   // operation which will increase the per byte scanned cost (i.e. move
2267   // entries to/from the global stack). It basically tries to decrease the
2268   // scanning limit so that the clock is called earlier.
2269 
2270   _words_scanned_limit = _real_words_scanned_limit - 3 * words_scanned_period / 4;
2271   _refs_reached_limit  = _real_refs_reached_limit - 3 * refs_reached_period / 4;
2272 }
2273 
2274 void G1CMTask::move_entries_to_global_stack() {
2275   // Local array where we'll store the entries that will be popped
2276   // from the local queue.
2277   G1TaskQueueEntry buffer[G1CMMarkStack::EntriesPerChunk];
2278 
2279   size_t n = 0;
2280   G1TaskQueueEntry task_entry;
2281   while (n < G1CMMarkStack::EntriesPerChunk && _task_queue->pop_local(task_entry)) {
2282     buffer[n] = task_entry;
2283     ++n;
2284   }
2285   if (n < G1CMMarkStack::EntriesPerChunk) {
2286     buffer[n] = G1TaskQueueEntry();
2287   }
2288 
2289   if (n > 0) {
2290     if (!_cm->mark_stack_push(buffer)) {
2291       set_has_aborted();
2292     }
2293   }
2294 
2295   // This operation was quite expensive, so decrease the limits.
2296   decrease_limits();
2297 }
2298 
2299 bool G1CMTask::get_entries_from_global_stack() {
2300   // Local array where we'll store the entries that will be popped
2301   // from the global stack.
2302   G1TaskQueueEntry buffer[G1CMMarkStack::EntriesPerChunk];
2303 
2304   if (!_cm->mark_stack_pop(buffer)) {
2305     return false;
2306   }
2307 
2308   // We did actually pop at least one entry.
2309   for (size_t i = 0; i < G1CMMarkStack::EntriesPerChunk; ++i) {
2310     G1TaskQueueEntry task_entry = buffer[i];
2311     if (task_entry.is_null()) {
2312       break;
2313     }
2314     assert(task_entry.is_partial_array_state() || oopDesc::is_oop(task_entry.to_oop()), "Element " PTR_FORMAT " must be an array slice or oop", p2i(task_entry.to_oop()));
2315     bool success = _task_queue->push(task_entry);
2316     // We only call this when the local queue is empty or under a
2317     // given target limit. So, we do not expect this push to fail.
2318     assert(success, "invariant");
2319   }
2320 
2321   // This operation was quite expensive, so decrease the limits
2322   decrease_limits();
2323   return true;
2324 }
2325 
2326 void G1CMTask::drain_local_queue(bool partially) {
2327   if (has_aborted()) {
2328     return;
2329   }
2330 
2331   // Decide what the target size is, depending whether we're going to
2332   // drain it partially (so that other tasks can steal if they run out
2333   // of things to do) or totally (at the very end).
2334   uint target_size;
2335   if (partially) {
2336     target_size = GCDrainStackTargetSize;
2337   } else {
2338     target_size = 0;
2339   }
2340 
2341   if (_task_queue->size() > target_size) {
2342     G1TaskQueueEntry entry;
2343     bool ret = _task_queue->pop_local(entry);
2344     while (ret) {
2345       process_entry(entry, false /* stolen */);
2346       if (_task_queue->size() <= target_size || has_aborted()) {
2347         ret = false;
2348       } else {
2349         ret = _task_queue->pop_local(entry);
2350       }
2351     }
2352   }
2353 }
2354 
2355 size_t G1CMTask::start_partial_array_processing(objArrayOop obj) {
2356   assert(obj->length() >= (int)ObjArrayMarkingStride, "Must be a large array object %d", obj->length());
2357 
2358   // Mark klass metadata
2359   process_klass(obj->klass());
2360 
2361   size_t array_length = obj->length();
2362   size_t initial_chunk_size = _partial_array_splitter.start(_task_queue, obj, nullptr, array_length, ObjArrayMarkingStride);
2363 
2364   process_array_chunk(obj, 0, initial_chunk_size);
2365 
2366   // Include object header size
2367   if (obj->is_refArray()) {
2368     return refArrayOopDesc::object_size(checked_cast<int>(initial_chunk_size));
2369   } else {
2370     FlatArrayKlass* fak = FlatArrayKlass::cast(obj->klass());
2371     return flatArrayOopDesc::object_size(fak->layout_helper(), checked_cast<int>(initial_chunk_size));
2372   }
2373 }
2374 
2375 size_t G1CMTask::process_partial_array(const G1TaskQueueEntry& task, bool stolen) {
2376   PartialArrayState* state = task.to_partial_array_state();
2377   // Access state before release by claim().
2378   objArrayOop obj = oop_cast<objArrayOop>(state->source());
2379 
2380   PartialArraySplitter::Claim claim =
2381     _partial_array_splitter.claim(state, _task_queue, stolen);
2382 
2383   process_array_chunk(obj, claim._start, claim._end);
2384 
2385   if (obj->is_refArray()) {
2386     return heap_word_size((claim._end - claim._start) * heapOopSize);
2387   } else {
2388     assert(obj->is_flatArray(), "Must be!");
2389     size_t element_byte_size = FlatArrayKlass::cast(obj->klass())->element_byte_size();
2390     size_t nof_elements = claim._end - claim._start;
2391     return heap_word_size(nof_elements * element_byte_size);
2392   }
2393 }
2394 
2395 void G1CMTask::drain_global_stack(bool partially) {
2396   if (has_aborted()) {
2397     return;
2398   }
2399 
2400   // We have a policy to drain the local queue before we attempt to
2401   // drain the global stack.
2402   assert(partially || _task_queue->size() == 0, "invariant");
2403 
2404   // Decide what the target size is, depending whether we're going to
2405   // drain it partially (so that other tasks can steal if they run out
2406   // of things to do) or totally (at the very end).
2407   // Notice that when draining the global mark stack partially, due to the racyness
2408   // of the mark stack size update we might in fact drop below the target. But,
2409   // this is not a problem.
2410   // In case of total draining, we simply process until the global mark stack is
2411   // totally empty, disregarding the size counter.
2412   if (partially) {
2413     size_t const target_size = _cm->partial_mark_stack_size_target();
2414     while (!has_aborted() && _cm->mark_stack_size() > target_size) {
2415       if (get_entries_from_global_stack()) {
2416         drain_local_queue(partially);
2417       }
2418     }
2419   } else {
2420     while (!has_aborted() && get_entries_from_global_stack()) {
2421       drain_local_queue(partially);
2422     }
2423   }
2424 }
2425 
2426 // SATB Queue has several assumptions on whether to call the par or
2427 // non-par versions of the methods. this is why some of the code is
2428 // replicated. We should really get rid of the single-threaded version
2429 // of the code to simplify things.
2430 void G1CMTask::drain_satb_buffers() {
2431   if (has_aborted()) {
2432     return;
2433   }
2434 
2435   // We set this so that the regular clock knows that we're in the
2436   // middle of draining buffers and doesn't set the abort flag when it
2437   // notices that SATB buffers are available for draining. It'd be
2438   // very counter productive if it did that. :-)
2439   _draining_satb_buffers = true;
2440 
2441   G1CMSATBBufferClosure satb_cl(this, _g1h);
2442   SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
2443 
2444   // This keeps claiming and applying the closure to completed buffers
2445   // until we run out of buffers or we need to abort.
2446   while (!has_aborted() &&
2447          satb_mq_set.apply_closure_to_completed_buffer(&satb_cl)) {
2448     abort_marking_if_regular_check_fail();
2449   }
2450 
2451   // Can't assert qset is empty here, even if not aborted.  If concurrent,
2452   // some other thread might be adding to the queue.  If not concurrent,
2453   // some other thread might have won the race for the last buffer, but
2454   // has not yet decremented the count.
2455 
2456   _draining_satb_buffers = false;
2457 
2458   // again, this was a potentially expensive operation, decrease the
2459   // limits to get the regular clock call early
2460   decrease_limits();
2461 }
2462 
2463 void G1CMTask::clear_mark_stats_cache(uint region_idx) {
2464   _mark_stats_cache.reset(region_idx);
2465 }
2466 
2467 Pair<size_t, size_t> G1CMTask::flush_mark_stats_cache() {
2468   return _mark_stats_cache.evict_all();
2469 }
2470 
2471 void G1CMTask::print_stats() {
2472   log_debug(gc, stats)("Marking Stats, task = %u, calls = %u", _worker_id, _calls);
2473   log_debug(gc, stats)("  Elapsed time = %1.2lfms, Termination time = %1.2lfms",
2474                        _elapsed_time_ms, _termination_time_ms);
2475   log_debug(gc, stats)("  Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms max = %1.2lfms, total = %1.2lfms",
2476                        _step_times_ms.num(),
2477                        _step_times_ms.avg(),
2478                        _step_times_ms.sd(),
2479                        _step_times_ms.maximum(),
2480                        _step_times_ms.sum());
2481   size_t const hits = _mark_stats_cache.hits();
2482   size_t const misses = _mark_stats_cache.misses();
2483   log_debug(gc, stats)("  Mark Stats Cache: hits %zu misses %zu ratio %.3f",
2484                        hits, misses, percent_of(hits, hits + misses));
2485 }
2486 
2487 bool G1ConcurrentMark::try_stealing(uint worker_id, G1TaskQueueEntry& task_entry) {
2488   return _task_queues->steal(worker_id, task_entry);
2489 }
2490 
2491 void G1CMTask::process_current_region(G1CMBitMapClosure& bitmap_closure) {
2492   if (has_aborted() || _curr_region == nullptr) {
2493     return;
2494   }
2495 
2496   // This means that we're already holding on to a region.
2497   assert(_finger != nullptr, "if region is not null, then the finger "
2498          "should not be null either");
2499 
2500   // We might have restarted this task after an evacuation pause
2501   // which might have evacuated the region we're holding on to
2502   // underneath our feet. Let's read its limit again to make sure
2503   // that we do not iterate over a region of the heap that
2504   // contains garbage (update_region_limit() will also move
2505   // _finger to the start of the region if it is found empty).
2506   update_region_limit();
2507   // We will start from _finger not from the start of the region,
2508   // as we might be restarting this task after aborting half-way
2509   // through scanning this region. In this case, _finger points to
2510   // the address where we last found a marked object. If this is a
2511   // fresh region, _finger points to start().
2512   MemRegion mr = MemRegion(_finger, _region_limit);
2513 
2514   assert(!_curr_region->is_humongous() || mr.start() == _curr_region->bottom(),
2515          "humongous regions should go around loop once only");
2516 
2517   // Some special cases:
2518   // If the memory region is empty, we can just give up the region.
2519   // If the current region is humongous then we only need to check
2520   // the bitmap for the bit associated with the start of the object,
2521   // scan the object if it's live, and give up the region.
2522   // Otherwise, let's iterate over the bitmap of the part of the region
2523   // that is left.
2524   // If the iteration is successful, give up the region.
2525   if (mr.is_empty()) {
2526     giveup_current_region();
2527     abort_marking_if_regular_check_fail();
2528   } else if (_curr_region->is_humongous() && mr.start() == _curr_region->bottom()) {
2529     if (_mark_bitmap->is_marked(mr.start())) {
2530       // The object is marked - apply the closure
2531       bitmap_closure.do_addr(mr.start());
2532     }
2533     // Even if this task aborted while scanning the humongous object
2534     // we can (and should) give up the current region.
2535     giveup_current_region();
2536     abort_marking_if_regular_check_fail();
2537   } else if (_mark_bitmap->iterate(&bitmap_closure, mr)) {
2538     giveup_current_region();
2539     abort_marking_if_regular_check_fail();
2540   } else {
2541     assert(has_aborted(), "currently the only way to do so");
2542     // The only way to abort the bitmap iteration is to return
2543     // false from the do_bit() method. However, inside the
2544     // do_bit() method we move the _finger to point to the
2545     // object currently being looked at. So, if we bail out, we
2546     // have definitely set _finger to something non-null.
2547     assert(_finger != nullptr, "invariant");
2548 
2549     // Region iteration was actually aborted. So now _finger
2550     // points to the address of the object we last scanned. If we
2551     // leave it there, when we restart this task, we will rescan
2552     // the object. It is easy to avoid this. We move the finger by
2553     // enough to point to the next possible object header.
2554     assert(_finger < _region_limit, "invariant");
2555     HeapWord* const new_finger = _finger + cast_to_oop(_finger)->size();
2556     if (new_finger >= _region_limit) {
2557       giveup_current_region();
2558     } else {
2559       move_finger_to(new_finger);
2560     }
2561   }
2562 }
2563 
2564 void G1CMTask::claim_new_region() {
2565   // Read the note on the claim_region() method on why it might
2566   // return null with potentially more regions available for
2567   // claiming and why we have to check out_of_regions() to determine
2568   // whether we're done or not.
2569   while (!has_aborted() && _curr_region == nullptr && !_cm->out_of_regions()) {
2570     // We are going to try to claim a new region. We should have
2571     // given up on the previous one.
2572     // Separated the asserts so that we know which one fires.
2573     assert(_curr_region  == nullptr, "invariant");
2574     assert(_finger       == nullptr, "invariant");
2575     assert(_region_limit == nullptr, "invariant");
2576     G1HeapRegion* claimed_region = _cm->claim_region(_worker_id);
2577     if (claimed_region != nullptr) {
2578       // Yes, we managed to claim one
2579       setup_for_region(claimed_region);
2580       assert(_curr_region == claimed_region, "invariant");
2581     }
2582     // It is important to call the regular clock here. It might take
2583     // a while to claim a region if, for example, we hit a large
2584     // block of empty regions. So we need to call the regular clock
2585     // method once round the loop to make sure it's called
2586     // frequently enough.
2587     abort_marking_if_regular_check_fail();
2588   }
2589 }
2590 
2591 void G1CMTask::attempt_stealing() {
2592   // We cannot check whether the global stack is empty, since other
2593   // tasks might be pushing objects to it concurrently.
2594   assert(_cm->out_of_regions() && _task_queue->size() == 0,
2595          "only way to reach here");
2596   while (!has_aborted()) {
2597     G1TaskQueueEntry entry;
2598     if (_cm->try_stealing(_worker_id, entry)) {
2599       process_entry(entry, true /* stolen */);
2600 
2601       // And since we're towards the end, let's totally drain the
2602       // local queue and global stack.
2603       drain_local_queue(false);
2604       drain_global_stack(false);
2605     } else {
2606       break;
2607     }
2608   }
2609 }
2610 
2611 void G1CMTask::attempt_termination(bool is_serial) {
2612   // We cannot check whether the global stack is empty, since other
2613   // tasks might be concurrently pushing objects on it.
2614   // Separated the asserts so that we know which one fires.
2615   assert(_cm->out_of_regions(), "only way to reach here");
2616   assert(_task_queue->size() == 0, "only way to reach here");
2617   double termination_start_time_ms = os::elapsedTime() * 1000.0;
2618 
2619   // The G1CMTask class also extends the TerminatorTerminator class,
2620   // hence its should_exit_termination() method will also decide
2621   // whether to exit the termination protocol or not.
2622   bool finished = (is_serial ||
2623                    _cm->terminator()->offer_termination(this));
2624   _termination_time_ms += (os::elapsedTime() * 1000.0 - termination_start_time_ms);
2625 
2626   if (finished) {
2627     // We're all done.
2628 
2629     // We can now guarantee that the global stack is empty, since
2630     // all other tasks have finished. We separated the guarantees so
2631     // that, if a condition is false, we can immediately find out
2632     // which one.
2633     guarantee(_cm->out_of_regions(), "only way to reach here");
2634     guarantee(_cm->mark_stack_empty(), "only way to reach here");
2635     guarantee(_task_queue->size() == 0, "only way to reach here");
2636     guarantee(!_cm->has_overflown(), "only way to reach here");
2637     guarantee(!has_aborted(), "should never happen if termination has completed");
2638   } else {
2639     // Apparently there's more work to do. Let's abort this task. We
2640     // will restart it and hopefully we can find more things to do.
2641     set_has_aborted();
2642   }
2643 }
2644 
2645 void G1CMTask::handle_abort(bool is_serial, double elapsed_time_ms) {
2646   if (_has_timed_out) {
2647     double diff_ms = elapsed_time_ms - _time_target_ms;
2648     // Keep statistics of how well we did with respect to hitting
2649     // our target only if we actually timed out (if we aborted for
2650     // other reasons, then the results might get skewed).
2651     _marking_step_diff_ms.add(diff_ms);
2652   }
2653 
2654   if (!_cm->has_overflown()) {
2655     return;
2656   }
2657 
2658   // This is the interesting one. We aborted because a global
2659   // overflow was raised. This means we have to restart the
2660   // marking phase and start iterating over regions. However, in
2661   // order to do this we have to make sure that all tasks stop
2662   // what they are doing and re-initialize in a safe manner. We
2663   // will achieve this with the use of two barrier sync points.
2664   if (!is_serial) {
2665     // We only need to enter the sync barrier if being called
2666     // from a parallel context
2667     _cm->enter_first_sync_barrier(_worker_id);
2668 
2669     // When we exit this sync barrier we know that all tasks have
2670     // stopped doing marking work. So, it's now safe to
2671     // re-initialize our data structures.
2672   }
2673 
2674   clear_region_fields();
2675   flush_mark_stats_cache();
2676 
2677   if (!is_serial) {
2678     // If we're executing the concurrent phase of marking, reset the marking
2679     // state; otherwise the marking state is reset after reference processing,
2680     // during the remark pause.
2681     // If we reset here as a result of an overflow during the remark we will
2682     // see assertion failures from any subsequent set_concurrency_and_phase()
2683     // calls.
2684     if (_cm->concurrent() && _worker_id == 0) {
2685       // Worker 0 is responsible for clearing the global data structures because
2686       // of an overflow. During STW we should not clear the overflow flag (in
2687       // G1ConcurrentMark::reset_marking_state()) since we rely on it being true when we exit
2688       // method to abort the pause and restart concurrent marking.
2689       _cm->reset_marking_for_restart();
2690 
2691       log_info(gc, marking)("Concurrent Mark reset for overflow");
2692     }
2693 
2694     // ...and enter the second barrier.
2695     _cm->enter_second_sync_barrier(_worker_id);
2696   }
2697 }
2698 
2699 /*****************************************************************************
2700 
2701     The do_marking_step(time_target_ms, ...) method is the building
2702     block of the parallel marking framework. It can be called in parallel
2703     with other invocations of do_marking_step() on different tasks
2704     (but only one per task, obviously) and concurrently with the
2705     mutator threads, or during remark, hence it eliminates the need
2706     for two versions of the code. When called during remark, it will
2707     pick up from where the task left off during the concurrent marking
2708     phase. Interestingly, tasks are also claimable during evacuation
2709     pauses too, since do_marking_step() ensures that it aborts before
2710     it needs to yield.
2711 
2712     The data structures that it uses to do marking work are the
2713     following:
2714 
2715       (1) Marking Bitmap. If there are grey objects that appear only
2716       on the bitmap (this happens either when dealing with an overflow
2717       or when the concurrent start pause has simply marked the roots
2718       and didn't push them on the stack), then tasks claim heap
2719       regions whose bitmap they then scan to find grey objects. A
2720       global finger indicates where the end of the last claimed region
2721       is. A local finger indicates how far into the region a task has
2722       scanned. The two fingers are used to determine how to grey an
2723       object (i.e. whether simply marking it is OK, as it will be
2724       visited by a task in the future, or whether it needs to be also
2725       pushed on a stack).
2726 
2727       (2) Local Queue. The local queue of the task which is accessed
2728       reasonably efficiently by the task. Other tasks can steal from
2729       it when they run out of work. Throughout the marking phase, a
2730       task attempts to keep its local queue short but not totally
2731       empty, so that entries are available for stealing by other
2732       tasks. Only when there is no more work, a task will totally
2733       drain its local queue.
2734 
2735       (3) Global Mark Stack. This handles local queue overflow. During
2736       marking only sets of entries are moved between it and the local
2737       queues, as access to it requires a mutex and more fine-grain
2738       interaction with it which might cause contention. If it
2739       overflows, then the marking phase should restart and iterate
2740       over the bitmap to identify grey objects. Throughout the marking
2741       phase, tasks attempt to keep the global mark stack at a small
2742       length but not totally empty, so that entries are available for
2743       popping by other tasks. Only when there is no more work, tasks
2744       will totally drain the global mark stack.
2745 
2746       (4) SATB Buffer Queue. This is where completed SATB buffers are
2747       made available. Buffers are regularly removed from this queue
2748       and scanned for roots, so that the queue doesn't get too
2749       long. During remark, all completed buffers are processed, as
2750       well as the filled in parts of any uncompleted buffers.
2751 
2752     The do_marking_step() method tries to abort when the time target
2753     has been reached. There are a few other cases when the
2754     do_marking_step() method also aborts:
2755 
2756       (1) When the marking phase has been aborted (after a Full GC).
2757 
2758       (2) When a global overflow (on the global stack) has been
2759       triggered. Before the task aborts, it will actually sync up with
2760       the other tasks to ensure that all the marking data structures
2761       (local queues, stacks, fingers etc.)  are re-initialized so that
2762       when do_marking_step() completes, the marking phase can
2763       immediately restart.
2764 
2765       (3) When enough completed SATB buffers are available. The
2766       do_marking_step() method only tries to drain SATB buffers right
2767       at the beginning. So, if enough buffers are available, the
2768       marking step aborts and the SATB buffers are processed at
2769       the beginning of the next invocation.
2770 
2771       (4) To yield. when we have to yield then we abort and yield
2772       right at the end of do_marking_step(). This saves us from a lot
2773       of hassle as, by yielding we might allow a Full GC. If this
2774       happens then objects will be compacted underneath our feet, the
2775       heap might shrink, etc. We save checking for this by just
2776       aborting and doing the yield right at the end.
2777 
2778     From the above it follows that the do_marking_step() method should
2779     be called in a loop (or, otherwise, regularly) until it completes.
2780 
2781     If a marking step completes without its has_aborted() flag being
2782     true, it means it has completed the current marking phase (and
2783     also all other marking tasks have done so and have all synced up).
2784 
2785     A method called regular_clock_call() is invoked "regularly" (in
2786     sub ms intervals) throughout marking. It is this clock method that
2787     checks all the abort conditions which were mentioned above and
2788     decides when the task should abort. A work-based scheme is used to
2789     trigger this clock method: when the number of object words the
2790     marking phase has scanned or the number of references the marking
2791     phase has visited reach a given limit. Additional invocations to
2792     the method clock have been planted in a few other strategic places
2793     too. The initial reason for the clock method was to avoid calling
2794     cpu time gathering too regularly, as it is quite expensive. So,
2795     once it was in place, it was natural to piggy-back all the other
2796     conditions on it too and not constantly check them throughout the code.
2797 
2798     If do_termination is true then do_marking_step will enter its
2799     termination protocol.
2800 
2801     The value of is_serial must be true when do_marking_step is being
2802     called serially (i.e. by the VMThread) and do_marking_step should
2803     skip any synchronization in the termination and overflow code.
2804     Examples include the serial remark code and the serial reference
2805     processing closures.
2806 
2807     The value of is_serial must be false when do_marking_step is
2808     being called by any of the worker threads.
2809     Examples include the concurrent marking code (CMMarkingTask),
2810     the MT remark code, and the MT reference processing closures.
2811 
2812  *****************************************************************************/
2813 
2814 void G1CMTask::do_marking_step(double time_target_ms,
2815                                bool do_termination,
2816                                bool is_serial) {
2817   assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
2818 
2819   _start_cpu_time_ns = os::current_thread_cpu_time();
2820 
2821   // If do_stealing is true then do_marking_step will attempt to
2822   // steal work from the other G1CMTasks. It only makes sense to
2823   // enable stealing when the termination protocol is enabled
2824   // and do_marking_step() is not being called serially.
2825   bool do_stealing = do_termination && !is_serial;
2826 
2827   G1Predictions const& predictor = _g1h->policy()->predictor();
2828   double diff_prediction_ms = predictor.predict_zero_bounded(&_marking_step_diff_ms);
2829   _time_target_ms = time_target_ms - diff_prediction_ms;
2830 
2831   // set up the variables that are used in the work-based scheme to
2832   // call the regular clock method
2833   _words_scanned = 0;
2834   _refs_reached  = 0;
2835   recalculate_limits();
2836 
2837   // clear all flags
2838   clear_has_aborted();
2839   _has_timed_out = false;
2840   _draining_satb_buffers = false;
2841 
2842   ++_calls;
2843 
2844   // Set up the bitmap and oop closures. Anything that uses them is
2845   // eventually called from this method, so it is OK to allocate these
2846   // statically.
2847   G1CMBitMapClosure bitmap_closure(this, _cm);
2848   G1CMOopClosure cm_oop_closure(_g1h, this);
2849   set_cm_oop_closure(&cm_oop_closure);
2850 
2851   if (_cm->has_overflown()) {
2852     // This can happen if the mark stack overflows during a GC pause
2853     // and this task, after a yield point, restarts. We have to abort
2854     // as we need to get into the overflow protocol which happens
2855     // right at the end of this task.
2856     set_has_aborted();
2857   }
2858 
2859   // First drain any available SATB buffers. After this, we will not
2860   // look at SATB buffers before the next invocation of this method.
2861   // If enough completed SATB buffers are queued up, the regular clock
2862   // will abort this task so that it restarts.
2863   drain_satb_buffers();
2864   // ...then partially drain the local queue and the global stack
2865   drain_local_queue(true);
2866   drain_global_stack(true);
2867 
2868   do {
2869     process_current_region(bitmap_closure);
2870     // At this point we have either completed iterating over the
2871     // region we were holding on to, or we have aborted.
2872 
2873     // We then partially drain the local queue and the global stack.
2874     drain_local_queue(true);
2875     drain_global_stack(true);
2876 
2877     claim_new_region();
2878 
2879     assert(has_aborted() || _curr_region != nullptr || _cm->out_of_regions(),
2880            "at this point we should be out of regions");
2881   } while ( _curr_region != nullptr && !has_aborted());
2882 
2883   // We cannot check whether the global stack is empty, since other
2884   // tasks might be pushing objects to it concurrently.
2885   assert(has_aborted() || _cm->out_of_regions(),
2886          "at this point we should be out of regions");
2887   // Try to reduce the number of available SATB buffers so that
2888   // remark has less work to do.
2889   drain_satb_buffers();
2890 
2891   // Since we've done everything else, we can now totally drain the
2892   // local queue and global stack.
2893   drain_local_queue(false);
2894   drain_global_stack(false);
2895 
2896   // Attempt at work stealing from other task's queues.
2897   if (do_stealing && !has_aborted()) {
2898     // We have not aborted. This means that we have finished all that
2899     // we could. Let's try to do some stealing...
2900     attempt_stealing();
2901   }
2902 
2903   // We still haven't aborted. Now, let's try to get into the
2904   // termination protocol.
2905   if (do_termination && !has_aborted()) {
2906     attempt_termination(is_serial);
2907   }
2908 
2909   // Mainly for debugging purposes to make sure that a pointer to the
2910   // closure which was statically allocated in this frame doesn't
2911   // escape it by accident.
2912   set_cm_oop_closure(nullptr);
2913   jlong end_cpu_time_ns = os::current_thread_cpu_time();
2914   double elapsed_time_ms = (double)(end_cpu_time_ns - _start_cpu_time_ns) / NANOSECS_PER_MILLISEC;
2915   // Update the step history.
2916   _step_times_ms.add(elapsed_time_ms);
2917 
2918   if (has_aborted()) {
2919     // The task was aborted for some reason.
2920     handle_abort(is_serial, elapsed_time_ms);
2921   }
2922 }
2923 
2924 G1CMTask::G1CMTask(uint worker_id,
2925                    G1ConcurrentMark* cm,
2926                    G1CMTaskQueue* task_queue,
2927                    G1RegionMarkStats* mark_stats) :
2928   _worker_id(worker_id),
2929   _g1h(G1CollectedHeap::heap()),
2930   _cm(cm),
2931   _mark_bitmap(nullptr),
2932   _task_queue(task_queue),
2933   _partial_array_splitter(_cm->partial_array_state_manager(), _cm->max_num_tasks()),
2934   _mark_stats_cache(mark_stats, G1RegionMarkStatsCache::RegionMarkStatsCacheSize),
2935   _calls(0),
2936   _time_target_ms(0.0),
2937   _start_cpu_time_ns(0),
2938   _cm_oop_closure(nullptr),
2939   _curr_region(nullptr),
2940   _finger(nullptr),
2941   _region_limit(nullptr),
2942   _words_scanned(0),
2943   _words_scanned_limit(0),
2944   _real_words_scanned_limit(0),
2945   _refs_reached(0),
2946   _refs_reached_limit(0),
2947   _real_refs_reached_limit(0),
2948   _has_aborted(false),
2949   _has_timed_out(false),
2950   _draining_satb_buffers(false),
2951   _step_times_ms(),
2952   _elapsed_time_ms(0.0),
2953   _termination_time_ms(0.0),
2954   _marking_step_diff_ms()
2955 {
2956   guarantee(task_queue != nullptr, "invariant");
2957 
2958   _marking_step_diff_ms.add(0.5);
2959 }
2960 
2961 // These are formatting macros that are used below to ensure
2962 // consistent formatting. The *_H_* versions are used to format the
2963 // header for a particular value and they should be kept consistent
2964 // with the corresponding macro. Also note that most of the macros add
2965 // the necessary white space (as a prefix) which makes them a bit
2966 // easier to compose.
2967 
2968 // All the output lines are prefixed with this string to be able to
2969 // identify them easily in a large log file.
2970 #define G1PPRL_LINE_PREFIX            "###"
2971 
2972 #define G1PPRL_ADDR_BASE_FORMAT    " " PTR_FORMAT "-" PTR_FORMAT
2973 #ifdef _LP64
2974 #define G1PPRL_ADDR_BASE_H_FORMAT  " %37s"
2975 #else // _LP64
2976 #define G1PPRL_ADDR_BASE_H_FORMAT  " %21s"
2977 #endif // _LP64
2978 
2979 // For per-region info
2980 #define G1PPRL_TYPE_FORMAT            "   %-4s"
2981 #define G1PPRL_TYPE_H_FORMAT          "   %4s"
2982 #define G1PPRL_STATE_FORMAT           "   %-5s"
2983 #define G1PPRL_STATE_H_FORMAT         "   %5s"
2984 #define G1PPRL_BYTE_FORMAT            "  %9zu"
2985 #define G1PPRL_BYTE_H_FORMAT          "  %9s"
2986 #define G1PPRL_DOUBLE_FORMAT          "%14.1f"
2987 #define G1PPRL_GCEFF_H_FORMAT         "  %14s"
2988 #define G1PPRL_GID_H_FORMAT           "  %9s"
2989 #define G1PPRL_GID_FORMAT             "  " UINT32_FORMAT_W(9)
2990 #define G1PPRL_LEN_FORMAT             "  " UINT32_FORMAT_W(14)
2991 #define G1PPRL_LEN_H_FORMAT           "  %14s"
2992 #define G1PPRL_GID_GCEFF_FORMAT       "  %14.1f"
2993 #define G1PPRL_GID_LIVENESS_FORMAT    "  %9.2f"
2994 
2995 // For summary info
2996 #define G1PPRL_SUM_ADDR_FORMAT(tag)    "  " tag ":" G1PPRL_ADDR_BASE_FORMAT
2997 #define G1PPRL_SUM_BYTE_FORMAT(tag)    "  " tag ": %zu"
2998 #define G1PPRL_SUM_MB_FORMAT(tag)      "  " tag ": %1.2f MB"
2999 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag) " / %1.2f %%"
3000 
3001 G1PrintRegionLivenessInfoClosure::G1PrintRegionLivenessInfoClosure(const char* phase_name) :
3002   _total_used_bytes(0),
3003   _total_capacity_bytes(0),
3004   _total_live_bytes(0),
3005   _total_remset_bytes(0),
3006   _total_code_roots_bytes(0)
3007 {
3008   if (!log_is_enabled(Trace, gc, liveness)) {
3009     return;
3010   }
3011 
3012   G1CollectedHeap* g1h = G1CollectedHeap::heap();
3013   MemRegion reserved = g1h->reserved();
3014   double now = os::elapsedTime();
3015 
3016   // Print the header of the output.
3017   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now);
3018   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" HEAP"
3019                           G1PPRL_SUM_ADDR_FORMAT("reserved")
3020                           G1PPRL_SUM_BYTE_FORMAT("region-size"),
3021                           p2i(reserved.start()), p2i(reserved.end()),
3022                           G1HeapRegion::GrainBytes);
3023   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX);
3024   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3025                           G1PPRL_TYPE_H_FORMAT
3026                           G1PPRL_ADDR_BASE_H_FORMAT
3027                           G1PPRL_BYTE_H_FORMAT
3028                           G1PPRL_BYTE_H_FORMAT
3029                           G1PPRL_STATE_H_FORMAT
3030                           G1PPRL_BYTE_H_FORMAT
3031                           G1PPRL_GID_H_FORMAT,
3032                           "type", "address-range",
3033                           "used", "live",
3034                           "state", "code-roots",
3035                           "group-id");
3036   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3037                           G1PPRL_TYPE_H_FORMAT
3038                           G1PPRL_ADDR_BASE_H_FORMAT
3039                           G1PPRL_BYTE_H_FORMAT
3040                           G1PPRL_BYTE_H_FORMAT
3041                           G1PPRL_STATE_H_FORMAT
3042                           G1PPRL_BYTE_H_FORMAT
3043                           G1PPRL_GID_H_FORMAT,
3044                           "", "",
3045                           "(bytes)", "(bytes)",
3046                           "", "(bytes)", "");
3047 }
3048 
3049 bool G1PrintRegionLivenessInfoClosure::do_heap_region(G1HeapRegion* r) {
3050   if (!log_is_enabled(Trace, gc, liveness)) {
3051     return false;
3052   }
3053 
3054   const char* type       = r->get_type_str();
3055   HeapWord* bottom       = r->bottom();
3056   HeapWord* end          = r->end();
3057   size_t capacity_bytes  = r->capacity();
3058   size_t used_bytes      = r->used();
3059   size_t live_bytes      = r->live_bytes();
3060   size_t remset_bytes    = r->rem_set()->mem_size();
3061   size_t code_roots_bytes = r->rem_set()->code_roots_mem_size();
3062   const char* remset_type = r->rem_set()->get_short_state_str();
3063   uint cset_group_id     = r->rem_set()->has_cset_group()
3064                          ? r->rem_set()->cset_group_id()
3065                          : G1CSetCandidateGroup::NoRemSetId;
3066 
3067   _total_used_bytes      += used_bytes;
3068   _total_capacity_bytes  += capacity_bytes;
3069   _total_live_bytes      += live_bytes;
3070   _total_remset_bytes    += remset_bytes;
3071   _total_code_roots_bytes += code_roots_bytes;
3072 
3073   // Print a line for this particular region.
3074   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3075                         G1PPRL_TYPE_FORMAT
3076                         G1PPRL_ADDR_BASE_FORMAT
3077                         G1PPRL_BYTE_FORMAT
3078                         G1PPRL_BYTE_FORMAT
3079                         G1PPRL_STATE_FORMAT
3080                         G1PPRL_BYTE_FORMAT
3081                         G1PPRL_GID_FORMAT,
3082                         type, p2i(bottom), p2i(end),
3083                         used_bytes, live_bytes,
3084                         remset_type, code_roots_bytes,
3085                         cset_group_id);
3086 
3087   return false;
3088 }
3089 
3090 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() {
3091   if (!log_is_enabled(Trace, gc, liveness)) {
3092     return;
3093   }
3094 
3095   G1CollectedHeap* g1h = G1CollectedHeap::heap();
3096   _total_remset_bytes += g1h->card_set_freelist_pool()->mem_size();
3097   // add static memory usages to remembered set sizes
3098   _total_remset_bytes += G1HeapRegionRemSet::static_mem_size();
3099 
3100   log_cset_candidate_groups();
3101 
3102   // Print the footer of the output.
3103   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX);
3104   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3105                          " SUMMARY"
3106                          G1PPRL_SUM_MB_FORMAT("capacity")
3107                          G1PPRL_SUM_MB_PERC_FORMAT("used")
3108                          G1PPRL_SUM_MB_PERC_FORMAT("live")
3109                          G1PPRL_SUM_MB_FORMAT("remset")
3110                          G1PPRL_SUM_MB_FORMAT("code-roots"),
3111                          bytes_to_mb(_total_capacity_bytes),
3112                          bytes_to_mb(_total_used_bytes),
3113                          percent_of(_total_used_bytes, _total_capacity_bytes),
3114                          bytes_to_mb(_total_live_bytes),
3115                          percent_of(_total_live_bytes, _total_capacity_bytes),
3116                          bytes_to_mb(_total_remset_bytes),
3117                          bytes_to_mb(_total_code_roots_bytes));
3118 }
3119 
3120 void G1PrintRegionLivenessInfoClosure::log_cset_candidate_group_add_total(G1CSetCandidateGroup* group, const char* type) {
3121   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3122                           G1PPRL_GID_FORMAT
3123                           G1PPRL_LEN_FORMAT
3124                           G1PPRL_GID_GCEFF_FORMAT
3125                           G1PPRL_GID_LIVENESS_FORMAT
3126                           G1PPRL_BYTE_FORMAT
3127                           G1PPRL_TYPE_H_FORMAT,
3128                           group->group_id(),
3129                           group->length(),
3130                           group->length() > 0 ? group->gc_efficiency() : 0.0,
3131                           group->length() > 0 ? group->liveness_percent() : 0.0,
3132                           group->card_set()->mem_size(),
3133                           type);
3134   _total_remset_bytes += group->card_set()->mem_size();
3135 }
3136 
3137 void G1PrintRegionLivenessInfoClosure::log_cset_candidate_grouplist(G1CSetCandidateGroupList& gl, const char* type) {
3138   for (G1CSetCandidateGroup* group : gl) {
3139     log_cset_candidate_group_add_total(group, type);
3140   }
3141 }
3142 
3143 void G1PrintRegionLivenessInfoClosure::log_cset_candidate_groups() {
3144   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX);
3145   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" Collection Set Candidate Groups");
3146   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX " Types: Y=Young, M=From Marking Regions, R=Retained Regions");
3147   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3148                           G1PPRL_GID_H_FORMAT
3149                           G1PPRL_LEN_H_FORMAT
3150                           G1PPRL_GCEFF_H_FORMAT
3151                           G1PPRL_BYTE_H_FORMAT
3152                           G1PPRL_BYTE_H_FORMAT
3153                           G1PPRL_TYPE_H_FORMAT,
3154                           "groud-id", "num-regions",
3155                           "gc-eff", "liveness",
3156                           "remset", "type");
3157 
3158   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3159                           G1PPRL_GID_H_FORMAT
3160                           G1PPRL_LEN_H_FORMAT
3161                           G1PPRL_GCEFF_H_FORMAT
3162                           G1PPRL_BYTE_H_FORMAT
3163                           G1PPRL_BYTE_H_FORMAT
3164                           G1PPRL_TYPE_H_FORMAT,
3165                           "", "",
3166                           "(bytes/ms)", "%",
3167                           "(bytes)", "");
3168 
3169   G1CollectedHeap* g1h = G1CollectedHeap::heap();
3170 
3171   log_cset_candidate_group_add_total(g1h->young_regions_cset_group(), "Y");
3172 
3173   G1CollectionSetCandidates* candidates = g1h->policy()->candidates();
3174   log_cset_candidate_grouplist(candidates->from_marking_groups(), "M");
3175   log_cset_candidate_grouplist(candidates->retained_groups(), "R");
3176 }