1 /*
   2  * Copyright (c) 2001, 2026, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "classfile/classLoaderData.hpp"
  26 #include "classfile/classLoaderDataGraph.hpp"
  27 #include "cppstdlib/new.hpp"
  28 #include "gc/g1/g1BarrierSet.hpp"
  29 #include "gc/g1/g1BatchedTask.hpp"
  30 #include "gc/g1/g1CardSetMemory.hpp"
  31 #include "gc/g1/g1CardTableClaimTable.inline.hpp"
  32 #include "gc/g1/g1CollectedHeap.inline.hpp"
  33 #include "gc/g1/g1CollectorState.inline.hpp"
  34 #include "gc/g1/g1ConcurrentMark.inline.hpp"
  35 #include "gc/g1/g1ConcurrentMarkRemarkTasks.hpp"
  36 #include "gc/g1/g1ConcurrentMarkThread.inline.hpp"
  37 #include "gc/g1/g1ConcurrentRebuildAndScrub.hpp"
  38 #include "gc/g1/g1ConcurrentRefine.hpp"
  39 #include "gc/g1/g1HeapRegion.inline.hpp"
  40 #include "gc/g1/g1HeapRegionManager.hpp"
  41 #include "gc/g1/g1HeapRegionPrinter.hpp"
  42 #include "gc/g1/g1HeapRegionRemSet.inline.hpp"
  43 #include "gc/g1/g1HeapRegionSet.inline.hpp"
  44 #include "gc/g1/g1HeapVerifier.hpp"
  45 #include "gc/g1/g1OopClosures.inline.hpp"
  46 #include "gc/g1/g1Policy.hpp"
  47 #include "gc/g1/g1RegionMarkStatsCache.inline.hpp"
  48 #include "gc/g1/g1ThreadLocalData.hpp"
  49 #include "gc/g1/g1Trace.hpp"
  50 #include "gc/shared/gcId.hpp"
  51 #include "gc/shared/gcTimer.hpp"
  52 #include "gc/shared/gcTraceTime.inline.hpp"
  53 #include "gc/shared/gcVMOperations.hpp"
  54 #include "gc/shared/partialArraySplitter.inline.hpp"
  55 #include "gc/shared/partialArrayState.hpp"
  56 #include "gc/shared/partialArrayTaskStats.hpp"
  57 #include "gc/shared/referencePolicy.hpp"
  58 #include "gc/shared/suspendibleThreadSet.hpp"
  59 #include "gc/shared/taskqueue.inline.hpp"
  60 #include "gc/shared/taskTerminator.hpp"
  61 #include "gc/shared/weakProcessor.inline.hpp"
  62 #include "gc/shared/workerPolicy.hpp"
  63 #include "jvm.h"
  64 #include "logging/log.hpp"
  65 #include "memory/allocation.hpp"
  66 #include "memory/iterator.hpp"
  67 #include "memory/metaspaceUtils.hpp"
  68 #include "memory/resourceArea.hpp"
  69 #include "memory/universe.hpp"
  70 #include "nmt/memTracker.hpp"
  71 #include "oops/access.inline.hpp"
  72 #include "oops/oop.inline.hpp"
  73 #include "runtime/globals_extension.hpp"
  74 #include "runtime/handles.inline.hpp"
  75 #include "runtime/java.hpp"
  76 #include "runtime/orderAccess.hpp"
  77 #include "runtime/os.hpp"
  78 #include "runtime/prefetch.inline.hpp"
  79 #include "runtime/threads.hpp"
  80 #include "utilities/align.hpp"
  81 #include "utilities/checkedCast.hpp"
  82 #include "utilities/formatBuffer.hpp"
  83 #include "utilities/growableArray.hpp"
  84 #include "utilities/powerOfTwo.hpp"
  85 
  86 G1CMIsAliveClosure::G1CMIsAliveClosure() : _cm(nullptr) { }
  87 
  88 G1CMIsAliveClosure::G1CMIsAliveClosure(G1ConcurrentMark* cm) : _cm(cm) {
  89   assert(cm != nullptr, "must be");
  90 }
  91 
  92 void G1CMIsAliveClosure::initialize(G1ConcurrentMark* cm) {
  93   assert(cm != nullptr, "must be");
  94   assert(_cm == nullptr, "double initialize");
  95   _cm = cm;
  96 }
  97 
  98 bool G1CMBitMapClosure::do_addr(HeapWord* const addr) {
  99   assert(addr < _cm->finger(), "invariant");
 100   assert(addr >= _task->finger(), "invariant");
 101 
 102   // We move that task's local finger along.
 103   _task->move_finger_to(addr);
 104 
 105   _task->process_entry(G1TaskQueueEntry(cast_to_oop(addr)), false /* stolen */);
 106   // we only partially drain the local queue and global stack
 107   _task->drain_local_queue(true);
 108   _task->drain_global_stack(true);
 109 
 110   // if the has_aborted flag has been raised, we need to bail out of
 111   // the iteration
 112   return !_task->has_aborted();
 113 }
 114 
 115 G1CMMarkStack::G1CMMarkStack() :
 116   _chunk_allocator() {
 117   set_empty();
 118 }
 119 
 120 size_t G1CMMarkStack::capacity_alignment() {
 121   return (size_t)lcm(os::vm_allocation_granularity(), sizeof(TaskQueueEntryChunk)) / sizeof(G1TaskQueueEntry);
 122 }
 123 
 124 bool G1CMMarkStack::initialize() {
 125   guarantee(_chunk_allocator.capacity() == 0, "G1CMMarkStack already initialized.");
 126 
 127   size_t initial_capacity = MarkStackSize;
 128   size_t max_capacity = MarkStackSizeMax;
 129 
 130   size_t const TaskEntryChunkSizeInVoidStar = sizeof(TaskQueueEntryChunk) / sizeof(G1TaskQueueEntry);
 131 
 132   size_t max_num_chunks = align_up(max_capacity, capacity_alignment()) / TaskEntryChunkSizeInVoidStar;
 133   size_t initial_num_chunks = align_up(initial_capacity, capacity_alignment()) / TaskEntryChunkSizeInVoidStar;
 134 
 135   initial_num_chunks = round_up_power_of_2(initial_num_chunks);
 136   max_num_chunks = MAX2(initial_num_chunks, max_num_chunks);
 137 
 138   size_t limit = (INT_MAX - 1);
 139   max_capacity = MIN2((max_num_chunks * TaskEntryChunkSizeInVoidStar), limit);
 140   initial_capacity = MIN2((initial_num_chunks * TaskEntryChunkSizeInVoidStar), limit);
 141 
 142   FLAG_SET_ERGO(MarkStackSizeMax, max_capacity);
 143   FLAG_SET_ERGO(MarkStackSize, initial_capacity);
 144 
 145   log_trace(gc)("MarkStackSize: %uk  MarkStackSizeMax: %uk", (uint)(MarkStackSize / K), (uint)(MarkStackSizeMax / K));
 146 
 147   log_debug(gc)("Initialize mark stack with %zu chunks, maximum %zu",
 148                 initial_num_chunks, max_capacity);
 149 
 150   return _chunk_allocator.initialize(initial_num_chunks, max_num_chunks);
 151 }
 152 
 153 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::ChunkAllocator::allocate_new_chunk() {
 154   if (_size.load_relaxed() >= _max_capacity) {
 155     return nullptr;
 156   }
 157 
 158   size_t cur_idx = _size.fetch_then_add(1u);
 159 
 160   if (cur_idx >= _max_capacity) {
 161     return nullptr;
 162   }
 163 
 164   size_t bucket = get_bucket(cur_idx);
 165   if (_buckets[bucket].load_acquire() == nullptr) {
 166     if (!_should_grow) {
 167       // Prefer to restart the CM.
 168       return nullptr;
 169     }
 170 
 171     MutexLocker x(G1MarkStackChunkList_lock, Mutex::_no_safepoint_check_flag);
 172     if (_buckets[bucket].load_acquire() == nullptr) {
 173       size_t desired_capacity = bucket_size(bucket) * 2;
 174       if (!try_expand_to(desired_capacity)) {
 175         return nullptr;
 176       }
 177     }
 178   }
 179 
 180   size_t bucket_idx = get_bucket_index(cur_idx);
 181   TaskQueueEntryChunk* result = ::new (&_buckets[bucket].load_relaxed()[bucket_idx]) TaskQueueEntryChunk;
 182   result->next = nullptr;
 183   return result;
 184 }
 185 
 186 G1CMMarkStack::ChunkAllocator::ChunkAllocator() :
 187   _min_capacity(0),
 188   _max_capacity(0),
 189   _capacity(0),
 190   _num_buckets(0),
 191   _should_grow(false),
 192   _buckets(nullptr),
 193   _size(0)
 194 { }
 195 
 196 bool G1CMMarkStack::ChunkAllocator::initialize(size_t initial_capacity, size_t max_capacity) {
 197   guarantee(is_power_of_2(initial_capacity), "Invalid initial_capacity");
 198 
 199   _min_capacity = initial_capacity;
 200   _max_capacity = max_capacity;
 201   _num_buckets  = get_bucket(_max_capacity) + 1;
 202 
 203   _buckets = NEW_C_HEAP_ARRAY(Atomic<TaskQueueEntryChunk*>, _num_buckets, mtGC);
 204 
 205   for (size_t i = 0; i < _num_buckets; i++) {
 206     _buckets[i].store_relaxed(nullptr);
 207   }
 208 
 209   size_t new_capacity = bucket_size(0);
 210 
 211   if (!reserve(new_capacity)) {
 212     log_warning(gc)("Failed to reserve memory for new overflow mark stack with %zu chunks and size %zuB.", new_capacity, new_capacity * sizeof(TaskQueueEntryChunk));
 213     return false;
 214   }
 215   return true;
 216 }
 217 
 218 bool G1CMMarkStack::ChunkAllocator::try_expand_to(size_t desired_capacity) {
 219   if (_capacity == _max_capacity) {
 220     log_debug(gc)("Can not expand overflow mark stack further, already at maximum capacity of %zu chunks.", _capacity);
 221     return false;
 222   }
 223 
 224   size_t old_capacity = _capacity;
 225   desired_capacity = MIN2(desired_capacity, _max_capacity);
 226 
 227   if (reserve(desired_capacity)) {
 228     log_debug(gc)("Expanded the mark stack capacity from %zu to %zu chunks",
 229                   old_capacity, desired_capacity);
 230     return true;
 231   }
 232   return false;
 233 }
 234 
 235 bool G1CMMarkStack::ChunkAllocator::try_expand() {
 236   size_t new_capacity = _capacity * 2;
 237   return try_expand_to(new_capacity);
 238 }
 239 
 240 G1CMMarkStack::ChunkAllocator::~ChunkAllocator() {
 241   if (_buckets == nullptr) {
 242     return;
 243   }
 244 
 245   for (size_t i = 0; i < _num_buckets; i++) {
 246     if (_buckets[i].load_relaxed() != nullptr) {
 247       MmapArrayAllocator<TaskQueueEntryChunk>::free(_buckets[i].load_relaxed(),  bucket_size(i));
 248       _buckets[i].store_relaxed(nullptr);
 249     }
 250   }
 251 
 252   FREE_C_HEAP_ARRAY(TaskQueueEntryChunk*, _buckets);
 253 }
 254 
 255 bool G1CMMarkStack::ChunkAllocator::reserve(size_t new_capacity) {
 256   assert(new_capacity <= _max_capacity, "Cannot expand overflow mark stack beyond the max_capacity of %zu chunks.", _max_capacity);
 257 
 258   size_t highest_bucket = get_bucket(new_capacity - 1);
 259   size_t i = get_bucket(_capacity);
 260 
 261   // Allocate all buckets associated with indexes between the current capacity (_capacity)
 262   // and the new capacity (new_capacity). This step ensures that there are no gaps in the
 263   // array and that the capacity accurately reflects the reserved memory.
 264   for (; i <= highest_bucket; i++) {
 265     if (_buckets[i].load_acquire() != nullptr) {
 266       continue; // Skip over already allocated buckets.
 267     }
 268 
 269     size_t bucket_capacity = bucket_size(i);
 270 
 271     // Trim bucket size so that we do not exceed the _max_capacity.
 272     bucket_capacity = (_capacity + bucket_capacity) <= _max_capacity ?
 273                       bucket_capacity :
 274                       _max_capacity - _capacity;
 275 
 276 
 277     TaskQueueEntryChunk* bucket_base = MmapArrayAllocator<TaskQueueEntryChunk>::allocate_or_null(bucket_capacity, mtGC);
 278 
 279     if (bucket_base == nullptr) {
 280       log_warning(gc)("Failed to reserve memory for increasing the overflow mark stack capacity with %zu chunks and size %zuB.",
 281                       bucket_capacity, bucket_capacity * sizeof(TaskQueueEntryChunk));
 282       return false;
 283     }
 284     _capacity += bucket_capacity;
 285     _buckets[i].release_store(bucket_base);
 286   }
 287   return true;
 288 }
 289 
 290 void G1CMMarkStack::expand() {
 291   _chunk_allocator.try_expand();
 292 }
 293 
 294 void G1CMMarkStack::add_chunk_to_list(Atomic<TaskQueueEntryChunk*>* list, TaskQueueEntryChunk* elem) {
 295   elem->next = list->load_relaxed();
 296   list->store_relaxed(elem);
 297 }
 298 
 299 void G1CMMarkStack::add_chunk_to_chunk_list(TaskQueueEntryChunk* elem) {
 300   MutexLocker x(G1MarkStackChunkList_lock, Mutex::_no_safepoint_check_flag);
 301   add_chunk_to_list(&_chunk_list, elem);
 302   _chunks_in_chunk_list++;
 303 }
 304 
 305 void G1CMMarkStack::add_chunk_to_free_list(TaskQueueEntryChunk* elem) {
 306   MutexLocker x(G1MarkStackFreeList_lock, Mutex::_no_safepoint_check_flag);
 307   add_chunk_to_list(&_free_list, elem);
 308 }
 309 
 310 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_list(Atomic<TaskQueueEntryChunk*>* list) {
 311   TaskQueueEntryChunk* result = list->load_relaxed();
 312   if (result != nullptr) {
 313     list->store_relaxed(list->load_relaxed()->next);
 314   }
 315   return result;
 316 }
 317 
 318 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_chunk_list() {
 319   MutexLocker x(G1MarkStackChunkList_lock, Mutex::_no_safepoint_check_flag);
 320   TaskQueueEntryChunk* result = remove_chunk_from_list(&_chunk_list);
 321   if (result != nullptr) {
 322     _chunks_in_chunk_list--;
 323   }
 324   return result;
 325 }
 326 
 327 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_free_list() {
 328   MutexLocker x(G1MarkStackFreeList_lock, Mutex::_no_safepoint_check_flag);
 329   return remove_chunk_from_list(&_free_list);
 330 }
 331 
 332 bool G1CMMarkStack::par_push_chunk(G1TaskQueueEntry* ptr_arr) {
 333   // Get a new chunk.
 334   TaskQueueEntryChunk* new_chunk = remove_chunk_from_free_list();
 335 
 336   if (new_chunk == nullptr) {
 337     // Did not get a chunk from the free list. Allocate from backing memory.
 338     new_chunk = _chunk_allocator.allocate_new_chunk();
 339 
 340     if (new_chunk == nullptr) {
 341       return false;
 342     }
 343   }
 344 
 345   Copy::conjoint_memory_atomic(ptr_arr, new_chunk->data, EntriesPerChunk * sizeof(G1TaskQueueEntry));
 346 
 347   add_chunk_to_chunk_list(new_chunk);
 348 
 349   return true;
 350 }
 351 
 352 bool G1CMMarkStack::par_pop_chunk(G1TaskQueueEntry* ptr_arr) {
 353   TaskQueueEntryChunk* cur = remove_chunk_from_chunk_list();
 354 
 355   if (cur == nullptr) {
 356     return false;
 357   }
 358 
 359   Copy::conjoint_memory_atomic(cur->data, ptr_arr, EntriesPerChunk * sizeof(G1TaskQueueEntry));
 360 
 361   add_chunk_to_free_list(cur);
 362   return true;
 363 }
 364 
 365 void G1CMMarkStack::set_empty() {
 366   _chunks_in_chunk_list = 0;
 367   _chunk_list.store_relaxed(nullptr);
 368   _free_list.store_relaxed(nullptr);
 369   _chunk_allocator.reset();
 370 }
 371 
 372 G1CMRootMemRegions::G1CMRootMemRegions(uint const max_regions) :
 373     _root_regions(MemRegion::create_array(max_regions, mtGC)),
 374     _max_regions(max_regions),
 375     _num_regions(0),
 376     _num_claimed_regions(0) { }
 377 
 378 G1CMRootMemRegions::~G1CMRootMemRegions() {
 379   MemRegion::destroy_array(_root_regions, _max_regions);
 380 }
 381 
 382 void G1CMRootMemRegions::reset() {
 383   assert_at_safepoint();
 384   assert(G1CollectedHeap::heap()->collector_state()->is_in_concurrent_start_gc(), "must be");
 385 
 386   _num_regions.store_relaxed(0);
 387   _num_claimed_regions.store_relaxed(0);
 388 }
 389 
 390 void G1CMRootMemRegions::add(HeapWord* start, HeapWord* end) {
 391   assert_at_safepoint();
 392   uint idx = _num_regions.fetch_then_add(1u);
 393   assert(idx < _max_regions, "Trying to add more root MemRegions than there is space %u", _max_regions);
 394   assert(start != nullptr && end != nullptr && start <= end, "Start (" PTR_FORMAT ") should be less or equal to "
 395          "end (" PTR_FORMAT ")", p2i(start), p2i(end));
 396   _root_regions[idx].set_start(start);
 397   _root_regions[idx].set_end(end);
 398 }
 399 
 400 const MemRegion* G1CMRootMemRegions::claim_next() {
 401   uint local_num_regions = num_regions();
 402   if (num_claimed_regions() >= local_num_regions) {
 403     return nullptr;
 404   }
 405 
 406   uint claimed_index = _num_claimed_regions.fetch_then_add(1u);
 407   if (claimed_index < local_num_regions) {
 408     return &_root_regions[claimed_index];
 409   }
 410   return nullptr;
 411 }
 412 
 413 bool G1CMRootMemRegions::work_completed() const {
 414   return num_remaining_regions() == 0;
 415 }
 416 
 417 uint G1CMRootMemRegions::num_remaining_regions() const {
 418   uint total = num_regions();
 419   uint claimed = num_claimed_regions();
 420   return (total > claimed) ? total - claimed : 0;
 421 }
 422 
 423 bool G1CMRootMemRegions::contains(const MemRegion mr) const {
 424   uint local_num_root_regions = num_regions();
 425   for (uint i = 0; i < local_num_root_regions; i++) {
 426     if (_root_regions[i].equals(mr)) {
 427       return true;
 428     }
 429   }
 430   return false;
 431 }
 432 
 433 G1ConcurrentMark::G1ConcurrentMark(G1CollectedHeap* g1h,
 434                                    G1RegionToSpaceMapper* bitmap_storage) :
 435   _cm_thread(nullptr),
 436   _g1h(g1h),
 437 
 438   _mark_bitmap(),
 439 
 440   _heap(_g1h->reserved()),
 441 
 442   _root_regions(_g1h->max_num_regions()),
 443   _root_region_scan_aborted(false),
 444 
 445   _global_mark_stack(),
 446 
 447   _finger(nullptr), // _finger set in set_non_marking_state
 448 
 449   _worker_id_offset(G1ConcRefinementThreads), // The refinement control thread does not refine cards, so it's just the worker threads.
 450   _max_num_tasks(MAX2(ConcGCThreads, ParallelGCThreads)),
 451   _num_active_tasks(0), // _num_active_tasks set in set_non_marking_state()
 452   _tasks(nullptr), // _tasks set inside late_init()
 453   _task_queues(new G1CMTaskQueueSet(_max_num_tasks)),
 454   _terminator(_max_num_tasks, _task_queues),
 455   _partial_array_state_manager(new PartialArrayStateManager(_max_num_tasks)),
 456 
 457   _first_overflow_barrier_sync(),
 458   _second_overflow_barrier_sync(),
 459 
 460   _completed_mark_cycles(0),
 461   _has_overflown(false),
 462   _concurrent(false),
 463   _has_aborted(false),
 464   _restart_for_overflow(false),
 465   _gc_timer_cm(new ConcurrentGCTimer()),
 466   _gc_tracer_cm(new G1OldTracer()),
 467 
 468   // _verbose_level set below
 469 
 470   _remark_times(),
 471   _remark_mark_times(),
 472   _remark_weak_ref_times(),
 473   _cleanup_times(),
 474 
 475   _concurrent_workers(nullptr),
 476   _num_concurrent_workers(0),
 477   _max_concurrent_workers(0),
 478 
 479   _region_mark_stats(NEW_C_HEAP_ARRAY(G1RegionMarkStats, _g1h->max_num_regions(), mtGC)),
 480   _top_at_mark_starts(NEW_C_HEAP_ARRAY(Atomic<HeapWord*>, _g1h->max_num_regions(), mtGC)),
 481   _top_at_rebuild_starts(NEW_C_HEAP_ARRAY(Atomic<HeapWord*>, _g1h->max_num_regions(), mtGC)),
 482   _needs_remembered_set_rebuild(false)
 483 {
 484   assert(G1CGC_lock != nullptr, "CGC_lock must be initialized");
 485 
 486   _mark_bitmap.initialize(g1h->reserved(), bitmap_storage);
 487 }
 488 
 489 void G1ConcurrentMark::fully_initialize() {
 490   if (is_fully_initialized()) {
 491     return;
 492   }
 493 
 494   // Create & start ConcurrentMark thread.
 495   _cm_thread = new G1ConcurrentMarkThread(this);
 496   if (_cm_thread->osthread() == nullptr) {
 497     vm_shutdown_during_initialization("Could not create ConcurrentMarkThread");
 498   }
 499 
 500   log_debug(gc)("ConcGCThreads: %u offset %u", ConcGCThreads, _worker_id_offset);
 501   log_debug(gc)("ParallelGCThreads: %u", ParallelGCThreads);
 502 
 503   _max_concurrent_workers = ConcGCThreads;
 504 
 505   _concurrent_workers = new WorkerThreads("G1 Conc", _max_concurrent_workers);
 506   _concurrent_workers->initialize_workers();
 507   _num_concurrent_workers = _concurrent_workers->active_workers();
 508 
 509   if (!_global_mark_stack.initialize()) {
 510     vm_exit_during_initialization("Failed to allocate initial concurrent mark overflow mark stack.");
 511   }
 512 
 513   _tasks = NEW_C_HEAP_ARRAY(G1CMTask*, _max_num_tasks, mtGC);
 514 
 515   // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
 516   _num_active_tasks = _max_num_tasks;
 517 
 518   for (uint i = 0; i < _max_num_tasks; ++i) {
 519     G1CMTaskQueue* task_queue = new G1CMTaskQueue();
 520     _task_queues->register_queue(i, task_queue);
 521 
 522     _tasks[i] = new G1CMTask(i, this, task_queue, _region_mark_stats);
 523   }
 524 
 525   uint max_num_regions = _g1h->max_num_regions();
 526   ::new (_region_mark_stats) G1RegionMarkStats[max_num_regions]{};
 527   ::new (_top_at_mark_starts) Atomic<HeapWord*>[max_num_regions]{};
 528   ::new (_top_at_rebuild_starts) Atomic<HeapWord*>[max_num_regions]{};
 529 
 530   reset_at_marking_complete();
 531 }
 532 
 533 bool G1ConcurrentMark::is_in_concurrent_cycle() const {
 534   return is_fully_initialized() ? _cm_thread->is_in_progress() : false;
 535 }
 536 
 537 bool G1ConcurrentMark::is_in_marking() const {
 538   return is_fully_initialized() ? cm_thread()->is_in_marking() : false;
 539 }
 540 
 541 bool G1ConcurrentMark::is_in_rebuild_or_scrub() const {
 542   return cm_thread()->is_in_rebuild_or_scrub();
 543 }
 544 
 545 bool G1ConcurrentMark::is_in_reset_for_next_cycle() const {
 546   return cm_thread()->is_in_reset_for_next_cycle();
 547 }
 548 
 549 PartialArrayStateManager* G1ConcurrentMark::partial_array_state_manager() const {
 550   return _partial_array_state_manager;
 551 }
 552 
 553 G1ConcurrentMarkThread* G1ConcurrentMark::cm_thread() const {
 554   assert(is_fully_initialized(), "must be");
 555   return _cm_thread;
 556 }
 557 
 558 void G1ConcurrentMark::reset() {
 559   _has_aborted.store_relaxed(false);
 560 
 561   reset_marking_for_restart();
 562 
 563   // Reset all tasks, since different phases will use different number of active
 564   // threads. So, it's easiest to have all of them ready.
 565   for (uint i = 0; i < _max_num_tasks; ++i) {
 566     _tasks[i]->reset(mark_bitmap());
 567   }
 568 
 569   uint max_num_regions = _g1h->max_num_regions();
 570   ::new (_top_at_rebuild_starts) Atomic<HeapWord*>[max_num_regions]{};
 571   for (uint i = 0; i < max_num_regions; i++) {
 572     _region_mark_stats[i].clear();
 573   }
 574 
 575   _root_region_scan_aborted.store_relaxed(false);
 576   _root_regions.reset();
 577 }
 578 
 579 void G1ConcurrentMark::clear_statistics(G1HeapRegion* r) {
 580   uint region_idx = r->hrm_index();
 581   for (uint j = 0; j < _max_num_tasks; ++j) {
 582     _tasks[j]->clear_mark_stats_cache(region_idx);
 583   }
 584   _top_at_rebuild_starts[region_idx].store_relaxed(nullptr);
 585   _region_mark_stats[region_idx].clear();
 586 }
 587 
 588 void G1ConcurrentMark::humongous_object_eagerly_reclaimed(G1HeapRegion* r) {
 589   assert_at_safepoint();
 590   assert(r->is_starts_humongous(), "Got humongous continues region here");
 591 
 592   // Need to clear mark bit of the humongous object. Doing this unconditionally is fine.
 593   mark_bitmap()->clear(r->bottom());
 594 
 595   if (!_g1h->collector_state()->is_in_mark_or_rebuild()) {
 596     return;
 597   }
 598 
 599   // Clear any statistics about the region gathered so far.
 600   _g1h->humongous_obj_regions_iterate(r,
 601                                       [&] (G1HeapRegion* r) {
 602                                         clear_statistics(r);
 603                                       });
 604 }
 605 
 606 void G1ConcurrentMark::reset_marking_for_restart() {
 607   _global_mark_stack.set_empty();
 608 
 609   // Expand the marking stack, if we have to and if we can.
 610   if (has_overflown()) {
 611     _global_mark_stack.expand();
 612 
 613     uint max_num_regions = _g1h->max_num_regions();
 614     for (uint i = 0; i < max_num_regions; i++) {
 615       _region_mark_stats[i].clear_during_overflow();
 616     }
 617   }
 618 
 619   clear_has_overflown();
 620   _finger.store_relaxed(_heap.start());
 621 
 622   for (uint i = 0; i < _max_num_tasks; ++i) {
 623     _tasks[i]->reset_for_restart();
 624   }
 625 }
 626 
 627 void G1ConcurrentMark::set_concurrency(uint active_tasks) {
 628   assert(active_tasks <= _max_num_tasks, "we should not have more");
 629 
 630   _num_active_tasks = active_tasks;
 631   // Need to update the three data structures below according to the
 632   // number of active threads for this phase.
 633   _terminator.reset_for_reuse(active_tasks);
 634   _first_overflow_barrier_sync.set_n_workers(active_tasks);
 635   _second_overflow_barrier_sync.set_n_workers(active_tasks);
 636 }
 637 
 638 void G1ConcurrentMark::set_concurrency_and_phase(uint active_tasks, bool concurrent) {
 639   set_concurrency(active_tasks);
 640 
 641   _concurrent.store_relaxed(concurrent);
 642 
 643   if (!concurrent) {
 644     // At this point we should be in a STW phase, and completed marking.
 645     assert_at_safepoint_on_vm_thread();
 646     assert(out_of_regions(),
 647            "only way to get here: _finger: " PTR_FORMAT ", _heap_end: " PTR_FORMAT,
 648            p2i(finger()), p2i(_heap.end()));
 649   }
 650 }
 651 
 652 #if TASKQUEUE_STATS
 653 void G1ConcurrentMark::print_and_reset_taskqueue_stats() {
 654 
 655   _task_queues->print_and_reset_taskqueue_stats("Concurrent Mark");
 656 
 657   auto get_pa_stats = [&](uint i) {
 658     return _tasks[i]->partial_array_task_stats();
 659   };
 660 
 661   PartialArrayTaskStats::log_set(_max_num_tasks, get_pa_stats,
 662                                  "Concurrent Mark Partial Array");
 663 
 664   for (uint i = 0; i < _max_num_tasks; ++i) {
 665     get_pa_stats(i)->reset();
 666   }
 667 }
 668 #endif
 669 
 670 void G1ConcurrentMark::reset_at_marking_complete() {
 671   TASKQUEUE_STATS_ONLY(print_and_reset_taskqueue_stats());
 672   // We set the global marking state to some default values when we're
 673   // not doing marking.
 674   reset_marking_for_restart();
 675   _num_active_tasks = 0;
 676 }
 677 
 678 G1ConcurrentMark::~G1ConcurrentMark() {
 679   FREE_C_HEAP_ARRAY(Atomic<HeapWord*>, _top_at_mark_starts);
 680   FREE_C_HEAP_ARRAY(Atomic<HeapWord*>, _top_at_rebuild_starts);
 681   FREE_C_HEAP_ARRAY(G1RegionMarkStats, _region_mark_stats);
 682   // The G1ConcurrentMark instance is never freed.
 683   ShouldNotReachHere();
 684 }
 685 
 686 class G1ClearBitMapTask : public WorkerTask {
 687 public:
 688   static size_t chunk_size() { return M; }
 689 
 690 private:
 691   // Heap region closure used for clearing the _mark_bitmap.
 692   class G1ClearBitmapHRClosure : public G1HeapRegionClosure {
 693     G1ConcurrentMark* _cm;
 694     G1CMBitMap* _bitmap;
 695     bool _suspendible; // If suspendible, do yield checks.
 696 
 697     bool suspendible() {
 698       return _suspendible;
 699     }
 700 
 701     bool is_clear_concurrent_undo() {
 702       return suspendible() && _cm->cm_thread()->is_in_undo_cycle();
 703     }
 704 
 705     bool has_aborted() {
 706       if (suspendible()) {
 707         _cm->do_yield_check();
 708         return _cm->has_aborted();
 709       }
 710       return false;
 711     }
 712 
 713     HeapWord* region_clear_limit(G1HeapRegion* r) {
 714       // During a Concurrent Undo Mark cycle, the per region top_at_mark_start and
 715       // live_words data are current wrt to the _mark_bitmap. We use this information
 716       // to only clear ranges of the bitmap that require clearing.
 717       if (is_clear_concurrent_undo()) {
 718         // No need to clear bitmaps for empty regions (which includes regions we
 719         // did not mark through).
 720         if (!_cm->contains_live_object(r->hrm_index())) {
 721           assert(_bitmap->get_next_marked_addr(r->bottom(), r->end()) == r->end(), "Should not have marked bits");
 722           return r->bottom();
 723         }
 724         assert(_bitmap->get_next_marked_addr(_cm->top_at_mark_start(r), r->end()) == r->end(), "Should not have marked bits above tams");
 725       }
 726       return r->end();
 727     }
 728 
 729   public:
 730     G1ClearBitmapHRClosure(G1ConcurrentMark* cm, bool suspendible) :
 731       G1HeapRegionClosure(),
 732       _cm(cm),
 733       _bitmap(cm->mark_bitmap()),
 734       _suspendible(suspendible)
 735     { }
 736 
 737     virtual bool do_heap_region(G1HeapRegion* r) {
 738       if (has_aborted()) {
 739         return true;
 740       }
 741 
 742       HeapWord* cur = r->bottom();
 743       HeapWord* const end = region_clear_limit(r);
 744 
 745       size_t const chunk_size_in_words = G1ClearBitMapTask::chunk_size() / HeapWordSize;
 746 
 747       while (cur < end) {
 748 
 749         MemRegion mr(cur, MIN2(cur + chunk_size_in_words, end));
 750         _bitmap->clear_range(mr);
 751 
 752         cur += chunk_size_in_words;
 753 
 754         // Repeat the asserts from before the start of the closure. We will do them
 755         // as asserts here to minimize their overhead on the product. However, we
 756         // will have them as guarantees at the beginning / end of the bitmap
 757         // clearing to get some checking in the product.
 758         assert(!suspendible() || _cm->is_in_reset_for_next_cycle(), "invariant");
 759 
 760         // Abort iteration if necessary.
 761         if (has_aborted()) {
 762           return true;
 763         }
 764       }
 765       assert(cur >= end, "Must have completed iteration over the bitmap for region %u.", r->hrm_index());
 766 
 767       _cm->reset_top_at_mark_start(r);
 768 
 769       return false;
 770     }
 771   };
 772 
 773   G1ClearBitmapHRClosure _cl;
 774   G1HeapRegionClaimer _hr_claimer;
 775   bool _suspendible; // If the task is suspendible, workers must join the STS.
 776 
 777 public:
 778   G1ClearBitMapTask(G1ConcurrentMark* cm, uint n_workers, bool suspendible) :
 779     WorkerTask("G1 Clear Bitmap"),
 780     _cl(cm, suspendible),
 781     _hr_claimer(n_workers),
 782     _suspendible(suspendible)
 783   { }
 784 
 785   void work(uint worker_id) {
 786     SuspendibleThreadSetJoiner sts_join(_suspendible);
 787     G1CollectedHeap::heap()->heap_region_par_iterate_from_worker_offset(&_cl, &_hr_claimer, worker_id);
 788   }
 789 };
 790 
 791 void G1ConcurrentMark::clear_bitmap(WorkerThreads* workers, bool may_yield) {
 792   assert(may_yield || SafepointSynchronize::is_at_safepoint(), "Non-yielding bitmap clear only allowed at safepoint.");
 793 
 794   size_t const num_bytes_to_clear = (G1HeapRegion::GrainBytes * _g1h->num_committed_regions()) / G1CMBitMap::heap_map_factor();
 795   size_t const num_chunks = align_up(num_bytes_to_clear, G1ClearBitMapTask::chunk_size()) / G1ClearBitMapTask::chunk_size();
 796 
 797   uint const num_workers = (uint)MIN2(num_chunks, (size_t)workers->active_workers());
 798 
 799   G1ClearBitMapTask cl(this, num_workers, may_yield);
 800 
 801   log_debug(gc, ergo)("Running %s with %u workers for %zu work units.", cl.name(), num_workers, num_chunks);
 802   workers->run_task(&cl, num_workers);
 803 }
 804 
 805 void G1ConcurrentMark::cleanup_for_next_mark() {
 806   // Make sure that the concurrent mark thread looks to still be in
 807   // the current cycle.
 808   guarantee(is_in_reset_for_next_cycle(), "invariant");
 809 
 810   clear_bitmap(_concurrent_workers, true);
 811 
 812   reset_partial_array_state_manager();
 813 
 814   // Should not have changed state yet (even if a Full GC interrupted us).
 815   guarantee(is_in_reset_for_next_cycle(), "invariant");
 816 }
 817 
 818 void G1ConcurrentMark::reset_partial_array_state_manager() {
 819   for (uint i = 0; i < _max_num_tasks; ++i) {
 820     _tasks[i]->unregister_partial_array_splitter();
 821   }
 822 
 823   partial_array_state_manager()->reset();
 824 
 825   for (uint i = 0; i < _max_num_tasks; ++i) {
 826     _tasks[i]->register_partial_array_splitter();
 827   }
 828 }
 829 
 830 void G1ConcurrentMark::clear_bitmap(WorkerThreads* workers) {
 831   assert_at_safepoint_on_vm_thread();
 832   // To avoid fragmentation the full collection requesting to clear the bitmap
 833   // might use fewer workers than available. To ensure the bitmap is cleared
 834   // as efficiently as possible the number of active workers are temporarily
 835   // increased to include all currently created workers.
 836   WithActiveWorkers update(workers, workers->created_workers());
 837   clear_bitmap(workers, false);
 838 }
 839 
 840 class G1PreConcurrentStartTask : public G1BatchedTask {
 841   // Reset marking state.
 842   class ResetMarkingStateTask;
 843   // For each region note start of marking.
 844   class NoteStartOfMarkTask;
 845 
 846 public:
 847   G1PreConcurrentStartTask(GCCause::Cause cause, G1ConcurrentMark* cm);
 848 };
 849 
 850 class G1PreConcurrentStartTask::ResetMarkingStateTask : public G1AbstractSubTask {
 851   G1ConcurrentMark* _cm;
 852 public:
 853   ResetMarkingStateTask(G1ConcurrentMark* cm) : G1AbstractSubTask(G1GCPhaseTimes::ResetMarkingState), _cm(cm) { }
 854 
 855   double worker_cost() const override { return 1.0; }
 856   void do_work(uint worker_id) override;
 857 };
 858 
 859 class G1PreConcurrentStartTask::NoteStartOfMarkTask : public G1AbstractSubTask {
 860 
 861   class NoteStartOfMarkHRClosure : public G1HeapRegionClosure {
 862     G1ConcurrentMark* _cm;
 863 
 864   public:
 865     NoteStartOfMarkHRClosure() : G1HeapRegionClosure(), _cm(G1CollectedHeap::heap()->concurrent_mark()) { }
 866 
 867     bool do_heap_region(G1HeapRegion* r) override {
 868       if (r->is_old_or_humongous() && !r->is_collection_set_candidate() && !r->in_collection_set()) {
 869         _cm->update_top_at_mark_start(r);
 870       } else {
 871         _cm->reset_top_at_mark_start(r);
 872       }
 873       return false;
 874     }
 875   } _region_cl;
 876 
 877   G1HeapRegionClaimer _claimer;
 878 public:
 879   NoteStartOfMarkTask() : G1AbstractSubTask(G1GCPhaseTimes::NoteStartOfMark), _region_cl(), _claimer(0) { }
 880 
 881   double worker_cost() const override {
 882     // The work done per region is very small, therefore we choose this magic number to cap the number
 883     // of threads used when there are few regions.
 884     const double regions_per_thread = 1000;
 885     return _claimer.n_regions() / regions_per_thread;
 886   }
 887 
 888   void set_max_workers(uint max_workers) override {
 889     _claimer.set_n_workers(max_workers);
 890   }
 891 
 892   void do_work(uint worker_id) override {
 893     G1CollectedHeap::heap()->heap_region_par_iterate_from_worker_offset(&_region_cl, &_claimer, worker_id);
 894   }
 895 };
 896 
 897 void G1PreConcurrentStartTask::ResetMarkingStateTask::do_work(uint worker_id) {
 898   // Reset marking state.
 899   _cm->reset();
 900 }
 901 
 902 G1PreConcurrentStartTask::G1PreConcurrentStartTask(GCCause::Cause cause, G1ConcurrentMark* cm) :
 903   G1BatchedTask("Pre Concurrent Start", G1CollectedHeap::heap()->phase_times()) {
 904   add_serial_task(new ResetMarkingStateTask(cm));
 905   add_parallel_task(new NoteStartOfMarkTask());
 906 };
 907 
 908 void G1ConcurrentMark::pre_concurrent_start(GCCause::Cause cause) {
 909   assert_at_safepoint_on_vm_thread();
 910 
 911   G1CollectedHeap::start_codecache_marking_cycle_if_inactive(true /* concurrent_mark_start */);
 912 
 913   ClassLoaderDataGraph::verify_claimed_marks_cleared(ClassLoaderData::_claim_strong);
 914 
 915   G1PreConcurrentStartTask cl(cause, this);
 916   G1CollectedHeap::heap()->run_batch_task(&cl);
 917 
 918   _gc_tracer_cm->set_gc_cause(cause);
 919 }
 920 
 921 void G1ConcurrentMark::start_full_concurrent_cycle() {
 922   // Start Concurrent Marking weak-reference discovery.
 923   ReferenceProcessor* rp = _g1h->ref_processor_cm();
 924   rp->start_discovery(false /* always_clear */);
 925 
 926   SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
 927   // This is the start of  the marking cycle, we're expected all
 928   // threads to have SATB queues with active set to false.
 929   satb_mq_set.set_active_all_threads(true, /* new active value */
 930                                      false /* expected_active */);
 931 
 932   // update_g1_committed() will be called at the end of an evac pause
 933   // when marking is on. So, it's also called at the end of the
 934   // concurrent start pause to update the heap end, if the heap expands
 935   // during it. No need to call it here.
 936 
 937   // Signal the thread to start work.
 938   cm_thread()->start_full_cycle();
 939 }
 940 
 941 void G1ConcurrentMark::start_undo_concurrent_cycle() {
 942   assert_at_safepoint_on_vm_thread();
 943   // At this time this GC is not a concurrent start gc any more, can only check for young only gc/phase.
 944   assert(_g1h->collector_state()->is_in_young_only_phase(), "must be");
 945 
 946   abort_root_region_scan_at_safepoint();
 947 
 948   // Signal the thread to start work.
 949   cm_thread()->start_undo_cycle();
 950 }
 951 
 952 void G1ConcurrentMark::notify_concurrent_cycle_completed() {
 953   cm_thread()->set_idle();
 954 }
 955 
 956 void G1ConcurrentMark::stop() {
 957   if (is_fully_initialized()) {
 958     cm_thread()->stop();
 959   }
 960 }
 961 
 962 /*
 963  * Notice that in the next two methods, we actually leave the STS
 964  * during the barrier sync and join it immediately afterwards. If we
 965  * do not do this, the following deadlock can occur: one thread could
 966  * be in the barrier sync code, waiting for the other thread to also
 967  * sync up, whereas another one could be trying to yield, while also
 968  * waiting for the other threads to sync up too.
 969  *
 970  * Note, however, that this code is also used during remark and in
 971  * this case we should not attempt to leave / enter the STS, otherwise
 972  * we'll either hit an assert (debug / fastdebug) or deadlock
 973  * (product). So we should only leave / enter the STS if we are
 974  * operating concurrently.
 975  *
 976  * Because the thread that does the sync barrier has left the STS, it
 977  * is possible to be suspended for a Full GC or an evacuation pause
 978  * could occur. This is actually safe, since the entering the sync
 979  * barrier is one of the last things do_marking_step() does, and it
 980  * doesn't manipulate any data structures afterwards.
 981  */
 982 
 983 void G1ConcurrentMark::enter_first_sync_barrier(uint worker_id) {
 984   bool barrier_aborted;
 985   {
 986     SuspendibleThreadSetLeaver sts_leave(concurrent());
 987     barrier_aborted = !_first_overflow_barrier_sync.enter();
 988   }
 989 
 990   // at this point everyone should have synced up and not be doing any
 991   // more work
 992 
 993   if (barrier_aborted) {
 994     // If the barrier aborted we ignore the overflow condition and
 995     // just abort the whole marking phase as quickly as possible.
 996     return;
 997   }
 998 }
 999 
1000 void G1ConcurrentMark::enter_second_sync_barrier(uint worker_id) {
1001   SuspendibleThreadSetLeaver sts_leave(concurrent());
1002   _second_overflow_barrier_sync.enter();
1003 
1004   // at this point everything should be re-initialized and ready to go
1005 }
1006 
1007 class G1CMConcurrentMarkingTask : public WorkerTask {
1008   G1ConcurrentMark*     _cm;
1009 
1010 public:
1011   void work(uint worker_id) {
1012     ResourceMark rm;
1013 
1014     SuspendibleThreadSetJoiner sts_join;
1015 
1016     assert(worker_id < _cm->active_tasks(), "invariant");
1017 
1018     G1CMTask* task = _cm->task(worker_id);
1019     task->record_start_time();
1020     if (!_cm->has_aborted()) {
1021       do {
1022         task->do_marking_step(G1ConcMarkStepDurationMillis,
1023                               true  /* do_termination */,
1024                               false /* is_serial*/);
1025 
1026         _cm->do_yield_check();
1027       } while (!_cm->has_aborted() && task->has_aborted());
1028     }
1029     task->record_end_time();
1030     guarantee(!task->has_aborted() || _cm->has_aborted(), "invariant");
1031   }
1032 
1033   G1CMConcurrentMarkingTask(G1ConcurrentMark* cm) :
1034       WorkerTask("Concurrent Mark"), _cm(cm) { }
1035 
1036   ~G1CMConcurrentMarkingTask() { }
1037 };
1038 
1039 uint G1ConcurrentMark::calc_active_marking_workers() {
1040   uint result = 0;
1041   if (!UseDynamicNumberOfGCThreads || !FLAG_IS_DEFAULT(ConcGCThreads)) {
1042     result = _max_concurrent_workers;
1043   } else {
1044     result =
1045       WorkerPolicy::calc_default_active_workers(_max_concurrent_workers,
1046                                                 1, /* Minimum workers */
1047                                                 _num_concurrent_workers,
1048                                                 Threads::number_of_non_daemon_threads());
1049     // Don't scale the result down by scale_concurrent_workers() because
1050     // that scaling has already gone into "_max_concurrent_workers".
1051   }
1052   assert(result > 0 && result <= _max_concurrent_workers,
1053          "Calculated number of marking workers must be larger than zero and at most the maximum %u, but is %u",
1054          _max_concurrent_workers, result);
1055   return result;
1056 }
1057 
1058 bool G1ConcurrentMark::has_root_region_scan_aborted() const {
1059   return _root_region_scan_aborted.load_relaxed();
1060 }
1061 
1062 #ifndef PRODUCT
1063 void G1ConcurrentMark::assert_root_region_scan_completed_or_aborted() {
1064   assert(root_regions()->work_completed() || has_root_region_scan_aborted(), "must be");
1065 }
1066 #endif
1067 
1068 void G1ConcurrentMark::scan_root_region(const MemRegion* region, uint worker_id) {
1069 #ifdef ASSERT
1070   HeapWord* last = region->last();
1071   G1HeapRegion* hr = _g1h->heap_region_containing(last);
1072   assert(hr->is_old() || top_at_mark_start(hr) == hr->bottom(),
1073          "Root regions must be old or survivor/eden but region %u is %s", hr->hrm_index(), hr->get_type_str());
1074   assert(top_at_mark_start(hr) == region->start(),
1075          "MemRegion start should be equal to TAMS");
1076 #endif
1077 
1078   G1RootRegionScanClosure cl(_g1h, this, worker_id);
1079 
1080   const uintx interval = PrefetchScanIntervalInBytes;
1081   HeapWord* curr = region->start();
1082   const HeapWord* end = region->end();
1083   while (curr < end) {
1084     Prefetch::read(curr, interval);
1085     oop obj = cast_to_oop(curr);
1086     size_t size = obj->oop_iterate_size(&cl);
1087     assert(size == obj->size(), "sanity");
1088     curr += size;
1089   }
1090 }
1091 
1092 class G1CMRootRegionScanTask : public WorkerTask {
1093   G1ConcurrentMark* _cm;
1094   bool _should_yield;
1095 
1096 public:
1097   G1CMRootRegionScanTask(G1ConcurrentMark* cm, bool should_yield) :
1098     WorkerTask("G1 Root Region Scan"), _cm(cm), _should_yield(should_yield) { }
1099 
1100   void work(uint worker_id) {
1101     SuspendibleThreadSetJoiner sts_join(_should_yield);
1102 
1103     while (true) {
1104       if (_cm->has_root_region_scan_aborted()) {
1105         return;
1106       }
1107       G1CMRootMemRegions* root_regions = _cm->root_regions();
1108       const MemRegion* region = root_regions->claim_next();
1109       if (region == nullptr) {
1110         return;
1111       }
1112       _cm->scan_root_region(region, worker_id);
1113       if (_should_yield) {
1114         SuspendibleThreadSet::yield();
1115         // If we yielded, a GC may have processed all root regions,
1116         // so this loop will naturally exit on the next claim_next() call.
1117         // Same if a Full GC signalled abort of the concurrent mark.
1118       }
1119     }
1120   }
1121 };
1122 
1123 bool G1ConcurrentMark::scan_root_regions(WorkerThreads* workers, bool concurrent) {
1124   // We first check whether there is any work to do as we might have already aborted
1125   // the concurrent cycle, or ran into a GC that did the actual work when we reach here.
1126   // We want to avoid spinning up the worker threads if that happened.
1127   // (Note that due to races reading the abort-flag, we might spin up the threads anyway).
1128   //
1129   // Abort happens if a Full GC occurs right after starting the concurrent cycle or
1130   // a young gc doing the work.
1131   //
1132   // Concurrent gc threads enter an STS when starting the task, so they stop, then
1133   // continue after that safepoint.
1134   bool do_scan = !root_regions()->work_completed() && !has_root_region_scan_aborted();
1135   if (do_scan) {
1136     // Assign one worker to each root-region but subject to the max constraint.
1137     // The constraint is also important to avoid accesses beyond the allocated per-worker
1138     // marking helper data structures. We might get passed different WorkerThreads with
1139     // different number of threads (potential worker ids) than helper data structures when
1140     // completing this work during GC.
1141     const uint num_workers = MIN2(root_regions()->num_remaining_regions(),
1142                                   _max_concurrent_workers);
1143     assert(num_workers > 0, "no more remaining root regions to process");
1144 
1145     G1CMRootRegionScanTask task(this, concurrent);
1146     log_debug(gc, ergo)("Running %s using %u workers for %u work units.",
1147                         task.name(), num_workers, root_regions()->num_remaining_regions());
1148     workers->run_task(&task, num_workers);
1149   }
1150 
1151   assert_root_region_scan_completed_or_aborted();
1152 
1153   return do_scan;
1154 }
1155 
1156 void G1ConcurrentMark::scan_root_regions_concurrently() {
1157   assert(Thread::current() == cm_thread(), "must be on Concurrent Mark Thread");
1158   scan_root_regions(_concurrent_workers, true /* concurrent */);
1159 }
1160 
1161 bool G1ConcurrentMark::complete_root_regions_scan_in_safepoint() {
1162   assert_at_safepoint_on_vm_thread();
1163   return scan_root_regions(_g1h->workers(), false /* concurrent */);
1164 }
1165 
1166 void G1ConcurrentMark::add_root_region(G1HeapRegion* r) {
1167   root_regions()->add(top_at_mark_start(r), r->top());
1168 }
1169 
1170 bool G1ConcurrentMark::is_root_region(G1HeapRegion* r) {
1171   return root_regions()->contains(MemRegion(top_at_mark_start(r), r->top()));
1172 }
1173 
1174 void G1ConcurrentMark::abort_root_region_scan() {
1175   assert_not_at_safepoint();
1176 
1177   _root_region_scan_aborted.store_relaxed(true);
1178 }
1179 
1180 void G1ConcurrentMark::abort_root_region_scan_at_safepoint() {
1181   assert_at_safepoint_on_vm_thread();
1182 
1183   _root_region_scan_aborted.store_relaxed(true);
1184 }
1185 
1186 void G1ConcurrentMark::concurrent_cycle_start() {
1187   _gc_timer_cm->register_gc_start();
1188 
1189   _gc_tracer_cm->report_gc_start(GCCause::_no_gc /* first parameter is not used */, _gc_timer_cm->gc_start());
1190 
1191   _g1h->trace_heap_before_gc(_gc_tracer_cm);
1192 }
1193 
1194 uint G1ConcurrentMark::completed_mark_cycles() const {
1195   return _completed_mark_cycles.load_relaxed();
1196 }
1197 
1198 void G1ConcurrentMark::concurrent_cycle_end(bool mark_cycle_completed) {
1199   _g1h->trace_heap_after_gc(_gc_tracer_cm);
1200 
1201   if (mark_cycle_completed) {
1202     _completed_mark_cycles.add_then_fetch(1u, memory_order_relaxed);
1203   }
1204 
1205   if (has_aborted()) {
1206     log_info(gc, marking)("Concurrent Mark Abort");
1207     _gc_tracer_cm->report_concurrent_mode_failure();
1208   }
1209 
1210   _gc_timer_cm->register_gc_end();
1211 
1212   _gc_tracer_cm->report_gc_end(_gc_timer_cm->gc_end(), _gc_timer_cm->time_partitions());
1213 }
1214 
1215 void G1ConcurrentMark::mark_from_roots() {
1216   _restart_for_overflow.store_relaxed(false);
1217 
1218   uint active_workers = calc_active_marking_workers();
1219 
1220   // Setting active workers is not guaranteed since fewer
1221   // worker threads may currently exist and more may not be
1222   // available.
1223   active_workers = _concurrent_workers->set_active_workers(active_workers);
1224   log_info(gc, task)("Concurrent Mark Using %u of %u Workers", active_workers, _concurrent_workers->max_workers());
1225 
1226   _num_concurrent_workers = active_workers;
1227 
1228   // Parallel task terminator is set in "set_concurrency_and_phase()"
1229   set_concurrency_and_phase(active_workers, true /* concurrent */);
1230 
1231   G1CMConcurrentMarkingTask marking_task(this);
1232   _concurrent_workers->run_task(&marking_task);
1233   print_stats();
1234 }
1235 
1236 const char* G1ConcurrentMark::verify_location_string(VerifyLocation location) {
1237   static const char* location_strings[] = { "Remark Before",
1238                                             "Remark After",
1239                                             "Remark Overflow",
1240                                             "Cleanup Before",
1241                                             "Cleanup After" };
1242   return location_strings[static_cast<std::underlying_type_t<VerifyLocation>>(location)];
1243 }
1244 
1245 void G1ConcurrentMark::verify_during_pause(G1HeapVerifier::G1VerifyType type,
1246                                            VerifyLocation location) {
1247   G1HeapVerifier* verifier = _g1h->verifier();
1248 
1249   verifier->verify_region_sets_optional();
1250 
1251   const char* caller = verify_location_string(location);
1252 
1253   if (VerifyDuringGC && G1HeapVerifier::should_verify(type)) {
1254     GCTraceTime(Debug, gc, phases) debug(caller, _gc_timer_cm);
1255 
1256     size_t const BufLen = 512;
1257     char buffer[BufLen];
1258 
1259     jio_snprintf(buffer, BufLen, "During GC (%s)", caller);
1260     verifier->verify(VerifyOption::G1UseConcMarking, buffer);
1261 
1262     // Only check bitmap in Remark, and not at After-Verification because the regions
1263     // already have their TAMS'es reset.
1264     if (location != VerifyLocation::RemarkAfter) {
1265       verifier->verify_bitmap_clear(true /* above_tams_only */);
1266     }
1267   }
1268 }
1269 
1270 class G1ObjectCountIsAliveClosure: public BoolObjectClosure {
1271   G1CollectedHeap* _g1h;
1272 public:
1273   G1ObjectCountIsAliveClosure(G1CollectedHeap* g1h) : _g1h(g1h) {}
1274 
1275   bool do_object_b(oop obj) {
1276     return !_g1h->is_obj_dead(obj);
1277   }
1278 };
1279 
1280 void G1ConcurrentMark::remark() {
1281   assert_at_safepoint_on_vm_thread();
1282 
1283   // If a full collection has happened, we should not continue. However we might
1284   // have ended up here as the Remark VM operation has been scheduled already.
1285   if (has_aborted()) {
1286     return;
1287   }
1288 
1289   G1Policy* policy = _g1h->policy();
1290   policy->record_pause_start_time();
1291 
1292   double start = os::elapsedTime();
1293 
1294   verify_during_pause(G1HeapVerifier::G1VerifyRemark, VerifyLocation::RemarkBefore);
1295 
1296   {
1297     GCTraceTime(Debug, gc, phases) debug("Finalize Marking", _gc_timer_cm);
1298     finalize_marking();
1299   }
1300 
1301   double mark_work_end = os::elapsedTime();
1302 
1303   bool const mark_finished = !has_overflown();
1304   if (mark_finished) {
1305     weak_refs_work();
1306 
1307     // Unload Klasses, String, Code Cache, etc.
1308     if (ClassUnloadingWithConcurrentMark) {
1309       G1CMIsAliveClosure is_alive(this);
1310       _g1h->unload_classes_and_code("Class Unloading", &is_alive, _gc_timer_cm);
1311     }
1312 
1313     SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
1314     // We're done with marking.
1315     // This is the end of the marking cycle, we're expected all
1316     // threads to have SATB queues with active set to true.
1317     satb_mq_set.set_active_all_threads(false, /* new active value */
1318                                        true /* expected_active */);
1319 
1320     {
1321       GCTraceTime(Debug, gc, phases) debug("Flush Task Caches", _gc_timer_cm);
1322       flush_all_task_caches();
1323     }
1324 
1325     // All marking completed. Check bitmap now as we will start to reset TAMSes
1326     // in parallel below so that we can not do this in the After-Remark verification.
1327     _g1h->verifier()->verify_bitmap_clear(true /* above_tams_only */);
1328 
1329     {
1330       GCTraceTime(Debug, gc, phases) debug("Select For Rebuild and Reclaim Empty Regions", _gc_timer_cm);
1331 
1332       G1UpdateRegionLivenessAndSelectForRebuildTask cl(_g1h, this, _g1h->workers()->active_workers());
1333       uint const num_workers = MIN2(G1UpdateRegionLivenessAndSelectForRebuildTask::desired_num_workers(_g1h->num_committed_regions()),
1334                                     _g1h->workers()->active_workers());
1335       log_debug(gc,ergo)("Running %s using %u workers for %u regions in heap", cl.name(), num_workers, _g1h->num_committed_regions());
1336       _g1h->workers()->run_task(&cl, num_workers);
1337 
1338       log_debug(gc, remset, tracking)("Remembered Set Tracking update regions total %u, selected %u",
1339                                       _g1h->num_committed_regions(), cl.total_selected_for_rebuild());
1340 
1341       _needs_remembered_set_rebuild = (cl.total_selected_for_rebuild() > 0);
1342 
1343       if (_needs_remembered_set_rebuild) {
1344         GrowableArrayCHeap<G1HeapRegion*, mtGC>* selected = cl.sort_and_prune_old_selected();
1345         _g1h->policy()->candidates()->set_candidates_from_marking(selected);
1346       }
1347     }
1348 
1349     if (log_is_enabled(Trace, gc, liveness)) {
1350       G1PrintRegionLivenessInfoClosure cl("Post-Marking");
1351       _g1h->heap_region_iterate(&cl);
1352     }
1353 
1354     // Potentially, some empty-regions have been reclaimed; make this a
1355     // "collection" so that pending allocation can retry before attempting a
1356     // GC pause.
1357     _g1h->increment_total_collections();
1358 
1359     // For Remark Pauses that may have been triggered by PeriodicGCs, we maintain
1360     // resizing based on MinHeapFreeRatio or MaxHeapFreeRatio. If a PeriodicGC is
1361     // triggered, it likely means there are very few regular GCs, making resizing
1362     // based on gc heuristics less effective.
1363     if (_g1h->last_gc_was_periodic()) {
1364       _g1h->resize_heap_after_full_collection(0 /* allocation_word_size */);
1365     }
1366 
1367     compute_new_sizes();
1368 
1369     verify_during_pause(G1HeapVerifier::G1VerifyRemark, VerifyLocation::RemarkAfter);
1370 
1371     assert(!restart_for_overflow(), "sanity");
1372     // Completely reset the marking state (except bitmaps) since marking completed.
1373     reset_at_marking_complete();
1374 
1375     G1CollectedHeap::finish_codecache_marking_cycle();
1376 
1377     {
1378       GCTraceTime(Debug, gc, phases) debug("Report Object Count", _gc_timer_cm);
1379       G1ObjectCountIsAliveClosure is_alive(_g1h);
1380       _gc_tracer_cm->report_object_count_after_gc(&is_alive, _g1h->workers());
1381     }
1382 
1383     // Successfully completed marking, advance state.
1384     cm_thread()->set_full_cycle_rebuild_and_scrub();
1385   } else {
1386     // We overflowed.  Restart concurrent marking.
1387     _restart_for_overflow.store_relaxed(true);
1388 
1389     verify_during_pause(G1HeapVerifier::G1VerifyRemark, VerifyLocation::RemarkOverflow);
1390 
1391     // Clear the marking state because we will be restarting
1392     // marking due to overflowing the global mark stack.
1393     reset_marking_for_restart();
1394   }
1395 
1396   // Statistics
1397   double now = os::elapsedTime();
1398   _remark_mark_times.add((mark_work_end - start) * 1000.0);
1399   _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
1400   _remark_times.add((now - start) * 1000.0);
1401 
1402   _g1h->update_perf_counter_cpu_time();
1403 
1404   policy->record_concurrent_mark_remark_end();
1405 
1406   return;
1407 }
1408 
1409 void G1ConcurrentMark::compute_new_sizes() {
1410   MetaspaceGC::compute_new_size();
1411 
1412   // Cleanup will have freed any regions completely full of garbage.
1413   // Update the soft reference policy with the new heap occupancy.
1414   Universe::heap()->update_capacity_and_used_at_gc();
1415 
1416   // We reclaimed old regions so we should calculate the sizes to make
1417   // sure we update the old gen/space data.
1418   _g1h->monitoring_support()->update_sizes();
1419 }
1420 
1421 class G1UpdateRegionsAfterRebuild : public G1HeapRegionClosure {
1422   G1CollectedHeap* _g1h;
1423 
1424 public:
1425   G1UpdateRegionsAfterRebuild(G1CollectedHeap* g1h) : _g1h(g1h) { }
1426 
1427   bool do_heap_region(G1HeapRegion* r) override {
1428     // Update the remset tracking state from updating to complete
1429     // if remembered sets have been rebuilt.
1430     _g1h->policy()->remset_tracker()->update_after_rebuild(r);
1431     return false;
1432   }
1433 };
1434 
1435 void G1ConcurrentMark::cleanup() {
1436   assert_at_safepoint_on_vm_thread();
1437 
1438   // If a full collection has happened, we shouldn't do this.
1439   if (has_aborted()) {
1440     return;
1441   }
1442 
1443   G1Policy* policy = _g1h->policy();
1444   policy->record_pause_start_time();
1445 
1446   double start = os::elapsedTime();
1447 
1448   verify_during_pause(G1HeapVerifier::G1VerifyCleanup, VerifyLocation::CleanupBefore);
1449 
1450   if (needs_remembered_set_rebuild()) {
1451     // Update the remset tracking information as well as marking all regions
1452     // as fully parsable.
1453     GCTraceTime(Debug, gc, phases) debug("Update Remembered Set Tracking After Rebuild", _gc_timer_cm);
1454     G1UpdateRegionsAfterRebuild cl(_g1h);
1455     _g1h->heap_region_iterate(&cl);
1456   } else {
1457     log_debug(gc, phases)("No Remembered Sets to update after rebuild");
1458   }
1459 
1460   verify_during_pause(G1HeapVerifier::G1VerifyCleanup, VerifyLocation::CleanupAfter);
1461 
1462   // Local statistics
1463   _cleanup_times.add((os::elapsedTime() - start) * 1000.0);
1464 
1465   {
1466     GCTraceTime(Debug, gc, phases) debug("Finalize Concurrent Mark Cleanup", _gc_timer_cm);
1467     policy->record_concurrent_mark_cleanup_end(needs_remembered_set_rebuild());
1468   }
1469 
1470   // Advance state.
1471   cm_thread()->set_full_cycle_reset_for_next_cycle();
1472   return;
1473 }
1474 
1475 // 'Keep Alive' oop closure used by both serial parallel reference processing.
1476 // Uses the G1CMTask associated with a worker thread (for serial reference
1477 // processing the G1CMTask for worker 0 is used) to preserve (mark) and
1478 // trace referent objects.
1479 //
1480 // Using the G1CMTask and embedded local queues avoids having the worker
1481 // threads operating on the global mark stack. This reduces the risk
1482 // of overflowing the stack - which we would rather avoid at this late
1483 // state. Also using the tasks' local queues removes the potential
1484 // of the workers interfering with each other that could occur if
1485 // operating on the global stack.
1486 
1487 class G1CMKeepAliveAndDrainClosure : public OopClosure {
1488   G1ConcurrentMark* _cm;
1489   G1CMTask*         _task;
1490   uint              _ref_counter_limit;
1491   uint              _ref_counter;
1492   bool              _is_serial;
1493 public:
1494   G1CMKeepAliveAndDrainClosure(G1ConcurrentMark* cm, G1CMTask* task, bool is_serial) :
1495     _cm(cm), _task(task), _ref_counter_limit(G1RefProcDrainInterval),
1496     _ref_counter(_ref_counter_limit), _is_serial(is_serial) {
1497     assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
1498   }
1499 
1500   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
1501   virtual void do_oop(      oop* p) { do_oop_work(p); }
1502 
1503   template <class T> void do_oop_work(T* p) {
1504     if (_cm->has_overflown()) {
1505       return;
1506     }
1507     if (!_task->deal_with_reference(p)) {
1508       // We did not add anything to the mark bitmap (or mark stack), so there is
1509       // no point trying to drain it.
1510       return;
1511     }
1512     _ref_counter--;
1513 
1514     if (_ref_counter == 0) {
1515       // We have dealt with _ref_counter_limit references, pushing them
1516       // and objects reachable from them on to the local stack (and
1517       // possibly the global stack). Call G1CMTask::do_marking_step() to
1518       // process these entries.
1519       //
1520       // We call G1CMTask::do_marking_step() in a loop, which we'll exit if
1521       // there's nothing more to do (i.e. we're done with the entries that
1522       // were pushed as a result of the G1CMTask::deal_with_reference() calls
1523       // above) or we overflow.
1524       //
1525       // Note: G1CMTask::do_marking_step() can set the G1CMTask::has_aborted()
1526       // flag while there may still be some work to do. (See the comment at
1527       // the beginning of G1CMTask::do_marking_step() for those conditions -
1528       // one of which is reaching the specified time target.) It is only
1529       // when G1CMTask::do_marking_step() returns without setting the
1530       // has_aborted() flag that the marking step has completed.
1531       do {
1532         double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
1533         _task->do_marking_step(mark_step_duration_ms,
1534                                false      /* do_termination */,
1535                                _is_serial);
1536       } while (_task->has_aborted() && !_cm->has_overflown());
1537       _ref_counter = _ref_counter_limit;
1538     }
1539   }
1540 };
1541 
1542 // 'Drain' oop closure used by both serial and parallel reference processing.
1543 // Uses the G1CMTask associated with a given worker thread (for serial
1544 // reference processing the G1CMtask for worker 0 is used). Calls the
1545 // do_marking_step routine, with an unbelievably large timeout value,
1546 // to drain the marking data structures of the remaining entries
1547 // added by the 'keep alive' oop closure above.
1548 
1549 class G1CMDrainMarkingStackClosure : public VoidClosure {
1550   G1ConcurrentMark* _cm;
1551   G1CMTask*         _task;
1552   bool              _is_serial;
1553  public:
1554   G1CMDrainMarkingStackClosure(G1ConcurrentMark* cm, G1CMTask* task, bool is_serial) :
1555     _cm(cm), _task(task), _is_serial(is_serial) {
1556     assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
1557   }
1558 
1559   void do_void() {
1560     do {
1561       // We call G1CMTask::do_marking_step() to completely drain the local
1562       // and global marking stacks of entries pushed by the 'keep alive'
1563       // oop closure (an instance of G1CMKeepAliveAndDrainClosure above).
1564       //
1565       // G1CMTask::do_marking_step() is called in a loop, which we'll exit
1566       // if there's nothing more to do (i.e. we've completely drained the
1567       // entries that were pushed as a result of applying the 'keep alive'
1568       // closure to the entries on the discovered ref lists) or we overflow
1569       // the global marking stack.
1570       //
1571       // Note: G1CMTask::do_marking_step() can set the G1CMTask::has_aborted()
1572       // flag while there may still be some work to do. (See the comment at
1573       // the beginning of G1CMTask::do_marking_step() for those conditions -
1574       // one of which is reaching the specified time target.) It is only
1575       // when G1CMTask::do_marking_step() returns without setting the
1576       // has_aborted() flag that the marking step has completed.
1577 
1578       _task->do_marking_step(1000000000.0 /* something very large */,
1579                              true         /* do_termination */,
1580                              _is_serial);
1581     } while (_task->has_aborted() && !_cm->has_overflown());
1582   }
1583 };
1584 
1585 class G1CMRefProcProxyTask : public RefProcProxyTask {
1586   G1CollectedHeap& _g1h;
1587   G1ConcurrentMark& _cm;
1588 
1589 public:
1590   G1CMRefProcProxyTask(uint max_workers, G1CollectedHeap& g1h, G1ConcurrentMark &cm)
1591     : RefProcProxyTask("G1CMRefProcProxyTask", max_workers),
1592       _g1h(g1h),
1593       _cm(cm) {}
1594 
1595   void work(uint worker_id) override {
1596     assert(worker_id < _max_workers, "sanity");
1597     G1CMIsAliveClosure is_alive(&_cm);
1598     uint index = (_tm == RefProcThreadModel::Single) ? 0 : worker_id;
1599     G1CMKeepAliveAndDrainClosure keep_alive(&_cm, _cm.task(index), _tm == RefProcThreadModel::Single);
1600     BarrierEnqueueDiscoveredFieldClosure enqueue;
1601     G1CMDrainMarkingStackClosure complete_gc(&_cm, _cm.task(index), _tm == RefProcThreadModel::Single);
1602     _rp_task->rp_work(worker_id, &is_alive, &keep_alive, &enqueue, &complete_gc);
1603   }
1604 
1605   void prepare_run_task_hook() override {
1606     // We need to reset the concurrency level before each
1607     // proxy task execution, so that the termination protocol
1608     // and overflow handling in G1CMTask::do_marking_step() knows
1609     // how many workers to wait for.
1610     _cm.set_concurrency(_queue_count);
1611   }
1612 };
1613 
1614 void G1ConcurrentMark::weak_refs_work() {
1615   ResourceMark rm;
1616 
1617   {
1618     GCTraceTime(Debug, gc, phases) debug("Reference Processing", _gc_timer_cm);
1619 
1620     ReferenceProcessor* rp = _g1h->ref_processor_cm();
1621 
1622     // See the comment in G1CollectedHeap::ref_processing_init()
1623     // about how reference processing currently works in G1.
1624 
1625     assert(_global_mark_stack.is_empty(), "mark stack should be empty");
1626 
1627     // Prefer to grow the stack until the max capacity.
1628     _global_mark_stack.set_should_grow();
1629 
1630     // Parallel processing task executor.
1631     G1CMRefProcProxyTask task(rp->max_num_queues(), *_g1h, *this);
1632     ReferenceProcessorPhaseTimes pt(_gc_timer_cm, rp->max_num_queues());
1633 
1634     // Process the weak references.
1635     const ReferenceProcessorStats& stats = rp->process_discovered_references(task, _g1h->workers(), pt);
1636     _gc_tracer_cm->report_gc_reference_stats(stats);
1637     pt.print_all_references();
1638 
1639     // The do_oop work routines of the keep_alive and drain_marking_stack
1640     // oop closures will set the has_overflown flag if we overflow the
1641     // global marking stack.
1642 
1643     assert(has_overflown() || _global_mark_stack.is_empty(),
1644            "Mark stack should be empty (unless it has overflown)");
1645   }
1646 
1647   if (has_overflown()) {
1648     // We can not trust g1_is_alive and the contents of the heap if the marking stack
1649     // overflowed while processing references. Exit the VM.
1650     fatal("Overflow during reference processing, can not continue. Current mark stack depth: "
1651           "%zu, MarkStackSize: %zu, MarkStackSizeMax: %zu. "
1652           "Please increase MarkStackSize and/or MarkStackSizeMax and restart.",
1653           _global_mark_stack.size(), MarkStackSize, MarkStackSizeMax);
1654     return;
1655   }
1656 
1657   assert(_global_mark_stack.is_empty(), "Marking should have completed");
1658 
1659   {
1660     GCTraceTime(Debug, gc, phases) debug("Weak Processing", _gc_timer_cm);
1661     G1CMIsAliveClosure is_alive(this);
1662     WeakProcessor::weak_oops_do(_g1h->workers(), &is_alive, &do_nothing_cl, 1);
1663   }
1664 }
1665 
1666 class G1PrecleanYieldClosure : public YieldClosure {
1667   G1ConcurrentMark* _cm;
1668 
1669 public:
1670   G1PrecleanYieldClosure(G1ConcurrentMark* cm) : _cm(cm) { }
1671 
1672   virtual bool should_return() {
1673     return _cm->has_aborted();
1674   }
1675 
1676   virtual bool should_return_fine_grain() {
1677     _cm->do_yield_check();
1678     return _cm->has_aborted();
1679   }
1680 };
1681 
1682 void G1ConcurrentMark::preclean() {
1683   assert(G1UseReferencePrecleaning, "Precleaning must be enabled.");
1684 
1685   SuspendibleThreadSetJoiner joiner;
1686 
1687   BarrierEnqueueDiscoveredFieldClosure enqueue;
1688 
1689   set_concurrency_and_phase(1, true);
1690 
1691   G1PrecleanYieldClosure yield_cl(this);
1692 
1693   ReferenceProcessor* rp = _g1h->ref_processor_cm();
1694   // Precleaning is single threaded. Temporarily disable MT discovery.
1695   ReferenceProcessorMTDiscoveryMutator rp_mut_discovery(rp, false);
1696   rp->preclean_discovered_references(rp->is_alive_non_header(),
1697                                      &enqueue,
1698                                      &yield_cl,
1699                                      _gc_timer_cm);
1700 }
1701 
1702 // Closure for marking entries in SATB buffers.
1703 class G1CMSATBBufferClosure : public SATBBufferClosure {
1704 private:
1705   G1CMTask* _task;
1706   G1CollectedHeap* _g1h;
1707 
1708   // This is very similar to G1CMTask::deal_with_reference, but with
1709   // more relaxed requirements for the argument, so this must be more
1710   // circumspect about treating the argument as an object.
1711   void do_entry(void* entry) const {
1712     _task->increment_refs_reached();
1713     oop const obj = cast_to_oop(entry);
1714     _task->make_reference_grey(obj);
1715   }
1716 
1717 public:
1718   G1CMSATBBufferClosure(G1CMTask* task, G1CollectedHeap* g1h)
1719     : _task(task), _g1h(g1h) { }
1720 
1721   virtual void do_buffer(void** buffer, size_t size) {
1722     for (size_t i = 0; i < size; ++i) {
1723       do_entry(buffer[i]);
1724     }
1725   }
1726 };
1727 
1728 class G1RemarkThreadsClosure : public ThreadClosure {
1729   G1SATBMarkQueueSet& _qset;
1730 
1731  public:
1732   G1RemarkThreadsClosure(G1CollectedHeap* g1h, G1CMTask* task) :
1733     _qset(G1BarrierSet::satb_mark_queue_set()) {}
1734 
1735   void do_thread(Thread* thread) {
1736     // Transfer any partial buffer to the qset for completed buffer processing.
1737     _qset.flush_queue(G1ThreadLocalData::satb_mark_queue(thread));
1738   }
1739 };
1740 
1741 class G1CMRemarkTask : public WorkerTask {
1742   // For Threads::possibly_parallel_threads_do
1743   ThreadsClaimTokenScope _threads_claim_token_scope;
1744   G1ConcurrentMark* _cm;
1745 public:
1746   void work(uint worker_id) {
1747     G1CMTask* task = _cm->task(worker_id);
1748     task->record_start_time();
1749     {
1750       ResourceMark rm;
1751 
1752       G1RemarkThreadsClosure threads_f(G1CollectedHeap::heap(), task);
1753       Threads::possibly_parallel_threads_do(true /* is_par */, &threads_f);
1754     }
1755 
1756     do {
1757       task->do_marking_step(1000000000.0 /* something very large */,
1758                             true         /* do_termination       */,
1759                             false        /* is_serial            */);
1760     } while (task->has_aborted() && !_cm->has_overflown());
1761     // If we overflow, then we do not want to restart. We instead
1762     // want to abort remark and do concurrent marking again.
1763     task->record_end_time();
1764   }
1765 
1766   G1CMRemarkTask(G1ConcurrentMark* cm, uint active_workers) :
1767     WorkerTask("Par Remark"), _threads_claim_token_scope(), _cm(cm) {
1768     _cm->terminator()->reset_for_reuse(active_workers);
1769   }
1770 };
1771 
1772 void G1ConcurrentMark::finalize_marking() {
1773   ResourceMark rm;
1774 
1775   _g1h->ensure_parsability(false);
1776 
1777   // this is remark, so we'll use up all active threads
1778   uint active_workers = _g1h->workers()->active_workers();
1779   set_concurrency_and_phase(active_workers, false /* concurrent */);
1780   // Leave _parallel_marking_threads at it's
1781   // value originally calculated in the G1ConcurrentMark
1782   // constructor and pass values of the active workers
1783   // through the task.
1784 
1785   {
1786     G1CMRemarkTask remarkTask(this, active_workers);
1787     // We will start all available threads, even if we decide that the
1788     // active_workers will be fewer. The extra ones will just bail out
1789     // immediately.
1790     _g1h->workers()->run_task(&remarkTask);
1791   }
1792 
1793   SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
1794   guarantee(has_overflown() ||
1795             satb_mq_set.completed_buffers_num() == 0,
1796             "Invariant: has_overflown = %s, num buffers = %zu",
1797             BOOL_TO_STR(has_overflown()),
1798             satb_mq_set.completed_buffers_num());
1799 
1800   print_stats();
1801 }
1802 
1803 void G1ConcurrentMark::flush_all_task_caches() {
1804   size_t hits = 0;
1805   size_t misses = 0;
1806   for (uint i = 0; i < _max_num_tasks; i++) {
1807     Pair<size_t, size_t> stats = _tasks[i]->flush_mark_stats_cache();
1808     hits += stats.first;
1809     misses += stats.second;
1810   }
1811   size_t sum = hits + misses;
1812   log_debug(gc, stats)("Mark stats cache hits %zu misses %zu ratio %1.3lf",
1813                        hits, misses, percent_of(hits, sum));
1814 }
1815 
1816 void G1ConcurrentMark::clear_bitmap_for_region(G1HeapRegion* hr) {
1817   assert_at_safepoint();
1818   _mark_bitmap.clear_range(MemRegion(hr->bottom(), hr->end()));
1819 }
1820 
1821 G1HeapRegion* G1ConcurrentMark::claim_region(uint worker_id) {
1822   // "Checkpoint" the finger.
1823   HeapWord* local_finger = finger();
1824 
1825   while (local_finger < _heap.end()) {
1826     assert(_g1h->is_in_reserved(local_finger), "invariant");
1827 
1828     G1HeapRegion* curr_region = _g1h->heap_region_containing_or_null(local_finger);
1829     // Make sure that the reads below do not float before loading curr_region.
1830     OrderAccess::loadload();
1831     // Above heap_region_containing may return null as we always scan claim
1832     // until the end of the heap. In this case, just jump to the next region.
1833     HeapWord* end = curr_region != nullptr ? curr_region->end() : local_finger + G1HeapRegion::GrainWords;
1834 
1835     // Is the gap between reading the finger and doing the CAS too long?
1836     HeapWord* res = _finger.compare_exchange(local_finger, end);
1837     if (res == local_finger && curr_region != nullptr) {
1838       // We succeeded.
1839       HeapWord* bottom = curr_region->bottom();
1840       HeapWord* limit = top_at_mark_start(curr_region);
1841 
1842       log_trace(gc, marking)("Claim region %u bottom " PTR_FORMAT " tams " PTR_FORMAT, curr_region->hrm_index(), p2i(curr_region->bottom()), p2i(top_at_mark_start(curr_region)));
1843       // Notice that _finger == end cannot be guaranteed here since,
1844       // someone else might have moved the finger even further.
1845       assert(finger() >= end, "The finger should have moved forward");
1846 
1847       if (limit > bottom) {
1848         return curr_region;
1849       } else {
1850         assert(limit == bottom,
1851                "The region limit should be at bottom");
1852         // We return null and the caller should try calling
1853         // claim_region() again.
1854         return nullptr;
1855       }
1856     } else {
1857       // Read the finger again.
1858       HeapWord* next_finger = finger();
1859       assert(next_finger > local_finger, "The finger should have moved forward " PTR_FORMAT " " PTR_FORMAT, p2i(local_finger), p2i(next_finger));
1860       local_finger = next_finger;
1861     }
1862   }
1863 
1864   return nullptr;
1865 }
1866 
1867 #ifndef PRODUCT
1868 class VerifyNoCSetOops {
1869   G1CollectedHeap* _g1h;
1870   const char* _phase;
1871   int _info;
1872 
1873 public:
1874   VerifyNoCSetOops(const char* phase, int info = -1) :
1875     _g1h(G1CollectedHeap::heap()),
1876     _phase(phase),
1877     _info(info)
1878   { }
1879 
1880   void operator()(G1TaskQueueEntry task_entry) const {
1881     if (task_entry.is_partial_array_state()) {
1882       oop obj = task_entry.to_partial_array_state()->source();
1883       guarantee(_g1h->is_in_reserved(obj), "Partial Array " PTR_FORMAT " must be in heap.", p2i(obj));
1884       return;
1885     }
1886     guarantee(oopDesc::is_oop(task_entry.to_oop()),
1887               "Non-oop " PTR_FORMAT ", phase: %s, info: %d",
1888               p2i(task_entry.to_oop()), _phase, _info);
1889     G1HeapRegion* r = _g1h->heap_region_containing(task_entry.to_oop());
1890     guarantee(!(r->in_collection_set() || r->has_index_in_opt_cset()),
1891               "obj " PTR_FORMAT " from %s (%d) in region %u in (optional) collection set",
1892               p2i(task_entry.to_oop()), _phase, _info, r->hrm_index());
1893   }
1894 };
1895 
1896 void G1ConcurrentMark::verify_no_collection_set_oops() {
1897   assert(SafepointSynchronize::is_at_safepoint() || !is_init_completed(),
1898          "should be at a safepoint or initializing");
1899   if (!is_fully_initialized() || !_g1h->collector_state()->is_in_mark_or_rebuild()) {
1900     return;
1901   }
1902 
1903   // Verify entries on the global mark stack
1904   _global_mark_stack.iterate(VerifyNoCSetOops("Stack"));
1905 
1906   // Verify entries on the task queues
1907   for (uint i = 0; i < _max_num_tasks; ++i) {
1908     G1CMTaskQueue* queue = _task_queues->queue(i);
1909     queue->iterate(VerifyNoCSetOops("Queue", i));
1910   }
1911 
1912   // Verify the global finger
1913   HeapWord* global_finger = finger();
1914   if (global_finger != nullptr && global_finger < _heap.end()) {
1915     // Since we always iterate over all regions, we might get a null G1HeapRegion
1916     // here.
1917     G1HeapRegion* global_hr = _g1h->heap_region_containing_or_null(global_finger);
1918     guarantee(global_hr == nullptr || global_finger == global_hr->bottom(),
1919               "global finger: " PTR_FORMAT " region: " HR_FORMAT,
1920               p2i(global_finger), HR_FORMAT_PARAMS(global_hr));
1921   }
1922 
1923   // Verify the task fingers
1924   assert(_num_concurrent_workers <= _max_num_tasks, "sanity");
1925   for (uint i = 0; i < _num_concurrent_workers; ++i) {
1926     G1CMTask* task = _tasks[i];
1927     HeapWord* task_finger = task->finger();
1928     if (task_finger != nullptr && task_finger < _heap.end()) {
1929       // See above note on the global finger verification.
1930       G1HeapRegion* r = _g1h->heap_region_containing_or_null(task_finger);
1931       guarantee(r == nullptr || task_finger == r->bottom() ||
1932                 !r->in_collection_set() || !r->has_index_in_opt_cset(),
1933                 "task finger: " PTR_FORMAT " region: " HR_FORMAT,
1934                 p2i(task_finger), HR_FORMAT_PARAMS(r));
1935     }
1936   }
1937 }
1938 #endif // PRODUCT
1939 
1940 void G1ConcurrentMark::rebuild_and_scrub() {
1941   if (!needs_remembered_set_rebuild()) {
1942     log_debug(gc, marking)("Skipping Remembered Set Rebuild. No regions selected for rebuild, will only scrub");
1943   }
1944 
1945   G1ConcurrentRebuildAndScrub::rebuild_and_scrub(this, needs_remembered_set_rebuild(), _concurrent_workers);
1946 }
1947 
1948 void G1ConcurrentMark::print_stats() {
1949   if (!log_is_enabled(Debug, gc, stats)) {
1950     return;
1951   }
1952   log_debug(gc, stats)("---------------------------------------------------------------------");
1953   for (size_t i = 0; i < _num_active_tasks; ++i) {
1954     _tasks[i]->print_stats();
1955     log_debug(gc, stats)("---------------------------------------------------------------------");
1956   }
1957 }
1958 
1959 bool G1ConcurrentMark::concurrent_cycle_abort() {
1960   assert_at_safepoint_on_vm_thread();
1961   assert(_g1h->collector_state()->is_in_full_gc(), "must be");
1962 
1963   // If we start the compaction before the CM threads finish
1964   // scanning the root regions we might trip them over as we'll
1965   // be moving objects / updating references. Since the root region
1966   // scan synchronized with the safepoint, just tell it to abort.
1967   // It will notice when the threads start up again later.
1968   abort_root_region_scan_at_safepoint();
1969 
1970   // We haven't started a concurrent cycle no need to do anything; we might have
1971   // aborted the marking because of shutting down though. In this case the marking
1972   // might have already completed the abort (leading to in_progress() below to
1973   // return false), however this still left marking state particularly in the
1974   // shared marking bitmap that must be cleaned up.
1975   // If there are multiple full gcs during shutdown we do this work repeatedly for
1976   // nothing, but this situation should be extremely rare (a full gc after shutdown
1977   // has been signalled is already rare), and this work should be negligible compared
1978   // to actual full gc work.
1979 
1980   if (!is_fully_initialized() || (!cm_thread()->is_in_progress() && !cm_thread()->should_terminate())) {
1981     return false;
1982   }
1983 
1984   reset_marking_for_restart();
1985 
1986   abort_marking_threads();
1987 
1988   SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
1989   satb_mq_set.abandon_partial_marking();
1990   // This can be called either during or outside marking, we'll read
1991   // the expected_active value from the SATB queue set.
1992   satb_mq_set.set_active_all_threads(false, /* new active value */
1993                                      satb_mq_set.is_active() /* expected_active */);
1994   return true;
1995 }
1996 
1997 void G1ConcurrentMark::abort_marking_threads() {
1998   assert_root_region_scan_completed_or_aborted();
1999   _has_aborted.store_relaxed(true);
2000   _first_overflow_barrier_sync.abort();
2001   _second_overflow_barrier_sync.abort();
2002 }
2003 
2004 double G1ConcurrentMark::worker_threads_cpu_time_s() {
2005   class CountCpuTimeThreadClosure : public ThreadClosure {
2006   public:
2007     jlong _total_cpu_time;
2008 
2009     CountCpuTimeThreadClosure() : ThreadClosure(), _total_cpu_time(0) { }
2010 
2011     void do_thread(Thread* t) {
2012       _total_cpu_time += os::thread_cpu_time(t);
2013     }
2014   } cl;
2015 
2016   threads_do(&cl);
2017 
2018   return (double)cl._total_cpu_time / NANOSECS_PER_SEC;
2019 }
2020 
2021 static void print_ms_time_info(const char* prefix, const char* name,
2022                                NumberSeq& ns) {
2023   log_trace(gc, marking)("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
2024                          prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
2025   if (ns.num() > 0) {
2026     log_trace(gc, marking)("%s         [std. dev = %8.2f ms, max = %8.2f ms]",
2027                            prefix, ns.sd(), ns.maximum());
2028   }
2029 }
2030 
2031 void G1ConcurrentMark::print_summary_info() {
2032   Log(gc, marking) log;
2033   if (!log.is_trace()) {
2034     return;
2035   }
2036 
2037   log.trace(" Concurrent marking:");
2038   if (!is_fully_initialized()) {
2039     log.trace("    has not been initialized yet");
2040     return;
2041   }
2042   print_ms_time_info("  ", "remarks", _remark_times);
2043   {
2044     print_ms_time_info("     ", "final marks", _remark_mark_times);
2045     print_ms_time_info("     ", "weak refs", _remark_weak_ref_times);
2046 
2047   }
2048   print_ms_time_info("  ", "cleanups", _cleanup_times);
2049   log.trace("    Finalize live data total time = %8.2f s (avg = %8.2f ms).",
2050             _cleanup_times.sum() / 1000.0, _cleanup_times.avg());
2051   log.trace("  Total stop_world time = %8.2f s.",
2052             (_remark_times.sum() + _cleanup_times.sum())/1000.0);
2053   log.trace("  Total concurrent time = %8.2f s (%8.2f s marking).",
2054             cm_thread()->total_mark_cpu_time_s(), cm_thread()->worker_threads_cpu_time_s());
2055 }
2056 
2057 void G1ConcurrentMark::threads_do(ThreadClosure* tc) const {
2058   if (is_fully_initialized()) { // they are initialized late
2059     tc->do_thread(_cm_thread);
2060     _concurrent_workers->threads_do(tc);
2061   }
2062 }
2063 
2064 void G1ConcurrentMark::print_on(outputStream* st) const {
2065   st->print_cr("Marking Bits: (CMBitMap*) " PTR_FORMAT, p2i(mark_bitmap()));
2066   _mark_bitmap.print_on(st, " Bits: ");
2067 }
2068 
2069 static ReferenceProcessor* get_cm_oop_closure_ref_processor(G1CollectedHeap* g1h) {
2070   ReferenceProcessor* result = g1h->ref_processor_cm();
2071   assert(result != nullptr, "CM reference processor should not be null");
2072   return result;
2073 }
2074 
2075 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h,
2076                                G1CMTask* task)
2077   : ClaimMetadataVisitingOopIterateClosure(ClassLoaderData::_claim_strong, get_cm_oop_closure_ref_processor(g1h)),
2078     _g1h(g1h), _task(task)
2079 { }
2080 
2081 void G1CMTask::setup_for_region(G1HeapRegion* hr) {
2082   assert(hr != nullptr,
2083         "claim_region() should have filtered out null regions");
2084   _curr_region  = hr;
2085   _finger       = hr->bottom();
2086   update_region_limit();
2087 }
2088 
2089 void G1CMTask::update_region_limit() {
2090   G1HeapRegion* hr = _curr_region;
2091   HeapWord* bottom = hr->bottom();
2092   HeapWord* limit = _cm->top_at_mark_start(hr);
2093 
2094   if (limit == bottom) {
2095     // The region was collected underneath our feet.
2096     // We set the finger to bottom to ensure that the bitmap
2097     // iteration that will follow this will not do anything.
2098     // (this is not a condition that holds when we set the region up,
2099     // as the region is not supposed to be empty in the first place)
2100     _finger = bottom;
2101   } else if (limit >= _region_limit) {
2102     assert(limit >= _finger, "peace of mind");
2103   } else {
2104     assert(limit < _region_limit, "only way to get here");
2105     // This can happen under some pretty unusual circumstances.  An
2106     // evacuation pause empties the region underneath our feet (TAMS
2107     // at bottom). We then do some allocation in the region (TAMS
2108     // stays at bottom), followed by the region being used as a GC
2109     // alloc region (TAMS will move to top() and the objects
2110     // originally below it will be greyed). All objects now marked in
2111     // the region are explicitly greyed, if below the global finger,
2112     // and we do not need in fact to scan anything else. So, we simply
2113     // set _finger to be limit to ensure that the bitmap iteration
2114     // doesn't do anything.
2115     _finger = limit;
2116   }
2117 
2118   _region_limit = limit;
2119 }
2120 
2121 void G1CMTask::giveup_current_region() {
2122   assert(_curr_region != nullptr, "invariant");
2123   clear_region_fields();
2124 }
2125 
2126 void G1CMTask::clear_region_fields() {
2127   // Values for these three fields that indicate that we're not
2128   // holding on to a region.
2129   _curr_region   = nullptr;
2130   _finger        = nullptr;
2131   _region_limit  = nullptr;
2132 }
2133 
2134 void G1CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) {
2135   if (cm_oop_closure == nullptr) {
2136     assert(_cm_oop_closure != nullptr, "invariant");
2137   } else {
2138     assert(_cm_oop_closure == nullptr, "invariant");
2139   }
2140   _cm_oop_closure = cm_oop_closure;
2141 }
2142 
2143 void G1CMTask::reset(G1CMBitMap* mark_bitmap) {
2144   guarantee(mark_bitmap != nullptr, "invariant");
2145   _mark_bitmap              = mark_bitmap;
2146   clear_region_fields();
2147 
2148   _calls                         = 0;
2149   _elapsed_time_ms               = 0.0;
2150   _termination_time_ms           = 0.0;
2151 
2152   _mark_stats_cache.reset();
2153 }
2154 
2155 void G1CMTask::reset_for_restart() {
2156   clear_region_fields();
2157   _task_queue->set_empty();
2158   TASKQUEUE_STATS_ONLY(_partial_array_splitter.stats()->reset());
2159   TASKQUEUE_STATS_ONLY(_task_queue->stats.reset());
2160 }
2161 
2162 void G1CMTask::register_partial_array_splitter() {
2163 
2164   ::new (&_partial_array_splitter) PartialArraySplitter(_cm->partial_array_state_manager(),
2165                                                         _cm->max_num_tasks(),
2166                                                         ObjArrayMarkingStride);
2167 }
2168 
2169 void G1CMTask::unregister_partial_array_splitter() {
2170   _partial_array_splitter.~PartialArraySplitter();
2171 }
2172 
2173 bool G1CMTask::should_exit_termination() {
2174   if (!regular_clock_call()) {
2175     return true;
2176   }
2177 
2178   // This is called when we are in the termination protocol. We should
2179   // quit if, for some reason, this task wants to abort or the global
2180   // stack is not empty (this means that we can get work from it).
2181   return !_cm->mark_stack_empty() || has_aborted();
2182 }
2183 
2184 void G1CMTask::reached_limit() {
2185   assert(_words_scanned >= _words_scanned_limit ||
2186          _refs_reached >= _refs_reached_limit ,
2187          "shouldn't have been called otherwise");
2188   abort_marking_if_regular_check_fail();
2189 }
2190 
2191 bool G1CMTask::regular_clock_call() {
2192   if (has_aborted()) {
2193     return false;
2194   }
2195 
2196   // First, we need to recalculate the words scanned and refs reached
2197   // limits for the next clock call.
2198   recalculate_limits();
2199 
2200   // During the regular clock call we do the following
2201 
2202   // (1) If an overflow has been flagged, then we abort.
2203   if (_cm->has_overflown()) {
2204     return false;
2205   }
2206 
2207   // If we are not concurrent (i.e. we're doing remark) we don't need
2208   // to check anything else. The other steps are only needed during
2209   // the concurrent marking phase.
2210   if (!_cm->concurrent()) {
2211     return true;
2212   }
2213 
2214   // (2) If marking has been aborted for Full GC, then we also abort.
2215   if (_cm->has_aborted()) {
2216     return false;
2217   }
2218 
2219   // (4) We check whether we should yield. If we have to, then we abort.
2220   if (SuspendibleThreadSet::should_yield()) {
2221     // We should yield. To do this we abort the task. The caller is
2222     // responsible for yielding.
2223     return false;
2224   }
2225 
2226   // (5) We check whether we've reached our time quota. If we have,
2227   // then we abort.
2228   double elapsed_time_ms = (double)(os::current_thread_cpu_time() - _start_cpu_time_ns) / NANOSECS_PER_MILLISEC;
2229   if (elapsed_time_ms > _time_target_ms) {
2230     _has_timed_out = true;
2231     return false;
2232   }
2233 
2234   // (6) Finally, we check whether there are enough completed STAB
2235   // buffers available for processing. If there are, we abort.
2236   SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
2237   if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
2238     // we do need to process SATB buffers, we'll abort and restart
2239     // the marking task to do so
2240     return false;
2241   }
2242   return true;
2243 }
2244 
2245 void G1CMTask::recalculate_limits() {
2246   _real_words_scanned_limit = _words_scanned + words_scanned_period;
2247   _words_scanned_limit      = _real_words_scanned_limit;
2248 
2249   _real_refs_reached_limit  = _refs_reached  + refs_reached_period;
2250   _refs_reached_limit       = _real_refs_reached_limit;
2251 }
2252 
2253 void G1CMTask::decrease_limits() {
2254   // This is called when we believe that we're going to do an infrequent
2255   // operation which will increase the per byte scanned cost (i.e. move
2256   // entries to/from the global stack). It basically tries to decrease the
2257   // scanning limit so that the clock is called earlier.
2258 
2259   _words_scanned_limit = _real_words_scanned_limit - 3 * words_scanned_period / 4;
2260   _refs_reached_limit  = _real_refs_reached_limit - 3 * refs_reached_period / 4;
2261 }
2262 
2263 void G1CMTask::move_entries_to_global_stack() {
2264   // Local array where we'll store the entries that will be popped
2265   // from the local queue.
2266   G1TaskQueueEntry buffer[G1CMMarkStack::EntriesPerChunk];
2267 
2268   size_t n = 0;
2269   G1TaskQueueEntry task_entry;
2270   while (n < G1CMMarkStack::EntriesPerChunk && _task_queue->pop_local(task_entry)) {
2271     buffer[n] = task_entry;
2272     ++n;
2273   }
2274   if (n < G1CMMarkStack::EntriesPerChunk) {
2275     buffer[n] = G1TaskQueueEntry();
2276   }
2277 
2278   if (n > 0) {
2279     if (!_cm->mark_stack_push(buffer)) {
2280       set_has_aborted();
2281     }
2282   }
2283 
2284   // This operation was quite expensive, so decrease the limits.
2285   decrease_limits();
2286 }
2287 
2288 bool G1CMTask::get_entries_from_global_stack() {
2289   // Local array where we'll store the entries that will be popped
2290   // from the global stack.
2291   G1TaskQueueEntry buffer[G1CMMarkStack::EntriesPerChunk];
2292 
2293   if (!_cm->mark_stack_pop(buffer)) {
2294     return false;
2295   }
2296 
2297   // We did actually pop at least one entry.
2298   for (size_t i = 0; i < G1CMMarkStack::EntriesPerChunk; ++i) {
2299     G1TaskQueueEntry task_entry = buffer[i];
2300     if (task_entry.is_null()) {
2301       break;
2302     }
2303     assert(task_entry.is_partial_array_state() || oopDesc::is_oop(task_entry.to_oop()), "Element " PTR_FORMAT " must be an array slice or oop", p2i(task_entry.to_oop()));
2304     bool success = _task_queue->push(task_entry);
2305     // We only call this when the local queue is empty or under a
2306     // given target limit. So, we do not expect this push to fail.
2307     assert(success, "invariant");
2308   }
2309 
2310   // This operation was quite expensive, so decrease the limits
2311   decrease_limits();
2312   return true;
2313 }
2314 
2315 void G1CMTask::drain_local_queue(bool partially) {
2316   if (has_aborted()) {
2317     return;
2318   }
2319 
2320   // Decide what the target size is, depending whether we're going to
2321   // drain it partially (so that other tasks can steal if they run out
2322   // of things to do) or totally (at the very end).
2323   uint target_size;
2324   if (partially) {
2325     target_size = GCDrainStackTargetSize;
2326   } else {
2327     target_size = 0;
2328   }
2329 
2330   if (_task_queue->size() > target_size) {
2331     G1TaskQueueEntry entry;
2332     bool ret = _task_queue->pop_local(entry);
2333     while (ret) {
2334       process_entry(entry, false /* stolen */);
2335       if (_task_queue->size() <= target_size || has_aborted()) {
2336         ret = false;
2337       } else {
2338         ret = _task_queue->pop_local(entry);
2339       }
2340     }
2341   }
2342 }
2343 
2344 size_t G1CMTask::start_partial_array_processing(oop obj) {
2345   assert(should_be_sliced(obj), "Must be an array object %d and large %zu", obj->is_objArray(), obj->size());
2346 
2347   objArrayOop obj_array = objArrayOop(obj);
2348   size_t array_length = obj_array->length();
2349 
2350   size_t initial_chunk_size = _partial_array_splitter.start(_task_queue, obj_array, nullptr, array_length);
2351 
2352   // Mark objArray klass metadata
2353   if (_cm_oop_closure->do_metadata()) {
2354     _cm_oop_closure->do_klass(obj_array->klass());
2355   }
2356 
2357   process_array_chunk(obj_array, 0, initial_chunk_size);
2358 
2359   // Include object header size
2360   return objArrayOopDesc::object_size(checked_cast<int>(initial_chunk_size));
2361 }
2362 
2363 size_t G1CMTask::process_partial_array(const G1TaskQueueEntry& task, bool stolen) {
2364   PartialArrayState* state = task.to_partial_array_state();
2365   // Access state before release by claim().
2366   objArrayOop obj = objArrayOop(state->source());
2367 
2368   PartialArraySplitter::Claim claim =
2369     _partial_array_splitter.claim(state, _task_queue, stolen);
2370 
2371   process_array_chunk(obj, claim._start, claim._end);
2372   return heap_word_size((claim._end - claim._start) * heapOopSize);
2373 }
2374 
2375 void G1CMTask::drain_global_stack(bool partially) {
2376   if (has_aborted()) {
2377     return;
2378   }
2379 
2380   // We have a policy to drain the local queue before we attempt to
2381   // drain the global stack.
2382   assert(partially || _task_queue->size() == 0, "invariant");
2383 
2384   // Decide what the target size is, depending whether we're going to
2385   // drain it partially (so that other tasks can steal if they run out
2386   // of things to do) or totally (at the very end).
2387   // Notice that when draining the global mark stack partially, due to the racyness
2388   // of the mark stack size update we might in fact drop below the target. But,
2389   // this is not a problem.
2390   // In case of total draining, we simply process until the global mark stack is
2391   // totally empty, disregarding the size counter.
2392   if (partially) {
2393     size_t const target_size = _cm->partial_mark_stack_size_target();
2394     while (!has_aborted() && _cm->mark_stack_size() > target_size) {
2395       if (get_entries_from_global_stack()) {
2396         drain_local_queue(partially);
2397       }
2398     }
2399   } else {
2400     while (!has_aborted() && get_entries_from_global_stack()) {
2401       drain_local_queue(partially);
2402     }
2403   }
2404 }
2405 
2406 // SATB Queue has several assumptions on whether to call the par or
2407 // non-par versions of the methods. this is why some of the code is
2408 // replicated. We should really get rid of the single-threaded version
2409 // of the code to simplify things.
2410 void G1CMTask::drain_satb_buffers() {
2411   if (has_aborted()) {
2412     return;
2413   }
2414 
2415   // We set this so that the regular clock knows that we're in the
2416   // middle of draining buffers and doesn't set the abort flag when it
2417   // notices that SATB buffers are available for draining. It'd be
2418   // very counter productive if it did that. :-)
2419   _draining_satb_buffers = true;
2420 
2421   G1CMSATBBufferClosure satb_cl(this, _g1h);
2422   SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
2423 
2424   // This keeps claiming and applying the closure to completed buffers
2425   // until we run out of buffers or we need to abort.
2426   while (!has_aborted() &&
2427          satb_mq_set.apply_closure_to_completed_buffer(&satb_cl)) {
2428     abort_marking_if_regular_check_fail();
2429   }
2430 
2431   // Can't assert qset is empty here, even if not aborted.  If concurrent,
2432   // some other thread might be adding to the queue.  If not concurrent,
2433   // some other thread might have won the race for the last buffer, but
2434   // has not yet decremented the count.
2435 
2436   _draining_satb_buffers = false;
2437 
2438   // again, this was a potentially expensive operation, decrease the
2439   // limits to get the regular clock call early
2440   decrease_limits();
2441 }
2442 
2443 void G1CMTask::clear_mark_stats_cache(uint region_idx) {
2444   _mark_stats_cache.reset(region_idx);
2445 }
2446 
2447 Pair<size_t, size_t> G1CMTask::flush_mark_stats_cache() {
2448   return _mark_stats_cache.evict_all();
2449 }
2450 
2451 void G1CMTask::print_stats() {
2452   log_debug(gc, stats)("Marking Stats, task = %u, calls = %u", _worker_id, _calls);
2453   log_debug(gc, stats)("  Elapsed time = %1.2lfms, Termination time = %1.2lfms",
2454                        _elapsed_time_ms, _termination_time_ms);
2455   log_debug(gc, stats)("  Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms max = %1.2lfms, total = %1.2lfms",
2456                        _step_times_ms.num(),
2457                        _step_times_ms.avg(),
2458                        _step_times_ms.sd(),
2459                        _step_times_ms.maximum(),
2460                        _step_times_ms.sum());
2461   size_t const hits = _mark_stats_cache.hits();
2462   size_t const misses = _mark_stats_cache.misses();
2463   log_debug(gc, stats)("  Mark Stats Cache: hits %zu misses %zu ratio %.3f",
2464                        hits, misses, percent_of(hits, hits + misses));
2465 }
2466 
2467 bool G1ConcurrentMark::try_stealing(uint worker_id, G1TaskQueueEntry& task_entry) {
2468   return _task_queues->steal(worker_id, task_entry);
2469 }
2470 
2471 void G1CMTask::process_current_region(G1CMBitMapClosure& bitmap_closure) {
2472   if (has_aborted() || _curr_region == nullptr) {
2473     return;
2474   }
2475 
2476   // This means that we're already holding on to a region.
2477   assert(_finger != nullptr, "if region is not null, then the finger "
2478          "should not be null either");
2479 
2480   // We might have restarted this task after an evacuation pause
2481   // which might have evacuated the region we're holding on to
2482   // underneath our feet. Let's read its limit again to make sure
2483   // that we do not iterate over a region of the heap that
2484   // contains garbage (update_region_limit() will also move
2485   // _finger to the start of the region if it is found empty).
2486   update_region_limit();
2487   // We will start from _finger not from the start of the region,
2488   // as we might be restarting this task after aborting half-way
2489   // through scanning this region. In this case, _finger points to
2490   // the address where we last found a marked object. If this is a
2491   // fresh region, _finger points to start().
2492   MemRegion mr = MemRegion(_finger, _region_limit);
2493 
2494   assert(!_curr_region->is_humongous() || mr.start() == _curr_region->bottom(),
2495          "humongous regions should go around loop once only");
2496 
2497   // Some special cases:
2498   // If the memory region is empty, we can just give up the region.
2499   // If the current region is humongous then we only need to check
2500   // the bitmap for the bit associated with the start of the object,
2501   // scan the object if it's live, and give up the region.
2502   // Otherwise, let's iterate over the bitmap of the part of the region
2503   // that is left.
2504   // If the iteration is successful, give up the region.
2505   if (mr.is_empty()) {
2506     giveup_current_region();
2507     abort_marking_if_regular_check_fail();
2508   } else if (_curr_region->is_humongous() && mr.start() == _curr_region->bottom()) {
2509     if (_mark_bitmap->is_marked(mr.start())) {
2510       // The object is marked - apply the closure
2511       bitmap_closure.do_addr(mr.start());
2512     }
2513     // Even if this task aborted while scanning the humongous object
2514     // we can (and should) give up the current region.
2515     giveup_current_region();
2516     abort_marking_if_regular_check_fail();
2517   } else if (_mark_bitmap->iterate(&bitmap_closure, mr)) {
2518     giveup_current_region();
2519     abort_marking_if_regular_check_fail();
2520   } else {
2521     assert(has_aborted(), "currently the only way to do so");
2522     // The only way to abort the bitmap iteration is to return
2523     // false from the do_bit() method. However, inside the
2524     // do_bit() method we move the _finger to point to the
2525     // object currently being looked at. So, if we bail out, we
2526     // have definitely set _finger to something non-null.
2527     assert(_finger != nullptr, "invariant");
2528 
2529     // Region iteration was actually aborted. So now _finger
2530     // points to the address of the object we last scanned. If we
2531     // leave it there, when we restart this task, we will rescan
2532     // the object. It is easy to avoid this. We move the finger by
2533     // enough to point to the next possible object header.
2534     assert(_finger < _region_limit, "invariant");
2535     HeapWord* const new_finger = _finger + cast_to_oop(_finger)->size();
2536     if (new_finger >= _region_limit) {
2537       giveup_current_region();
2538     } else {
2539       move_finger_to(new_finger);
2540     }
2541   }
2542 }
2543 
2544 void G1CMTask::claim_new_region() {
2545   // Read the note on the claim_region() method on why it might
2546   // return null with potentially more regions available for
2547   // claiming and why we have to check out_of_regions() to determine
2548   // whether we're done or not.
2549   while (!has_aborted() && _curr_region == nullptr && !_cm->out_of_regions()) {
2550     // We are going to try to claim a new region. We should have
2551     // given up on the previous one.
2552     // Separated the asserts so that we know which one fires.
2553     assert(_curr_region  == nullptr, "invariant");
2554     assert(_finger       == nullptr, "invariant");
2555     assert(_region_limit == nullptr, "invariant");
2556     G1HeapRegion* claimed_region = _cm->claim_region(_worker_id);
2557     if (claimed_region != nullptr) {
2558       // Yes, we managed to claim one
2559       setup_for_region(claimed_region);
2560       assert(_curr_region == claimed_region, "invariant");
2561     }
2562     // It is important to call the regular clock here. It might take
2563     // a while to claim a region if, for example, we hit a large
2564     // block of empty regions. So we need to call the regular clock
2565     // method once round the loop to make sure it's called
2566     // frequently enough.
2567     abort_marking_if_regular_check_fail();
2568   }
2569 }
2570 
2571 void G1CMTask::attempt_stealing() {
2572   // We cannot check whether the global stack is empty, since other
2573   // tasks might be pushing objects to it concurrently.
2574   assert(_cm->out_of_regions() && _task_queue->size() == 0,
2575          "only way to reach here");
2576   while (!has_aborted()) {
2577     G1TaskQueueEntry entry;
2578     if (_cm->try_stealing(_worker_id, entry)) {
2579       process_entry(entry, true /* stolen */);
2580 
2581       // And since we're towards the end, let's totally drain the
2582       // local queue and global stack.
2583       drain_local_queue(false);
2584       drain_global_stack(false);
2585     } else {
2586       break;
2587     }
2588   }
2589 }
2590 
2591 void G1CMTask::attempt_termination(bool is_serial) {
2592   // We cannot check whether the global stack is empty, since other
2593   // tasks might be concurrently pushing objects on it.
2594   // Separated the asserts so that we know which one fires.
2595   assert(_cm->out_of_regions(), "only way to reach here");
2596   assert(_task_queue->size() == 0, "only way to reach here");
2597   double termination_start_time_ms = os::elapsedTime() * 1000.0;
2598 
2599   // The G1CMTask class also extends the TerminatorTerminator class,
2600   // hence its should_exit_termination() method will also decide
2601   // whether to exit the termination protocol or not.
2602   bool finished = (is_serial ||
2603                    _cm->terminator()->offer_termination(this));
2604   _termination_time_ms += (os::elapsedTime() * 1000.0 - termination_start_time_ms);
2605 
2606   if (finished) {
2607     // We're all done.
2608 
2609     // We can now guarantee that the global stack is empty, since
2610     // all other tasks have finished. We separated the guarantees so
2611     // that, if a condition is false, we can immediately find out
2612     // which one.
2613     guarantee(_cm->out_of_regions(), "only way to reach here");
2614     guarantee(_cm->mark_stack_empty(), "only way to reach here");
2615     guarantee(_task_queue->size() == 0, "only way to reach here");
2616     guarantee(!_cm->has_overflown(), "only way to reach here");
2617     guarantee(!has_aborted(), "should never happen if termination has completed");
2618   } else {
2619     // Apparently there's more work to do. Let's abort this task. We
2620     // will restart it and hopefully we can find more things to do.
2621     set_has_aborted();
2622   }
2623 }
2624 
2625 void G1CMTask::handle_abort(bool is_serial, double elapsed_time_ms) {
2626   if (_has_timed_out) {
2627     double diff_ms = elapsed_time_ms - _time_target_ms;
2628     // Keep statistics of how well we did with respect to hitting
2629     // our target only if we actually timed out (if we aborted for
2630     // other reasons, then the results might get skewed).
2631     _marking_step_diff_ms.add(diff_ms);
2632   }
2633 
2634   if (!_cm->has_overflown()) {
2635     return;
2636   }
2637 
2638   // This is the interesting one. We aborted because a global
2639   // overflow was raised. This means we have to restart the
2640   // marking phase and start iterating over regions. However, in
2641   // order to do this we have to make sure that all tasks stop
2642   // what they are doing and re-initialize in a safe manner. We
2643   // will achieve this with the use of two barrier sync points.
2644   if (!is_serial) {
2645     // We only need to enter the sync barrier if being called
2646     // from a parallel context
2647     _cm->enter_first_sync_barrier(_worker_id);
2648 
2649     // When we exit this sync barrier we know that all tasks have
2650     // stopped doing marking work. So, it's now safe to
2651     // re-initialize our data structures.
2652   }
2653 
2654   clear_region_fields();
2655   flush_mark_stats_cache();
2656 
2657   if (!is_serial) {
2658     // If we're executing the concurrent phase of marking, reset the marking
2659     // state; otherwise the marking state is reset after reference processing,
2660     // during the remark pause.
2661     // If we reset here as a result of an overflow during the remark we will
2662     // see assertion failures from any subsequent set_concurrency_and_phase()
2663     // calls.
2664     if (_cm->concurrent() && _worker_id == 0) {
2665       // Worker 0 is responsible for clearing the global data structures because
2666       // of an overflow. During STW we should not clear the overflow flag (in
2667       // G1ConcurrentMark::reset_marking_state()) since we rely on it being true when we exit
2668       // method to abort the pause and restart concurrent marking.
2669       _cm->reset_marking_for_restart();
2670 
2671       log_info(gc, marking)("Concurrent Mark reset for overflow");
2672     }
2673 
2674     // ...and enter the second barrier.
2675     _cm->enter_second_sync_barrier(_worker_id);
2676   }
2677 }
2678 
2679 /*****************************************************************************
2680 
2681     The do_marking_step(time_target_ms, ...) method is the building
2682     block of the parallel marking framework. It can be called in parallel
2683     with other invocations of do_marking_step() on different tasks
2684     (but only one per task, obviously) and concurrently with the
2685     mutator threads, or during remark, hence it eliminates the need
2686     for two versions of the code. When called during remark, it will
2687     pick up from where the task left off during the concurrent marking
2688     phase. Interestingly, tasks are also claimable during evacuation
2689     pauses too, since do_marking_step() ensures that it aborts before
2690     it needs to yield.
2691 
2692     The data structures that it uses to do marking work are the
2693     following:
2694 
2695       (1) Marking Bitmap. If there are grey objects that appear only
2696       on the bitmap (this happens either when dealing with an overflow
2697       or when the concurrent start pause has simply marked the roots
2698       and didn't push them on the stack), then tasks claim heap
2699       regions whose bitmap they then scan to find grey objects. A
2700       global finger indicates where the end of the last claimed region
2701       is. A local finger indicates how far into the region a task has
2702       scanned. The two fingers are used to determine how to grey an
2703       object (i.e. whether simply marking it is OK, as it will be
2704       visited by a task in the future, or whether it needs to be also
2705       pushed on a stack).
2706 
2707       (2) Local Queue. The local queue of the task which is accessed
2708       reasonably efficiently by the task. Other tasks can steal from
2709       it when they run out of work. Throughout the marking phase, a
2710       task attempts to keep its local queue short but not totally
2711       empty, so that entries are available for stealing by other
2712       tasks. Only when there is no more work, a task will totally
2713       drain its local queue.
2714 
2715       (3) Global Mark Stack. This handles local queue overflow. During
2716       marking only sets of entries are moved between it and the local
2717       queues, as access to it requires a mutex and more fine-grain
2718       interaction with it which might cause contention. If it
2719       overflows, then the marking phase should restart and iterate
2720       over the bitmap to identify grey objects. Throughout the marking
2721       phase, tasks attempt to keep the global mark stack at a small
2722       length but not totally empty, so that entries are available for
2723       popping by other tasks. Only when there is no more work, tasks
2724       will totally drain the global mark stack.
2725 
2726       (4) SATB Buffer Queue. This is where completed SATB buffers are
2727       made available. Buffers are regularly removed from this queue
2728       and scanned for roots, so that the queue doesn't get too
2729       long. During remark, all completed buffers are processed, as
2730       well as the filled in parts of any uncompleted buffers.
2731 
2732     The do_marking_step() method tries to abort when the time target
2733     has been reached. There are a few other cases when the
2734     do_marking_step() method also aborts:
2735 
2736       (1) When the marking phase has been aborted (after a Full GC).
2737 
2738       (2) When a global overflow (on the global stack) has been
2739       triggered. Before the task aborts, it will actually sync up with
2740       the other tasks to ensure that all the marking data structures
2741       (local queues, stacks, fingers etc.)  are re-initialized so that
2742       when do_marking_step() completes, the marking phase can
2743       immediately restart.
2744 
2745       (3) When enough completed SATB buffers are available. The
2746       do_marking_step() method only tries to drain SATB buffers right
2747       at the beginning. So, if enough buffers are available, the
2748       marking step aborts and the SATB buffers are processed at
2749       the beginning of the next invocation.
2750 
2751       (4) To yield. when we have to yield then we abort and yield
2752       right at the end of do_marking_step(). This saves us from a lot
2753       of hassle as, by yielding we might allow a Full GC. If this
2754       happens then objects will be compacted underneath our feet, the
2755       heap might shrink, etc. We save checking for this by just
2756       aborting and doing the yield right at the end.
2757 
2758     From the above it follows that the do_marking_step() method should
2759     be called in a loop (or, otherwise, regularly) until it completes.
2760 
2761     If a marking step completes without its has_aborted() flag being
2762     true, it means it has completed the current marking phase (and
2763     also all other marking tasks have done so and have all synced up).
2764 
2765     A method called regular_clock_call() is invoked "regularly" (in
2766     sub ms intervals) throughout marking. It is this clock method that
2767     checks all the abort conditions which were mentioned above and
2768     decides when the task should abort. A work-based scheme is used to
2769     trigger this clock method: when the number of object words the
2770     marking phase has scanned or the number of references the marking
2771     phase has visited reach a given limit. Additional invocations to
2772     the method clock have been planted in a few other strategic places
2773     too. The initial reason for the clock method was to avoid calling
2774     cpu time gathering too regularly, as it is quite expensive. So,
2775     once it was in place, it was natural to piggy-back all the other
2776     conditions on it too and not constantly check them throughout the code.
2777 
2778     If do_termination is true then do_marking_step will enter its
2779     termination protocol.
2780 
2781     The value of is_serial must be true when do_marking_step is being
2782     called serially (i.e. by the VMThread) and do_marking_step should
2783     skip any synchronization in the termination and overflow code.
2784     Examples include the serial remark code and the serial reference
2785     processing closures.
2786 
2787     The value of is_serial must be false when do_marking_step is
2788     being called by any of the worker threads.
2789     Examples include the concurrent marking code (CMMarkingTask),
2790     the MT remark code, and the MT reference processing closures.
2791 
2792  *****************************************************************************/
2793 
2794 void G1CMTask::do_marking_step(double time_target_ms,
2795                                bool do_termination,
2796                                bool is_serial) {
2797   assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
2798 
2799   _start_cpu_time_ns = os::current_thread_cpu_time();
2800 
2801   // If do_stealing is true then do_marking_step will attempt to
2802   // steal work from the other G1CMTasks. It only makes sense to
2803   // enable stealing when the termination protocol is enabled
2804   // and do_marking_step() is not being called serially.
2805   bool do_stealing = do_termination && !is_serial;
2806 
2807   G1Predictions const& predictor = _g1h->policy()->predictor();
2808   double diff_prediction_ms = predictor.predict_zero_bounded(&_marking_step_diff_ms);
2809   _time_target_ms = time_target_ms - diff_prediction_ms;
2810 
2811   // set up the variables that are used in the work-based scheme to
2812   // call the regular clock method
2813   _words_scanned = 0;
2814   _refs_reached  = 0;
2815   recalculate_limits();
2816 
2817   // clear all flags
2818   clear_has_aborted();
2819   _has_timed_out = false;
2820   _draining_satb_buffers = false;
2821 
2822   ++_calls;
2823 
2824   // Set up the bitmap and oop closures. Anything that uses them is
2825   // eventually called from this method, so it is OK to allocate these
2826   // statically.
2827   G1CMBitMapClosure bitmap_closure(this, _cm);
2828   G1CMOopClosure cm_oop_closure(_g1h, this);
2829   set_cm_oop_closure(&cm_oop_closure);
2830 
2831   if (_cm->has_overflown()) {
2832     // This can happen if the mark stack overflows during a GC pause
2833     // and this task, after a yield point, restarts. We have to abort
2834     // as we need to get into the overflow protocol which happens
2835     // right at the end of this task.
2836     set_has_aborted();
2837   }
2838 
2839   // First drain any available SATB buffers. After this, we will not
2840   // look at SATB buffers before the next invocation of this method.
2841   // If enough completed SATB buffers are queued up, the regular clock
2842   // will abort this task so that it restarts.
2843   drain_satb_buffers();
2844   // ...then partially drain the local queue and the global stack
2845   drain_local_queue(true);
2846   drain_global_stack(true);
2847 
2848   do {
2849     process_current_region(bitmap_closure);
2850     // At this point we have either completed iterating over the
2851     // region we were holding on to, or we have aborted.
2852 
2853     // We then partially drain the local queue and the global stack.
2854     drain_local_queue(true);
2855     drain_global_stack(true);
2856 
2857     claim_new_region();
2858 
2859     assert(has_aborted() || _curr_region != nullptr || _cm->out_of_regions(),
2860            "at this point we should be out of regions");
2861   } while ( _curr_region != nullptr && !has_aborted());
2862 
2863   // We cannot check whether the global stack is empty, since other
2864   // tasks might be pushing objects to it concurrently.
2865   assert(has_aborted() || _cm->out_of_regions(),
2866          "at this point we should be out of regions");
2867   // Try to reduce the number of available SATB buffers so that
2868   // remark has less work to do.
2869   drain_satb_buffers();
2870 
2871   // Since we've done everything else, we can now totally drain the
2872   // local queue and global stack.
2873   drain_local_queue(false);
2874   drain_global_stack(false);
2875 
2876   // Attempt at work stealing from other task's queues.
2877   if (do_stealing && !has_aborted()) {
2878     // We have not aborted. This means that we have finished all that
2879     // we could. Let's try to do some stealing...
2880     attempt_stealing();
2881   }
2882 
2883   // We still haven't aborted. Now, let's try to get into the
2884   // termination protocol.
2885   if (do_termination && !has_aborted()) {
2886     attempt_termination(is_serial);
2887   }
2888 
2889   // Mainly for debugging purposes to make sure that a pointer to the
2890   // closure which was statically allocated in this frame doesn't
2891   // escape it by accident.
2892   set_cm_oop_closure(nullptr);
2893   jlong end_cpu_time_ns = os::current_thread_cpu_time();
2894   double elapsed_time_ms = (double)(end_cpu_time_ns - _start_cpu_time_ns) / NANOSECS_PER_MILLISEC;
2895   // Update the step history.
2896   _step_times_ms.add(elapsed_time_ms);
2897 
2898   if (has_aborted()) {
2899     // The task was aborted for some reason.
2900     handle_abort(is_serial, elapsed_time_ms);
2901   }
2902 }
2903 
2904 G1CMTask::G1CMTask(uint worker_id,
2905                    G1ConcurrentMark* cm,
2906                    G1CMTaskQueue* task_queue,
2907                    G1RegionMarkStats* mark_stats) :
2908   _worker_id(worker_id),
2909   _g1h(G1CollectedHeap::heap()),
2910   _cm(cm),
2911   _mark_bitmap(nullptr),
2912   _task_queue(task_queue),
2913   _partial_array_splitter(_cm->partial_array_state_manager(), _cm->max_num_tasks(), ObjArrayMarkingStride),
2914   _mark_stats_cache(mark_stats, G1RegionMarkStatsCache::RegionMarkStatsCacheSize),
2915   _calls(0),
2916   _time_target_ms(0.0),
2917   _start_cpu_time_ns(0),
2918   _cm_oop_closure(nullptr),
2919   _curr_region(nullptr),
2920   _finger(nullptr),
2921   _region_limit(nullptr),
2922   _words_scanned(0),
2923   _words_scanned_limit(0),
2924   _real_words_scanned_limit(0),
2925   _refs_reached(0),
2926   _refs_reached_limit(0),
2927   _real_refs_reached_limit(0),
2928   _has_aborted(false),
2929   _has_timed_out(false),
2930   _draining_satb_buffers(false),
2931   _step_times_ms(),
2932   _elapsed_time_ms(0.0),
2933   _termination_time_ms(0.0),
2934   _marking_step_diff_ms()
2935 {
2936   guarantee(task_queue != nullptr, "invariant");
2937 
2938   _marking_step_diff_ms.add(0.5);
2939 }
2940 
2941 // These are formatting macros that are used below to ensure
2942 // consistent formatting. The *_H_* versions are used to format the
2943 // header for a particular value and they should be kept consistent
2944 // with the corresponding macro. Also note that most of the macros add
2945 // the necessary white space (as a prefix) which makes them a bit
2946 // easier to compose.
2947 
2948 // All the output lines are prefixed with this string to be able to
2949 // identify them easily in a large log file.
2950 #define G1PPRL_LINE_PREFIX            "###"
2951 
2952 #define G1PPRL_ADDR_BASE_FORMAT    " " PTR_FORMAT "-" PTR_FORMAT
2953 #ifdef _LP64
2954 #define G1PPRL_ADDR_BASE_H_FORMAT  " %37s"
2955 #else // _LP64
2956 #define G1PPRL_ADDR_BASE_H_FORMAT  " %21s"
2957 #endif // _LP64
2958 
2959 // For per-region info
2960 #define G1PPRL_TYPE_FORMAT            "   %-4s"
2961 #define G1PPRL_TYPE_H_FORMAT          "   %4s"
2962 #define G1PPRL_STATE_FORMAT           "   %-5s"
2963 #define G1PPRL_STATE_H_FORMAT         "   %5s"
2964 #define G1PPRL_BYTE_FORMAT            "  %9zu"
2965 #define G1PPRL_BYTE_H_FORMAT          "  %9s"
2966 #define G1PPRL_DOUBLE_FORMAT          "%14.1f"
2967 #define G1PPRL_GCEFF_H_FORMAT         "  %14s"
2968 #define G1PPRL_GID_H_FORMAT           "  %9s"
2969 #define G1PPRL_GID_FORMAT             "  " UINT32_FORMAT_W(9)
2970 #define G1PPRL_LEN_FORMAT             "  " UINT32_FORMAT_W(14)
2971 #define G1PPRL_LEN_H_FORMAT           "  %14s"
2972 #define G1PPRL_GID_GCEFF_FORMAT       "  %14.1f"
2973 #define G1PPRL_GID_LIVENESS_FORMAT    "  %9.2f"
2974 
2975 // For summary info
2976 #define G1PPRL_SUM_ADDR_FORMAT(tag)    "  " tag ":" G1PPRL_ADDR_BASE_FORMAT
2977 #define G1PPRL_SUM_BYTE_FORMAT(tag)    "  " tag ": %zu"
2978 #define G1PPRL_SUM_MB_FORMAT(tag)      "  " tag ": %1.2f MB"
2979 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag) " / %1.2f %%"
2980 
2981 G1PrintRegionLivenessInfoClosure::G1PrintRegionLivenessInfoClosure(const char* phase_name) :
2982   _total_used_bytes(0),
2983   _total_capacity_bytes(0),
2984   _total_live_bytes(0),
2985   _total_remset_bytes(0),
2986   _total_code_roots_bytes(0)
2987 {
2988   if (!log_is_enabled(Trace, gc, liveness)) {
2989     return;
2990   }
2991 
2992   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2993   MemRegion reserved = g1h->reserved();
2994   double now = os::elapsedTime();
2995 
2996   // Print the header of the output.
2997   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now);
2998   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" HEAP"
2999                           G1PPRL_SUM_ADDR_FORMAT("reserved")
3000                           G1PPRL_SUM_BYTE_FORMAT("region-size"),
3001                           p2i(reserved.start()), p2i(reserved.end()),
3002                           G1HeapRegion::GrainBytes);
3003   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX);
3004   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3005                           G1PPRL_TYPE_H_FORMAT
3006                           G1PPRL_ADDR_BASE_H_FORMAT
3007                           G1PPRL_BYTE_H_FORMAT
3008                           G1PPRL_BYTE_H_FORMAT
3009                           G1PPRL_STATE_H_FORMAT
3010                           G1PPRL_BYTE_H_FORMAT
3011                           G1PPRL_GID_H_FORMAT,
3012                           "type", "address-range",
3013                           "used", "live",
3014                           "state", "code-roots",
3015                           "group-id");
3016   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3017                           G1PPRL_TYPE_H_FORMAT
3018                           G1PPRL_ADDR_BASE_H_FORMAT
3019                           G1PPRL_BYTE_H_FORMAT
3020                           G1PPRL_BYTE_H_FORMAT
3021                           G1PPRL_STATE_H_FORMAT
3022                           G1PPRL_BYTE_H_FORMAT
3023                           G1PPRL_GID_H_FORMAT,
3024                           "", "",
3025                           "(bytes)", "(bytes)",
3026                           "", "(bytes)", "");
3027 }
3028 
3029 bool G1PrintRegionLivenessInfoClosure::do_heap_region(G1HeapRegion* r) {
3030   if (!log_is_enabled(Trace, gc, liveness)) {
3031     return false;
3032   }
3033 
3034   const char* type       = r->get_type_str();
3035   HeapWord* bottom       = r->bottom();
3036   HeapWord* end          = r->end();
3037   size_t capacity_bytes  = r->capacity();
3038   size_t used_bytes      = r->used();
3039   size_t live_bytes      = r->live_bytes();
3040   size_t remset_bytes    = r->rem_set()->mem_size();
3041   size_t code_roots_bytes = r->rem_set()->code_roots_mem_size();
3042   const char* remset_type = r->rem_set()->get_short_state_str();
3043   uint cset_group_id     = r->rem_set()->has_cset_group()
3044                          ? r->rem_set()->cset_group_id()
3045                          : G1CSetCandidateGroup::NoRemSetId;
3046 
3047   _total_used_bytes      += used_bytes;
3048   _total_capacity_bytes  += capacity_bytes;
3049   _total_live_bytes      += live_bytes;
3050   _total_remset_bytes    += remset_bytes;
3051   _total_code_roots_bytes += code_roots_bytes;
3052 
3053   // Print a line for this particular region.
3054   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3055                         G1PPRL_TYPE_FORMAT
3056                         G1PPRL_ADDR_BASE_FORMAT
3057                         G1PPRL_BYTE_FORMAT
3058                         G1PPRL_BYTE_FORMAT
3059                         G1PPRL_STATE_FORMAT
3060                         G1PPRL_BYTE_FORMAT
3061                         G1PPRL_GID_FORMAT,
3062                         type, p2i(bottom), p2i(end),
3063                         used_bytes, live_bytes,
3064                         remset_type, code_roots_bytes,
3065                         cset_group_id);
3066 
3067   return false;
3068 }
3069 
3070 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() {
3071   if (!log_is_enabled(Trace, gc, liveness)) {
3072     return;
3073   }
3074 
3075   G1CollectedHeap* g1h = G1CollectedHeap::heap();
3076   _total_remset_bytes += g1h->card_set_freelist_pool()->mem_size();
3077   // add static memory usages to remembered set sizes
3078   _total_remset_bytes += G1HeapRegionRemSet::static_mem_size();
3079 
3080   log_cset_candidate_groups();
3081 
3082   // Print the footer of the output.
3083   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX);
3084   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3085                          " SUMMARY"
3086                          G1PPRL_SUM_MB_FORMAT("capacity")
3087                          G1PPRL_SUM_MB_PERC_FORMAT("used")
3088                          G1PPRL_SUM_MB_PERC_FORMAT("live")
3089                          G1PPRL_SUM_MB_FORMAT("remset")
3090                          G1PPRL_SUM_MB_FORMAT("code-roots"),
3091                          bytes_to_mb(_total_capacity_bytes),
3092                          bytes_to_mb(_total_used_bytes),
3093                          percent_of(_total_used_bytes, _total_capacity_bytes),
3094                          bytes_to_mb(_total_live_bytes),
3095                          percent_of(_total_live_bytes, _total_capacity_bytes),
3096                          bytes_to_mb(_total_remset_bytes),
3097                          bytes_to_mb(_total_code_roots_bytes));
3098 }
3099 
3100 void G1PrintRegionLivenessInfoClosure::log_cset_candidate_group_add_total(G1CSetCandidateGroup* group, const char* type) {
3101   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3102                           G1PPRL_GID_FORMAT
3103                           G1PPRL_LEN_FORMAT
3104                           G1PPRL_GID_GCEFF_FORMAT
3105                           G1PPRL_GID_LIVENESS_FORMAT
3106                           G1PPRL_BYTE_FORMAT
3107                           G1PPRL_TYPE_H_FORMAT,
3108                           group->group_id(),
3109                           group->length(),
3110                           group->length() > 0 ? group->gc_efficiency() : 0.0,
3111                           group->length() > 0 ? group->liveness_percent() : 0.0,
3112                           group->card_set()->mem_size(),
3113                           type);
3114   _total_remset_bytes += group->card_set()->mem_size();
3115 }
3116 
3117 void G1PrintRegionLivenessInfoClosure::log_cset_candidate_grouplist(G1CSetCandidateGroupList& gl, const char* type) {
3118   for (G1CSetCandidateGroup* group : gl) {
3119     log_cset_candidate_group_add_total(group, type);
3120   }
3121 }
3122 
3123 void G1PrintRegionLivenessInfoClosure::log_cset_candidate_groups() {
3124   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX);
3125   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" Collection Set Candidate Groups");
3126   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX " Types: Y=Young, M=From Marking Regions, R=Retained Regions");
3127   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3128                           G1PPRL_GID_H_FORMAT
3129                           G1PPRL_LEN_H_FORMAT
3130                           G1PPRL_GCEFF_H_FORMAT
3131                           G1PPRL_BYTE_H_FORMAT
3132                           G1PPRL_BYTE_H_FORMAT
3133                           G1PPRL_TYPE_H_FORMAT,
3134                           "groud-id", "num-regions",
3135                           "gc-eff", "liveness",
3136                           "remset", "type");
3137 
3138   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3139                           G1PPRL_GID_H_FORMAT
3140                           G1PPRL_LEN_H_FORMAT
3141                           G1PPRL_GCEFF_H_FORMAT
3142                           G1PPRL_BYTE_H_FORMAT
3143                           G1PPRL_BYTE_H_FORMAT
3144                           G1PPRL_TYPE_H_FORMAT,
3145                           "", "",
3146                           "(bytes/ms)", "%",
3147                           "(bytes)", "");
3148 
3149   G1CollectedHeap* g1h = G1CollectedHeap::heap();
3150 
3151   log_cset_candidate_group_add_total(g1h->young_regions_cset_group(), "Y");
3152 
3153   G1CollectionSetCandidates* candidates = g1h->policy()->candidates();
3154   log_cset_candidate_grouplist(candidates->from_marking_groups(), "M");
3155   log_cset_candidate_grouplist(candidates->retained_groups(), "R");
3156 }