1 /*
   2  * Copyright (c) 2001, 2026, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "classfile/classLoaderData.hpp"
  26 #include "classfile/classLoaderDataGraph.hpp"
  27 #include "cppstdlib/new.hpp"
  28 #include "gc/g1/g1BarrierSet.hpp"
  29 #include "gc/g1/g1BatchedTask.hpp"
  30 #include "gc/g1/g1CardSetMemory.hpp"
  31 #include "gc/g1/g1CardTableClaimTable.inline.hpp"
  32 #include "gc/g1/g1CollectedHeap.inline.hpp"
  33 #include "gc/g1/g1CollectorState.inline.hpp"
  34 #include "gc/g1/g1ConcurrentMark.inline.hpp"
  35 #include "gc/g1/g1ConcurrentMarkRemarkTasks.hpp"
  36 #include "gc/g1/g1ConcurrentMarkThread.inline.hpp"
  37 #include "gc/g1/g1ConcurrentRebuildAndScrub.hpp"
  38 #include "gc/g1/g1ConcurrentRefine.hpp"
  39 #include "gc/g1/g1HeapRegion.inline.hpp"
  40 #include "gc/g1/g1HeapRegionManager.hpp"
  41 #include "gc/g1/g1HeapRegionPrinter.hpp"
  42 #include "gc/g1/g1HeapRegionRemSet.inline.hpp"
  43 #include "gc/g1/g1HeapRegionSet.inline.hpp"
  44 #include "gc/g1/g1HeapVerifier.hpp"
  45 #include "gc/g1/g1OopClosures.inline.hpp"
  46 #include "gc/g1/g1Policy.hpp"
  47 #include "gc/g1/g1RegionMarkStatsCache.inline.hpp"
  48 #include "gc/g1/g1ThreadLocalData.hpp"
  49 #include "gc/g1/g1Trace.hpp"
  50 #include "gc/shared/gcId.hpp"
  51 #include "gc/shared/gcTimer.hpp"
  52 #include "gc/shared/gcTraceTime.inline.hpp"
  53 #include "gc/shared/gcVMOperations.hpp"
  54 #include "gc/shared/partialArraySplitter.inline.hpp"
  55 #include "gc/shared/partialArrayState.hpp"
  56 #include "gc/shared/partialArrayTaskStats.hpp"
  57 #include "gc/shared/referencePolicy.hpp"
  58 #include "gc/shared/suspendibleThreadSet.hpp"
  59 #include "gc/shared/taskqueue.inline.hpp"
  60 #include "gc/shared/taskTerminator.hpp"
  61 #include "gc/shared/weakProcessor.inline.hpp"
  62 #include "gc/shared/workerPolicy.hpp"
  63 #include "jvm.h"
  64 #include "logging/log.hpp"
  65 #include "memory/allocation.hpp"
  66 #include "memory/iterator.hpp"
  67 #include "memory/metaspaceUtils.hpp"
  68 #include "memory/resourceArea.hpp"
  69 #include "memory/universe.hpp"
  70 #include "nmt/memTracker.hpp"
  71 #include "oops/access.inline.hpp"
  72 #include "oops/oop.inline.hpp"
  73 #include "oops/oopCast.inline.hpp"
  74 #include "runtime/globals_extension.hpp"
  75 #include "runtime/handles.inline.hpp"
  76 #include "runtime/java.hpp"
  77 #include "runtime/orderAccess.hpp"
  78 #include "runtime/os.hpp"
  79 #include "runtime/prefetch.inline.hpp"
  80 #include "runtime/threads.hpp"
  81 #include "utilities/align.hpp"
  82 #include "utilities/checkedCast.hpp"
  83 #include "utilities/formatBuffer.hpp"
  84 #include "utilities/growableArray.hpp"
  85 #include "utilities/powerOfTwo.hpp"
  86 
  87 G1CMIsAliveClosure::G1CMIsAliveClosure() : _cm(nullptr) { }
  88 
  89 G1CMIsAliveClosure::G1CMIsAliveClosure(G1ConcurrentMark* cm) : _cm(cm) {
  90   assert(cm != nullptr, "must be");
  91 }
  92 
  93 void G1CMIsAliveClosure::initialize(G1ConcurrentMark* cm) {
  94   assert(cm != nullptr, "must be");
  95   assert(_cm == nullptr, "double initialize");
  96   _cm = cm;
  97 }
  98 
  99 bool G1CMBitMapClosure::do_addr(HeapWord* const addr) {
 100   assert(addr < _cm->finger(), "invariant");
 101   assert(addr >= _task->finger(), "invariant");
 102 
 103   // We move that task's local finger along.
 104   _task->move_finger_to(addr);
 105 
 106   _task->process_entry(G1TaskQueueEntry(cast_to_oop(addr)), false /* stolen */);
 107   // we only partially drain the local queue and global stack
 108   _task->drain_local_queue(true);
 109   _task->drain_global_stack(true);
 110 
 111   // if the has_aborted flag has been raised, we need to bail out of
 112   // the iteration
 113   return !_task->has_aborted();
 114 }
 115 
 116 G1CMMarkStack::G1CMMarkStack() :
 117   _chunk_allocator() {
 118   set_empty();
 119 }
 120 
 121 size_t G1CMMarkStack::capacity_alignment() {
 122   return (size_t)lcm(os::vm_allocation_granularity(), sizeof(TaskQueueEntryChunk)) / sizeof(G1TaskQueueEntry);
 123 }
 124 
 125 bool G1CMMarkStack::initialize() {
 126   guarantee(_chunk_allocator.capacity() == 0, "G1CMMarkStack already initialized.");
 127 
 128   size_t initial_capacity = MarkStackSize;
 129   size_t max_capacity = MarkStackSizeMax;
 130 
 131   size_t const TaskEntryChunkSizeInVoidStar = sizeof(TaskQueueEntryChunk) / sizeof(G1TaskQueueEntry);
 132 
 133   size_t max_num_chunks = align_up(max_capacity, capacity_alignment()) / TaskEntryChunkSizeInVoidStar;
 134   size_t initial_num_chunks = align_up(initial_capacity, capacity_alignment()) / TaskEntryChunkSizeInVoidStar;
 135 
 136   initial_num_chunks = round_up_power_of_2(initial_num_chunks);
 137   max_num_chunks = MAX2(initial_num_chunks, max_num_chunks);
 138 
 139   size_t limit = (INT_MAX - 1);
 140   max_capacity = MIN2((max_num_chunks * TaskEntryChunkSizeInVoidStar), limit);
 141   initial_capacity = MIN2((initial_num_chunks * TaskEntryChunkSizeInVoidStar), limit);
 142 
 143   FLAG_SET_ERGO(MarkStackSizeMax, max_capacity);
 144   FLAG_SET_ERGO(MarkStackSize, initial_capacity);
 145 
 146   log_trace(gc)("MarkStackSize: %uk  MarkStackSizeMax: %uk", (uint)(MarkStackSize / K), (uint)(MarkStackSizeMax / K));
 147 
 148   log_debug(gc)("Initialize mark stack with %zu chunks, maximum %zu",
 149                 initial_num_chunks, max_capacity);
 150 
 151   return _chunk_allocator.initialize(initial_num_chunks, max_num_chunks);
 152 }
 153 
 154 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::ChunkAllocator::allocate_new_chunk() {
 155   if (_size.load_relaxed() >= _max_capacity) {
 156     return nullptr;
 157   }
 158 
 159   size_t cur_idx = _size.fetch_then_add(1u);
 160 
 161   if (cur_idx >= _max_capacity) {
 162     return nullptr;
 163   }
 164 
 165   size_t bucket = get_bucket(cur_idx);
 166   if (_buckets[bucket].load_acquire() == nullptr) {
 167     if (!_should_grow) {
 168       // Prefer to restart the CM.
 169       return nullptr;
 170     }
 171 
 172     MutexLocker x(G1MarkStackChunkList_lock, Mutex::_no_safepoint_check_flag);
 173     if (_buckets[bucket].load_acquire() == nullptr) {
 174       size_t desired_capacity = bucket_size(bucket) * 2;
 175       if (!try_expand_to(desired_capacity)) {
 176         return nullptr;
 177       }
 178     }
 179   }
 180 
 181   size_t bucket_idx = get_bucket_index(cur_idx);
 182   TaskQueueEntryChunk* result = ::new (&_buckets[bucket].load_relaxed()[bucket_idx]) TaskQueueEntryChunk;
 183   result->next = nullptr;
 184   return result;
 185 }
 186 
 187 G1CMMarkStack::ChunkAllocator::ChunkAllocator() :
 188   _min_capacity(0),
 189   _max_capacity(0),
 190   _capacity(0),
 191   _num_buckets(0),
 192   _should_grow(false),
 193   _buckets(nullptr),
 194   _size(0)
 195 { }
 196 
 197 bool G1CMMarkStack::ChunkAllocator::initialize(size_t initial_capacity, size_t max_capacity) {
 198   guarantee(is_power_of_2(initial_capacity), "Invalid initial_capacity");
 199 
 200   _min_capacity = initial_capacity;
 201   _max_capacity = max_capacity;
 202   _num_buckets  = get_bucket(_max_capacity) + 1;
 203 
 204   _buckets = NEW_C_HEAP_ARRAY(Atomic<TaskQueueEntryChunk*>, _num_buckets, mtGC);
 205 
 206   for (size_t i = 0; i < _num_buckets; i++) {
 207     _buckets[i].store_relaxed(nullptr);
 208   }
 209 
 210   size_t new_capacity = bucket_size(0);
 211 
 212   if (!reserve(new_capacity)) {
 213     log_warning(gc)("Failed to reserve memory for new overflow mark stack with %zu chunks and size %zuB.", new_capacity, new_capacity * sizeof(TaskQueueEntryChunk));
 214     return false;
 215   }
 216   return true;
 217 }
 218 
 219 bool G1CMMarkStack::ChunkAllocator::try_expand_to(size_t desired_capacity) {
 220   if (_capacity == _max_capacity) {
 221     log_debug(gc)("Can not expand overflow mark stack further, already at maximum capacity of %zu chunks.", _capacity);
 222     return false;
 223   }
 224 
 225   size_t old_capacity = _capacity;
 226   desired_capacity = MIN2(desired_capacity, _max_capacity);
 227 
 228   if (reserve(desired_capacity)) {
 229     log_debug(gc)("Expanded the mark stack capacity from %zu to %zu chunks",
 230                   old_capacity, desired_capacity);
 231     return true;
 232   }
 233   return false;
 234 }
 235 
 236 bool G1CMMarkStack::ChunkAllocator::try_expand() {
 237   size_t new_capacity = _capacity * 2;
 238   return try_expand_to(new_capacity);
 239 }
 240 
 241 G1CMMarkStack::ChunkAllocator::~ChunkAllocator() {
 242   if (_buckets == nullptr) {
 243     return;
 244   }
 245 
 246   for (size_t i = 0; i < _num_buckets; i++) {
 247     if (_buckets[i].load_relaxed() != nullptr) {
 248       MmapArrayAllocator<TaskQueueEntryChunk>::free(_buckets[i].load_relaxed(),  bucket_size(i));
 249       _buckets[i].store_relaxed(nullptr);
 250     }
 251   }
 252 
 253   FREE_C_HEAP_ARRAY(TaskQueueEntryChunk*, _buckets);
 254 }
 255 
 256 bool G1CMMarkStack::ChunkAllocator::reserve(size_t new_capacity) {
 257   assert(new_capacity <= _max_capacity, "Cannot expand overflow mark stack beyond the max_capacity of %zu chunks.", _max_capacity);
 258 
 259   size_t highest_bucket = get_bucket(new_capacity - 1);
 260   size_t i = get_bucket(_capacity);
 261 
 262   // Allocate all buckets associated with indexes between the current capacity (_capacity)
 263   // and the new capacity (new_capacity). This step ensures that there are no gaps in the
 264   // array and that the capacity accurately reflects the reserved memory.
 265   for (; i <= highest_bucket; i++) {
 266     if (_buckets[i].load_acquire() != nullptr) {
 267       continue; // Skip over already allocated buckets.
 268     }
 269 
 270     size_t bucket_capacity = bucket_size(i);
 271 
 272     // Trim bucket size so that we do not exceed the _max_capacity.
 273     bucket_capacity = (_capacity + bucket_capacity) <= _max_capacity ?
 274                       bucket_capacity :
 275                       _max_capacity - _capacity;
 276 
 277 
 278     TaskQueueEntryChunk* bucket_base = MmapArrayAllocator<TaskQueueEntryChunk>::allocate_or_null(bucket_capacity, mtGC);
 279 
 280     if (bucket_base == nullptr) {
 281       log_warning(gc)("Failed to reserve memory for increasing the overflow mark stack capacity with %zu chunks and size %zuB.",
 282                       bucket_capacity, bucket_capacity * sizeof(TaskQueueEntryChunk));
 283       return false;
 284     }
 285     _capacity += bucket_capacity;
 286     _buckets[i].release_store(bucket_base);
 287   }
 288   return true;
 289 }
 290 
 291 void G1CMMarkStack::expand() {
 292   _chunk_allocator.try_expand();
 293 }
 294 
 295 void G1CMMarkStack::add_chunk_to_list(Atomic<TaskQueueEntryChunk*>* list, TaskQueueEntryChunk* elem) {
 296   elem->next = list->load_relaxed();
 297   list->store_relaxed(elem);
 298 }
 299 
 300 void G1CMMarkStack::add_chunk_to_chunk_list(TaskQueueEntryChunk* elem) {
 301   MutexLocker x(G1MarkStackChunkList_lock, Mutex::_no_safepoint_check_flag);
 302   add_chunk_to_list(&_chunk_list, elem);
 303   _chunks_in_chunk_list++;
 304 }
 305 
 306 void G1CMMarkStack::add_chunk_to_free_list(TaskQueueEntryChunk* elem) {
 307   MutexLocker x(G1MarkStackFreeList_lock, Mutex::_no_safepoint_check_flag);
 308   add_chunk_to_list(&_free_list, elem);
 309 }
 310 
 311 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_list(Atomic<TaskQueueEntryChunk*>* list) {
 312   TaskQueueEntryChunk* result = list->load_relaxed();
 313   if (result != nullptr) {
 314     list->store_relaxed(list->load_relaxed()->next);
 315   }
 316   return result;
 317 }
 318 
 319 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_chunk_list() {
 320   MutexLocker x(G1MarkStackChunkList_lock, Mutex::_no_safepoint_check_flag);
 321   TaskQueueEntryChunk* result = remove_chunk_from_list(&_chunk_list);
 322   if (result != nullptr) {
 323     _chunks_in_chunk_list--;
 324   }
 325   return result;
 326 }
 327 
 328 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_free_list() {
 329   MutexLocker x(G1MarkStackFreeList_lock, Mutex::_no_safepoint_check_flag);
 330   return remove_chunk_from_list(&_free_list);
 331 }
 332 
 333 bool G1CMMarkStack::par_push_chunk(G1TaskQueueEntry* ptr_arr) {
 334   // Get a new chunk.
 335   TaskQueueEntryChunk* new_chunk = remove_chunk_from_free_list();
 336 
 337   if (new_chunk == nullptr) {
 338     // Did not get a chunk from the free list. Allocate from backing memory.
 339     new_chunk = _chunk_allocator.allocate_new_chunk();
 340 
 341     if (new_chunk == nullptr) {
 342       return false;
 343     }
 344   }
 345 
 346   Copy::conjoint_memory_atomic(ptr_arr, new_chunk->data, EntriesPerChunk * sizeof(G1TaskQueueEntry));
 347 
 348   add_chunk_to_chunk_list(new_chunk);
 349 
 350   return true;
 351 }
 352 
 353 bool G1CMMarkStack::par_pop_chunk(G1TaskQueueEntry* ptr_arr) {
 354   TaskQueueEntryChunk* cur = remove_chunk_from_chunk_list();
 355 
 356   if (cur == nullptr) {
 357     return false;
 358   }
 359 
 360   Copy::conjoint_memory_atomic(cur->data, ptr_arr, EntriesPerChunk * sizeof(G1TaskQueueEntry));
 361 
 362   add_chunk_to_free_list(cur);
 363   return true;
 364 }
 365 
 366 void G1CMMarkStack::set_empty() {
 367   _chunks_in_chunk_list = 0;
 368   _chunk_list.store_relaxed(nullptr);
 369   _free_list.store_relaxed(nullptr);
 370   _chunk_allocator.reset();
 371 }
 372 
 373 G1CMRootMemRegions::G1CMRootMemRegions(uint const max_regions) :
 374     _root_regions(MemRegion::create_array(max_regions, mtGC)),
 375     _max_regions(max_regions),
 376     _num_regions(0),
 377     _num_claimed_regions(0) { }
 378 
 379 G1CMRootMemRegions::~G1CMRootMemRegions() {
 380   MemRegion::destroy_array(_root_regions, _max_regions);
 381 }
 382 
 383 void G1CMRootMemRegions::reset() {
 384   assert_at_safepoint();
 385   assert(G1CollectedHeap::heap()->collector_state()->is_in_concurrent_start_gc(), "must be");
 386 
 387   _num_regions.store_relaxed(0);
 388   _num_claimed_regions.store_relaxed(0);
 389 }
 390 
 391 void G1CMRootMemRegions::add(HeapWord* start, HeapWord* end) {
 392   assert_at_safepoint();
 393   uint idx = _num_regions.fetch_then_add(1u);
 394   assert(idx < _max_regions, "Trying to add more root MemRegions than there is space %u", _max_regions);
 395   assert(start != nullptr && end != nullptr && start <= end, "Start (" PTR_FORMAT ") should be less or equal to "
 396          "end (" PTR_FORMAT ")", p2i(start), p2i(end));
 397   _root_regions[idx].set_start(start);
 398   _root_regions[idx].set_end(end);
 399 }
 400 
 401 const MemRegion* G1CMRootMemRegions::claim_next() {
 402   uint local_num_regions = num_regions();
 403   if (num_claimed_regions() >= local_num_regions) {
 404     return nullptr;
 405   }
 406 
 407   uint claimed_index = _num_claimed_regions.fetch_then_add(1u);
 408   if (claimed_index < local_num_regions) {
 409     return &_root_regions[claimed_index];
 410   }
 411   return nullptr;
 412 }
 413 
 414 bool G1CMRootMemRegions::work_completed() const {
 415   return num_remaining_regions() == 0;
 416 }
 417 
 418 uint G1CMRootMemRegions::num_remaining_regions() const {
 419   uint total = num_regions();
 420   uint claimed = num_claimed_regions();
 421   return (total > claimed) ? total - claimed : 0;
 422 }
 423 
 424 bool G1CMRootMemRegions::contains(const MemRegion mr) const {
 425   uint local_num_root_regions = num_regions();
 426   for (uint i = 0; i < local_num_root_regions; i++) {
 427     if (_root_regions[i].equals(mr)) {
 428       return true;
 429     }
 430   }
 431   return false;
 432 }
 433 
 434 G1ConcurrentMark::G1ConcurrentMark(G1CollectedHeap* g1h,
 435                                    G1RegionToSpaceMapper* bitmap_storage) :
 436   _cm_thread(nullptr),
 437   _g1h(g1h),
 438 
 439   _mark_bitmap(),
 440 
 441   _heap(_g1h->reserved()),
 442 
 443   _root_regions(_g1h->max_num_regions()),
 444   _root_region_scan_aborted(false),
 445 
 446   _global_mark_stack(),
 447 
 448   _finger(nullptr), // _finger set in set_non_marking_state
 449 
 450   _worker_id_offset(G1ConcRefinementThreads), // The refinement control thread does not refine cards, so it's just the worker threads.
 451   _max_num_tasks(MAX2(ConcGCThreads, ParallelGCThreads)),
 452   _num_active_tasks(0), // _num_active_tasks set in set_non_marking_state()
 453   _tasks(nullptr), // _tasks set inside late_init()
 454   _task_queues(new G1CMTaskQueueSet(_max_num_tasks)),
 455   _terminator(_max_num_tasks, _task_queues),
 456   _partial_array_state_manager(new PartialArrayStateManager(_max_num_tasks)),
 457 
 458   _first_overflow_barrier_sync(),
 459   _second_overflow_barrier_sync(),
 460 
 461   _completed_mark_cycles(0),
 462   _has_overflown(false),
 463   _concurrent(false),
 464   _has_aborted(false),
 465   _restart_for_overflow(false),
 466   _gc_timer_cm(new ConcurrentGCTimer()),
 467   _gc_tracer_cm(new G1OldTracer()),
 468 
 469   // _verbose_level set below
 470 
 471   _remark_times(),
 472   _remark_mark_times(),
 473   _remark_weak_ref_times(),
 474   _cleanup_times(),
 475 
 476   _concurrent_workers(nullptr),
 477   _num_concurrent_workers(0),
 478   _max_concurrent_workers(0),
 479 
 480   _region_mark_stats(NEW_C_HEAP_ARRAY(G1RegionMarkStats, _g1h->max_num_regions(), mtGC)),
 481   _top_at_mark_starts(NEW_C_HEAP_ARRAY(Atomic<HeapWord*>, _g1h->max_num_regions(), mtGC)),
 482   _top_at_rebuild_starts(NEW_C_HEAP_ARRAY(Atomic<HeapWord*>, _g1h->max_num_regions(), mtGC)),
 483   _needs_remembered_set_rebuild(false)
 484 {
 485   assert(G1CGC_lock != nullptr, "CGC_lock must be initialized");
 486 
 487   _mark_bitmap.initialize(g1h->reserved(), bitmap_storage);
 488 }
 489 
 490 void G1ConcurrentMark::fully_initialize() {
 491   if (is_fully_initialized()) {
 492     return;
 493   }
 494 
 495   // Create & start ConcurrentMark thread.
 496   _cm_thread = new G1ConcurrentMarkThread(this);
 497   if (_cm_thread->osthread() == nullptr) {
 498     vm_shutdown_during_initialization("Could not create ConcurrentMarkThread");
 499   }
 500 
 501   log_debug(gc)("ConcGCThreads: %u offset %u", ConcGCThreads, _worker_id_offset);
 502   log_debug(gc)("ParallelGCThreads: %u", ParallelGCThreads);
 503 
 504   _max_concurrent_workers = ConcGCThreads;
 505 
 506   _concurrent_workers = new WorkerThreads("G1 Conc", _max_concurrent_workers);
 507   _concurrent_workers->initialize_workers();
 508   _num_concurrent_workers = _concurrent_workers->active_workers();
 509 
 510   if (!_global_mark_stack.initialize()) {
 511     vm_exit_during_initialization("Failed to allocate initial concurrent mark overflow mark stack.");
 512   }
 513 
 514   _tasks = NEW_C_HEAP_ARRAY(G1CMTask*, _max_num_tasks, mtGC);
 515 
 516   // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
 517   _num_active_tasks = _max_num_tasks;
 518 
 519   for (uint i = 0; i < _max_num_tasks; ++i) {
 520     G1CMTaskQueue* task_queue = new G1CMTaskQueue();
 521     _task_queues->register_queue(i, task_queue);
 522 
 523     _tasks[i] = new G1CMTask(i, this, task_queue, _region_mark_stats);
 524   }
 525 
 526   uint max_num_regions = _g1h->max_num_regions();
 527   ::new (_region_mark_stats) G1RegionMarkStats[max_num_regions]{};
 528   ::new (_top_at_mark_starts) Atomic<HeapWord*>[max_num_regions]{};
 529   ::new (_top_at_rebuild_starts) Atomic<HeapWord*>[max_num_regions]{};
 530 
 531   reset_at_marking_complete();
 532 }
 533 
 534 bool G1ConcurrentMark::is_in_concurrent_cycle() const {
 535   return is_fully_initialized() ? _cm_thread->is_in_progress() : false;
 536 }
 537 
 538 bool G1ConcurrentMark::is_in_marking() const {
 539   return is_fully_initialized() ? cm_thread()->is_in_marking() : false;
 540 }
 541 
 542 bool G1ConcurrentMark::is_in_rebuild_or_scrub() const {
 543   return cm_thread()->is_in_rebuild_or_scrub();
 544 }
 545 
 546 bool G1ConcurrentMark::is_in_reset_for_next_cycle() const {
 547   return cm_thread()->is_in_reset_for_next_cycle();
 548 }
 549 
 550 PartialArrayStateManager* G1ConcurrentMark::partial_array_state_manager() const {
 551   return _partial_array_state_manager;
 552 }
 553 
 554 G1ConcurrentMarkThread* G1ConcurrentMark::cm_thread() const {
 555   assert(is_fully_initialized(), "must be");
 556   return _cm_thread;
 557 }
 558 
 559 void G1ConcurrentMark::reset() {
 560   _has_aborted.store_relaxed(false);
 561 
 562   reset_marking_for_restart();
 563 
 564   // Reset all tasks, since different phases will use different number of active
 565   // threads. So, it's easiest to have all of them ready.
 566   for (uint i = 0; i < _max_num_tasks; ++i) {
 567     _tasks[i]->reset(mark_bitmap());
 568   }
 569 
 570   uint max_num_regions = _g1h->max_num_regions();
 571   ::new (_top_at_rebuild_starts) Atomic<HeapWord*>[max_num_regions]{};
 572   for (uint i = 0; i < max_num_regions; i++) {
 573     _region_mark_stats[i].clear();
 574   }
 575 
 576   _root_region_scan_aborted.store_relaxed(false);
 577   _root_regions.reset();
 578 }
 579 
 580 void G1ConcurrentMark::clear_statistics(G1HeapRegion* r) {
 581   uint region_idx = r->hrm_index();
 582   for (uint j = 0; j < _max_num_tasks; ++j) {
 583     _tasks[j]->clear_mark_stats_cache(region_idx);
 584   }
 585   _top_at_rebuild_starts[region_idx].store_relaxed(nullptr);
 586   _region_mark_stats[region_idx].clear();
 587 }
 588 
 589 void G1ConcurrentMark::humongous_object_eagerly_reclaimed(G1HeapRegion* r) {
 590   assert_at_safepoint();
 591   assert(r->is_starts_humongous(), "Got humongous continues region here");
 592 
 593   // Need to clear mark bit of the humongous object. Doing this unconditionally is fine.
 594   mark_bitmap()->clear(r->bottom());
 595 
 596   if (!_g1h->collector_state()->is_in_mark_or_rebuild()) {
 597     return;
 598   }
 599 
 600   // Clear any statistics about the region gathered so far.
 601   _g1h->humongous_obj_regions_iterate(r,
 602                                       [&] (G1HeapRegion* r) {
 603                                         clear_statistics(r);
 604                                       });
 605 }
 606 
 607 void G1ConcurrentMark::reset_marking_for_restart() {
 608   _global_mark_stack.set_empty();
 609 
 610   // Expand the marking stack, if we have to and if we can.
 611   if (has_overflown()) {
 612     _global_mark_stack.expand();
 613 
 614     uint max_num_regions = _g1h->max_num_regions();
 615     for (uint i = 0; i < max_num_regions; i++) {
 616       _region_mark_stats[i].clear_during_overflow();
 617     }
 618   }
 619 
 620   clear_has_overflown();
 621   _finger.store_relaxed(_heap.start());
 622 
 623   for (uint i = 0; i < _max_num_tasks; ++i) {
 624     _tasks[i]->reset_for_restart();
 625   }
 626 }
 627 
 628 void G1ConcurrentMark::set_concurrency(uint active_tasks) {
 629   assert(active_tasks <= _max_num_tasks, "we should not have more");
 630 
 631   _num_active_tasks = active_tasks;
 632   // Need to update the three data structures below according to the
 633   // number of active threads for this phase.
 634   _terminator.reset_for_reuse(active_tasks);
 635   _first_overflow_barrier_sync.set_n_workers(active_tasks);
 636   _second_overflow_barrier_sync.set_n_workers(active_tasks);
 637 }
 638 
 639 void G1ConcurrentMark::set_concurrency_and_phase(uint active_tasks, bool concurrent) {
 640   set_concurrency(active_tasks);
 641 
 642   _concurrent.store_relaxed(concurrent);
 643 
 644   if (!concurrent) {
 645     // At this point we should be in a STW phase, and completed marking.
 646     assert_at_safepoint_on_vm_thread();
 647     assert(out_of_regions(),
 648            "only way to get here: _finger: " PTR_FORMAT ", _heap_end: " PTR_FORMAT,
 649            p2i(finger()), p2i(_heap.end()));
 650   }
 651 }
 652 
 653 #if TASKQUEUE_STATS
 654 void G1ConcurrentMark::print_and_reset_taskqueue_stats() {
 655 
 656   _task_queues->print_and_reset_taskqueue_stats("Concurrent Mark");
 657 
 658   auto get_pa_stats = [&](uint i) {
 659     return _tasks[i]->partial_array_task_stats();
 660   };
 661 
 662   PartialArrayTaskStats::log_set(_max_num_tasks, get_pa_stats,
 663                                  "Concurrent Mark Partial Array");
 664 
 665   for (uint i = 0; i < _max_num_tasks; ++i) {
 666     get_pa_stats(i)->reset();
 667   }
 668 }
 669 #endif
 670 
 671 void G1ConcurrentMark::reset_at_marking_complete() {
 672   TASKQUEUE_STATS_ONLY(print_and_reset_taskqueue_stats());
 673   // We set the global marking state to some default values when we're
 674   // not doing marking.
 675   reset_marking_for_restart();
 676   _num_active_tasks = 0;
 677 }
 678 
 679 G1ConcurrentMark::~G1ConcurrentMark() {
 680   FREE_C_HEAP_ARRAY(Atomic<HeapWord*>, _top_at_mark_starts);
 681   FREE_C_HEAP_ARRAY(Atomic<HeapWord*>, _top_at_rebuild_starts);
 682   FREE_C_HEAP_ARRAY(G1RegionMarkStats, _region_mark_stats);
 683   // The G1ConcurrentMark instance is never freed.
 684   ShouldNotReachHere();
 685 }
 686 
 687 class G1ClearBitMapTask : public WorkerTask {
 688 public:
 689   static size_t chunk_size() { return M; }
 690 
 691 private:
 692   // Heap region closure used for clearing the _mark_bitmap.
 693   class G1ClearBitmapHRClosure : public G1HeapRegionClosure {
 694     G1ConcurrentMark* _cm;
 695     G1CMBitMap* _bitmap;
 696     bool _suspendible; // If suspendible, do yield checks.
 697 
 698     bool suspendible() {
 699       return _suspendible;
 700     }
 701 
 702     bool is_clear_concurrent_undo() {
 703       return suspendible() && _cm->cm_thread()->is_in_undo_cycle();
 704     }
 705 
 706     bool has_aborted() {
 707       if (suspendible()) {
 708         _cm->do_yield_check();
 709         return _cm->has_aborted();
 710       }
 711       return false;
 712     }
 713 
 714     HeapWord* region_clear_limit(G1HeapRegion* r) {
 715       // During a Concurrent Undo Mark cycle, the per region top_at_mark_start and
 716       // live_words data are current wrt to the _mark_bitmap. We use this information
 717       // to only clear ranges of the bitmap that require clearing.
 718       if (is_clear_concurrent_undo()) {
 719         // No need to clear bitmaps for empty regions (which includes regions we
 720         // did not mark through).
 721         if (!_cm->contains_live_object(r->hrm_index())) {
 722           assert(_bitmap->get_next_marked_addr(r->bottom(), r->end()) == r->end(), "Should not have marked bits");
 723           return r->bottom();
 724         }
 725         assert(_bitmap->get_next_marked_addr(_cm->top_at_mark_start(r), r->end()) == r->end(), "Should not have marked bits above tams");
 726       }
 727       return r->end();
 728     }
 729 
 730   public:
 731     G1ClearBitmapHRClosure(G1ConcurrentMark* cm, bool suspendible) :
 732       G1HeapRegionClosure(),
 733       _cm(cm),
 734       _bitmap(cm->mark_bitmap()),
 735       _suspendible(suspendible)
 736     { }
 737 
 738     virtual bool do_heap_region(G1HeapRegion* r) {
 739       if (has_aborted()) {
 740         return true;
 741       }
 742 
 743       HeapWord* cur = r->bottom();
 744       HeapWord* const end = region_clear_limit(r);
 745 
 746       size_t const chunk_size_in_words = G1ClearBitMapTask::chunk_size() / HeapWordSize;
 747 
 748       while (cur < end) {
 749 
 750         MemRegion mr(cur, MIN2(cur + chunk_size_in_words, end));
 751         _bitmap->clear_range(mr);
 752 
 753         cur += chunk_size_in_words;
 754 
 755         // Repeat the asserts from before the start of the closure. We will do them
 756         // as asserts here to minimize their overhead on the product. However, we
 757         // will have them as guarantees at the beginning / end of the bitmap
 758         // clearing to get some checking in the product.
 759         assert(!suspendible() || _cm->is_in_reset_for_next_cycle(), "invariant");
 760 
 761         // Abort iteration if necessary.
 762         if (has_aborted()) {
 763           return true;
 764         }
 765       }
 766       assert(cur >= end, "Must have completed iteration over the bitmap for region %u.", r->hrm_index());
 767 
 768       _cm->reset_top_at_mark_start(r);
 769 
 770       return false;
 771     }
 772   };
 773 
 774   G1ClearBitmapHRClosure _cl;
 775   G1HeapRegionClaimer _hr_claimer;
 776   bool _suspendible; // If the task is suspendible, workers must join the STS.
 777 
 778 public:
 779   G1ClearBitMapTask(G1ConcurrentMark* cm, uint n_workers, bool suspendible) :
 780     WorkerTask("G1 Clear Bitmap"),
 781     _cl(cm, suspendible),
 782     _hr_claimer(n_workers),
 783     _suspendible(suspendible)
 784   { }
 785 
 786   void work(uint worker_id) {
 787     SuspendibleThreadSetJoiner sts_join(_suspendible);
 788     G1CollectedHeap::heap()->heap_region_par_iterate_from_worker_offset(&_cl, &_hr_claimer, worker_id);
 789   }
 790 };
 791 
 792 void G1ConcurrentMark::clear_bitmap(WorkerThreads* workers, bool may_yield) {
 793   assert(may_yield || SafepointSynchronize::is_at_safepoint(), "Non-yielding bitmap clear only allowed at safepoint.");
 794 
 795   size_t const num_bytes_to_clear = (G1HeapRegion::GrainBytes * _g1h->num_committed_regions()) / G1CMBitMap::heap_map_factor();
 796   size_t const num_chunks = align_up(num_bytes_to_clear, G1ClearBitMapTask::chunk_size()) / G1ClearBitMapTask::chunk_size();
 797 
 798   uint const num_workers = (uint)MIN2(num_chunks, (size_t)workers->active_workers());
 799 
 800   G1ClearBitMapTask cl(this, num_workers, may_yield);
 801 
 802   log_debug(gc, ergo)("Running %s with %u workers for %zu work units.", cl.name(), num_workers, num_chunks);
 803   workers->run_task(&cl, num_workers);
 804 }
 805 
 806 void G1ConcurrentMark::cleanup_for_next_mark() {
 807   // Make sure that the concurrent mark thread looks to still be in
 808   // the current cycle.
 809   guarantee(is_in_reset_for_next_cycle(), "invariant");
 810 
 811   clear_bitmap(_concurrent_workers, true);
 812 
 813   reset_partial_array_state_manager();
 814 
 815   // Should not have changed state yet (even if a Full GC interrupted us).
 816   guarantee(is_in_reset_for_next_cycle(), "invariant");
 817 }
 818 
 819 void G1ConcurrentMark::reset_partial_array_state_manager() {
 820   for (uint i = 0; i < _max_num_tasks; ++i) {
 821     _tasks[i]->unregister_partial_array_splitter();
 822   }
 823 
 824   partial_array_state_manager()->reset();
 825 
 826   for (uint i = 0; i < _max_num_tasks; ++i) {
 827     _tasks[i]->register_partial_array_splitter();
 828   }
 829 }
 830 
 831 void G1ConcurrentMark::clear_bitmap(WorkerThreads* workers) {
 832   assert_at_safepoint_on_vm_thread();
 833   // To avoid fragmentation the full collection requesting to clear the bitmap
 834   // might use fewer workers than available. To ensure the bitmap is cleared
 835   // as efficiently as possible the number of active workers are temporarily
 836   // increased to include all currently created workers.
 837   WithActiveWorkers update(workers, workers->created_workers());
 838   clear_bitmap(workers, false);
 839 }
 840 
 841 class G1PreConcurrentStartTask : public G1BatchedTask {
 842   // Reset marking state.
 843   class ResetMarkingStateTask;
 844   // For each region note start of marking.
 845   class NoteStartOfMarkTask;
 846 
 847 public:
 848   G1PreConcurrentStartTask(GCCause::Cause cause, G1ConcurrentMark* cm);
 849 };
 850 
 851 class G1PreConcurrentStartTask::ResetMarkingStateTask : public G1AbstractSubTask {
 852   G1ConcurrentMark* _cm;
 853 public:
 854   ResetMarkingStateTask(G1ConcurrentMark* cm) : G1AbstractSubTask(G1GCPhaseTimes::ResetMarkingState), _cm(cm) { }
 855 
 856   double worker_cost() const override { return 1.0; }
 857   void do_work(uint worker_id) override;
 858 };
 859 
 860 class G1PreConcurrentStartTask::NoteStartOfMarkTask : public G1AbstractSubTask {
 861 
 862   class NoteStartOfMarkHRClosure : public G1HeapRegionClosure {
 863     G1ConcurrentMark* _cm;
 864 
 865   public:
 866     NoteStartOfMarkHRClosure() : G1HeapRegionClosure(), _cm(G1CollectedHeap::heap()->concurrent_mark()) { }
 867 
 868     bool do_heap_region(G1HeapRegion* r) override {
 869       if (r->is_old_or_humongous() && !r->is_collection_set_candidate() && !r->in_collection_set()) {
 870         _cm->update_top_at_mark_start(r);
 871       } else {
 872         _cm->reset_top_at_mark_start(r);
 873       }
 874       return false;
 875     }
 876   } _region_cl;
 877 
 878   G1HeapRegionClaimer _claimer;
 879 public:
 880   NoteStartOfMarkTask() : G1AbstractSubTask(G1GCPhaseTimes::NoteStartOfMark), _region_cl(), _claimer(0) { }
 881 
 882   double worker_cost() const override {
 883     // The work done per region is very small, therefore we choose this magic number to cap the number
 884     // of threads used when there are few regions.
 885     const double regions_per_thread = 1000;
 886     return _claimer.n_regions() / regions_per_thread;
 887   }
 888 
 889   void set_max_workers(uint max_workers) override {
 890     _claimer.set_n_workers(max_workers);
 891   }
 892 
 893   void do_work(uint worker_id) override {
 894     G1CollectedHeap::heap()->heap_region_par_iterate_from_worker_offset(&_region_cl, &_claimer, worker_id);
 895   }
 896 };
 897 
 898 void G1PreConcurrentStartTask::ResetMarkingStateTask::do_work(uint worker_id) {
 899   // Reset marking state.
 900   _cm->reset();
 901 }
 902 
 903 G1PreConcurrentStartTask::G1PreConcurrentStartTask(GCCause::Cause cause, G1ConcurrentMark* cm) :
 904   G1BatchedTask("Pre Concurrent Start", G1CollectedHeap::heap()->phase_times()) {
 905   add_serial_task(new ResetMarkingStateTask(cm));
 906   add_parallel_task(new NoteStartOfMarkTask());
 907 };
 908 
 909 void G1ConcurrentMark::pre_concurrent_start(GCCause::Cause cause) {
 910   assert_at_safepoint_on_vm_thread();
 911 
 912   G1CollectedHeap::start_codecache_marking_cycle_if_inactive(true /* concurrent_mark_start */);
 913 
 914   ClassLoaderDataGraph::verify_claimed_marks_cleared(ClassLoaderData::_claim_strong);
 915 
 916   G1PreConcurrentStartTask cl(cause, this);
 917   G1CollectedHeap::heap()->run_batch_task(&cl);
 918 
 919   _gc_tracer_cm->set_gc_cause(cause);
 920 }
 921 
 922 void G1ConcurrentMark::start_full_concurrent_cycle() {
 923   // Start Concurrent Marking weak-reference discovery.
 924   ReferenceProcessor* rp = _g1h->ref_processor_cm();
 925   rp->start_discovery(false /* always_clear */);
 926 
 927   SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
 928   // This is the start of  the marking cycle, we're expected all
 929   // threads to have SATB queues with active set to false.
 930   satb_mq_set.set_active_all_threads(true, /* new active value */
 931                                      false /* expected_active */);
 932 
 933   // update_g1_committed() will be called at the end of an evac pause
 934   // when marking is on. So, it's also called at the end of the
 935   // concurrent start pause to update the heap end, if the heap expands
 936   // during it. No need to call it here.
 937 
 938   // Signal the thread to start work.
 939   cm_thread()->start_full_cycle();
 940 }
 941 
 942 void G1ConcurrentMark::start_undo_concurrent_cycle() {
 943   assert_at_safepoint_on_vm_thread();
 944   // At this time this GC is not a concurrent start gc any more, can only check for young only gc/phase.
 945   assert(_g1h->collector_state()->is_in_young_only_phase(), "must be");
 946 
 947   abort_root_region_scan_at_safepoint();
 948 
 949   // Signal the thread to start work.
 950   cm_thread()->start_undo_cycle();
 951 }
 952 
 953 void G1ConcurrentMark::notify_concurrent_cycle_completed() {
 954   cm_thread()->set_idle();
 955 }
 956 
 957 void G1ConcurrentMark::stop() {
 958   if (is_fully_initialized()) {
 959     cm_thread()->stop();
 960   }
 961 }
 962 
 963 /*
 964  * Notice that in the next two methods, we actually leave the STS
 965  * during the barrier sync and join it immediately afterwards. If we
 966  * do not do this, the following deadlock can occur: one thread could
 967  * be in the barrier sync code, waiting for the other thread to also
 968  * sync up, whereas another one could be trying to yield, while also
 969  * waiting for the other threads to sync up too.
 970  *
 971  * Note, however, that this code is also used during remark and in
 972  * this case we should not attempt to leave / enter the STS, otherwise
 973  * we'll either hit an assert (debug / fastdebug) or deadlock
 974  * (product). So we should only leave / enter the STS if we are
 975  * operating concurrently.
 976  *
 977  * Because the thread that does the sync barrier has left the STS, it
 978  * is possible to be suspended for a Full GC or an evacuation pause
 979  * could occur. This is actually safe, since the entering the sync
 980  * barrier is one of the last things do_marking_step() does, and it
 981  * doesn't manipulate any data structures afterwards.
 982  */
 983 
 984 void G1ConcurrentMark::enter_first_sync_barrier(uint worker_id) {
 985   bool barrier_aborted;
 986   {
 987     SuspendibleThreadSetLeaver sts_leave(concurrent());
 988     barrier_aborted = !_first_overflow_barrier_sync.enter();
 989   }
 990 
 991   // at this point everyone should have synced up and not be doing any
 992   // more work
 993 
 994   if (barrier_aborted) {
 995     // If the barrier aborted we ignore the overflow condition and
 996     // just abort the whole marking phase as quickly as possible.
 997     return;
 998   }
 999 }
1000 
1001 void G1ConcurrentMark::enter_second_sync_barrier(uint worker_id) {
1002   SuspendibleThreadSetLeaver sts_leave(concurrent());
1003   _second_overflow_barrier_sync.enter();
1004 
1005   // at this point everything should be re-initialized and ready to go
1006 }
1007 
1008 class G1CMConcurrentMarkingTask : public WorkerTask {
1009   G1ConcurrentMark*     _cm;
1010 
1011 public:
1012   void work(uint worker_id) {
1013     ResourceMark rm;
1014 
1015     SuspendibleThreadSetJoiner sts_join;
1016 
1017     assert(worker_id < _cm->active_tasks(), "invariant");
1018 
1019     G1CMTask* task = _cm->task(worker_id);
1020     task->record_start_time();
1021     if (!_cm->has_aborted()) {
1022       do {
1023         task->do_marking_step(G1ConcMarkStepDurationMillis,
1024                               true  /* do_termination */,
1025                               false /* is_serial*/);
1026 
1027         _cm->do_yield_check();
1028       } while (!_cm->has_aborted() && task->has_aborted());
1029     }
1030     task->record_end_time();
1031     guarantee(!task->has_aborted() || _cm->has_aborted(), "invariant");
1032   }
1033 
1034   G1CMConcurrentMarkingTask(G1ConcurrentMark* cm) :
1035       WorkerTask("Concurrent Mark"), _cm(cm) { }
1036 
1037   ~G1CMConcurrentMarkingTask() { }
1038 };
1039 
1040 uint G1ConcurrentMark::calc_active_marking_workers() {
1041   uint result = 0;
1042   if (!UseDynamicNumberOfGCThreads || !FLAG_IS_DEFAULT(ConcGCThreads)) {
1043     result = _max_concurrent_workers;
1044   } else {
1045     result =
1046       WorkerPolicy::calc_default_active_workers(_max_concurrent_workers,
1047                                                 1, /* Minimum workers */
1048                                                 _num_concurrent_workers,
1049                                                 Threads::number_of_non_daemon_threads());
1050     // Don't scale the result down by scale_concurrent_workers() because
1051     // that scaling has already gone into "_max_concurrent_workers".
1052   }
1053   assert(result > 0 && result <= _max_concurrent_workers,
1054          "Calculated number of marking workers must be larger than zero and at most the maximum %u, but is %u",
1055          _max_concurrent_workers, result);
1056   return result;
1057 }
1058 
1059 bool G1ConcurrentMark::has_root_region_scan_aborted() const {
1060   return _root_region_scan_aborted.load_relaxed();
1061 }
1062 
1063 #ifndef PRODUCT
1064 void G1ConcurrentMark::assert_root_region_scan_completed_or_aborted() {
1065   assert(root_regions()->work_completed() || has_root_region_scan_aborted(), "must be");
1066 }
1067 #endif
1068 
1069 void G1ConcurrentMark::scan_root_region(const MemRegion* region, uint worker_id) {
1070 #ifdef ASSERT
1071   HeapWord* last = region->last();
1072   G1HeapRegion* hr = _g1h->heap_region_containing(last);
1073   assert(hr->is_old() || top_at_mark_start(hr) == hr->bottom(),
1074          "Root regions must be old or survivor/eden but region %u is %s", hr->hrm_index(), hr->get_type_str());
1075   assert(top_at_mark_start(hr) == region->start(),
1076          "MemRegion start should be equal to TAMS");
1077 #endif
1078 
1079   G1RootRegionScanClosure cl(_g1h, this, worker_id);
1080 
1081   const uintx interval = PrefetchScanIntervalInBytes;
1082   HeapWord* curr = region->start();
1083   const HeapWord* end = region->end();
1084   while (curr < end) {
1085     Prefetch::read(curr, interval);
1086     oop obj = cast_to_oop(curr);
1087     size_t size = obj->oop_iterate_size(&cl);
1088     assert(size == obj->size(), "sanity");
1089     curr += size;
1090   }
1091 }
1092 
1093 class G1CMRootRegionScanTask : public WorkerTask {
1094   G1ConcurrentMark* _cm;
1095   bool _should_yield;
1096 
1097 public:
1098   G1CMRootRegionScanTask(G1ConcurrentMark* cm, bool should_yield) :
1099     WorkerTask("G1 Root Region Scan"), _cm(cm), _should_yield(should_yield) { }
1100 
1101   void work(uint worker_id) {
1102     SuspendibleThreadSetJoiner sts_join(_should_yield);
1103 
1104     while (true) {
1105       if (_cm->has_root_region_scan_aborted()) {
1106         return;
1107       }
1108       G1CMRootMemRegions* root_regions = _cm->root_regions();
1109       const MemRegion* region = root_regions->claim_next();
1110       if (region == nullptr) {
1111         return;
1112       }
1113       _cm->scan_root_region(region, worker_id);
1114       if (_should_yield) {
1115         SuspendibleThreadSet::yield();
1116         // If we yielded, a GC may have processed all root regions,
1117         // so this loop will naturally exit on the next claim_next() call.
1118         // Same if a Full GC signalled abort of the concurrent mark.
1119       }
1120     }
1121   }
1122 };
1123 
1124 bool G1ConcurrentMark::scan_root_regions(WorkerThreads* workers, bool concurrent) {
1125   // We first check whether there is any work to do as we might have already aborted
1126   // the concurrent cycle, or ran into a GC that did the actual work when we reach here.
1127   // We want to avoid spinning up the worker threads if that happened.
1128   // (Note that due to races reading the abort-flag, we might spin up the threads anyway).
1129   //
1130   // Abort happens if a Full GC occurs right after starting the concurrent cycle or
1131   // a young gc doing the work.
1132   //
1133   // Concurrent gc threads enter an STS when starting the task, so they stop, then
1134   // continue after that safepoint.
1135   bool do_scan = !root_regions()->work_completed() && !has_root_region_scan_aborted();
1136   if (do_scan) {
1137     // Assign one worker to each root-region but subject to the max constraint.
1138     // The constraint is also important to avoid accesses beyond the allocated per-worker
1139     // marking helper data structures. We might get passed different WorkerThreads with
1140     // different number of threads (potential worker ids) than helper data structures when
1141     // completing this work during GC.
1142     const uint num_workers = MIN2(root_regions()->num_remaining_regions(),
1143                                   _max_concurrent_workers);
1144     assert(num_workers > 0, "no more remaining root regions to process");
1145 
1146     G1CMRootRegionScanTask task(this, concurrent);
1147     log_debug(gc, ergo)("Running %s using %u workers for %u work units.",
1148                         task.name(), num_workers, root_regions()->num_remaining_regions());
1149     workers->run_task(&task, num_workers);
1150   }
1151 
1152   assert_root_region_scan_completed_or_aborted();
1153 
1154   return do_scan;
1155 }
1156 
1157 void G1ConcurrentMark::scan_root_regions_concurrently() {
1158   assert(Thread::current() == cm_thread(), "must be on Concurrent Mark Thread");
1159   scan_root_regions(_concurrent_workers, true /* concurrent */);
1160 }
1161 
1162 bool G1ConcurrentMark::complete_root_regions_scan_in_safepoint() {
1163   assert_at_safepoint_on_vm_thread();
1164   return scan_root_regions(_g1h->workers(), false /* concurrent */);
1165 }
1166 
1167 void G1ConcurrentMark::add_root_region(G1HeapRegion* r) {
1168   root_regions()->add(top_at_mark_start(r), r->top());
1169 }
1170 
1171 bool G1ConcurrentMark::is_root_region(G1HeapRegion* r) {
1172   return root_regions()->contains(MemRegion(top_at_mark_start(r), r->top()));
1173 }
1174 
1175 void G1ConcurrentMark::abort_root_region_scan() {
1176   assert_not_at_safepoint();
1177 
1178   _root_region_scan_aborted.store_relaxed(true);
1179 }
1180 
1181 void G1ConcurrentMark::abort_root_region_scan_at_safepoint() {
1182   assert_at_safepoint_on_vm_thread();
1183 
1184   _root_region_scan_aborted.store_relaxed(true);
1185 }
1186 
1187 void G1ConcurrentMark::concurrent_cycle_start() {
1188   _gc_timer_cm->register_gc_start();
1189 
1190   _gc_tracer_cm->report_gc_start(GCCause::_no_gc /* first parameter is not used */, _gc_timer_cm->gc_start());
1191 
1192   _g1h->trace_heap_before_gc(_gc_tracer_cm);
1193 }
1194 
1195 uint G1ConcurrentMark::completed_mark_cycles() const {
1196   return _completed_mark_cycles.load_relaxed();
1197 }
1198 
1199 void G1ConcurrentMark::concurrent_cycle_end(bool mark_cycle_completed) {
1200   _g1h->trace_heap_after_gc(_gc_tracer_cm);
1201 
1202   if (mark_cycle_completed) {
1203     _completed_mark_cycles.add_then_fetch(1u, memory_order_relaxed);
1204   }
1205 
1206   if (has_aborted()) {
1207     log_info(gc, marking)("Concurrent Mark Abort");
1208     _gc_tracer_cm->report_concurrent_mode_failure();
1209   }
1210 
1211   _gc_timer_cm->register_gc_end();
1212 
1213   _gc_tracer_cm->report_gc_end(_gc_timer_cm->gc_end(), _gc_timer_cm->time_partitions());
1214 }
1215 
1216 void G1ConcurrentMark::mark_from_roots() {
1217   _restart_for_overflow.store_relaxed(false);
1218 
1219   uint active_workers = calc_active_marking_workers();
1220 
1221   // Setting active workers is not guaranteed since fewer
1222   // worker threads may currently exist and more may not be
1223   // available.
1224   active_workers = _concurrent_workers->set_active_workers(active_workers);
1225   log_info(gc, task)("Concurrent Mark Using %u of %u Workers", active_workers, _concurrent_workers->max_workers());
1226 
1227   _num_concurrent_workers = active_workers;
1228 
1229   // Parallel task terminator is set in "set_concurrency_and_phase()"
1230   set_concurrency_and_phase(active_workers, true /* concurrent */);
1231 
1232   G1CMConcurrentMarkingTask marking_task(this);
1233   _concurrent_workers->run_task(&marking_task);
1234   print_stats();
1235 }
1236 
1237 const char* G1ConcurrentMark::verify_location_string(VerifyLocation location) {
1238   static const char* location_strings[] = { "Remark Before",
1239                                             "Remark After",
1240                                             "Remark Overflow",
1241                                             "Cleanup Before",
1242                                             "Cleanup After" };
1243   return location_strings[static_cast<std::underlying_type_t<VerifyLocation>>(location)];
1244 }
1245 
1246 void G1ConcurrentMark::verify_during_pause(G1HeapVerifier::G1VerifyType type,
1247                                            VerifyLocation location) {
1248   G1HeapVerifier* verifier = _g1h->verifier();
1249 
1250   verifier->verify_region_sets_optional();
1251 
1252   const char* caller = verify_location_string(location);
1253 
1254   if (VerifyDuringGC && G1HeapVerifier::should_verify(type)) {
1255     GCTraceTime(Debug, gc, phases) debug(caller, _gc_timer_cm);
1256 
1257     size_t const BufLen = 512;
1258     char buffer[BufLen];
1259 
1260     jio_snprintf(buffer, BufLen, "During GC (%s)", caller);
1261     verifier->verify(VerifyOption::G1UseConcMarking, buffer);
1262 
1263     // Only check bitmap in Remark, and not at After-Verification because the regions
1264     // already have their TAMS'es reset.
1265     if (location != VerifyLocation::RemarkAfter) {
1266       verifier->verify_bitmap_clear(true /* above_tams_only */);
1267     }
1268   }
1269 }
1270 
1271 class G1ObjectCountIsAliveClosure: public BoolObjectClosure {
1272   G1CollectedHeap* _g1h;
1273 public:
1274   G1ObjectCountIsAliveClosure(G1CollectedHeap* g1h) : _g1h(g1h) {}
1275 
1276   bool do_object_b(oop obj) {
1277     return !_g1h->is_obj_dead(obj);
1278   }
1279 };
1280 
1281 void G1ConcurrentMark::remark() {
1282   assert_at_safepoint_on_vm_thread();
1283 
1284   // If a full collection has happened, we should not continue. However we might
1285   // have ended up here as the Remark VM operation has been scheduled already.
1286   if (has_aborted()) {
1287     return;
1288   }
1289 
1290   G1Policy* policy = _g1h->policy();
1291   policy->record_pause_start_time();
1292 
1293   double start = os::elapsedTime();
1294 
1295   verify_during_pause(G1HeapVerifier::G1VerifyRemark, VerifyLocation::RemarkBefore);
1296 
1297   {
1298     GCTraceTime(Debug, gc, phases) debug("Finalize Marking", _gc_timer_cm);
1299     finalize_marking();
1300   }
1301 
1302   double mark_work_end = os::elapsedTime();
1303 
1304   bool const mark_finished = !has_overflown();
1305   if (mark_finished) {
1306     weak_refs_work();
1307 
1308     // Unload Klasses, String, Code Cache, etc.
1309     if (ClassUnloadingWithConcurrentMark) {
1310       G1CMIsAliveClosure is_alive(this);
1311       _g1h->unload_classes_and_code("Class Unloading", &is_alive, _gc_timer_cm);
1312     }
1313 
1314     SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
1315     // We're done with marking.
1316     // This is the end of the marking cycle, we're expected all
1317     // threads to have SATB queues with active set to true.
1318     satb_mq_set.set_active_all_threads(false, /* new active value */
1319                                        true /* expected_active */);
1320 
1321     {
1322       GCTraceTime(Debug, gc, phases) debug("Flush Task Caches", _gc_timer_cm);
1323       flush_all_task_caches();
1324     }
1325 
1326     // All marking completed. Check bitmap now as we will start to reset TAMSes
1327     // in parallel below so that we can not do this in the After-Remark verification.
1328     _g1h->verifier()->verify_bitmap_clear(true /* above_tams_only */);
1329 
1330     {
1331       GCTraceTime(Debug, gc, phases) debug("Select For Rebuild and Reclaim Empty Regions", _gc_timer_cm);
1332 
1333       G1UpdateRegionLivenessAndSelectForRebuildTask cl(_g1h, this, _g1h->workers()->active_workers());
1334       uint const num_workers = MIN2(G1UpdateRegionLivenessAndSelectForRebuildTask::desired_num_workers(_g1h->num_committed_regions()),
1335                                     _g1h->workers()->active_workers());
1336       log_debug(gc,ergo)("Running %s using %u workers for %u regions in heap", cl.name(), num_workers, _g1h->num_committed_regions());
1337       _g1h->workers()->run_task(&cl, num_workers);
1338 
1339       log_debug(gc, remset, tracking)("Remembered Set Tracking update regions total %u, selected %u",
1340                                       _g1h->num_committed_regions(), cl.total_selected_for_rebuild());
1341 
1342       _needs_remembered_set_rebuild = (cl.total_selected_for_rebuild() > 0);
1343 
1344       if (_needs_remembered_set_rebuild) {
1345         GrowableArrayCHeap<G1HeapRegion*, mtGC>* selected = cl.sort_and_prune_old_selected();
1346         _g1h->policy()->candidates()->set_candidates_from_marking(selected);
1347       }
1348     }
1349 
1350     if (log_is_enabled(Trace, gc, liveness)) {
1351       G1PrintRegionLivenessInfoClosure cl("Post-Marking");
1352       _g1h->heap_region_iterate(&cl);
1353     }
1354 
1355     // Potentially, some empty-regions have been reclaimed; make this a
1356     // "collection" so that pending allocation can retry before attempting a
1357     // GC pause.
1358     _g1h->increment_total_collections();
1359 
1360     // For Remark Pauses that may have been triggered by PeriodicGCs, we maintain
1361     // resizing based on MinHeapFreeRatio or MaxHeapFreeRatio. If a PeriodicGC is
1362     // triggered, it likely means there are very few regular GCs, making resizing
1363     // based on gc heuristics less effective.
1364     if (_g1h->last_gc_was_periodic()) {
1365       _g1h->resize_heap_after_full_collection(0 /* allocation_word_size */);
1366     }
1367 
1368     compute_new_sizes();
1369 
1370     verify_during_pause(G1HeapVerifier::G1VerifyRemark, VerifyLocation::RemarkAfter);
1371 
1372     assert(!restart_for_overflow(), "sanity");
1373     // Completely reset the marking state (except bitmaps) since marking completed.
1374     reset_at_marking_complete();
1375 
1376     G1CollectedHeap::finish_codecache_marking_cycle();
1377 
1378     {
1379       GCTraceTime(Debug, gc, phases) debug("Report Object Count", _gc_timer_cm);
1380       G1ObjectCountIsAliveClosure is_alive(_g1h);
1381       _gc_tracer_cm->report_object_count_after_gc(&is_alive, _g1h->workers());
1382     }
1383 
1384     // Successfully completed marking, advance state.
1385     cm_thread()->set_full_cycle_rebuild_and_scrub();
1386   } else {
1387     // We overflowed.  Restart concurrent marking.
1388     _restart_for_overflow.store_relaxed(true);
1389 
1390     verify_during_pause(G1HeapVerifier::G1VerifyRemark, VerifyLocation::RemarkOverflow);
1391 
1392     // Clear the marking state because we will be restarting
1393     // marking due to overflowing the global mark stack.
1394     reset_marking_for_restart();
1395   }
1396 
1397   // Statistics
1398   double now = os::elapsedTime();
1399   _remark_mark_times.add((mark_work_end - start) * 1000.0);
1400   _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
1401   _remark_times.add((now - start) * 1000.0);
1402 
1403   _g1h->update_perf_counter_cpu_time();
1404 
1405   policy->record_concurrent_mark_remark_end();
1406 
1407   return;
1408 }
1409 
1410 void G1ConcurrentMark::compute_new_sizes() {
1411   MetaspaceGC::compute_new_size();
1412 
1413   // Cleanup will have freed any regions completely full of garbage.
1414   // Update the soft reference policy with the new heap occupancy.
1415   Universe::heap()->update_capacity_and_used_at_gc();
1416 
1417   // We reclaimed old regions so we should calculate the sizes to make
1418   // sure we update the old gen/space data.
1419   _g1h->monitoring_support()->update_sizes();
1420 }
1421 
1422 class G1UpdateRegionsAfterRebuild : public G1HeapRegionClosure {
1423   G1CollectedHeap* _g1h;
1424 
1425 public:
1426   G1UpdateRegionsAfterRebuild(G1CollectedHeap* g1h) : _g1h(g1h) { }
1427 
1428   bool do_heap_region(G1HeapRegion* r) override {
1429     // Update the remset tracking state from updating to complete
1430     // if remembered sets have been rebuilt.
1431     _g1h->policy()->remset_tracker()->update_after_rebuild(r);
1432     return false;
1433   }
1434 };
1435 
1436 void G1ConcurrentMark::cleanup() {
1437   assert_at_safepoint_on_vm_thread();
1438 
1439   // If a full collection has happened, we shouldn't do this.
1440   if (has_aborted()) {
1441     return;
1442   }
1443 
1444   G1Policy* policy = _g1h->policy();
1445   policy->record_pause_start_time();
1446 
1447   double start = os::elapsedTime();
1448 
1449   verify_during_pause(G1HeapVerifier::G1VerifyCleanup, VerifyLocation::CleanupBefore);
1450 
1451   if (needs_remembered_set_rebuild()) {
1452     // Update the remset tracking information as well as marking all regions
1453     // as fully parsable.
1454     GCTraceTime(Debug, gc, phases) debug("Update Remembered Set Tracking After Rebuild", _gc_timer_cm);
1455     G1UpdateRegionsAfterRebuild cl(_g1h);
1456     _g1h->heap_region_iterate(&cl);
1457   } else {
1458     log_debug(gc, phases)("No Remembered Sets to update after rebuild");
1459   }
1460 
1461   verify_during_pause(G1HeapVerifier::G1VerifyCleanup, VerifyLocation::CleanupAfter);
1462 
1463   // Local statistics
1464   _cleanup_times.add((os::elapsedTime() - start) * 1000.0);
1465 
1466   {
1467     GCTraceTime(Debug, gc, phases) debug("Finalize Concurrent Mark Cleanup", _gc_timer_cm);
1468     policy->record_concurrent_mark_cleanup_end(needs_remembered_set_rebuild());
1469   }
1470 
1471   // Advance state.
1472   cm_thread()->set_full_cycle_reset_for_next_cycle();
1473   return;
1474 }
1475 
1476 // 'Keep Alive' oop closure used by both serial parallel reference processing.
1477 // Uses the G1CMTask associated with a worker thread (for serial reference
1478 // processing the G1CMTask for worker 0 is used) to preserve (mark) and
1479 // trace referent objects.
1480 //
1481 // Using the G1CMTask and embedded local queues avoids having the worker
1482 // threads operating on the global mark stack. This reduces the risk
1483 // of overflowing the stack - which we would rather avoid at this late
1484 // state. Also using the tasks' local queues removes the potential
1485 // of the workers interfering with each other that could occur if
1486 // operating on the global stack.
1487 
1488 class G1CMKeepAliveAndDrainClosure : public OopClosure {
1489   G1ConcurrentMark* _cm;
1490   G1CMTask*         _task;
1491   uint              _ref_counter_limit;
1492   uint              _ref_counter;
1493   bool              _is_serial;
1494 public:
1495   G1CMKeepAliveAndDrainClosure(G1ConcurrentMark* cm, G1CMTask* task, bool is_serial) :
1496     _cm(cm), _task(task), _ref_counter_limit(G1RefProcDrainInterval),
1497     _ref_counter(_ref_counter_limit), _is_serial(is_serial) {
1498     assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
1499   }
1500 
1501   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
1502   virtual void do_oop(      oop* p) { do_oop_work(p); }
1503 
1504   template <class T> void do_oop_work(T* p) {
1505     if (_cm->has_overflown()) {
1506       return;
1507     }
1508     if (!_task->deal_with_reference(p)) {
1509       // We did not add anything to the mark bitmap (or mark stack), so there is
1510       // no point trying to drain it.
1511       return;
1512     }
1513     _ref_counter--;
1514 
1515     if (_ref_counter == 0) {
1516       // We have dealt with _ref_counter_limit references, pushing them
1517       // and objects reachable from them on to the local stack (and
1518       // possibly the global stack). Call G1CMTask::do_marking_step() to
1519       // process these entries.
1520       //
1521       // We call G1CMTask::do_marking_step() in a loop, which we'll exit if
1522       // there's nothing more to do (i.e. we're done with the entries that
1523       // were pushed as a result of the G1CMTask::deal_with_reference() calls
1524       // above) or we overflow.
1525       //
1526       // Note: G1CMTask::do_marking_step() can set the G1CMTask::has_aborted()
1527       // flag while there may still be some work to do. (See the comment at
1528       // the beginning of G1CMTask::do_marking_step() for those conditions -
1529       // one of which is reaching the specified time target.) It is only
1530       // when G1CMTask::do_marking_step() returns without setting the
1531       // has_aborted() flag that the marking step has completed.
1532       do {
1533         double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
1534         _task->do_marking_step(mark_step_duration_ms,
1535                                false      /* do_termination */,
1536                                _is_serial);
1537       } while (_task->has_aborted() && !_cm->has_overflown());
1538       _ref_counter = _ref_counter_limit;
1539     }
1540   }
1541 };
1542 
1543 // 'Drain' oop closure used by both serial and parallel reference processing.
1544 // Uses the G1CMTask associated with a given worker thread (for serial
1545 // reference processing the G1CMtask for worker 0 is used). Calls the
1546 // do_marking_step routine, with an unbelievably large timeout value,
1547 // to drain the marking data structures of the remaining entries
1548 // added by the 'keep alive' oop closure above.
1549 
1550 class G1CMDrainMarkingStackClosure : public VoidClosure {
1551   G1ConcurrentMark* _cm;
1552   G1CMTask*         _task;
1553   bool              _is_serial;
1554  public:
1555   G1CMDrainMarkingStackClosure(G1ConcurrentMark* cm, G1CMTask* task, bool is_serial) :
1556     _cm(cm), _task(task), _is_serial(is_serial) {
1557     assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
1558   }
1559 
1560   void do_void() {
1561     do {
1562       // We call G1CMTask::do_marking_step() to completely drain the local
1563       // and global marking stacks of entries pushed by the 'keep alive'
1564       // oop closure (an instance of G1CMKeepAliveAndDrainClosure above).
1565       //
1566       // G1CMTask::do_marking_step() is called in a loop, which we'll exit
1567       // if there's nothing more to do (i.e. we've completely drained the
1568       // entries that were pushed as a result of applying the 'keep alive'
1569       // closure to the entries on the discovered ref lists) or we overflow
1570       // the global marking stack.
1571       //
1572       // Note: G1CMTask::do_marking_step() can set the G1CMTask::has_aborted()
1573       // flag while there may still be some work to do. (See the comment at
1574       // the beginning of G1CMTask::do_marking_step() for those conditions -
1575       // one of which is reaching the specified time target.) It is only
1576       // when G1CMTask::do_marking_step() returns without setting the
1577       // has_aborted() flag that the marking step has completed.
1578 
1579       _task->do_marking_step(1000000000.0 /* something very large */,
1580                              true         /* do_termination */,
1581                              _is_serial);
1582     } while (_task->has_aborted() && !_cm->has_overflown());
1583   }
1584 };
1585 
1586 class G1CMRefProcProxyTask : public RefProcProxyTask {
1587   G1CollectedHeap& _g1h;
1588   G1ConcurrentMark& _cm;
1589 
1590 public:
1591   G1CMRefProcProxyTask(uint max_workers, G1CollectedHeap& g1h, G1ConcurrentMark &cm)
1592     : RefProcProxyTask("G1CMRefProcProxyTask", max_workers),
1593       _g1h(g1h),
1594       _cm(cm) {}
1595 
1596   void work(uint worker_id) override {
1597     assert(worker_id < _max_workers, "sanity");
1598     G1CMIsAliveClosure is_alive(&_cm);
1599     uint index = (_tm == RefProcThreadModel::Single) ? 0 : worker_id;
1600     G1CMKeepAliveAndDrainClosure keep_alive(&_cm, _cm.task(index), _tm == RefProcThreadModel::Single);
1601     BarrierEnqueueDiscoveredFieldClosure enqueue;
1602     G1CMDrainMarkingStackClosure complete_gc(&_cm, _cm.task(index), _tm == RefProcThreadModel::Single);
1603     _rp_task->rp_work(worker_id, &is_alive, &keep_alive, &enqueue, &complete_gc);
1604   }
1605 
1606   void prepare_run_task_hook() override {
1607     // We need to reset the concurrency level before each
1608     // proxy task execution, so that the termination protocol
1609     // and overflow handling in G1CMTask::do_marking_step() knows
1610     // how many workers to wait for.
1611     _cm.set_concurrency(_queue_count);
1612   }
1613 };
1614 
1615 void G1ConcurrentMark::weak_refs_work() {
1616   ResourceMark rm;
1617 
1618   {
1619     GCTraceTime(Debug, gc, phases) debug("Reference Processing", _gc_timer_cm);
1620 
1621     ReferenceProcessor* rp = _g1h->ref_processor_cm();
1622 
1623     // See the comment in G1CollectedHeap::ref_processing_init()
1624     // about how reference processing currently works in G1.
1625 
1626     assert(_global_mark_stack.is_empty(), "mark stack should be empty");
1627 
1628     // Prefer to grow the stack until the max capacity.
1629     _global_mark_stack.set_should_grow();
1630 
1631     // Parallel processing task executor.
1632     G1CMRefProcProxyTask task(rp->max_num_queues(), *_g1h, *this);
1633     ReferenceProcessorPhaseTimes pt(_gc_timer_cm, rp->max_num_queues());
1634 
1635     // Process the weak references.
1636     const ReferenceProcessorStats& stats = rp->process_discovered_references(task, _g1h->workers(), pt);
1637     _gc_tracer_cm->report_gc_reference_stats(stats);
1638     pt.print_all_references();
1639 
1640     // The do_oop work routines of the keep_alive and drain_marking_stack
1641     // oop closures will set the has_overflown flag if we overflow the
1642     // global marking stack.
1643 
1644     assert(has_overflown() || _global_mark_stack.is_empty(),
1645            "Mark stack should be empty (unless it has overflown)");
1646   }
1647 
1648   if (has_overflown()) {
1649     // We can not trust g1_is_alive and the contents of the heap if the marking stack
1650     // overflowed while processing references. Exit the VM.
1651     fatal("Overflow during reference processing, can not continue. Current mark stack depth: "
1652           "%zu, MarkStackSize: %zu, MarkStackSizeMax: %zu. "
1653           "Please increase MarkStackSize and/or MarkStackSizeMax and restart.",
1654           _global_mark_stack.size(), MarkStackSize, MarkStackSizeMax);
1655     return;
1656   }
1657 
1658   assert(_global_mark_stack.is_empty(), "Marking should have completed");
1659 
1660   {
1661     GCTraceTime(Debug, gc, phases) debug("Weak Processing", _gc_timer_cm);
1662     G1CMIsAliveClosure is_alive(this);
1663     WeakProcessor::weak_oops_do(_g1h->workers(), &is_alive, &do_nothing_cl, 1);
1664   }
1665 }
1666 
1667 class G1PrecleanYieldClosure : public YieldClosure {
1668   G1ConcurrentMark* _cm;
1669 
1670 public:
1671   G1PrecleanYieldClosure(G1ConcurrentMark* cm) : _cm(cm) { }
1672 
1673   virtual bool should_return() {
1674     return _cm->has_aborted();
1675   }
1676 
1677   virtual bool should_return_fine_grain() {
1678     _cm->do_yield_check();
1679     return _cm->has_aborted();
1680   }
1681 };
1682 
1683 void G1ConcurrentMark::preclean() {
1684   assert(G1UseReferencePrecleaning, "Precleaning must be enabled.");
1685 
1686   SuspendibleThreadSetJoiner joiner;
1687 
1688   BarrierEnqueueDiscoveredFieldClosure enqueue;
1689 
1690   set_concurrency_and_phase(1, true);
1691 
1692   G1PrecleanYieldClosure yield_cl(this);
1693 
1694   ReferenceProcessor* rp = _g1h->ref_processor_cm();
1695   // Precleaning is single threaded. Temporarily disable MT discovery.
1696   ReferenceProcessorMTDiscoveryMutator rp_mut_discovery(rp, false);
1697   rp->preclean_discovered_references(rp->is_alive_non_header(),
1698                                      &enqueue,
1699                                      &yield_cl,
1700                                      _gc_timer_cm);
1701 }
1702 
1703 // Closure for marking entries in SATB buffers.
1704 class G1CMSATBBufferClosure : public SATBBufferClosure {
1705 private:
1706   G1CMTask* _task;
1707   G1CollectedHeap* _g1h;
1708 
1709   // This is very similar to G1CMTask::deal_with_reference, but with
1710   // more relaxed requirements for the argument, so this must be more
1711   // circumspect about treating the argument as an object.
1712   void do_entry(void* entry) const {
1713     _task->increment_refs_reached();
1714     oop const obj = cast_to_oop(entry);
1715     _task->make_reference_grey(obj);
1716   }
1717 
1718 public:
1719   G1CMSATBBufferClosure(G1CMTask* task, G1CollectedHeap* g1h)
1720     : _task(task), _g1h(g1h) { }
1721 
1722   virtual void do_buffer(void** buffer, size_t size) {
1723     for (size_t i = 0; i < size; ++i) {
1724       do_entry(buffer[i]);
1725     }
1726   }
1727 };
1728 
1729 class G1RemarkThreadsClosure : public ThreadClosure {
1730   G1SATBMarkQueueSet& _qset;
1731 
1732  public:
1733   G1RemarkThreadsClosure(G1CollectedHeap* g1h, G1CMTask* task) :
1734     _qset(G1BarrierSet::satb_mark_queue_set()) {}
1735 
1736   void do_thread(Thread* thread) {
1737     // Transfer any partial buffer to the qset for completed buffer processing.
1738     _qset.flush_queue(G1ThreadLocalData::satb_mark_queue(thread));
1739   }
1740 };
1741 
1742 class G1CMRemarkTask : public WorkerTask {
1743   // For Threads::possibly_parallel_threads_do
1744   ThreadsClaimTokenScope _threads_claim_token_scope;
1745   G1ConcurrentMark* _cm;
1746 public:
1747   void work(uint worker_id) {
1748     G1CMTask* task = _cm->task(worker_id);
1749     task->record_start_time();
1750     {
1751       ResourceMark rm;
1752 
1753       G1RemarkThreadsClosure threads_f(G1CollectedHeap::heap(), task);
1754       Threads::possibly_parallel_threads_do(true /* is_par */, &threads_f);
1755     }
1756 
1757     do {
1758       task->do_marking_step(1000000000.0 /* something very large */,
1759                             true         /* do_termination       */,
1760                             false        /* is_serial            */);
1761     } while (task->has_aborted() && !_cm->has_overflown());
1762     // If we overflow, then we do not want to restart. We instead
1763     // want to abort remark and do concurrent marking again.
1764     task->record_end_time();
1765   }
1766 
1767   G1CMRemarkTask(G1ConcurrentMark* cm, uint active_workers) :
1768     WorkerTask("Par Remark"), _threads_claim_token_scope(), _cm(cm) {
1769     _cm->terminator()->reset_for_reuse(active_workers);
1770   }
1771 };
1772 
1773 void G1ConcurrentMark::finalize_marking() {
1774   ResourceMark rm;
1775 
1776   _g1h->ensure_parsability(false);
1777 
1778   // this is remark, so we'll use up all active threads
1779   uint active_workers = _g1h->workers()->active_workers();
1780   set_concurrency_and_phase(active_workers, false /* concurrent */);
1781   // Leave _parallel_marking_threads at it's
1782   // value originally calculated in the G1ConcurrentMark
1783   // constructor and pass values of the active workers
1784   // through the task.
1785 
1786   {
1787     G1CMRemarkTask remarkTask(this, active_workers);
1788     // We will start all available threads, even if we decide that the
1789     // active_workers will be fewer. The extra ones will just bail out
1790     // immediately.
1791     _g1h->workers()->run_task(&remarkTask);
1792   }
1793 
1794   SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
1795   guarantee(has_overflown() ||
1796             satb_mq_set.completed_buffers_num() == 0,
1797             "Invariant: has_overflown = %s, num buffers = %zu",
1798             BOOL_TO_STR(has_overflown()),
1799             satb_mq_set.completed_buffers_num());
1800 
1801   print_stats();
1802 }
1803 
1804 void G1ConcurrentMark::flush_all_task_caches() {
1805   size_t hits = 0;
1806   size_t misses = 0;
1807   for (uint i = 0; i < _max_num_tasks; i++) {
1808     Pair<size_t, size_t> stats = _tasks[i]->flush_mark_stats_cache();
1809     hits += stats.first;
1810     misses += stats.second;
1811   }
1812   size_t sum = hits + misses;
1813   log_debug(gc, stats)("Mark stats cache hits %zu misses %zu ratio %1.3lf",
1814                        hits, misses, percent_of(hits, sum));
1815 }
1816 
1817 void G1ConcurrentMark::clear_bitmap_for_region(G1HeapRegion* hr) {
1818   assert_at_safepoint();
1819   _mark_bitmap.clear_range(MemRegion(hr->bottom(), hr->end()));
1820 }
1821 
1822 G1HeapRegion* G1ConcurrentMark::claim_region(uint worker_id) {
1823   // "Checkpoint" the finger.
1824   HeapWord* local_finger = finger();
1825 
1826   while (local_finger < _heap.end()) {
1827     assert(_g1h->is_in_reserved(local_finger), "invariant");
1828 
1829     G1HeapRegion* curr_region = _g1h->heap_region_containing_or_null(local_finger);
1830     // Make sure that the reads below do not float before loading curr_region.
1831     OrderAccess::loadload();
1832     // Above heap_region_containing may return null as we always scan claim
1833     // until the end of the heap. In this case, just jump to the next region.
1834     HeapWord* end = curr_region != nullptr ? curr_region->end() : local_finger + G1HeapRegion::GrainWords;
1835 
1836     // Is the gap between reading the finger and doing the CAS too long?
1837     HeapWord* res = _finger.compare_exchange(local_finger, end);
1838     if (res == local_finger && curr_region != nullptr) {
1839       // We succeeded.
1840       HeapWord* bottom = curr_region->bottom();
1841       HeapWord* limit = top_at_mark_start(curr_region);
1842 
1843       log_trace(gc, marking)("Claim region %u bottom " PTR_FORMAT " tams " PTR_FORMAT, curr_region->hrm_index(), p2i(curr_region->bottom()), p2i(top_at_mark_start(curr_region)));
1844       // Notice that _finger == end cannot be guaranteed here since,
1845       // someone else might have moved the finger even further.
1846       assert(finger() >= end, "The finger should have moved forward");
1847 
1848       if (limit > bottom) {
1849         return curr_region;
1850       } else {
1851         assert(limit == bottom,
1852                "The region limit should be at bottom");
1853         // We return null and the caller should try calling
1854         // claim_region() again.
1855         return nullptr;
1856       }
1857     } else {
1858       // Read the finger again.
1859       HeapWord* next_finger = finger();
1860       assert(next_finger > local_finger, "The finger should have moved forward " PTR_FORMAT " " PTR_FORMAT, p2i(local_finger), p2i(next_finger));
1861       local_finger = next_finger;
1862     }
1863   }
1864 
1865   return nullptr;
1866 }
1867 
1868 #ifndef PRODUCT
1869 class VerifyNoCSetOops {
1870   G1CollectedHeap* _g1h;
1871   const char* _phase;
1872   int _info;
1873 
1874 public:
1875   VerifyNoCSetOops(const char* phase, int info = -1) :
1876     _g1h(G1CollectedHeap::heap()),
1877     _phase(phase),
1878     _info(info)
1879   { }
1880 
1881   void operator()(G1TaskQueueEntry task_entry) const {
1882     if (task_entry.is_partial_array_state()) {
1883       oop obj = task_entry.to_partial_array_state()->source();
1884       guarantee(_g1h->is_in_reserved(obj), "Partial Array " PTR_FORMAT " must be in heap.", p2i(obj));
1885       return;
1886     }
1887     guarantee(oopDesc::is_oop(task_entry.to_oop()),
1888               "Non-oop " PTR_FORMAT ", phase: %s, info: %d",
1889               p2i(task_entry.to_oop()), _phase, _info);
1890     G1HeapRegion* r = _g1h->heap_region_containing(task_entry.to_oop());
1891     guarantee(!(r->in_collection_set() || r->has_index_in_opt_cset()),
1892               "obj " PTR_FORMAT " from %s (%d) in region %u in (optional) collection set",
1893               p2i(task_entry.to_oop()), _phase, _info, r->hrm_index());
1894   }
1895 };
1896 
1897 void G1ConcurrentMark::verify_no_collection_set_oops() {
1898   assert(SafepointSynchronize::is_at_safepoint() || !is_init_completed(),
1899          "should be at a safepoint or initializing");
1900   if (!is_fully_initialized() || !_g1h->collector_state()->is_in_mark_or_rebuild()) {
1901     return;
1902   }
1903 
1904   // Verify entries on the global mark stack
1905   _global_mark_stack.iterate(VerifyNoCSetOops("Stack"));
1906 
1907   // Verify entries on the task queues
1908   for (uint i = 0; i < _max_num_tasks; ++i) {
1909     G1CMTaskQueue* queue = _task_queues->queue(i);
1910     queue->iterate(VerifyNoCSetOops("Queue", i));
1911   }
1912 
1913   // Verify the global finger
1914   HeapWord* global_finger = finger();
1915   if (global_finger != nullptr && global_finger < _heap.end()) {
1916     // Since we always iterate over all regions, we might get a null G1HeapRegion
1917     // here.
1918     G1HeapRegion* global_hr = _g1h->heap_region_containing_or_null(global_finger);
1919     guarantee(global_hr == nullptr || global_finger == global_hr->bottom(),
1920               "global finger: " PTR_FORMAT " region: " HR_FORMAT,
1921               p2i(global_finger), HR_FORMAT_PARAMS(global_hr));
1922   }
1923 
1924   // Verify the task fingers
1925   assert(_num_concurrent_workers <= _max_num_tasks, "sanity");
1926   for (uint i = 0; i < _num_concurrent_workers; ++i) {
1927     G1CMTask* task = _tasks[i];
1928     HeapWord* task_finger = task->finger();
1929     if (task_finger != nullptr && task_finger < _heap.end()) {
1930       // See above note on the global finger verification.
1931       G1HeapRegion* r = _g1h->heap_region_containing_or_null(task_finger);
1932       guarantee(r == nullptr || task_finger == r->bottom() ||
1933                 !r->in_collection_set() || !r->has_index_in_opt_cset(),
1934                 "task finger: " PTR_FORMAT " region: " HR_FORMAT,
1935                 p2i(task_finger), HR_FORMAT_PARAMS(r));
1936     }
1937   }
1938 }
1939 #endif // PRODUCT
1940 
1941 void G1ConcurrentMark::rebuild_and_scrub() {
1942   if (!needs_remembered_set_rebuild()) {
1943     log_debug(gc, marking)("Skipping Remembered Set Rebuild. No regions selected for rebuild, will only scrub");
1944   }
1945 
1946   G1ConcurrentRebuildAndScrub::rebuild_and_scrub(this, needs_remembered_set_rebuild(), _concurrent_workers);
1947 }
1948 
1949 void G1ConcurrentMark::print_stats() {
1950   if (!log_is_enabled(Debug, gc, stats)) {
1951     return;
1952   }
1953   log_debug(gc, stats)("---------------------------------------------------------------------");
1954   for (size_t i = 0; i < _num_active_tasks; ++i) {
1955     _tasks[i]->print_stats();
1956     log_debug(gc, stats)("---------------------------------------------------------------------");
1957   }
1958 }
1959 
1960 bool G1ConcurrentMark::concurrent_cycle_abort() {
1961   assert_at_safepoint_on_vm_thread();
1962   assert(_g1h->collector_state()->is_in_full_gc(), "must be");
1963 
1964   // If we start the compaction before the CM threads finish
1965   // scanning the root regions we might trip them over as we'll
1966   // be moving objects / updating references. Since the root region
1967   // scan synchronized with the safepoint, just tell it to abort.
1968   // It will notice when the threads start up again later.
1969   abort_root_region_scan_at_safepoint();
1970 
1971   // We haven't started a concurrent cycle no need to do anything; we might have
1972   // aborted the marking because of shutting down though. In this case the marking
1973   // might have already completed the abort (leading to in_progress() below to
1974   // return false), however this still left marking state particularly in the
1975   // shared marking bitmap that must be cleaned up.
1976   // If there are multiple full gcs during shutdown we do this work repeatedly for
1977   // nothing, but this situation should be extremely rare (a full gc after shutdown
1978   // has been signalled is already rare), and this work should be negligible compared
1979   // to actual full gc work.
1980 
1981   if (!is_fully_initialized() || (!cm_thread()->is_in_progress() && !cm_thread()->should_terminate())) {
1982     return false;
1983   }
1984 
1985   reset_marking_for_restart();
1986 
1987   abort_marking_threads();
1988 
1989   SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
1990   satb_mq_set.abandon_partial_marking();
1991   // This can be called either during or outside marking, we'll read
1992   // the expected_active value from the SATB queue set.
1993   satb_mq_set.set_active_all_threads(false, /* new active value */
1994                                      satb_mq_set.is_active() /* expected_active */);
1995   return true;
1996 }
1997 
1998 void G1ConcurrentMark::abort_marking_threads() {
1999   assert_root_region_scan_completed_or_aborted();
2000   _has_aborted.store_relaxed(true);
2001   _first_overflow_barrier_sync.abort();
2002   _second_overflow_barrier_sync.abort();
2003 }
2004 
2005 double G1ConcurrentMark::worker_threads_cpu_time_s() {
2006   class CountCpuTimeThreadClosure : public ThreadClosure {
2007   public:
2008     jlong _total_cpu_time;
2009 
2010     CountCpuTimeThreadClosure() : ThreadClosure(), _total_cpu_time(0) { }
2011 
2012     void do_thread(Thread* t) {
2013       _total_cpu_time += os::thread_cpu_time(t);
2014     }
2015   } cl;
2016 
2017   threads_do(&cl);
2018 
2019   return (double)cl._total_cpu_time / NANOSECS_PER_SEC;
2020 }
2021 
2022 static void print_ms_time_info(const char* prefix, const char* name,
2023                                NumberSeq& ns) {
2024   log_trace(gc, marking)("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
2025                          prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
2026   if (ns.num() > 0) {
2027     log_trace(gc, marking)("%s         [std. dev = %8.2f ms, max = %8.2f ms]",
2028                            prefix, ns.sd(), ns.maximum());
2029   }
2030 }
2031 
2032 void G1ConcurrentMark::print_summary_info() {
2033   Log(gc, marking) log;
2034   if (!log.is_trace()) {
2035     return;
2036   }
2037 
2038   log.trace(" Concurrent marking:");
2039   if (!is_fully_initialized()) {
2040     log.trace("    has not been initialized yet");
2041     return;
2042   }
2043   print_ms_time_info("  ", "remarks", _remark_times);
2044   {
2045     print_ms_time_info("     ", "final marks", _remark_mark_times);
2046     print_ms_time_info("     ", "weak refs", _remark_weak_ref_times);
2047 
2048   }
2049   print_ms_time_info("  ", "cleanups", _cleanup_times);
2050   log.trace("    Finalize live data total time = %8.2f s (avg = %8.2f ms).",
2051             _cleanup_times.sum() / 1000.0, _cleanup_times.avg());
2052   log.trace("  Total stop_world time = %8.2f s.",
2053             (_remark_times.sum() + _cleanup_times.sum())/1000.0);
2054   log.trace("  Total concurrent time = %8.2f s (%8.2f s marking).",
2055             cm_thread()->total_mark_cpu_time_s(), cm_thread()->worker_threads_cpu_time_s());
2056 }
2057 
2058 void G1ConcurrentMark::threads_do(ThreadClosure* tc) const {
2059   if (is_fully_initialized()) { // they are initialized late
2060     tc->do_thread(_cm_thread);
2061     _concurrent_workers->threads_do(tc);
2062   }
2063 }
2064 
2065 void G1ConcurrentMark::print_on(outputStream* st) const {
2066   st->print_cr("Marking Bits: (CMBitMap*) " PTR_FORMAT, p2i(mark_bitmap()));
2067   _mark_bitmap.print_on(st, " Bits: ");
2068 }
2069 
2070 static ReferenceProcessor* get_cm_oop_closure_ref_processor(G1CollectedHeap* g1h) {
2071   ReferenceProcessor* result = g1h->ref_processor_cm();
2072   assert(result != nullptr, "CM reference processor should not be null");
2073   return result;
2074 }
2075 
2076 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h,
2077                                G1CMTask* task)
2078   : ClaimMetadataVisitingOopIterateClosure(ClassLoaderData::_claim_strong, get_cm_oop_closure_ref_processor(g1h)),
2079     _g1h(g1h), _task(task)
2080 { }
2081 
2082 void G1CMTask::setup_for_region(G1HeapRegion* hr) {
2083   assert(hr != nullptr,
2084         "claim_region() should have filtered out null regions");
2085   _curr_region  = hr;
2086   _finger       = hr->bottom();
2087   update_region_limit();
2088 }
2089 
2090 void G1CMTask::update_region_limit() {
2091   G1HeapRegion* hr = _curr_region;
2092   HeapWord* bottom = hr->bottom();
2093   HeapWord* limit = _cm->top_at_mark_start(hr);
2094 
2095   if (limit == bottom) {
2096     // The region was collected underneath our feet.
2097     // We set the finger to bottom to ensure that the bitmap
2098     // iteration that will follow this will not do anything.
2099     // (this is not a condition that holds when we set the region up,
2100     // as the region is not supposed to be empty in the first place)
2101     _finger = bottom;
2102   } else if (limit >= _region_limit) {
2103     assert(limit >= _finger, "peace of mind");
2104   } else {
2105     assert(limit < _region_limit, "only way to get here");
2106     // This can happen under some pretty unusual circumstances.  An
2107     // evacuation pause empties the region underneath our feet (TAMS
2108     // at bottom). We then do some allocation in the region (TAMS
2109     // stays at bottom), followed by the region being used as a GC
2110     // alloc region (TAMS will move to top() and the objects
2111     // originally below it will be greyed). All objects now marked in
2112     // the region are explicitly greyed, if below the global finger,
2113     // and we do not need in fact to scan anything else. So, we simply
2114     // set _finger to be limit to ensure that the bitmap iteration
2115     // doesn't do anything.
2116     _finger = limit;
2117   }
2118 
2119   _region_limit = limit;
2120 }
2121 
2122 void G1CMTask::giveup_current_region() {
2123   assert(_curr_region != nullptr, "invariant");
2124   clear_region_fields();
2125 }
2126 
2127 void G1CMTask::clear_region_fields() {
2128   // Values for these three fields that indicate that we're not
2129   // holding on to a region.
2130   _curr_region   = nullptr;
2131   _finger        = nullptr;
2132   _region_limit  = nullptr;
2133 }
2134 
2135 void G1CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) {
2136   if (cm_oop_closure == nullptr) {
2137     assert(_cm_oop_closure != nullptr, "invariant");
2138   } else {
2139     assert(_cm_oop_closure == nullptr, "invariant");
2140   }
2141   _cm_oop_closure = cm_oop_closure;
2142 }
2143 
2144 void G1CMTask::reset(G1CMBitMap* mark_bitmap) {
2145   guarantee(mark_bitmap != nullptr, "invariant");
2146   _mark_bitmap              = mark_bitmap;
2147   clear_region_fields();
2148 
2149   _calls                         = 0;
2150   _elapsed_time_ms               = 0.0;
2151   _termination_time_ms           = 0.0;
2152 
2153   _mark_stats_cache.reset();
2154 }
2155 
2156 void G1CMTask::reset_for_restart() {
2157   clear_region_fields();
2158   _task_queue->set_empty();
2159   TASKQUEUE_STATS_ONLY(_partial_array_splitter.stats()->reset());
2160   TASKQUEUE_STATS_ONLY(_task_queue->stats.reset());
2161 }
2162 
2163 void G1CMTask::register_partial_array_splitter() {
2164 
2165   ::new (&_partial_array_splitter) PartialArraySplitter(_cm->partial_array_state_manager(),
2166                                                         _cm->max_num_tasks(),
2167                                                         ObjArrayMarkingStride);
2168 }
2169 
2170 void G1CMTask::unregister_partial_array_splitter() {
2171   _partial_array_splitter.~PartialArraySplitter();
2172 }
2173 
2174 bool G1CMTask::should_exit_termination() {
2175   if (!regular_clock_call()) {
2176     return true;
2177   }
2178 
2179   // This is called when we are in the termination protocol. We should
2180   // quit if, for some reason, this task wants to abort or the global
2181   // stack is not empty (this means that we can get work from it).
2182   return !_cm->mark_stack_empty() || has_aborted();
2183 }
2184 
2185 void G1CMTask::reached_limit() {
2186   assert(_words_scanned >= _words_scanned_limit ||
2187          _refs_reached >= _refs_reached_limit ,
2188          "shouldn't have been called otherwise");
2189   abort_marking_if_regular_check_fail();
2190 }
2191 
2192 bool G1CMTask::regular_clock_call() {
2193   if (has_aborted()) {
2194     return false;
2195   }
2196 
2197   // First, we need to recalculate the words scanned and refs reached
2198   // limits for the next clock call.
2199   recalculate_limits();
2200 
2201   // During the regular clock call we do the following
2202 
2203   // (1) If an overflow has been flagged, then we abort.
2204   if (_cm->has_overflown()) {
2205     return false;
2206   }
2207 
2208   // If we are not concurrent (i.e. we're doing remark) we don't need
2209   // to check anything else. The other steps are only needed during
2210   // the concurrent marking phase.
2211   if (!_cm->concurrent()) {
2212     return true;
2213   }
2214 
2215   // (2) If marking has been aborted for Full GC, then we also abort.
2216   if (_cm->has_aborted()) {
2217     return false;
2218   }
2219 
2220   // (4) We check whether we should yield. If we have to, then we abort.
2221   if (SuspendibleThreadSet::should_yield()) {
2222     // We should yield. To do this we abort the task. The caller is
2223     // responsible for yielding.
2224     return false;
2225   }
2226 
2227   // (5) We check whether we've reached our time quota. If we have,
2228   // then we abort.
2229   double elapsed_time_ms = (double)(os::current_thread_cpu_time() - _start_cpu_time_ns) / NANOSECS_PER_MILLISEC;
2230   if (elapsed_time_ms > _time_target_ms) {
2231     _has_timed_out = true;
2232     return false;
2233   }
2234 
2235   // (6) Finally, we check whether there are enough completed STAB
2236   // buffers available for processing. If there are, we abort.
2237   SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
2238   if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
2239     // we do need to process SATB buffers, we'll abort and restart
2240     // the marking task to do so
2241     return false;
2242   }
2243   return true;
2244 }
2245 
2246 void G1CMTask::recalculate_limits() {
2247   _real_words_scanned_limit = _words_scanned + words_scanned_period;
2248   _words_scanned_limit      = _real_words_scanned_limit;
2249 
2250   _real_refs_reached_limit  = _refs_reached  + refs_reached_period;
2251   _refs_reached_limit       = _real_refs_reached_limit;
2252 }
2253 
2254 void G1CMTask::decrease_limits() {
2255   // This is called when we believe that we're going to do an infrequent
2256   // operation which will increase the per byte scanned cost (i.e. move
2257   // entries to/from the global stack). It basically tries to decrease the
2258   // scanning limit so that the clock is called earlier.
2259 
2260   _words_scanned_limit = _real_words_scanned_limit - 3 * words_scanned_period / 4;
2261   _refs_reached_limit  = _real_refs_reached_limit - 3 * refs_reached_period / 4;
2262 }
2263 
2264 void G1CMTask::move_entries_to_global_stack() {
2265   // Local array where we'll store the entries that will be popped
2266   // from the local queue.
2267   G1TaskQueueEntry buffer[G1CMMarkStack::EntriesPerChunk];
2268 
2269   size_t n = 0;
2270   G1TaskQueueEntry task_entry;
2271   while (n < G1CMMarkStack::EntriesPerChunk && _task_queue->pop_local(task_entry)) {
2272     buffer[n] = task_entry;
2273     ++n;
2274   }
2275   if (n < G1CMMarkStack::EntriesPerChunk) {
2276     buffer[n] = G1TaskQueueEntry();
2277   }
2278 
2279   if (n > 0) {
2280     if (!_cm->mark_stack_push(buffer)) {
2281       set_has_aborted();
2282     }
2283   }
2284 
2285   // This operation was quite expensive, so decrease the limits.
2286   decrease_limits();
2287 }
2288 
2289 bool G1CMTask::get_entries_from_global_stack() {
2290   // Local array where we'll store the entries that will be popped
2291   // from the global stack.
2292   G1TaskQueueEntry buffer[G1CMMarkStack::EntriesPerChunk];
2293 
2294   if (!_cm->mark_stack_pop(buffer)) {
2295     return false;
2296   }
2297 
2298   // We did actually pop at least one entry.
2299   for (size_t i = 0; i < G1CMMarkStack::EntriesPerChunk; ++i) {
2300     G1TaskQueueEntry task_entry = buffer[i];
2301     if (task_entry.is_null()) {
2302       break;
2303     }
2304     assert(task_entry.is_partial_array_state() || oopDesc::is_oop(task_entry.to_oop()), "Element " PTR_FORMAT " must be an array slice or oop", p2i(task_entry.to_oop()));
2305     bool success = _task_queue->push(task_entry);
2306     // We only call this when the local queue is empty or under a
2307     // given target limit. So, we do not expect this push to fail.
2308     assert(success, "invariant");
2309   }
2310 
2311   // This operation was quite expensive, so decrease the limits
2312   decrease_limits();
2313   return true;
2314 }
2315 
2316 void G1CMTask::drain_local_queue(bool partially) {
2317   if (has_aborted()) {
2318     return;
2319   }
2320 
2321   // Decide what the target size is, depending whether we're going to
2322   // drain it partially (so that other tasks can steal if they run out
2323   // of things to do) or totally (at the very end).
2324   uint target_size;
2325   if (partially) {
2326     target_size = GCDrainStackTargetSize;
2327   } else {
2328     target_size = 0;
2329   }
2330 
2331   if (_task_queue->size() > target_size) {
2332     G1TaskQueueEntry entry;
2333     bool ret = _task_queue->pop_local(entry);
2334     while (ret) {
2335       process_entry(entry, false /* stolen */);
2336       if (_task_queue->size() <= target_size || has_aborted()) {
2337         ret = false;
2338       } else {
2339         ret = _task_queue->pop_local(entry);
2340       }
2341     }
2342   }
2343 }
2344 
2345 size_t G1CMTask::start_partial_array_processing(oop obj) {
2346   assert(should_be_sliced(obj), "Must be an array object %d and large %zu", obj->is_objArray(), obj->size());
2347 
2348   objArrayOop obj_array = oop_cast<objArrayOop>(obj);
2349   size_t array_length = obj_array->length();
2350 
2351   size_t initial_chunk_size = _partial_array_splitter.start(_task_queue, obj_array, nullptr, array_length);
2352 
2353   // Mark objArray klass metadata
2354   if (_cm_oop_closure->do_metadata()) {
2355     _cm_oop_closure->do_klass(obj_array->klass());
2356   }
2357 
2358   process_array_chunk(obj_array, 0, initial_chunk_size);
2359 
2360   // Include object header size
2361   if (obj_array->is_refArray()) {
2362     return refArrayOopDesc::object_size(checked_cast<int>(initial_chunk_size));
2363   } else {
2364     FlatArrayKlass* fak = FlatArrayKlass::cast(obj_array->klass());
2365     return flatArrayOopDesc::object_size(fak->layout_helper(), checked_cast<int>(initial_chunk_size));
2366   }
2367 }
2368 
2369 size_t G1CMTask::process_partial_array(const G1TaskQueueEntry& task, bool stolen) {
2370   PartialArrayState* state = task.to_partial_array_state();
2371   // Access state before release by claim().
2372   objArrayOop obj = oop_cast<objArrayOop>(state->source());
2373 
2374   PartialArraySplitter::Claim claim =
2375     _partial_array_splitter.claim(state, _task_queue, stolen);
2376 
2377   process_array_chunk(obj, claim._start, claim._end);
2378 
2379   if (obj->is_refArray()) {
2380     return heap_word_size((claim._end - claim._start) * heapOopSize);
2381   } else {
2382     assert(obj->is_flatArray(), "Must be!");
2383     size_t element_byte_size = FlatArrayKlass::cast(obj->klass())->element_byte_size();
2384     size_t nof_elements = claim._end - claim._start;
2385     return heap_word_size(nof_elements * element_byte_size);
2386   }
2387 }
2388 
2389 void G1CMTask::drain_global_stack(bool partially) {
2390   if (has_aborted()) {
2391     return;
2392   }
2393 
2394   // We have a policy to drain the local queue before we attempt to
2395   // drain the global stack.
2396   assert(partially || _task_queue->size() == 0, "invariant");
2397 
2398   // Decide what the target size is, depending whether we're going to
2399   // drain it partially (so that other tasks can steal if they run out
2400   // of things to do) or totally (at the very end).
2401   // Notice that when draining the global mark stack partially, due to the racyness
2402   // of the mark stack size update we might in fact drop below the target. But,
2403   // this is not a problem.
2404   // In case of total draining, we simply process until the global mark stack is
2405   // totally empty, disregarding the size counter.
2406   if (partially) {
2407     size_t const target_size = _cm->partial_mark_stack_size_target();
2408     while (!has_aborted() && _cm->mark_stack_size() > target_size) {
2409       if (get_entries_from_global_stack()) {
2410         drain_local_queue(partially);
2411       }
2412     }
2413   } else {
2414     while (!has_aborted() && get_entries_from_global_stack()) {
2415       drain_local_queue(partially);
2416     }
2417   }
2418 }
2419 
2420 // SATB Queue has several assumptions on whether to call the par or
2421 // non-par versions of the methods. this is why some of the code is
2422 // replicated. We should really get rid of the single-threaded version
2423 // of the code to simplify things.
2424 void G1CMTask::drain_satb_buffers() {
2425   if (has_aborted()) {
2426     return;
2427   }
2428 
2429   // We set this so that the regular clock knows that we're in the
2430   // middle of draining buffers and doesn't set the abort flag when it
2431   // notices that SATB buffers are available for draining. It'd be
2432   // very counter productive if it did that. :-)
2433   _draining_satb_buffers = true;
2434 
2435   G1CMSATBBufferClosure satb_cl(this, _g1h);
2436   SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
2437 
2438   // This keeps claiming and applying the closure to completed buffers
2439   // until we run out of buffers or we need to abort.
2440   while (!has_aborted() &&
2441          satb_mq_set.apply_closure_to_completed_buffer(&satb_cl)) {
2442     abort_marking_if_regular_check_fail();
2443   }
2444 
2445   // Can't assert qset is empty here, even if not aborted.  If concurrent,
2446   // some other thread might be adding to the queue.  If not concurrent,
2447   // some other thread might have won the race for the last buffer, but
2448   // has not yet decremented the count.
2449 
2450   _draining_satb_buffers = false;
2451 
2452   // again, this was a potentially expensive operation, decrease the
2453   // limits to get the regular clock call early
2454   decrease_limits();
2455 }
2456 
2457 void G1CMTask::clear_mark_stats_cache(uint region_idx) {
2458   _mark_stats_cache.reset(region_idx);
2459 }
2460 
2461 Pair<size_t, size_t> G1CMTask::flush_mark_stats_cache() {
2462   return _mark_stats_cache.evict_all();
2463 }
2464 
2465 void G1CMTask::print_stats() {
2466   log_debug(gc, stats)("Marking Stats, task = %u, calls = %u", _worker_id, _calls);
2467   log_debug(gc, stats)("  Elapsed time = %1.2lfms, Termination time = %1.2lfms",
2468                        _elapsed_time_ms, _termination_time_ms);
2469   log_debug(gc, stats)("  Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms max = %1.2lfms, total = %1.2lfms",
2470                        _step_times_ms.num(),
2471                        _step_times_ms.avg(),
2472                        _step_times_ms.sd(),
2473                        _step_times_ms.maximum(),
2474                        _step_times_ms.sum());
2475   size_t const hits = _mark_stats_cache.hits();
2476   size_t const misses = _mark_stats_cache.misses();
2477   log_debug(gc, stats)("  Mark Stats Cache: hits %zu misses %zu ratio %.3f",
2478                        hits, misses, percent_of(hits, hits + misses));
2479 }
2480 
2481 bool G1ConcurrentMark::try_stealing(uint worker_id, G1TaskQueueEntry& task_entry) {
2482   return _task_queues->steal(worker_id, task_entry);
2483 }
2484 
2485 void G1CMTask::process_current_region(G1CMBitMapClosure& bitmap_closure) {
2486   if (has_aborted() || _curr_region == nullptr) {
2487     return;
2488   }
2489 
2490   // This means that we're already holding on to a region.
2491   assert(_finger != nullptr, "if region is not null, then the finger "
2492          "should not be null either");
2493 
2494   // We might have restarted this task after an evacuation pause
2495   // which might have evacuated the region we're holding on to
2496   // underneath our feet. Let's read its limit again to make sure
2497   // that we do not iterate over a region of the heap that
2498   // contains garbage (update_region_limit() will also move
2499   // _finger to the start of the region if it is found empty).
2500   update_region_limit();
2501   // We will start from _finger not from the start of the region,
2502   // as we might be restarting this task after aborting half-way
2503   // through scanning this region. In this case, _finger points to
2504   // the address where we last found a marked object. If this is a
2505   // fresh region, _finger points to start().
2506   MemRegion mr = MemRegion(_finger, _region_limit);
2507 
2508   assert(!_curr_region->is_humongous() || mr.start() == _curr_region->bottom(),
2509          "humongous regions should go around loop once only");
2510 
2511   // Some special cases:
2512   // If the memory region is empty, we can just give up the region.
2513   // If the current region is humongous then we only need to check
2514   // the bitmap for the bit associated with the start of the object,
2515   // scan the object if it's live, and give up the region.
2516   // Otherwise, let's iterate over the bitmap of the part of the region
2517   // that is left.
2518   // If the iteration is successful, give up the region.
2519   if (mr.is_empty()) {
2520     giveup_current_region();
2521     abort_marking_if_regular_check_fail();
2522   } else if (_curr_region->is_humongous() && mr.start() == _curr_region->bottom()) {
2523     if (_mark_bitmap->is_marked(mr.start())) {
2524       // The object is marked - apply the closure
2525       bitmap_closure.do_addr(mr.start());
2526     }
2527     // Even if this task aborted while scanning the humongous object
2528     // we can (and should) give up the current region.
2529     giveup_current_region();
2530     abort_marking_if_regular_check_fail();
2531   } else if (_mark_bitmap->iterate(&bitmap_closure, mr)) {
2532     giveup_current_region();
2533     abort_marking_if_regular_check_fail();
2534   } else {
2535     assert(has_aborted(), "currently the only way to do so");
2536     // The only way to abort the bitmap iteration is to return
2537     // false from the do_bit() method. However, inside the
2538     // do_bit() method we move the _finger to point to the
2539     // object currently being looked at. So, if we bail out, we
2540     // have definitely set _finger to something non-null.
2541     assert(_finger != nullptr, "invariant");
2542 
2543     // Region iteration was actually aborted. So now _finger
2544     // points to the address of the object we last scanned. If we
2545     // leave it there, when we restart this task, we will rescan
2546     // the object. It is easy to avoid this. We move the finger by
2547     // enough to point to the next possible object header.
2548     assert(_finger < _region_limit, "invariant");
2549     HeapWord* const new_finger = _finger + cast_to_oop(_finger)->size();
2550     if (new_finger >= _region_limit) {
2551       giveup_current_region();
2552     } else {
2553       move_finger_to(new_finger);
2554     }
2555   }
2556 }
2557 
2558 void G1CMTask::claim_new_region() {
2559   // Read the note on the claim_region() method on why it might
2560   // return null with potentially more regions available for
2561   // claiming and why we have to check out_of_regions() to determine
2562   // whether we're done or not.
2563   while (!has_aborted() && _curr_region == nullptr && !_cm->out_of_regions()) {
2564     // We are going to try to claim a new region. We should have
2565     // given up on the previous one.
2566     // Separated the asserts so that we know which one fires.
2567     assert(_curr_region  == nullptr, "invariant");
2568     assert(_finger       == nullptr, "invariant");
2569     assert(_region_limit == nullptr, "invariant");
2570     G1HeapRegion* claimed_region = _cm->claim_region(_worker_id);
2571     if (claimed_region != nullptr) {
2572       // Yes, we managed to claim one
2573       setup_for_region(claimed_region);
2574       assert(_curr_region == claimed_region, "invariant");
2575     }
2576     // It is important to call the regular clock here. It might take
2577     // a while to claim a region if, for example, we hit a large
2578     // block of empty regions. So we need to call the regular clock
2579     // method once round the loop to make sure it's called
2580     // frequently enough.
2581     abort_marking_if_regular_check_fail();
2582   }
2583 }
2584 
2585 void G1CMTask::attempt_stealing() {
2586   // We cannot check whether the global stack is empty, since other
2587   // tasks might be pushing objects to it concurrently.
2588   assert(_cm->out_of_regions() && _task_queue->size() == 0,
2589          "only way to reach here");
2590   while (!has_aborted()) {
2591     G1TaskQueueEntry entry;
2592     if (_cm->try_stealing(_worker_id, entry)) {
2593       process_entry(entry, true /* stolen */);
2594 
2595       // And since we're towards the end, let's totally drain the
2596       // local queue and global stack.
2597       drain_local_queue(false);
2598       drain_global_stack(false);
2599     } else {
2600       break;
2601     }
2602   }
2603 }
2604 
2605 void G1CMTask::attempt_termination(bool is_serial) {
2606   // We cannot check whether the global stack is empty, since other
2607   // tasks might be concurrently pushing objects on it.
2608   // Separated the asserts so that we know which one fires.
2609   assert(_cm->out_of_regions(), "only way to reach here");
2610   assert(_task_queue->size() == 0, "only way to reach here");
2611   double termination_start_time_ms = os::elapsedTime() * 1000.0;
2612 
2613   // The G1CMTask class also extends the TerminatorTerminator class,
2614   // hence its should_exit_termination() method will also decide
2615   // whether to exit the termination protocol or not.
2616   bool finished = (is_serial ||
2617                    _cm->terminator()->offer_termination(this));
2618   _termination_time_ms += (os::elapsedTime() * 1000.0 - termination_start_time_ms);
2619 
2620   if (finished) {
2621     // We're all done.
2622 
2623     // We can now guarantee that the global stack is empty, since
2624     // all other tasks have finished. We separated the guarantees so
2625     // that, if a condition is false, we can immediately find out
2626     // which one.
2627     guarantee(_cm->out_of_regions(), "only way to reach here");
2628     guarantee(_cm->mark_stack_empty(), "only way to reach here");
2629     guarantee(_task_queue->size() == 0, "only way to reach here");
2630     guarantee(!_cm->has_overflown(), "only way to reach here");
2631     guarantee(!has_aborted(), "should never happen if termination has completed");
2632   } else {
2633     // Apparently there's more work to do. Let's abort this task. We
2634     // will restart it and hopefully we can find more things to do.
2635     set_has_aborted();
2636   }
2637 }
2638 
2639 void G1CMTask::handle_abort(bool is_serial, double elapsed_time_ms) {
2640   if (_has_timed_out) {
2641     double diff_ms = elapsed_time_ms - _time_target_ms;
2642     // Keep statistics of how well we did with respect to hitting
2643     // our target only if we actually timed out (if we aborted for
2644     // other reasons, then the results might get skewed).
2645     _marking_step_diff_ms.add(diff_ms);
2646   }
2647 
2648   if (!_cm->has_overflown()) {
2649     return;
2650   }
2651 
2652   // This is the interesting one. We aborted because a global
2653   // overflow was raised. This means we have to restart the
2654   // marking phase and start iterating over regions. However, in
2655   // order to do this we have to make sure that all tasks stop
2656   // what they are doing and re-initialize in a safe manner. We
2657   // will achieve this with the use of two barrier sync points.
2658   if (!is_serial) {
2659     // We only need to enter the sync barrier if being called
2660     // from a parallel context
2661     _cm->enter_first_sync_barrier(_worker_id);
2662 
2663     // When we exit this sync barrier we know that all tasks have
2664     // stopped doing marking work. So, it's now safe to
2665     // re-initialize our data structures.
2666   }
2667 
2668   clear_region_fields();
2669   flush_mark_stats_cache();
2670 
2671   if (!is_serial) {
2672     // If we're executing the concurrent phase of marking, reset the marking
2673     // state; otherwise the marking state is reset after reference processing,
2674     // during the remark pause.
2675     // If we reset here as a result of an overflow during the remark we will
2676     // see assertion failures from any subsequent set_concurrency_and_phase()
2677     // calls.
2678     if (_cm->concurrent() && _worker_id == 0) {
2679       // Worker 0 is responsible for clearing the global data structures because
2680       // of an overflow. During STW we should not clear the overflow flag (in
2681       // G1ConcurrentMark::reset_marking_state()) since we rely on it being true when we exit
2682       // method to abort the pause and restart concurrent marking.
2683       _cm->reset_marking_for_restart();
2684 
2685       log_info(gc, marking)("Concurrent Mark reset for overflow");
2686     }
2687 
2688     // ...and enter the second barrier.
2689     _cm->enter_second_sync_barrier(_worker_id);
2690   }
2691 }
2692 
2693 /*****************************************************************************
2694 
2695     The do_marking_step(time_target_ms, ...) method is the building
2696     block of the parallel marking framework. It can be called in parallel
2697     with other invocations of do_marking_step() on different tasks
2698     (but only one per task, obviously) and concurrently with the
2699     mutator threads, or during remark, hence it eliminates the need
2700     for two versions of the code. When called during remark, it will
2701     pick up from where the task left off during the concurrent marking
2702     phase. Interestingly, tasks are also claimable during evacuation
2703     pauses too, since do_marking_step() ensures that it aborts before
2704     it needs to yield.
2705 
2706     The data structures that it uses to do marking work are the
2707     following:
2708 
2709       (1) Marking Bitmap. If there are grey objects that appear only
2710       on the bitmap (this happens either when dealing with an overflow
2711       or when the concurrent start pause has simply marked the roots
2712       and didn't push them on the stack), then tasks claim heap
2713       regions whose bitmap they then scan to find grey objects. A
2714       global finger indicates where the end of the last claimed region
2715       is. A local finger indicates how far into the region a task has
2716       scanned. The two fingers are used to determine how to grey an
2717       object (i.e. whether simply marking it is OK, as it will be
2718       visited by a task in the future, or whether it needs to be also
2719       pushed on a stack).
2720 
2721       (2) Local Queue. The local queue of the task which is accessed
2722       reasonably efficiently by the task. Other tasks can steal from
2723       it when they run out of work. Throughout the marking phase, a
2724       task attempts to keep its local queue short but not totally
2725       empty, so that entries are available for stealing by other
2726       tasks. Only when there is no more work, a task will totally
2727       drain its local queue.
2728 
2729       (3) Global Mark Stack. This handles local queue overflow. During
2730       marking only sets of entries are moved between it and the local
2731       queues, as access to it requires a mutex and more fine-grain
2732       interaction with it which might cause contention. If it
2733       overflows, then the marking phase should restart and iterate
2734       over the bitmap to identify grey objects. Throughout the marking
2735       phase, tasks attempt to keep the global mark stack at a small
2736       length but not totally empty, so that entries are available for
2737       popping by other tasks. Only when there is no more work, tasks
2738       will totally drain the global mark stack.
2739 
2740       (4) SATB Buffer Queue. This is where completed SATB buffers are
2741       made available. Buffers are regularly removed from this queue
2742       and scanned for roots, so that the queue doesn't get too
2743       long. During remark, all completed buffers are processed, as
2744       well as the filled in parts of any uncompleted buffers.
2745 
2746     The do_marking_step() method tries to abort when the time target
2747     has been reached. There are a few other cases when the
2748     do_marking_step() method also aborts:
2749 
2750       (1) When the marking phase has been aborted (after a Full GC).
2751 
2752       (2) When a global overflow (on the global stack) has been
2753       triggered. Before the task aborts, it will actually sync up with
2754       the other tasks to ensure that all the marking data structures
2755       (local queues, stacks, fingers etc.)  are re-initialized so that
2756       when do_marking_step() completes, the marking phase can
2757       immediately restart.
2758 
2759       (3) When enough completed SATB buffers are available. The
2760       do_marking_step() method only tries to drain SATB buffers right
2761       at the beginning. So, if enough buffers are available, the
2762       marking step aborts and the SATB buffers are processed at
2763       the beginning of the next invocation.
2764 
2765       (4) To yield. when we have to yield then we abort and yield
2766       right at the end of do_marking_step(). This saves us from a lot
2767       of hassle as, by yielding we might allow a Full GC. If this
2768       happens then objects will be compacted underneath our feet, the
2769       heap might shrink, etc. We save checking for this by just
2770       aborting and doing the yield right at the end.
2771 
2772     From the above it follows that the do_marking_step() method should
2773     be called in a loop (or, otherwise, regularly) until it completes.
2774 
2775     If a marking step completes without its has_aborted() flag being
2776     true, it means it has completed the current marking phase (and
2777     also all other marking tasks have done so and have all synced up).
2778 
2779     A method called regular_clock_call() is invoked "regularly" (in
2780     sub ms intervals) throughout marking. It is this clock method that
2781     checks all the abort conditions which were mentioned above and
2782     decides when the task should abort. A work-based scheme is used to
2783     trigger this clock method: when the number of object words the
2784     marking phase has scanned or the number of references the marking
2785     phase has visited reach a given limit. Additional invocations to
2786     the method clock have been planted in a few other strategic places
2787     too. The initial reason for the clock method was to avoid calling
2788     cpu time gathering too regularly, as it is quite expensive. So,
2789     once it was in place, it was natural to piggy-back all the other
2790     conditions on it too and not constantly check them throughout the code.
2791 
2792     If do_termination is true then do_marking_step will enter its
2793     termination protocol.
2794 
2795     The value of is_serial must be true when do_marking_step is being
2796     called serially (i.e. by the VMThread) and do_marking_step should
2797     skip any synchronization in the termination and overflow code.
2798     Examples include the serial remark code and the serial reference
2799     processing closures.
2800 
2801     The value of is_serial must be false when do_marking_step is
2802     being called by any of the worker threads.
2803     Examples include the concurrent marking code (CMMarkingTask),
2804     the MT remark code, and the MT reference processing closures.
2805 
2806  *****************************************************************************/
2807 
2808 void G1CMTask::do_marking_step(double time_target_ms,
2809                                bool do_termination,
2810                                bool is_serial) {
2811   assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
2812 
2813   _start_cpu_time_ns = os::current_thread_cpu_time();
2814 
2815   // If do_stealing is true then do_marking_step will attempt to
2816   // steal work from the other G1CMTasks. It only makes sense to
2817   // enable stealing when the termination protocol is enabled
2818   // and do_marking_step() is not being called serially.
2819   bool do_stealing = do_termination && !is_serial;
2820 
2821   G1Predictions const& predictor = _g1h->policy()->predictor();
2822   double diff_prediction_ms = predictor.predict_zero_bounded(&_marking_step_diff_ms);
2823   _time_target_ms = time_target_ms - diff_prediction_ms;
2824 
2825   // set up the variables that are used in the work-based scheme to
2826   // call the regular clock method
2827   _words_scanned = 0;
2828   _refs_reached  = 0;
2829   recalculate_limits();
2830 
2831   // clear all flags
2832   clear_has_aborted();
2833   _has_timed_out = false;
2834   _draining_satb_buffers = false;
2835 
2836   ++_calls;
2837 
2838   // Set up the bitmap and oop closures. Anything that uses them is
2839   // eventually called from this method, so it is OK to allocate these
2840   // statically.
2841   G1CMBitMapClosure bitmap_closure(this, _cm);
2842   G1CMOopClosure cm_oop_closure(_g1h, this);
2843   set_cm_oop_closure(&cm_oop_closure);
2844 
2845   if (_cm->has_overflown()) {
2846     // This can happen if the mark stack overflows during a GC pause
2847     // and this task, after a yield point, restarts. We have to abort
2848     // as we need to get into the overflow protocol which happens
2849     // right at the end of this task.
2850     set_has_aborted();
2851   }
2852 
2853   // First drain any available SATB buffers. After this, we will not
2854   // look at SATB buffers before the next invocation of this method.
2855   // If enough completed SATB buffers are queued up, the regular clock
2856   // will abort this task so that it restarts.
2857   drain_satb_buffers();
2858   // ...then partially drain the local queue and the global stack
2859   drain_local_queue(true);
2860   drain_global_stack(true);
2861 
2862   do {
2863     process_current_region(bitmap_closure);
2864     // At this point we have either completed iterating over the
2865     // region we were holding on to, or we have aborted.
2866 
2867     // We then partially drain the local queue and the global stack.
2868     drain_local_queue(true);
2869     drain_global_stack(true);
2870 
2871     claim_new_region();
2872 
2873     assert(has_aborted() || _curr_region != nullptr || _cm->out_of_regions(),
2874            "at this point we should be out of regions");
2875   } while ( _curr_region != nullptr && !has_aborted());
2876 
2877   // We cannot check whether the global stack is empty, since other
2878   // tasks might be pushing objects to it concurrently.
2879   assert(has_aborted() || _cm->out_of_regions(),
2880          "at this point we should be out of regions");
2881   // Try to reduce the number of available SATB buffers so that
2882   // remark has less work to do.
2883   drain_satb_buffers();
2884 
2885   // Since we've done everything else, we can now totally drain the
2886   // local queue and global stack.
2887   drain_local_queue(false);
2888   drain_global_stack(false);
2889 
2890   // Attempt at work stealing from other task's queues.
2891   if (do_stealing && !has_aborted()) {
2892     // We have not aborted. This means that we have finished all that
2893     // we could. Let's try to do some stealing...
2894     attempt_stealing();
2895   }
2896 
2897   // We still haven't aborted. Now, let's try to get into the
2898   // termination protocol.
2899   if (do_termination && !has_aborted()) {
2900     attempt_termination(is_serial);
2901   }
2902 
2903   // Mainly for debugging purposes to make sure that a pointer to the
2904   // closure which was statically allocated in this frame doesn't
2905   // escape it by accident.
2906   set_cm_oop_closure(nullptr);
2907   jlong end_cpu_time_ns = os::current_thread_cpu_time();
2908   double elapsed_time_ms = (double)(end_cpu_time_ns - _start_cpu_time_ns) / NANOSECS_PER_MILLISEC;
2909   // Update the step history.
2910   _step_times_ms.add(elapsed_time_ms);
2911 
2912   if (has_aborted()) {
2913     // The task was aborted for some reason.
2914     handle_abort(is_serial, elapsed_time_ms);
2915   }
2916 }
2917 
2918 G1CMTask::G1CMTask(uint worker_id,
2919                    G1ConcurrentMark* cm,
2920                    G1CMTaskQueue* task_queue,
2921                    G1RegionMarkStats* mark_stats) :
2922   _worker_id(worker_id),
2923   _g1h(G1CollectedHeap::heap()),
2924   _cm(cm),
2925   _mark_bitmap(nullptr),
2926   _task_queue(task_queue),
2927   _partial_array_splitter(_cm->partial_array_state_manager(), _cm->max_num_tasks(), ObjArrayMarkingStride),
2928   _mark_stats_cache(mark_stats, G1RegionMarkStatsCache::RegionMarkStatsCacheSize),
2929   _calls(0),
2930   _time_target_ms(0.0),
2931   _start_cpu_time_ns(0),
2932   _cm_oop_closure(nullptr),
2933   _curr_region(nullptr),
2934   _finger(nullptr),
2935   _region_limit(nullptr),
2936   _words_scanned(0),
2937   _words_scanned_limit(0),
2938   _real_words_scanned_limit(0),
2939   _refs_reached(0),
2940   _refs_reached_limit(0),
2941   _real_refs_reached_limit(0),
2942   _has_aborted(false),
2943   _has_timed_out(false),
2944   _draining_satb_buffers(false),
2945   _step_times_ms(),
2946   _elapsed_time_ms(0.0),
2947   _termination_time_ms(0.0),
2948   _marking_step_diff_ms()
2949 {
2950   guarantee(task_queue != nullptr, "invariant");
2951 
2952   _marking_step_diff_ms.add(0.5);
2953 }
2954 
2955 // These are formatting macros that are used below to ensure
2956 // consistent formatting. The *_H_* versions are used to format the
2957 // header for a particular value and they should be kept consistent
2958 // with the corresponding macro. Also note that most of the macros add
2959 // the necessary white space (as a prefix) which makes them a bit
2960 // easier to compose.
2961 
2962 // All the output lines are prefixed with this string to be able to
2963 // identify them easily in a large log file.
2964 #define G1PPRL_LINE_PREFIX            "###"
2965 
2966 #define G1PPRL_ADDR_BASE_FORMAT    " " PTR_FORMAT "-" PTR_FORMAT
2967 #ifdef _LP64
2968 #define G1PPRL_ADDR_BASE_H_FORMAT  " %37s"
2969 #else // _LP64
2970 #define G1PPRL_ADDR_BASE_H_FORMAT  " %21s"
2971 #endif // _LP64
2972 
2973 // For per-region info
2974 #define G1PPRL_TYPE_FORMAT            "   %-4s"
2975 #define G1PPRL_TYPE_H_FORMAT          "   %4s"
2976 #define G1PPRL_STATE_FORMAT           "   %-5s"
2977 #define G1PPRL_STATE_H_FORMAT         "   %5s"
2978 #define G1PPRL_BYTE_FORMAT            "  %9zu"
2979 #define G1PPRL_BYTE_H_FORMAT          "  %9s"
2980 #define G1PPRL_DOUBLE_FORMAT          "%14.1f"
2981 #define G1PPRL_GCEFF_H_FORMAT         "  %14s"
2982 #define G1PPRL_GID_H_FORMAT           "  %9s"
2983 #define G1PPRL_GID_FORMAT             "  " UINT32_FORMAT_W(9)
2984 #define G1PPRL_LEN_FORMAT             "  " UINT32_FORMAT_W(14)
2985 #define G1PPRL_LEN_H_FORMAT           "  %14s"
2986 #define G1PPRL_GID_GCEFF_FORMAT       "  %14.1f"
2987 #define G1PPRL_GID_LIVENESS_FORMAT    "  %9.2f"
2988 
2989 // For summary info
2990 #define G1PPRL_SUM_ADDR_FORMAT(tag)    "  " tag ":" G1PPRL_ADDR_BASE_FORMAT
2991 #define G1PPRL_SUM_BYTE_FORMAT(tag)    "  " tag ": %zu"
2992 #define G1PPRL_SUM_MB_FORMAT(tag)      "  " tag ": %1.2f MB"
2993 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag) " / %1.2f %%"
2994 
2995 G1PrintRegionLivenessInfoClosure::G1PrintRegionLivenessInfoClosure(const char* phase_name) :
2996   _total_used_bytes(0),
2997   _total_capacity_bytes(0),
2998   _total_live_bytes(0),
2999   _total_remset_bytes(0),
3000   _total_code_roots_bytes(0)
3001 {
3002   if (!log_is_enabled(Trace, gc, liveness)) {
3003     return;
3004   }
3005 
3006   G1CollectedHeap* g1h = G1CollectedHeap::heap();
3007   MemRegion reserved = g1h->reserved();
3008   double now = os::elapsedTime();
3009 
3010   // Print the header of the output.
3011   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now);
3012   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" HEAP"
3013                           G1PPRL_SUM_ADDR_FORMAT("reserved")
3014                           G1PPRL_SUM_BYTE_FORMAT("region-size"),
3015                           p2i(reserved.start()), p2i(reserved.end()),
3016                           G1HeapRegion::GrainBytes);
3017   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX);
3018   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3019                           G1PPRL_TYPE_H_FORMAT
3020                           G1PPRL_ADDR_BASE_H_FORMAT
3021                           G1PPRL_BYTE_H_FORMAT
3022                           G1PPRL_BYTE_H_FORMAT
3023                           G1PPRL_STATE_H_FORMAT
3024                           G1PPRL_BYTE_H_FORMAT
3025                           G1PPRL_GID_H_FORMAT,
3026                           "type", "address-range",
3027                           "used", "live",
3028                           "state", "code-roots",
3029                           "group-id");
3030   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3031                           G1PPRL_TYPE_H_FORMAT
3032                           G1PPRL_ADDR_BASE_H_FORMAT
3033                           G1PPRL_BYTE_H_FORMAT
3034                           G1PPRL_BYTE_H_FORMAT
3035                           G1PPRL_STATE_H_FORMAT
3036                           G1PPRL_BYTE_H_FORMAT
3037                           G1PPRL_GID_H_FORMAT,
3038                           "", "",
3039                           "(bytes)", "(bytes)",
3040                           "", "(bytes)", "");
3041 }
3042 
3043 bool G1PrintRegionLivenessInfoClosure::do_heap_region(G1HeapRegion* r) {
3044   if (!log_is_enabled(Trace, gc, liveness)) {
3045     return false;
3046   }
3047 
3048   const char* type       = r->get_type_str();
3049   HeapWord* bottom       = r->bottom();
3050   HeapWord* end          = r->end();
3051   size_t capacity_bytes  = r->capacity();
3052   size_t used_bytes      = r->used();
3053   size_t live_bytes      = r->live_bytes();
3054   size_t remset_bytes    = r->rem_set()->mem_size();
3055   size_t code_roots_bytes = r->rem_set()->code_roots_mem_size();
3056   const char* remset_type = r->rem_set()->get_short_state_str();
3057   uint cset_group_id     = r->rem_set()->has_cset_group()
3058                          ? r->rem_set()->cset_group_id()
3059                          : G1CSetCandidateGroup::NoRemSetId;
3060 
3061   _total_used_bytes      += used_bytes;
3062   _total_capacity_bytes  += capacity_bytes;
3063   _total_live_bytes      += live_bytes;
3064   _total_remset_bytes    += remset_bytes;
3065   _total_code_roots_bytes += code_roots_bytes;
3066 
3067   // Print a line for this particular region.
3068   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3069                         G1PPRL_TYPE_FORMAT
3070                         G1PPRL_ADDR_BASE_FORMAT
3071                         G1PPRL_BYTE_FORMAT
3072                         G1PPRL_BYTE_FORMAT
3073                         G1PPRL_STATE_FORMAT
3074                         G1PPRL_BYTE_FORMAT
3075                         G1PPRL_GID_FORMAT,
3076                         type, p2i(bottom), p2i(end),
3077                         used_bytes, live_bytes,
3078                         remset_type, code_roots_bytes,
3079                         cset_group_id);
3080 
3081   return false;
3082 }
3083 
3084 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() {
3085   if (!log_is_enabled(Trace, gc, liveness)) {
3086     return;
3087   }
3088 
3089   G1CollectedHeap* g1h = G1CollectedHeap::heap();
3090   _total_remset_bytes += g1h->card_set_freelist_pool()->mem_size();
3091   // add static memory usages to remembered set sizes
3092   _total_remset_bytes += G1HeapRegionRemSet::static_mem_size();
3093 
3094   log_cset_candidate_groups();
3095 
3096   // Print the footer of the output.
3097   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX);
3098   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3099                          " SUMMARY"
3100                          G1PPRL_SUM_MB_FORMAT("capacity")
3101                          G1PPRL_SUM_MB_PERC_FORMAT("used")
3102                          G1PPRL_SUM_MB_PERC_FORMAT("live")
3103                          G1PPRL_SUM_MB_FORMAT("remset")
3104                          G1PPRL_SUM_MB_FORMAT("code-roots"),
3105                          bytes_to_mb(_total_capacity_bytes),
3106                          bytes_to_mb(_total_used_bytes),
3107                          percent_of(_total_used_bytes, _total_capacity_bytes),
3108                          bytes_to_mb(_total_live_bytes),
3109                          percent_of(_total_live_bytes, _total_capacity_bytes),
3110                          bytes_to_mb(_total_remset_bytes),
3111                          bytes_to_mb(_total_code_roots_bytes));
3112 }
3113 
3114 void G1PrintRegionLivenessInfoClosure::log_cset_candidate_group_add_total(G1CSetCandidateGroup* group, const char* type) {
3115   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3116                           G1PPRL_GID_FORMAT
3117                           G1PPRL_LEN_FORMAT
3118                           G1PPRL_GID_GCEFF_FORMAT
3119                           G1PPRL_GID_LIVENESS_FORMAT
3120                           G1PPRL_BYTE_FORMAT
3121                           G1PPRL_TYPE_H_FORMAT,
3122                           group->group_id(),
3123                           group->length(),
3124                           group->length() > 0 ? group->gc_efficiency() : 0.0,
3125                           group->length() > 0 ? group->liveness_percent() : 0.0,
3126                           group->card_set()->mem_size(),
3127                           type);
3128   _total_remset_bytes += group->card_set()->mem_size();
3129 }
3130 
3131 void G1PrintRegionLivenessInfoClosure::log_cset_candidate_grouplist(G1CSetCandidateGroupList& gl, const char* type) {
3132   for (G1CSetCandidateGroup* group : gl) {
3133     log_cset_candidate_group_add_total(group, type);
3134   }
3135 }
3136 
3137 void G1PrintRegionLivenessInfoClosure::log_cset_candidate_groups() {
3138   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX);
3139   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" Collection Set Candidate Groups");
3140   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX " Types: Y=Young, M=From Marking Regions, R=Retained Regions");
3141   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3142                           G1PPRL_GID_H_FORMAT
3143                           G1PPRL_LEN_H_FORMAT
3144                           G1PPRL_GCEFF_H_FORMAT
3145                           G1PPRL_BYTE_H_FORMAT
3146                           G1PPRL_BYTE_H_FORMAT
3147                           G1PPRL_TYPE_H_FORMAT,
3148                           "groud-id", "num-regions",
3149                           "gc-eff", "liveness",
3150                           "remset", "type");
3151 
3152   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3153                           G1PPRL_GID_H_FORMAT
3154                           G1PPRL_LEN_H_FORMAT
3155                           G1PPRL_GCEFF_H_FORMAT
3156                           G1PPRL_BYTE_H_FORMAT
3157                           G1PPRL_BYTE_H_FORMAT
3158                           G1PPRL_TYPE_H_FORMAT,
3159                           "", "",
3160                           "(bytes/ms)", "%",
3161                           "(bytes)", "");
3162 
3163   G1CollectedHeap* g1h = G1CollectedHeap::heap();
3164 
3165   log_cset_candidate_group_add_total(g1h->young_regions_cset_group(), "Y");
3166 
3167   G1CollectionSetCandidates* candidates = g1h->policy()->candidates();
3168   log_cset_candidate_grouplist(candidates->from_marking_groups(), "M");
3169   log_cset_candidate_grouplist(candidates->retained_groups(), "R");
3170 }