1 /*
   2  * Copyright (c) 2001, 2026, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "classfile/classLoaderData.hpp"
  26 #include "classfile/classLoaderDataGraph.hpp"
  27 #include "cppstdlib/new.hpp"
  28 #include "gc/g1/g1BarrierSet.hpp"
  29 #include "gc/g1/g1BatchedTask.hpp"
  30 #include "gc/g1/g1CardSetMemory.hpp"
  31 #include "gc/g1/g1CardTableClaimTable.inline.hpp"
  32 #include "gc/g1/g1CollectedHeap.inline.hpp"
  33 #include "gc/g1/g1CollectorState.inline.hpp"
  34 #include "gc/g1/g1ConcurrentMark.inline.hpp"
  35 #include "gc/g1/g1ConcurrentMarkRemarkTasks.hpp"
  36 #include "gc/g1/g1ConcurrentMarkThread.inline.hpp"
  37 #include "gc/g1/g1ConcurrentRebuildAndScrub.hpp"
  38 #include "gc/g1/g1ConcurrentRefine.hpp"
  39 #include "gc/g1/g1HeapRegion.inline.hpp"
  40 #include "gc/g1/g1HeapRegionManager.hpp"
  41 #include "gc/g1/g1HeapRegionPrinter.hpp"
  42 #include "gc/g1/g1HeapRegionRemSet.inline.hpp"
  43 #include "gc/g1/g1HeapRegionSet.inline.hpp"
  44 #include "gc/g1/g1HeapVerifier.hpp"
  45 #include "gc/g1/g1OopClosures.inline.hpp"
  46 #include "gc/g1/g1Policy.hpp"
  47 #include "gc/g1/g1RegionMarkStatsCache.inline.hpp"
  48 #include "gc/g1/g1ThreadLocalData.hpp"
  49 #include "gc/g1/g1Trace.hpp"
  50 #include "gc/shared/gcId.hpp"
  51 #include "gc/shared/gcTimer.hpp"
  52 #include "gc/shared/gcTraceTime.inline.hpp"
  53 #include "gc/shared/gcVMOperations.hpp"
  54 #include "gc/shared/partialArraySplitter.inline.hpp"
  55 #include "gc/shared/partialArrayState.hpp"
  56 #include "gc/shared/partialArrayTaskStats.hpp"
  57 #include "gc/shared/referencePolicy.hpp"
  58 #include "gc/shared/suspendibleThreadSet.hpp"
  59 #include "gc/shared/taskqueue.inline.hpp"
  60 #include "gc/shared/taskTerminator.hpp"
  61 #include "gc/shared/weakProcessor.inline.hpp"
  62 #include "gc/shared/workerPolicy.hpp"
  63 #include "jvm.h"
  64 #include "logging/log.hpp"
  65 #include "memory/allocation.hpp"
  66 #include "memory/iterator.hpp"
  67 #include "memory/metaspaceUtils.hpp"
  68 #include "memory/resourceArea.hpp"
  69 #include "memory/universe.hpp"
  70 #include "nmt/memTracker.hpp"
  71 #include "oops/access.inline.hpp"
  72 #include "oops/oop.inline.hpp"
  73 #include "runtime/globals_extension.hpp"
  74 #include "runtime/handles.inline.hpp"
  75 #include "runtime/java.hpp"
  76 #include "runtime/orderAccess.hpp"
  77 #include "runtime/os.hpp"
  78 #include "runtime/prefetch.inline.hpp"
  79 #include "runtime/threads.hpp"
  80 #include "utilities/align.hpp"
  81 #include "utilities/checkedCast.hpp"
  82 #include "utilities/formatBuffer.hpp"
  83 #include "utilities/growableArray.hpp"
  84 #include "utilities/powerOfTwo.hpp"
  85 
  86 G1CMIsAliveClosure::G1CMIsAliveClosure() : _cm(nullptr) { }
  87 
  88 G1CMIsAliveClosure::G1CMIsAliveClosure(G1ConcurrentMark* cm) : _cm(cm) {
  89   assert(cm != nullptr, "must be");
  90 }
  91 
  92 void G1CMIsAliveClosure::initialize(G1ConcurrentMark* cm) {
  93   assert(cm != nullptr, "must be");
  94   assert(_cm == nullptr, "double initialize");
  95   _cm = cm;
  96 }
  97 
  98 bool G1CMBitMapClosure::do_addr(HeapWord* const addr) {
  99   assert(addr < _cm->finger(), "invariant");
 100   assert(addr >= _task->finger(), "invariant");
 101 
 102   // We move that task's local finger along.
 103   _task->move_finger_to(addr);
 104 
 105   _task->process_entry(G1TaskQueueEntry(cast_to_oop(addr)), false /* stolen */);
 106   // we only partially drain the local queue and global stack
 107   _task->drain_local_queue(true);
 108   _task->drain_global_stack(true);
 109 
 110   // if the has_aborted flag has been raised, we need to bail out of
 111   // the iteration
 112   return !_task->has_aborted();
 113 }
 114 
 115 G1CMMarkStack::G1CMMarkStack() :
 116   _chunk_allocator() {
 117   set_empty();
 118 }
 119 
 120 size_t G1CMMarkStack::capacity_alignment() {
 121   return (size_t)lcm(os::vm_allocation_granularity(), sizeof(TaskQueueEntryChunk)) / sizeof(G1TaskQueueEntry);
 122 }
 123 
 124 bool G1CMMarkStack::initialize() {
 125   guarantee(_chunk_allocator.capacity() == 0, "G1CMMarkStack already initialized.");
 126 
 127   size_t initial_capacity = MarkStackSize;
 128   size_t max_capacity = MarkStackSizeMax;
 129 
 130   size_t const TaskEntryChunkSizeInVoidStar = sizeof(TaskQueueEntryChunk) / sizeof(G1TaskQueueEntry);
 131 
 132   size_t max_num_chunks = align_up(max_capacity, capacity_alignment()) / TaskEntryChunkSizeInVoidStar;
 133   size_t initial_num_chunks = align_up(initial_capacity, capacity_alignment()) / TaskEntryChunkSizeInVoidStar;
 134 
 135   initial_num_chunks = round_up_power_of_2(initial_num_chunks);
 136   max_num_chunks = MAX2(initial_num_chunks, max_num_chunks);
 137 
 138   size_t limit = (INT_MAX - 1);
 139   max_capacity = MIN2((max_num_chunks * TaskEntryChunkSizeInVoidStar), limit);
 140   initial_capacity = MIN2((initial_num_chunks * TaskEntryChunkSizeInVoidStar), limit);
 141 
 142   FLAG_SET_ERGO(MarkStackSizeMax, max_capacity);
 143   FLAG_SET_ERGO(MarkStackSize, initial_capacity);
 144 
 145   log_trace(gc)("MarkStackSize: %uk  MarkStackSizeMax: %uk", (uint)(MarkStackSize / K), (uint)(MarkStackSizeMax / K));
 146 
 147   log_debug(gc)("Initialize mark stack with %zu chunks, maximum %zu",
 148                 initial_num_chunks, max_capacity);
 149 
 150   return _chunk_allocator.initialize(initial_num_chunks, max_num_chunks);
 151 }
 152 
 153 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::ChunkAllocator::allocate_new_chunk() {
 154   if (_size.load_relaxed() >= _max_capacity) {
 155     return nullptr;
 156   }
 157 
 158   size_t cur_idx = _size.fetch_then_add(1u);
 159 
 160   if (cur_idx >= _max_capacity) {
 161     return nullptr;
 162   }
 163 
 164   size_t bucket = get_bucket(cur_idx);
 165   if (_buckets[bucket].load_acquire() == nullptr) {
 166     if (!_should_grow) {
 167       // Prefer to restart the CM.
 168       return nullptr;
 169     }
 170 
 171     MutexLocker x(G1MarkStackChunkList_lock, Mutex::_no_safepoint_check_flag);
 172     if (_buckets[bucket].load_acquire() == nullptr) {
 173       size_t desired_capacity = bucket_size(bucket) * 2;
 174       if (!try_expand_to(desired_capacity)) {
 175         return nullptr;
 176       }
 177     }
 178   }
 179 
 180   size_t bucket_idx = get_bucket_index(cur_idx);
 181   TaskQueueEntryChunk* result = ::new (&_buckets[bucket].load_relaxed()[bucket_idx]) TaskQueueEntryChunk;
 182   result->next = nullptr;
 183   return result;
 184 }
 185 
 186 G1CMMarkStack::ChunkAllocator::ChunkAllocator() :
 187   _min_capacity(0),
 188   _max_capacity(0),
 189   _capacity(0),
 190   _num_buckets(0),
 191   _should_grow(false),
 192   _buckets(nullptr),
 193   _size(0)
 194 { }
 195 
 196 bool G1CMMarkStack::ChunkAllocator::initialize(size_t initial_capacity, size_t max_capacity) {
 197   guarantee(is_power_of_2(initial_capacity), "Invalid initial_capacity");
 198 
 199   _min_capacity = initial_capacity;
 200   _max_capacity = max_capacity;
 201   _num_buckets  = get_bucket(_max_capacity) + 1;
 202 
 203   _buckets = NEW_C_HEAP_ARRAY(Atomic<TaskQueueEntryChunk*>, _num_buckets, mtGC);
 204 
 205   for (size_t i = 0; i < _num_buckets; i++) {
 206     _buckets[i].store_relaxed(nullptr);
 207   }
 208 
 209   size_t new_capacity = bucket_size(0);
 210 
 211   if (!reserve(new_capacity)) {
 212     log_warning(gc)("Failed to reserve memory for new overflow mark stack with %zu chunks and size %zuB.", new_capacity, new_capacity * sizeof(TaskQueueEntryChunk));
 213     return false;
 214   }
 215   return true;
 216 }
 217 
 218 bool G1CMMarkStack::ChunkAllocator::try_expand_to(size_t desired_capacity) {
 219   if (_capacity == _max_capacity) {
 220     log_debug(gc)("Can not expand overflow mark stack further, already at maximum capacity of %zu chunks.", _capacity);
 221     return false;
 222   }
 223 
 224   size_t old_capacity = _capacity;
 225   desired_capacity = MIN2(desired_capacity, _max_capacity);
 226 
 227   if (reserve(desired_capacity)) {
 228     log_debug(gc)("Expanded the mark stack capacity from %zu to %zu chunks",
 229                   old_capacity, desired_capacity);
 230     return true;
 231   }
 232   return false;
 233 }
 234 
 235 bool G1CMMarkStack::ChunkAllocator::try_expand() {
 236   size_t new_capacity = _capacity * 2;
 237   return try_expand_to(new_capacity);
 238 }
 239 
 240 G1CMMarkStack::ChunkAllocator::~ChunkAllocator() {
 241   if (_buckets == nullptr) {
 242     return;
 243   }
 244 
 245   for (size_t i = 0; i < _num_buckets; i++) {
 246     if (_buckets[i].load_relaxed() != nullptr) {
 247       MmapArrayAllocator<TaskQueueEntryChunk>::free(_buckets[i].load_relaxed(),  bucket_size(i));
 248       _buckets[i].store_relaxed(nullptr);
 249     }
 250   }
 251 
 252   FREE_C_HEAP_ARRAY(_buckets);
 253 }
 254 
 255 bool G1CMMarkStack::ChunkAllocator::reserve(size_t new_capacity) {
 256   assert(new_capacity <= _max_capacity, "Cannot expand overflow mark stack beyond the max_capacity of %zu chunks.", _max_capacity);
 257 
 258   size_t highest_bucket = get_bucket(new_capacity - 1);
 259   size_t i = get_bucket(_capacity);
 260 
 261   // Allocate all buckets associated with indexes between the current capacity (_capacity)
 262   // and the new capacity (new_capacity). This step ensures that there are no gaps in the
 263   // array and that the capacity accurately reflects the reserved memory.
 264   for (; i <= highest_bucket; i++) {
 265     if (_buckets[i].load_acquire() != nullptr) {
 266       continue; // Skip over already allocated buckets.
 267     }
 268 
 269     size_t bucket_capacity = bucket_size(i);
 270 
 271     // Trim bucket size so that we do not exceed the _max_capacity.
 272     bucket_capacity = (_capacity + bucket_capacity) <= _max_capacity ?
 273                       bucket_capacity :
 274                       _max_capacity - _capacity;
 275 
 276 
 277     TaskQueueEntryChunk* bucket_base = MmapArrayAllocator<TaskQueueEntryChunk>::allocate_or_null(bucket_capacity, mtGC);
 278 
 279     if (bucket_base == nullptr) {
 280       log_warning(gc)("Failed to reserve memory for increasing the overflow mark stack capacity with %zu chunks and size %zuB.",
 281                       bucket_capacity, bucket_capacity * sizeof(TaskQueueEntryChunk));
 282       return false;
 283     }
 284     _capacity += bucket_capacity;
 285     _buckets[i].release_store(bucket_base);
 286   }
 287   return true;
 288 }
 289 
 290 void G1CMMarkStack::expand() {
 291   _chunk_allocator.try_expand();
 292 }
 293 
 294 void G1CMMarkStack::add_chunk_to_list(Atomic<TaskQueueEntryChunk*>* list, TaskQueueEntryChunk* elem) {
 295   elem->next = list->load_relaxed();
 296   list->store_relaxed(elem);
 297 }
 298 
 299 void G1CMMarkStack::add_chunk_to_chunk_list(TaskQueueEntryChunk* elem) {
 300   MutexLocker x(G1MarkStackChunkList_lock, Mutex::_no_safepoint_check_flag);
 301   add_chunk_to_list(&_chunk_list, elem);
 302   _chunks_in_chunk_list++;
 303 }
 304 
 305 void G1CMMarkStack::add_chunk_to_free_list(TaskQueueEntryChunk* elem) {
 306   MutexLocker x(G1MarkStackFreeList_lock, Mutex::_no_safepoint_check_flag);
 307   add_chunk_to_list(&_free_list, elem);
 308 }
 309 
 310 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_list(Atomic<TaskQueueEntryChunk*>* list) {
 311   TaskQueueEntryChunk* result = list->load_relaxed();
 312   if (result != nullptr) {
 313     list->store_relaxed(list->load_relaxed()->next);
 314   }
 315   return result;
 316 }
 317 
 318 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_chunk_list() {
 319   MutexLocker x(G1MarkStackChunkList_lock, Mutex::_no_safepoint_check_flag);
 320   TaskQueueEntryChunk* result = remove_chunk_from_list(&_chunk_list);
 321   if (result != nullptr) {
 322     _chunks_in_chunk_list--;
 323   }
 324   return result;
 325 }
 326 
 327 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_free_list() {
 328   MutexLocker x(G1MarkStackFreeList_lock, Mutex::_no_safepoint_check_flag);
 329   return remove_chunk_from_list(&_free_list);
 330 }
 331 
 332 bool G1CMMarkStack::par_push_chunk(G1TaskQueueEntry* ptr_arr) {
 333   // Get a new chunk.
 334   TaskQueueEntryChunk* new_chunk = remove_chunk_from_free_list();
 335 
 336   if (new_chunk == nullptr) {
 337     // Did not get a chunk from the free list. Allocate from backing memory.
 338     new_chunk = _chunk_allocator.allocate_new_chunk();
 339 
 340     if (new_chunk == nullptr) {
 341       return false;
 342     }
 343   }
 344 
 345   Copy::conjoint_memory_atomic(ptr_arr, new_chunk->data, EntriesPerChunk * sizeof(G1TaskQueueEntry));
 346 
 347   add_chunk_to_chunk_list(new_chunk);
 348 
 349   return true;
 350 }
 351 
 352 bool G1CMMarkStack::par_pop_chunk(G1TaskQueueEntry* ptr_arr) {
 353   TaskQueueEntryChunk* cur = remove_chunk_from_chunk_list();
 354 
 355   if (cur == nullptr) {
 356     return false;
 357   }
 358 
 359   Copy::conjoint_memory_atomic(cur->data, ptr_arr, EntriesPerChunk * sizeof(G1TaskQueueEntry));
 360 
 361   add_chunk_to_free_list(cur);
 362   return true;
 363 }
 364 
 365 void G1CMMarkStack::set_empty() {
 366   _chunks_in_chunk_list = 0;
 367   _chunk_list.store_relaxed(nullptr);
 368   _free_list.store_relaxed(nullptr);
 369   _chunk_allocator.reset();
 370 }
 371 
 372 G1CMRootMemRegions::G1CMRootMemRegions(uint const max_regions) :
 373     _root_regions(MemRegion::create_array(max_regions, mtGC)),
 374     _max_regions(max_regions),
 375     _num_regions(0),
 376     _num_claimed_regions(0) { }
 377 
 378 G1CMRootMemRegions::~G1CMRootMemRegions() {
 379   MemRegion::destroy_array(_root_regions, _max_regions);
 380 }
 381 
 382 void G1CMRootMemRegions::reset() {
 383   assert_at_safepoint();
 384   assert(G1CollectedHeap::heap()->collector_state()->is_in_concurrent_start_gc(), "must be");
 385 
 386   _num_regions.store_relaxed(0);
 387   _num_claimed_regions.store_relaxed(0);
 388 }
 389 
 390 void G1CMRootMemRegions::add(HeapWord* start, HeapWord* end) {
 391   assert_at_safepoint();
 392   uint idx = _num_regions.fetch_then_add(1u);
 393   assert(idx < _max_regions, "Trying to add more root MemRegions than there is space %u", _max_regions);
 394   assert(start != nullptr && end != nullptr && start <= end, "Start (" PTR_FORMAT ") should be less or equal to "
 395          "end (" PTR_FORMAT ")", p2i(start), p2i(end));
 396   _root_regions[idx].set_start(start);
 397   _root_regions[idx].set_end(end);
 398 }
 399 
 400 const MemRegion* G1CMRootMemRegions::claim_next() {
 401   uint local_num_regions = num_regions();
 402   if (num_claimed_regions() >= local_num_regions) {
 403     return nullptr;
 404   }
 405 
 406   uint claimed_index = _num_claimed_regions.fetch_then_add(1u);
 407   if (claimed_index < local_num_regions) {
 408     return &_root_regions[claimed_index];
 409   }
 410   return nullptr;
 411 }
 412 
 413 bool G1CMRootMemRegions::work_completed() const {
 414   return num_remaining_regions() == 0;
 415 }
 416 
 417 uint G1CMRootMemRegions::num_remaining_regions() const {
 418   uint total = num_regions();
 419   uint claimed = num_claimed_regions();
 420   return (total > claimed) ? total - claimed : 0;
 421 }
 422 
 423 bool G1CMRootMemRegions::contains(const MemRegion mr) const {
 424   uint local_num_root_regions = num_regions();
 425   for (uint i = 0; i < local_num_root_regions; i++) {
 426     if (_root_regions[i].equals(mr)) {
 427       return true;
 428     }
 429   }
 430   return false;
 431 }
 432 
 433 G1ConcurrentMark::G1ConcurrentMark(G1CollectedHeap* g1h,
 434                                    G1RegionToSpaceMapper* bitmap_storage) :
 435   _cm_thread(nullptr),
 436   _g1h(g1h),
 437 
 438   _mark_bitmap(),
 439 
 440   _heap(_g1h->reserved()),
 441 
 442   _root_regions(_g1h->max_num_regions()),
 443   _root_region_scan_aborted(false),
 444 
 445   _global_mark_stack(),
 446 
 447   _finger(nullptr), // _finger set in set_non_marking_state
 448 
 449   _worker_id_offset(G1ConcRefinementThreads), // The refinement control thread does not refine cards, so it's just the worker threads.
 450   _max_num_tasks(MAX2(ConcGCThreads, ParallelGCThreads)),
 451   _num_active_tasks(0), // _num_active_tasks set in set_non_marking_state()
 452   _tasks(nullptr), // _tasks set inside late_init()
 453   _task_queues(new G1CMTaskQueueSet(_max_num_tasks)),
 454   _terminator(_max_num_tasks, _task_queues),
 455   _partial_array_state_manager(new PartialArrayStateManager(_max_num_tasks)),
 456 
 457   _first_overflow_barrier_sync(),
 458   _second_overflow_barrier_sync(),
 459 
 460   _completed_mark_cycles(0),
 461   _has_overflown(false),
 462   _concurrent(false),
 463   _has_aborted(false),
 464   _restart_for_overflow(false),
 465   _gc_timer_cm(new ConcurrentGCTimer()),
 466   _gc_tracer_cm(new G1OldTracer()),
 467 
 468   // _verbose_level set below
 469 
 470   _remark_times(),
 471   _remark_mark_times(),
 472   _remark_weak_ref_times(),
 473   _cleanup_times(),
 474 
 475   _concurrent_workers(nullptr),
 476   _num_concurrent_workers(0),
 477   _max_concurrent_workers(0),
 478 
 479   _region_mark_stats(NEW_C_HEAP_ARRAY(G1RegionMarkStats, _g1h->max_num_regions(), mtGC)),
 480   _top_at_mark_starts(NEW_C_HEAP_ARRAY(Atomic<HeapWord*>, _g1h->max_num_regions(), mtGC)),
 481   _top_at_rebuild_starts(NEW_C_HEAP_ARRAY(Atomic<HeapWord*>, _g1h->max_num_regions(), mtGC)),
 482   _needs_remembered_set_rebuild(false)
 483 {
 484   assert(G1CGC_lock != nullptr, "CGC_lock must be initialized");
 485 
 486   _mark_bitmap.initialize(g1h->reserved(), bitmap_storage);
 487 }
 488 
 489 void G1ConcurrentMark::fully_initialize() {
 490   if (is_fully_initialized()) {
 491     return;
 492   }
 493 
 494   // Create & start ConcurrentMark thread.
 495   _cm_thread = new G1ConcurrentMarkThread(this);
 496   if (_cm_thread->osthread() == nullptr) {
 497     vm_shutdown_during_initialization("Could not create ConcurrentMarkThread");
 498   }
 499 
 500   log_debug(gc)("ConcGCThreads: %u offset %u", ConcGCThreads, _worker_id_offset);
 501   log_debug(gc)("ParallelGCThreads: %u", ParallelGCThreads);
 502 
 503   _max_concurrent_workers = ConcGCThreads;
 504 
 505   _concurrent_workers = new WorkerThreads("G1 Conc", _max_concurrent_workers);
 506   _concurrent_workers->initialize_workers();
 507   _num_concurrent_workers = _concurrent_workers->active_workers();
 508 
 509   if (!_global_mark_stack.initialize()) {
 510     vm_exit_during_initialization("Failed to allocate initial concurrent mark overflow mark stack.");
 511   }
 512 
 513   _tasks = NEW_C_HEAP_ARRAY(G1CMTask*, _max_num_tasks, mtGC);
 514 
 515   // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
 516   _num_active_tasks = _max_num_tasks;
 517 
 518   for (uint i = 0; i < _max_num_tasks; ++i) {
 519     G1CMTaskQueue* task_queue = new G1CMTaskQueue();
 520     _task_queues->register_queue(i, task_queue);
 521 
 522     _tasks[i] = new G1CMTask(i, this, task_queue, _region_mark_stats);
 523   }
 524 
 525   uint max_num_regions = _g1h->max_num_regions();
 526   ::new (_region_mark_stats) G1RegionMarkStats[max_num_regions]{};
 527   ::new (_top_at_mark_starts) Atomic<HeapWord*>[max_num_regions]{};
 528   ::new (_top_at_rebuild_starts) Atomic<HeapWord*>[max_num_regions]{};
 529 
 530   reset_at_marking_complete();
 531 }
 532 
 533 bool G1ConcurrentMark::is_in_concurrent_cycle() const {
 534   return is_fully_initialized() ? _cm_thread->is_in_progress() : false;
 535 }
 536 
 537 bool G1ConcurrentMark::is_in_marking() const {
 538   return is_fully_initialized() ? cm_thread()->is_in_marking() : false;
 539 }
 540 
 541 bool G1ConcurrentMark::is_in_rebuild_or_scrub() const {
 542   return cm_thread()->is_in_rebuild_or_scrub();
 543 }
 544 
 545 bool G1ConcurrentMark::is_in_reset_for_next_cycle() const {
 546   return cm_thread()->is_in_reset_for_next_cycle();
 547 }
 548 
 549 PartialArrayStateManager* G1ConcurrentMark::partial_array_state_manager() const {
 550   return _partial_array_state_manager;
 551 }
 552 
 553 G1ConcurrentMarkThread* G1ConcurrentMark::cm_thread() const {
 554   assert(is_fully_initialized(), "must be");
 555   return _cm_thread;
 556 }
 557 
 558 void G1ConcurrentMark::reset() {
 559   _has_aborted.store_relaxed(false);
 560 
 561   reset_marking_for_restart();
 562 
 563   // Reset all tasks, since different phases will use different number of active
 564   // threads. So, it's easiest to have all of them ready.
 565   for (uint i = 0; i < _max_num_tasks; ++i) {
 566     _tasks[i]->reset(mark_bitmap());
 567   }
 568 
 569   uint max_num_regions = _g1h->max_num_regions();
 570   ::new (_top_at_rebuild_starts) Atomic<HeapWord*>[max_num_regions]{};
 571   for (uint i = 0; i < max_num_regions; i++) {
 572     _region_mark_stats[i].clear();
 573   }
 574 
 575   _root_region_scan_aborted.store_relaxed(false);
 576   _root_regions.reset();
 577 }
 578 
 579 void G1ConcurrentMark::clear_statistics(G1HeapRegion* r) {
 580   uint region_idx = r->hrm_index();
 581   for (uint j = 0; j < _max_num_tasks; ++j) {
 582     _tasks[j]->clear_mark_stats_cache(region_idx);
 583   }
 584   _top_at_rebuild_starts[region_idx].store_relaxed(nullptr);
 585   _region_mark_stats[region_idx].clear();
 586 }
 587 
 588 void G1ConcurrentMark::humongous_object_eagerly_reclaimed(G1HeapRegion* r) {
 589   assert_at_safepoint();
 590   assert(r->is_starts_humongous(), "Got humongous continues region here");
 591 
 592   // Need to clear mark bit of the humongous object. Doing this unconditionally is fine.
 593   mark_bitmap()->clear(r->bottom());
 594 
 595   if (!_g1h->collector_state()->is_in_mark_or_rebuild()) {
 596     return;
 597   }
 598 
 599   // Clear any statistics about the region gathered so far.
 600   _g1h->humongous_obj_regions_iterate(r,
 601                                       [&] (G1HeapRegion* r) {
 602                                         clear_statistics(r);
 603                                       });
 604 }
 605 
 606 void G1ConcurrentMark::reset_marking_for_restart() {
 607   _global_mark_stack.set_empty();
 608 
 609   // Expand the marking stack, if we have to and if we can.
 610   if (has_overflown()) {
 611     _global_mark_stack.expand();
 612 
 613     uint max_num_regions = _g1h->max_num_regions();
 614     for (uint i = 0; i < max_num_regions; i++) {
 615       _region_mark_stats[i].clear_during_overflow();
 616     }
 617   }
 618 
 619   clear_has_overflown();
 620   _finger.store_relaxed(_heap.start());
 621 
 622   for (uint i = 0; i < _max_num_tasks; ++i) {
 623     _tasks[i]->reset_for_restart();
 624   }
 625 }
 626 
 627 void G1ConcurrentMark::set_concurrency(uint active_tasks) {
 628   assert(active_tasks <= _max_num_tasks, "we should not have more");
 629 
 630   _num_active_tasks = active_tasks;
 631   // Need to update the three data structures below according to the
 632   // number of active threads for this phase.
 633   _terminator.reset_for_reuse(active_tasks);
 634   _first_overflow_barrier_sync.set_n_workers(active_tasks);
 635   _second_overflow_barrier_sync.set_n_workers(active_tasks);
 636 }
 637 
 638 void G1ConcurrentMark::set_concurrency_and_phase(uint active_tasks, bool concurrent) {
 639   set_concurrency(active_tasks);
 640 
 641   _concurrent.store_relaxed(concurrent);
 642 
 643   if (!concurrent) {
 644     // At this point we should be in a STW phase, and completed marking.
 645     assert_at_safepoint_on_vm_thread();
 646     assert(out_of_regions(),
 647            "only way to get here: _finger: " PTR_FORMAT ", _heap_end: " PTR_FORMAT,
 648            p2i(finger()), p2i(_heap.end()));
 649   }
 650 }
 651 
 652 #if TASKQUEUE_STATS
 653 void G1ConcurrentMark::print_and_reset_taskqueue_stats() {
 654 
 655   _task_queues->print_and_reset_taskqueue_stats("Concurrent Mark");
 656 
 657   auto get_pa_stats = [&](uint i) {
 658     return _tasks[i]->partial_array_task_stats();
 659   };
 660 
 661   PartialArrayTaskStats::log_set(_max_num_tasks, get_pa_stats,
 662                                  "Concurrent Mark Partial Array");
 663 
 664   for (uint i = 0; i < _max_num_tasks; ++i) {
 665     get_pa_stats(i)->reset();
 666   }
 667 }
 668 #endif
 669 
 670 void G1ConcurrentMark::reset_at_marking_complete() {
 671   TASKQUEUE_STATS_ONLY(print_and_reset_taskqueue_stats());
 672   // We set the global marking state to some default values when we're
 673   // not doing marking.
 674   reset_marking_for_restart();
 675   _num_active_tasks = 0;
 676 }
 677 
 678 G1ConcurrentMark::~G1ConcurrentMark() {
 679   FREE_C_HEAP_ARRAY(_top_at_mark_starts);
 680   FREE_C_HEAP_ARRAY(_top_at_rebuild_starts);
 681   FREE_C_HEAP_ARRAY(_region_mark_stats);
 682   // The G1ConcurrentMark instance is never freed.
 683   ShouldNotReachHere();
 684 }
 685 
 686 class G1ClearBitMapTask : public WorkerTask {
 687 public:
 688   static size_t chunk_size() { return M; }
 689 
 690 private:
 691   // Heap region closure used for clearing the _mark_bitmap.
 692   class G1ClearBitmapHRClosure : public G1HeapRegionClosure {
 693     G1ConcurrentMark* _cm;
 694     G1CMBitMap* _bitmap;
 695     bool _suspendible; // If suspendible, do yield checks.
 696 
 697     bool suspendible() {
 698       return _suspendible;
 699     }
 700 
 701     bool is_clear_concurrent_undo() {
 702       return suspendible() && _cm->cm_thread()->is_in_undo_cycle();
 703     }
 704 
 705     bool has_aborted() {
 706       if (suspendible()) {
 707         _cm->do_yield_check();
 708         return _cm->has_aborted();
 709       }
 710       return false;
 711     }
 712 
 713     HeapWord* region_clear_limit(G1HeapRegion* r) {
 714       // During a Concurrent Undo Mark cycle, the per region top_at_mark_start and
 715       // live_words data are current wrt to the _mark_bitmap. We use this information
 716       // to only clear ranges of the bitmap that require clearing.
 717       if (is_clear_concurrent_undo()) {
 718         // No need to clear bitmaps for empty regions (which includes regions we
 719         // did not mark through).
 720         if (!_cm->contains_live_object(r->hrm_index())) {
 721           assert(_bitmap->get_next_marked_addr(r->bottom(), r->end()) == r->end(), "Should not have marked bits");
 722           return r->bottom();
 723         }
 724         assert(_bitmap->get_next_marked_addr(_cm->top_at_mark_start(r), r->end()) == r->end(), "Should not have marked bits above tams");
 725       }
 726       return r->end();
 727     }
 728 
 729   public:
 730     G1ClearBitmapHRClosure(G1ConcurrentMark* cm, bool suspendible) :
 731       G1HeapRegionClosure(),
 732       _cm(cm),
 733       _bitmap(cm->mark_bitmap()),
 734       _suspendible(suspendible)
 735     { }
 736 
 737     virtual bool do_heap_region(G1HeapRegion* r) {
 738       if (has_aborted()) {
 739         return true;
 740       }
 741 
 742       HeapWord* cur = r->bottom();
 743       HeapWord* const end = region_clear_limit(r);
 744 
 745       size_t const chunk_size_in_words = G1ClearBitMapTask::chunk_size() / HeapWordSize;
 746 
 747       while (cur < end) {
 748 
 749         MemRegion mr(cur, MIN2(cur + chunk_size_in_words, end));
 750         _bitmap->clear_range(mr);
 751 
 752         cur += chunk_size_in_words;
 753 
 754         // Repeat the asserts from before the start of the closure. We will do them
 755         // as asserts here to minimize their overhead on the product. However, we
 756         // will have them as guarantees at the beginning / end of the bitmap
 757         // clearing to get some checking in the product.
 758         assert(!suspendible() || _cm->is_in_reset_for_next_cycle(), "invariant");
 759 
 760         // Abort iteration if necessary.
 761         if (has_aborted()) {
 762           return true;
 763         }
 764       }
 765       assert(cur >= end, "Must have completed iteration over the bitmap for region %u.", r->hrm_index());
 766 
 767       _cm->reset_top_at_mark_start(r);
 768 
 769       return false;
 770     }
 771   };
 772 
 773   G1ClearBitmapHRClosure _cl;
 774   G1HeapRegionClaimer _hr_claimer;
 775   bool _suspendible; // If the task is suspendible, workers must join the STS.
 776 
 777 public:
 778   G1ClearBitMapTask(G1ConcurrentMark* cm, uint n_workers, bool suspendible) :
 779     WorkerTask("G1 Clear Bitmap"),
 780     _cl(cm, suspendible),
 781     _hr_claimer(n_workers),
 782     _suspendible(suspendible)
 783   { }
 784 
 785   void work(uint worker_id) {
 786     SuspendibleThreadSetJoiner sts_join(_suspendible);
 787     G1CollectedHeap::heap()->heap_region_par_iterate_from_worker_offset(&_cl, &_hr_claimer, worker_id);
 788   }
 789 };
 790 
 791 void G1ConcurrentMark::clear_bitmap(WorkerThreads* workers, bool may_yield) {
 792   assert(may_yield || SafepointSynchronize::is_at_safepoint(), "Non-yielding bitmap clear only allowed at safepoint.");
 793 
 794   size_t const num_bytes_to_clear = (G1HeapRegion::GrainBytes * _g1h->num_committed_regions()) / G1CMBitMap::heap_map_factor();
 795   size_t const num_chunks = align_up(num_bytes_to_clear, G1ClearBitMapTask::chunk_size()) / G1ClearBitMapTask::chunk_size();
 796 
 797   uint const num_workers = (uint)MIN2(num_chunks, (size_t)workers->active_workers());
 798 
 799   G1ClearBitMapTask cl(this, num_workers, may_yield);
 800 
 801   log_debug(gc, ergo)("Running %s with %u workers for %zu work units.", cl.name(), num_workers, num_chunks);
 802   workers->run_task(&cl, num_workers);
 803 }
 804 
 805 void G1ConcurrentMark::cleanup_for_next_mark() {
 806   // Make sure that the concurrent mark thread looks to still be in
 807   // the current cycle.
 808   guarantee(is_in_reset_for_next_cycle(), "invariant");
 809 
 810   clear_bitmap(_concurrent_workers, true);
 811 
 812   reset_partial_array_state_manager();
 813 
 814   // Should not have changed state yet (even if a Full GC interrupted us).
 815   guarantee(is_in_reset_for_next_cycle(), "invariant");
 816 }
 817 
 818 void G1ConcurrentMark::reset_partial_array_state_manager() {
 819   for (uint i = 0; i < _max_num_tasks; ++i) {
 820     _tasks[i]->unregister_partial_array_splitter();
 821   }
 822 
 823   partial_array_state_manager()->reset();
 824 
 825   for (uint i = 0; i < _max_num_tasks; ++i) {
 826     _tasks[i]->register_partial_array_splitter();
 827   }
 828 }
 829 
 830 void G1ConcurrentMark::clear_bitmap(WorkerThreads* workers) {
 831   assert_at_safepoint_on_vm_thread();
 832   // To avoid fragmentation the full collection requesting to clear the bitmap
 833   // might use fewer workers than available. To ensure the bitmap is cleared
 834   // as efficiently as possible the number of active workers are temporarily
 835   // increased to include all currently created workers.
 836   WithActiveWorkers update(workers, workers->created_workers());
 837   clear_bitmap(workers, false);
 838 }
 839 
 840 class G1PreConcurrentStartTask : public G1BatchedTask {
 841   // Reset marking state.
 842   class ResetMarkingStateTask;
 843   // For each region note start of marking.
 844   class NoteStartOfMarkTask;
 845 
 846 public:
 847   G1PreConcurrentStartTask(GCCause::Cause cause, G1ConcurrentMark* cm);
 848 };
 849 
 850 class G1PreConcurrentStartTask::ResetMarkingStateTask : public G1AbstractSubTask {
 851   G1ConcurrentMark* _cm;
 852 public:
 853   ResetMarkingStateTask(G1ConcurrentMark* cm) : G1AbstractSubTask(G1GCPhaseTimes::ResetMarkingState), _cm(cm) { }
 854 
 855   double worker_cost() const override { return 1.0; }
 856   void do_work(uint worker_id) override;
 857 };
 858 
 859 class G1PreConcurrentStartTask::NoteStartOfMarkTask : public G1AbstractSubTask {
 860 
 861   class NoteStartOfMarkHRClosure : public G1HeapRegionClosure {
 862     G1ConcurrentMark* _cm;
 863 
 864   public:
 865     NoteStartOfMarkHRClosure() : G1HeapRegionClosure(), _cm(G1CollectedHeap::heap()->concurrent_mark()) { }
 866 
 867     bool do_heap_region(G1HeapRegion* r) override {
 868       if (r->is_old_or_humongous() && !r->is_collection_set_candidate() && !r->in_collection_set()) {
 869         _cm->update_top_at_mark_start(r);
 870       } else {
 871         _cm->reset_top_at_mark_start(r);
 872       }
 873       return false;
 874     }
 875   } _region_cl;
 876 
 877   G1HeapRegionClaimer _claimer;
 878 public:
 879   NoteStartOfMarkTask() : G1AbstractSubTask(G1GCPhaseTimes::NoteStartOfMark), _region_cl(), _claimer(0) { }
 880 
 881   double worker_cost() const override {
 882     // The work done per region is very small, therefore we choose this magic number to cap the number
 883     // of threads used when there are few regions.
 884     const double regions_per_thread = 1000;
 885     return _claimer.n_regions() / regions_per_thread;
 886   }
 887 
 888   void set_max_workers(uint max_workers) override {
 889     _claimer.set_n_workers(max_workers);
 890   }
 891 
 892   void do_work(uint worker_id) override {
 893     G1CollectedHeap::heap()->heap_region_par_iterate_from_worker_offset(&_region_cl, &_claimer, worker_id);
 894   }
 895 };
 896 
 897 void G1PreConcurrentStartTask::ResetMarkingStateTask::do_work(uint worker_id) {
 898   // Reset marking state.
 899   _cm->reset();
 900 }
 901 
 902 G1PreConcurrentStartTask::G1PreConcurrentStartTask(GCCause::Cause cause, G1ConcurrentMark* cm) :
 903   G1BatchedTask("Pre Concurrent Start", G1CollectedHeap::heap()->phase_times()) {
 904   add_serial_task(new ResetMarkingStateTask(cm));
 905   add_parallel_task(new NoteStartOfMarkTask());
 906 };
 907 
 908 void G1ConcurrentMark::pre_concurrent_start(GCCause::Cause cause) {
 909   assert_at_safepoint_on_vm_thread();
 910 
 911   G1CollectedHeap::start_codecache_marking_cycle_if_inactive(true /* concurrent_mark_start */);
 912 
 913   ClassLoaderDataGraph::verify_claimed_marks_cleared(ClassLoaderData::_claim_strong);
 914 
 915   G1PreConcurrentStartTask cl(cause, this);
 916   G1CollectedHeap::heap()->run_batch_task(&cl);
 917 
 918   _gc_tracer_cm->set_gc_cause(cause);
 919 }
 920 
 921 void G1ConcurrentMark::start_full_concurrent_cycle() {
 922   // Start Concurrent Marking weak-reference discovery.
 923   ReferenceProcessor* rp = _g1h->ref_processor_cm();
 924   rp->start_discovery(false /* always_clear */);
 925 
 926   SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
 927   // This is the start of  the marking cycle, we're expected all
 928   // threads to have SATB queues with active set to false.
 929   satb_mq_set.set_active_all_threads(true, /* new active value */
 930                                      false /* expected_active */);
 931 
 932   // update_g1_committed() will be called at the end of an evac pause
 933   // when marking is on. So, it's also called at the end of the
 934   // concurrent start pause to update the heap end, if the heap expands
 935   // during it. No need to call it here.
 936 
 937   // Signal the thread to start work.
 938   cm_thread()->start_full_cycle();
 939 }
 940 
 941 void G1ConcurrentMark::start_undo_concurrent_cycle() {
 942   assert_at_safepoint_on_vm_thread();
 943   // At this time this GC is not a concurrent start gc any more, can only check for young only gc/phase.
 944   assert(_g1h->collector_state()->is_in_young_only_phase(), "must be");
 945 
 946   abort_root_region_scan_at_safepoint();
 947 
 948   // Signal the thread to start work.
 949   cm_thread()->start_undo_cycle();
 950 }
 951 
 952 void G1ConcurrentMark::notify_concurrent_cycle_completed() {
 953   cm_thread()->set_idle();
 954 }
 955 
 956 void G1ConcurrentMark::stop() {
 957   if (is_fully_initialized()) {
 958     cm_thread()->stop();
 959   }
 960 }
 961 
 962 /*
 963  * Notice that in the next two methods, we actually leave the STS
 964  * during the barrier sync and join it immediately afterwards. If we
 965  * do not do this, the following deadlock can occur: one thread could
 966  * be in the barrier sync code, waiting for the other thread to also
 967  * sync up, whereas another one could be trying to yield, while also
 968  * waiting for the other threads to sync up too.
 969  *
 970  * Note, however, that this code is also used during remark and in
 971  * this case we should not attempt to leave / enter the STS, otherwise
 972  * we'll either hit an assert (debug / fastdebug) or deadlock
 973  * (product). So we should only leave / enter the STS if we are
 974  * operating concurrently.
 975  *
 976  * Because the thread that does the sync barrier has left the STS, it
 977  * is possible to be suspended for a Full GC or an evacuation pause
 978  * could occur. This is actually safe, since the entering the sync
 979  * barrier is one of the last things do_marking_step() does, and it
 980  * doesn't manipulate any data structures afterwards.
 981  */
 982 
 983 void G1ConcurrentMark::enter_first_sync_barrier(uint worker_id) {
 984   bool barrier_aborted;
 985   {
 986     SuspendibleThreadSetLeaver sts_leave(concurrent());
 987     barrier_aborted = !_first_overflow_barrier_sync.enter();
 988   }
 989 
 990   // at this point everyone should have synced up and not be doing any
 991   // more work
 992 
 993   if (barrier_aborted) {
 994     // If the barrier aborted we ignore the overflow condition and
 995     // just abort the whole marking phase as quickly as possible.
 996     return;
 997   }
 998 }
 999 
1000 void G1ConcurrentMark::enter_second_sync_barrier(uint worker_id) {
1001   SuspendibleThreadSetLeaver sts_leave(concurrent());
1002   _second_overflow_barrier_sync.enter();
1003 
1004   // at this point everything should be re-initialized and ready to go
1005 }
1006 
1007 class G1CMConcurrentMarkingTask : public WorkerTask {
1008   G1ConcurrentMark*     _cm;
1009 
1010 public:
1011   void work(uint worker_id) {
1012     ResourceMark rm;
1013 
1014     SuspendibleThreadSetJoiner sts_join;
1015 
1016     assert(worker_id < _cm->active_tasks(), "invariant");
1017 
1018     G1CMTask* task = _cm->task(worker_id);
1019     task->record_start_time();
1020     if (!_cm->has_aborted()) {
1021       do {
1022         task->do_marking_step(G1ConcMarkStepDurationMillis,
1023                               true  /* do_termination */,
1024                               false /* is_serial*/);
1025 
1026         _cm->do_yield_check();
1027       } while (!_cm->has_aborted() && task->has_aborted());
1028     }
1029     task->record_end_time();
1030     guarantee(!task->has_aborted() || _cm->has_aborted(), "invariant");
1031   }
1032 
1033   G1CMConcurrentMarkingTask(G1ConcurrentMark* cm) :
1034       WorkerTask("Concurrent Mark"), _cm(cm) { }
1035 
1036   ~G1CMConcurrentMarkingTask() { }
1037 };
1038 
1039 uint G1ConcurrentMark::calc_active_marking_workers() {
1040   uint result = 0;
1041   if (!UseDynamicNumberOfGCThreads || !FLAG_IS_DEFAULT(ConcGCThreads)) {
1042     result = _max_concurrent_workers;
1043   } else {
1044     result =
1045       WorkerPolicy::calc_default_active_workers(_max_concurrent_workers,
1046                                                 1, /* Minimum workers */
1047                                                 _num_concurrent_workers,
1048                                                 Threads::number_of_non_daemon_threads());
1049     // Don't scale the result down by scale_concurrent_workers() because
1050     // that scaling has already gone into "_max_concurrent_workers".
1051   }
1052   assert(result > 0 && result <= _max_concurrent_workers,
1053          "Calculated number of marking workers must be larger than zero and at most the maximum %u, but is %u",
1054          _max_concurrent_workers, result);
1055   return result;
1056 }
1057 
1058 bool G1ConcurrentMark::has_root_region_scan_aborted() const {
1059   return _root_region_scan_aborted.load_relaxed();
1060 }
1061 
1062 #ifndef PRODUCT
1063 void G1ConcurrentMark::assert_root_region_scan_completed_or_aborted() {
1064   assert(root_regions()->work_completed() || has_root_region_scan_aborted(), "must be");
1065 }
1066 #endif
1067 
1068 void G1ConcurrentMark::scan_root_region(const MemRegion* region, uint worker_id) {
1069 #ifdef ASSERT
1070   HeapWord* last = region->last();
1071   G1HeapRegion* hr = _g1h->heap_region_containing(last);
1072   assert(hr->is_old() || top_at_mark_start(hr) == hr->bottom(),
1073          "Root regions must be old or survivor/eden but region %u is %s", hr->hrm_index(), hr->get_type_str());
1074   assert(top_at_mark_start(hr) == region->start(),
1075          "MemRegion start should be equal to TAMS");
1076 #endif
1077 
1078   G1RootRegionScanClosure cl(_g1h, this, worker_id);
1079 
1080   const uintx interval = PrefetchScanIntervalInBytes;
1081   HeapWord* curr = region->start();
1082   const HeapWord* end = region->end();
1083   while (curr < end) {
1084     Prefetch::read(curr, interval);
1085     oop obj = cast_to_oop(curr);
1086     size_t size = obj->oop_iterate_size(&cl);
1087     assert(size == obj->size(), "sanity");
1088     curr += size;
1089   }
1090 }
1091 
1092 class G1CMRootRegionScanTask : public WorkerTask {
1093   G1ConcurrentMark* _cm;
1094   bool _should_yield;
1095 
1096 public:
1097   G1CMRootRegionScanTask(G1ConcurrentMark* cm, bool should_yield) :
1098     WorkerTask("G1 Root Region Scan"), _cm(cm), _should_yield(should_yield) { }
1099 
1100   void work(uint worker_id) {
1101     SuspendibleThreadSetJoiner sts_join(_should_yield);
1102 
1103     while (true) {
1104       if (_cm->has_root_region_scan_aborted()) {
1105         return;
1106       }
1107       G1CMRootMemRegions* root_regions = _cm->root_regions();
1108       const MemRegion* region = root_regions->claim_next();
1109       if (region == nullptr) {
1110         return;
1111       }
1112       _cm->scan_root_region(region, worker_id);
1113       if (_should_yield) {
1114         SuspendibleThreadSet::yield();
1115         // If we yielded, a GC may have processed all root regions,
1116         // so this loop will naturally exit on the next claim_next() call.
1117         // Same if a Full GC signalled abort of the concurrent mark.
1118       }
1119     }
1120   }
1121 };
1122 
1123 bool G1ConcurrentMark::scan_root_regions(WorkerThreads* workers, bool concurrent) {
1124   // We first check whether there is any work to do as we might have already aborted
1125   // the concurrent cycle, or ran into a GC that did the actual work when we reach here.
1126   // We want to avoid spinning up the worker threads if that happened.
1127   // (Note that due to races reading the abort-flag, we might spin up the threads anyway).
1128   //
1129   // Abort happens if a Full GC occurs right after starting the concurrent cycle or
1130   // a young gc doing the work.
1131   //
1132   // Concurrent gc threads enter an STS when starting the task, so they stop, then
1133   // continue after that safepoint.
1134   //
1135   // Must not use G1CMRootMemRegions::work_completed() here because we need to get a
1136   // consistent view of the value containing the number of remaining regions across the
1137   // usages below. The safepoint/gc may already be running and modifying it
1138   // while this code is still executing.
1139   uint num_remaining = root_regions()->num_remaining_regions();
1140   bool do_scan = num_remaining > 0 && !has_root_region_scan_aborted();
1141   if (do_scan) {
1142     // Assign one worker to each root-region but subject to the max constraint.
1143     // The constraint is also important to avoid accesses beyond the allocated per-worker
1144     // marking helper data structures. We might get passed different WorkerThreads with
1145     // different number of threads (potential worker ids) than helper data structures when
1146     // completing this work during GC.
1147     const uint num_workers = MIN2(num_remaining,
1148                                   _max_concurrent_workers);
1149     assert(num_workers > 0, "no more remaining root regions to process");
1150 
1151     G1CMRootRegionScanTask task(this, concurrent);
1152     log_debug(gc, ergo)("Running %s using %u workers for %u work units.",
1153                         task.name(), num_workers, num_remaining);
1154     workers->run_task(&task, num_workers);
1155   }
1156 
1157   // At the end of this method, we can re-read num_remaining() in the assert: either
1158   // we got non-zero above and we processed all root regions (and it must be zero
1159   // after the worker task synchronization) or it had already been zero. We also
1160   // can't have started another concurrent cycle that could have set it to something else
1161   // while still in the concurrent cycle (if called concurrently).
1162   assert_root_region_scan_completed_or_aborted();
1163 
1164   return do_scan;
1165 }
1166 
1167 void G1ConcurrentMark::scan_root_regions_concurrently() {
1168   assert(Thread::current() == cm_thread(), "must be on Concurrent Mark Thread");
1169   scan_root_regions(_concurrent_workers, true /* concurrent */);
1170 }
1171 
1172 bool G1ConcurrentMark::complete_root_regions_scan_in_safepoint() {
1173   assert_at_safepoint_on_vm_thread();
1174   return scan_root_regions(_g1h->workers(), false /* concurrent */);
1175 }
1176 
1177 void G1ConcurrentMark::add_root_region(G1HeapRegion* r) {
1178   root_regions()->add(top_at_mark_start(r), r->top());
1179 }
1180 
1181 bool G1ConcurrentMark::is_root_region(G1HeapRegion* r) {
1182   return root_regions()->contains(MemRegion(top_at_mark_start(r), r->top()));
1183 }
1184 
1185 void G1ConcurrentMark::abort_root_region_scan() {
1186   assert_not_at_safepoint();
1187 
1188   _root_region_scan_aborted.store_relaxed(true);
1189 }
1190 
1191 void G1ConcurrentMark::abort_root_region_scan_at_safepoint() {
1192   assert_at_safepoint_on_vm_thread();
1193 
1194   _root_region_scan_aborted.store_relaxed(true);
1195 }
1196 
1197 void G1ConcurrentMark::concurrent_cycle_start() {
1198   _gc_timer_cm->register_gc_start();
1199 
1200   _gc_tracer_cm->report_gc_start(GCCause::_no_gc /* first parameter is not used */, _gc_timer_cm->gc_start());
1201 
1202   _g1h->trace_heap_before_gc(_gc_tracer_cm);
1203 }
1204 
1205 uint G1ConcurrentMark::completed_mark_cycles() const {
1206   return _completed_mark_cycles.load_relaxed();
1207 }
1208 
1209 void G1ConcurrentMark::concurrent_cycle_end(bool mark_cycle_completed) {
1210   _g1h->trace_heap_after_gc(_gc_tracer_cm);
1211 
1212   if (mark_cycle_completed) {
1213     _completed_mark_cycles.add_then_fetch(1u, memory_order_relaxed);
1214   }
1215 
1216   if (has_aborted()) {
1217     log_info(gc, marking)("Concurrent Mark Abort");
1218     _gc_tracer_cm->report_concurrent_mode_failure();
1219   }
1220 
1221   _gc_timer_cm->register_gc_end();
1222 
1223   _gc_tracer_cm->report_gc_end(_gc_timer_cm->gc_end(), _gc_timer_cm->time_partitions());
1224 }
1225 
1226 void G1ConcurrentMark::mark_from_roots() {
1227   _restart_for_overflow.store_relaxed(false);
1228 
1229   uint active_workers = calc_active_marking_workers();
1230 
1231   // Setting active workers is not guaranteed since fewer
1232   // worker threads may currently exist and more may not be
1233   // available.
1234   active_workers = _concurrent_workers->set_active_workers(active_workers);
1235   log_info(gc, task)("Concurrent Mark Using %u of %u Workers", active_workers, _concurrent_workers->max_workers());
1236 
1237   _num_concurrent_workers = active_workers;
1238 
1239   // Parallel task terminator is set in "set_concurrency_and_phase()"
1240   set_concurrency_and_phase(active_workers, true /* concurrent */);
1241 
1242   G1CMConcurrentMarkingTask marking_task(this);
1243   _concurrent_workers->run_task(&marking_task);
1244   print_stats();
1245 }
1246 
1247 const char* G1ConcurrentMark::verify_location_string(VerifyLocation location) {
1248   static const char* location_strings[] = { "Remark Before",
1249                                             "Remark After",
1250                                             "Remark Overflow",
1251                                             "Cleanup Before",
1252                                             "Cleanup After" };
1253   return location_strings[static_cast<std::underlying_type_t<VerifyLocation>>(location)];
1254 }
1255 
1256 void G1ConcurrentMark::verify_during_pause(G1HeapVerifier::G1VerifyType type,
1257                                            VerifyLocation location) {
1258   G1HeapVerifier* verifier = _g1h->verifier();
1259 
1260   verifier->verify_region_sets_optional();
1261 
1262   const char* caller = verify_location_string(location);
1263 
1264   if (VerifyDuringGC && G1HeapVerifier::should_verify(type)) {
1265     GCTraceTime(Debug, gc, phases) debug(caller, _gc_timer_cm);
1266 
1267     size_t const BufLen = 512;
1268     char buffer[BufLen];
1269 
1270     jio_snprintf(buffer, BufLen, "During GC (%s)", caller);
1271     verifier->verify(VerifyOption::G1UseConcMarking, buffer);
1272 
1273     // Only check bitmap in Remark, and not at After-Verification because the regions
1274     // already have their TAMS'es reset.
1275     if (location != VerifyLocation::RemarkAfter) {
1276       verifier->verify_bitmap_clear(true /* above_tams_only */);
1277     }
1278   }
1279 }
1280 
1281 class G1ObjectCountIsAliveClosure: public BoolObjectClosure {
1282   G1CollectedHeap* _g1h;
1283 public:
1284   G1ObjectCountIsAliveClosure(G1CollectedHeap* g1h) : _g1h(g1h) {}
1285 
1286   bool do_object_b(oop obj) {
1287     return !_g1h->is_obj_dead(obj);
1288   }
1289 };
1290 
1291 void G1ConcurrentMark::remark() {
1292   assert_at_safepoint_on_vm_thread();
1293 
1294   // If a full collection has happened, we should not continue. However we might
1295   // have ended up here as the Remark VM operation has been scheduled already.
1296   if (has_aborted()) {
1297     return;
1298   }
1299 
1300   G1Policy* policy = _g1h->policy();
1301   policy->record_pause_start_time();
1302 
1303   double start = os::elapsedTime();
1304 
1305   verify_during_pause(G1HeapVerifier::G1VerifyRemark, VerifyLocation::RemarkBefore);
1306 
1307   {
1308     GCTraceTime(Debug, gc, phases) debug("Finalize Marking", _gc_timer_cm);
1309     finalize_marking();
1310   }
1311 
1312   double mark_work_end = os::elapsedTime();
1313 
1314   bool const mark_finished = !has_overflown();
1315   if (mark_finished) {
1316     weak_refs_work();
1317 
1318     // Unload Klasses, String, Code Cache, etc.
1319     if (ClassUnloadingWithConcurrentMark) {
1320       G1CMIsAliveClosure is_alive(this);
1321       _g1h->unload_classes_and_code("Class Unloading", &is_alive, _gc_timer_cm);
1322     }
1323 
1324     SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
1325     // We're done with marking.
1326     // This is the end of the marking cycle, we're expected all
1327     // threads to have SATB queues with active set to true.
1328     satb_mq_set.set_active_all_threads(false, /* new active value */
1329                                        true /* expected_active */);
1330 
1331     {
1332       GCTraceTime(Debug, gc, phases) debug("Flush Task Caches", _gc_timer_cm);
1333       flush_all_task_caches();
1334     }
1335 
1336     // All marking completed. Check bitmap now as we will start to reset TAMSes
1337     // in parallel below so that we can not do this in the After-Remark verification.
1338     _g1h->verifier()->verify_bitmap_clear(true /* above_tams_only */);
1339 
1340     {
1341       GCTraceTime(Debug, gc, phases) debug("Select For Rebuild and Reclaim Empty Regions", _gc_timer_cm);
1342 
1343       G1UpdateRegionLivenessAndSelectForRebuildTask cl(_g1h, this, _g1h->workers()->active_workers());
1344       uint const num_workers = MIN2(G1UpdateRegionLivenessAndSelectForRebuildTask::desired_num_workers(_g1h->num_committed_regions()),
1345                                     _g1h->workers()->active_workers());
1346       log_debug(gc,ergo)("Running %s using %u workers for %u regions in heap", cl.name(), num_workers, _g1h->num_committed_regions());
1347       _g1h->workers()->run_task(&cl, num_workers);
1348 
1349       log_debug(gc, remset, tracking)("Remembered Set Tracking update regions total %u, selected %u",
1350                                       _g1h->num_committed_regions(), cl.total_selected_for_rebuild());
1351 
1352       _needs_remembered_set_rebuild = (cl.total_selected_for_rebuild() > 0);
1353 
1354       if (_needs_remembered_set_rebuild) {
1355         GrowableArrayCHeap<G1HeapRegion*, mtGC>* selected = cl.sort_and_prune_old_selected();
1356         _g1h->policy()->candidates()->set_candidates_from_marking(selected);
1357       }
1358     }
1359 
1360     if (log_is_enabled(Trace, gc, liveness)) {
1361       G1PrintRegionLivenessInfoClosure cl("Post-Marking");
1362       _g1h->heap_region_iterate(&cl);
1363     }
1364 
1365     // Potentially, some empty-regions have been reclaimed; make this a
1366     // "collection" so that pending allocation can retry before attempting a
1367     // GC pause.
1368     _g1h->increment_total_collections();
1369 
1370     // For Remark Pauses that may have been triggered by PeriodicGCs, we maintain
1371     // resizing based on MinHeapFreeRatio or MaxHeapFreeRatio. If a PeriodicGC is
1372     // triggered, it likely means there are very few regular GCs, making resizing
1373     // based on gc heuristics less effective.
1374     if (_g1h->last_gc_was_periodic()) {
1375       _g1h->resize_heap_after_full_collection(0 /* allocation_word_size */);
1376     }
1377 
1378     compute_new_sizes();
1379 
1380     verify_during_pause(G1HeapVerifier::G1VerifyRemark, VerifyLocation::RemarkAfter);
1381 
1382     assert(!restart_for_overflow(), "sanity");
1383     // Completely reset the marking state (except bitmaps) since marking completed.
1384     reset_at_marking_complete();
1385 
1386     G1CollectedHeap::finish_codecache_marking_cycle();
1387 
1388     {
1389       GCTraceTime(Debug, gc, phases) debug("Report Object Count", _gc_timer_cm);
1390       G1ObjectCountIsAliveClosure is_alive(_g1h);
1391       _gc_tracer_cm->report_object_count_after_gc(&is_alive, _g1h->workers());
1392     }
1393 
1394     // Successfully completed marking, advance state.
1395     cm_thread()->set_full_cycle_rebuild_and_scrub();
1396   } else {
1397     // We overflowed.  Restart concurrent marking.
1398     _restart_for_overflow.store_relaxed(true);
1399 
1400     verify_during_pause(G1HeapVerifier::G1VerifyRemark, VerifyLocation::RemarkOverflow);
1401 
1402     // Clear the marking state because we will be restarting
1403     // marking due to overflowing the global mark stack.
1404     reset_marking_for_restart();
1405   }
1406 
1407   // Statistics
1408   double now = os::elapsedTime();
1409   _remark_mark_times.add((mark_work_end - start) * 1000.0);
1410   _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
1411   _remark_times.add((now - start) * 1000.0);
1412 
1413   _g1h->update_perf_counter_cpu_time();
1414 
1415   policy->record_concurrent_mark_remark_end();
1416 
1417   return;
1418 }
1419 
1420 void G1ConcurrentMark::compute_new_sizes() {
1421   MetaspaceGC::compute_new_size();
1422 
1423   // Cleanup will have freed any regions completely full of garbage.
1424   // Update the soft reference policy with the new heap occupancy.
1425   Universe::heap()->update_capacity_and_used_at_gc();
1426 
1427   // We reclaimed old regions so we should calculate the sizes to make
1428   // sure we update the old gen/space data.
1429   _g1h->monitoring_support()->update_sizes();
1430 }
1431 
1432 class G1UpdateRegionsAfterRebuild : public G1HeapRegionClosure {
1433   G1CollectedHeap* _g1h;
1434 
1435 public:
1436   G1UpdateRegionsAfterRebuild(G1CollectedHeap* g1h) : _g1h(g1h) { }
1437 
1438   bool do_heap_region(G1HeapRegion* r) override {
1439     // Update the remset tracking state from updating to complete
1440     // if remembered sets have been rebuilt.
1441     _g1h->policy()->remset_tracker()->update_after_rebuild(r);
1442     return false;
1443   }
1444 };
1445 
1446 void G1ConcurrentMark::cleanup() {
1447   assert_at_safepoint_on_vm_thread();
1448 
1449   // If a full collection has happened, we shouldn't do this.
1450   if (has_aborted()) {
1451     return;
1452   }
1453 
1454   G1Policy* policy = _g1h->policy();
1455   policy->record_pause_start_time();
1456 
1457   double start = os::elapsedTime();
1458 
1459   verify_during_pause(G1HeapVerifier::G1VerifyCleanup, VerifyLocation::CleanupBefore);
1460 
1461   if (needs_remembered_set_rebuild()) {
1462     // Update the remset tracking information as well as marking all regions
1463     // as fully parsable.
1464     GCTraceTime(Debug, gc, phases) debug("Update Remembered Set Tracking After Rebuild", _gc_timer_cm);
1465     G1UpdateRegionsAfterRebuild cl(_g1h);
1466     _g1h->heap_region_iterate(&cl);
1467   } else {
1468     log_debug(gc, phases)("No Remembered Sets to update after rebuild");
1469   }
1470 
1471   verify_during_pause(G1HeapVerifier::G1VerifyCleanup, VerifyLocation::CleanupAfter);
1472 
1473   // Local statistics
1474   _cleanup_times.add((os::elapsedTime() - start) * 1000.0);
1475 
1476   {
1477     GCTraceTime(Debug, gc, phases) debug("Finalize Concurrent Mark Cleanup", _gc_timer_cm);
1478     policy->record_concurrent_mark_cleanup_end(needs_remembered_set_rebuild());
1479   }
1480 
1481   // Advance state.
1482   cm_thread()->set_full_cycle_reset_for_next_cycle();
1483   return;
1484 }
1485 
1486 // 'Keep Alive' oop closure used by both serial parallel reference processing.
1487 // Uses the G1CMTask associated with a worker thread (for serial reference
1488 // processing the G1CMTask for worker 0 is used) to preserve (mark) and
1489 // trace referent objects.
1490 //
1491 // Using the G1CMTask and embedded local queues avoids having the worker
1492 // threads operating on the global mark stack. This reduces the risk
1493 // of overflowing the stack - which we would rather avoid at this late
1494 // state. Also using the tasks' local queues removes the potential
1495 // of the workers interfering with each other that could occur if
1496 // operating on the global stack.
1497 
1498 class G1CMKeepAliveAndDrainClosure : public OopClosure {
1499   G1ConcurrentMark* _cm;
1500   G1CMTask*         _task;
1501   uint              _ref_counter_limit;
1502   uint              _ref_counter;
1503   bool              _is_serial;
1504 public:
1505   G1CMKeepAliveAndDrainClosure(G1ConcurrentMark* cm, G1CMTask* task, bool is_serial) :
1506     _cm(cm), _task(task), _ref_counter_limit(G1RefProcDrainInterval),
1507     _ref_counter(_ref_counter_limit), _is_serial(is_serial) {
1508     assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
1509   }
1510 
1511   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
1512   virtual void do_oop(      oop* p) { do_oop_work(p); }
1513 
1514   template <class T> void do_oop_work(T* p) {
1515     if (_cm->has_overflown()) {
1516       return;
1517     }
1518     if (!_task->deal_with_reference(p)) {
1519       // We did not add anything to the mark bitmap (or mark stack), so there is
1520       // no point trying to drain it.
1521       return;
1522     }
1523     _ref_counter--;
1524 
1525     if (_ref_counter == 0) {
1526       // We have dealt with _ref_counter_limit references, pushing them
1527       // and objects reachable from them on to the local stack (and
1528       // possibly the global stack). Call G1CMTask::do_marking_step() to
1529       // process these entries.
1530       //
1531       // We call G1CMTask::do_marking_step() in a loop, which we'll exit if
1532       // there's nothing more to do (i.e. we're done with the entries that
1533       // were pushed as a result of the G1CMTask::deal_with_reference() calls
1534       // above) or we overflow.
1535       //
1536       // Note: G1CMTask::do_marking_step() can set the G1CMTask::has_aborted()
1537       // flag while there may still be some work to do. (See the comment at
1538       // the beginning of G1CMTask::do_marking_step() for those conditions -
1539       // one of which is reaching the specified time target.) It is only
1540       // when G1CMTask::do_marking_step() returns without setting the
1541       // has_aborted() flag that the marking step has completed.
1542       do {
1543         double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
1544         _task->do_marking_step(mark_step_duration_ms,
1545                                false      /* do_termination */,
1546                                _is_serial);
1547       } while (_task->has_aborted() && !_cm->has_overflown());
1548       _ref_counter = _ref_counter_limit;
1549     }
1550   }
1551 };
1552 
1553 // 'Drain' oop closure used by both serial and parallel reference processing.
1554 // Uses the G1CMTask associated with a given worker thread (for serial
1555 // reference processing the G1CMtask for worker 0 is used). Calls the
1556 // do_marking_step routine, with an unbelievably large timeout value,
1557 // to drain the marking data structures of the remaining entries
1558 // added by the 'keep alive' oop closure above.
1559 
1560 class G1CMDrainMarkingStackClosure : public VoidClosure {
1561   G1ConcurrentMark* _cm;
1562   G1CMTask*         _task;
1563   bool              _is_serial;
1564  public:
1565   G1CMDrainMarkingStackClosure(G1ConcurrentMark* cm, G1CMTask* task, bool is_serial) :
1566     _cm(cm), _task(task), _is_serial(is_serial) {
1567     assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
1568   }
1569 
1570   void do_void() {
1571     do {
1572       // We call G1CMTask::do_marking_step() to completely drain the local
1573       // and global marking stacks of entries pushed by the 'keep alive'
1574       // oop closure (an instance of G1CMKeepAliveAndDrainClosure above).
1575       //
1576       // G1CMTask::do_marking_step() is called in a loop, which we'll exit
1577       // if there's nothing more to do (i.e. we've completely drained the
1578       // entries that were pushed as a result of applying the 'keep alive'
1579       // closure to the entries on the discovered ref lists) or we overflow
1580       // the global marking stack.
1581       //
1582       // Note: G1CMTask::do_marking_step() can set the G1CMTask::has_aborted()
1583       // flag while there may still be some work to do. (See the comment at
1584       // the beginning of G1CMTask::do_marking_step() for those conditions -
1585       // one of which is reaching the specified time target.) It is only
1586       // when G1CMTask::do_marking_step() returns without setting the
1587       // has_aborted() flag that the marking step has completed.
1588 
1589       _task->do_marking_step(1000000000.0 /* something very large */,
1590                              true         /* do_termination */,
1591                              _is_serial);
1592     } while (_task->has_aborted() && !_cm->has_overflown());
1593   }
1594 };
1595 
1596 class G1CMRefProcProxyTask : public RefProcProxyTask {
1597   G1CollectedHeap& _g1h;
1598   G1ConcurrentMark& _cm;
1599 
1600 public:
1601   G1CMRefProcProxyTask(uint max_workers, G1CollectedHeap& g1h, G1ConcurrentMark &cm)
1602     : RefProcProxyTask("G1CMRefProcProxyTask", max_workers),
1603       _g1h(g1h),
1604       _cm(cm) {}
1605 
1606   void work(uint worker_id) override {
1607     assert(worker_id < _max_workers, "sanity");
1608     G1CMIsAliveClosure is_alive(&_cm);
1609     uint index = (_tm == RefProcThreadModel::Single) ? 0 : worker_id;
1610     G1CMKeepAliveAndDrainClosure keep_alive(&_cm, _cm.task(index), _tm == RefProcThreadModel::Single);
1611     BarrierEnqueueDiscoveredFieldClosure enqueue;
1612     G1CMDrainMarkingStackClosure complete_gc(&_cm, _cm.task(index), _tm == RefProcThreadModel::Single);
1613     _rp_task->rp_work(worker_id, &is_alive, &keep_alive, &enqueue, &complete_gc);
1614   }
1615 
1616   void prepare_run_task_hook() override {
1617     // We need to reset the concurrency level before each
1618     // proxy task execution, so that the termination protocol
1619     // and overflow handling in G1CMTask::do_marking_step() knows
1620     // how many workers to wait for.
1621     _cm.set_concurrency(_queue_count);
1622   }
1623 };
1624 
1625 void G1ConcurrentMark::weak_refs_work() {
1626   ResourceMark rm;
1627 
1628   {
1629     GCTraceTime(Debug, gc, phases) debug("Reference Processing", _gc_timer_cm);
1630 
1631     ReferenceProcessor* rp = _g1h->ref_processor_cm();
1632 
1633     // See the comment in G1CollectedHeap::ref_processing_init()
1634     // about how reference processing currently works in G1.
1635 
1636     assert(_global_mark_stack.is_empty(), "mark stack should be empty");
1637 
1638     // Prefer to grow the stack until the max capacity.
1639     _global_mark_stack.set_should_grow();
1640 
1641     // Parallel processing task executor.
1642     G1CMRefProcProxyTask task(rp->max_num_queues(), *_g1h, *this);
1643     ReferenceProcessorPhaseTimes pt(_gc_timer_cm, rp->max_num_queues());
1644 
1645     // Process the weak references.
1646     const ReferenceProcessorStats& stats = rp->process_discovered_references(task, _g1h->workers(), pt);
1647     _gc_tracer_cm->report_gc_reference_stats(stats);
1648     pt.print_all_references();
1649 
1650     // The do_oop work routines of the keep_alive and drain_marking_stack
1651     // oop closures will set the has_overflown flag if we overflow the
1652     // global marking stack.
1653 
1654     assert(has_overflown() || _global_mark_stack.is_empty(),
1655            "Mark stack should be empty (unless it has overflown)");
1656   }
1657 
1658   if (has_overflown()) {
1659     // We can not trust g1_is_alive and the contents of the heap if the marking stack
1660     // overflowed while processing references. Exit the VM.
1661     fatal("Overflow during reference processing, can not continue. Current mark stack depth: "
1662           "%zu, MarkStackSize: %zu, MarkStackSizeMax: %zu. "
1663           "Please increase MarkStackSize and/or MarkStackSizeMax and restart.",
1664           _global_mark_stack.size(), MarkStackSize, MarkStackSizeMax);
1665     return;
1666   }
1667 
1668   assert(_global_mark_stack.is_empty(), "Marking should have completed");
1669 
1670   {
1671     GCTraceTime(Debug, gc, phases) debug("Weak Processing", _gc_timer_cm);
1672     G1CMIsAliveClosure is_alive(this);
1673     WeakProcessor::weak_oops_do(_g1h->workers(), &is_alive, &do_nothing_cl, 1);
1674   }
1675 }
1676 
1677 class G1PrecleanYieldClosure : public YieldClosure {
1678   G1ConcurrentMark* _cm;
1679 
1680 public:
1681   G1PrecleanYieldClosure(G1ConcurrentMark* cm) : _cm(cm) { }
1682 
1683   virtual bool should_return() {
1684     return _cm->has_aborted();
1685   }
1686 
1687   virtual bool should_return_fine_grain() {
1688     _cm->do_yield_check();
1689     return _cm->has_aborted();
1690   }
1691 };
1692 
1693 void G1ConcurrentMark::preclean() {
1694   assert(G1UseReferencePrecleaning, "Precleaning must be enabled.");
1695 
1696   SuspendibleThreadSetJoiner joiner;
1697 
1698   BarrierEnqueueDiscoveredFieldClosure enqueue;
1699 
1700   set_concurrency_and_phase(1, true);
1701 
1702   G1PrecleanYieldClosure yield_cl(this);
1703 
1704   ReferenceProcessor* rp = _g1h->ref_processor_cm();
1705   // Precleaning is single threaded. Temporarily disable MT discovery.
1706   ReferenceProcessorMTDiscoveryMutator rp_mut_discovery(rp, false);
1707   rp->preclean_discovered_references(rp->is_alive_non_header(),
1708                                      &enqueue,
1709                                      &yield_cl,
1710                                      _gc_timer_cm);
1711 }
1712 
1713 // Closure for marking entries in SATB buffers.
1714 class G1CMSATBBufferClosure : public SATBBufferClosure {
1715 private:
1716   G1CMTask* _task;
1717   G1CollectedHeap* _g1h;
1718 
1719   // This is very similar to G1CMTask::deal_with_reference, but with
1720   // more relaxed requirements for the argument, so this must be more
1721   // circumspect about treating the argument as an object.
1722   void do_entry(void* entry) const {
1723     _task->increment_refs_reached();
1724     oop const obj = cast_to_oop(entry);
1725     _task->make_reference_grey(obj);
1726   }
1727 
1728 public:
1729   G1CMSATBBufferClosure(G1CMTask* task, G1CollectedHeap* g1h)
1730     : _task(task), _g1h(g1h) { }
1731 
1732   virtual void do_buffer(void** buffer, size_t size) {
1733     for (size_t i = 0; i < size; ++i) {
1734       do_entry(buffer[i]);
1735     }
1736   }
1737 };
1738 
1739 class G1RemarkThreadsClosure : public ThreadClosure {
1740   G1SATBMarkQueueSet& _qset;
1741 
1742  public:
1743   G1RemarkThreadsClosure(G1CollectedHeap* g1h, G1CMTask* task) :
1744     _qset(G1BarrierSet::satb_mark_queue_set()) {}
1745 
1746   void do_thread(Thread* thread) {
1747     // Transfer any partial buffer to the qset for completed buffer processing.
1748     _qset.flush_queue(G1ThreadLocalData::satb_mark_queue(thread));
1749   }
1750 };
1751 
1752 class G1CMRemarkTask : public WorkerTask {
1753   // For Threads::possibly_parallel_threads_do
1754   ThreadsClaimTokenScope _threads_claim_token_scope;
1755   G1ConcurrentMark* _cm;
1756 public:
1757   void work(uint worker_id) {
1758     G1CMTask* task = _cm->task(worker_id);
1759     task->record_start_time();
1760     {
1761       ResourceMark rm;
1762 
1763       G1RemarkThreadsClosure threads_f(G1CollectedHeap::heap(), task);
1764       Threads::possibly_parallel_threads_do(true /* is_par */, &threads_f);
1765     }
1766 
1767     do {
1768       task->do_marking_step(1000000000.0 /* something very large */,
1769                             true         /* do_termination       */,
1770                             false        /* is_serial            */);
1771     } while (task->has_aborted() && !_cm->has_overflown());
1772     // If we overflow, then we do not want to restart. We instead
1773     // want to abort remark and do concurrent marking again.
1774     task->record_end_time();
1775   }
1776 
1777   G1CMRemarkTask(G1ConcurrentMark* cm, uint active_workers) :
1778     WorkerTask("Par Remark"), _threads_claim_token_scope(), _cm(cm) {
1779     _cm->terminator()->reset_for_reuse(active_workers);
1780   }
1781 };
1782 
1783 void G1ConcurrentMark::finalize_marking() {
1784   ResourceMark rm;
1785 
1786   _g1h->ensure_parsability(false);
1787 
1788   // this is remark, so we'll use up all active threads
1789   uint active_workers = _g1h->workers()->active_workers();
1790   set_concurrency_and_phase(active_workers, false /* concurrent */);
1791   // Leave _parallel_marking_threads at it's
1792   // value originally calculated in the G1ConcurrentMark
1793   // constructor and pass values of the active workers
1794   // through the task.
1795 
1796   {
1797     G1CMRemarkTask remarkTask(this, active_workers);
1798     // We will start all available threads, even if we decide that the
1799     // active_workers will be fewer. The extra ones will just bail out
1800     // immediately.
1801     _g1h->workers()->run_task(&remarkTask);
1802   }
1803 
1804   SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
1805   guarantee(has_overflown() ||
1806             satb_mq_set.completed_buffers_num() == 0,
1807             "Invariant: has_overflown = %s, num buffers = %zu",
1808             BOOL_TO_STR(has_overflown()),
1809             satb_mq_set.completed_buffers_num());
1810 
1811   print_stats();
1812 }
1813 
1814 void G1ConcurrentMark::flush_all_task_caches() {
1815   size_t hits = 0;
1816   size_t misses = 0;
1817   for (uint i = 0; i < _max_num_tasks; i++) {
1818     Pair<size_t, size_t> stats = _tasks[i]->flush_mark_stats_cache();
1819     hits += stats.first;
1820     misses += stats.second;
1821   }
1822   size_t sum = hits + misses;
1823   log_debug(gc, stats)("Mark stats cache hits %zu misses %zu ratio %1.3lf",
1824                        hits, misses, percent_of(hits, sum));
1825 }
1826 
1827 void G1ConcurrentMark::clear_bitmap_for_region(G1HeapRegion* hr) {
1828   assert_at_safepoint();
1829   _mark_bitmap.clear_range(MemRegion(hr->bottom(), hr->end()));
1830 }
1831 
1832 G1HeapRegion* G1ConcurrentMark::claim_region(uint worker_id) {
1833   // "Checkpoint" the finger.
1834   HeapWord* local_finger = finger();
1835 
1836   while (local_finger < _heap.end()) {
1837     assert(_g1h->is_in_reserved(local_finger), "invariant");
1838 
1839     G1HeapRegion* curr_region = _g1h->heap_region_containing_or_null(local_finger);
1840     // Make sure that the reads below do not float before loading curr_region.
1841     OrderAccess::loadload();
1842     // Above heap_region_containing may return null as we always scan claim
1843     // until the end of the heap. In this case, just jump to the next region.
1844     HeapWord* end = curr_region != nullptr ? curr_region->end() : local_finger + G1HeapRegion::GrainWords;
1845 
1846     // Is the gap between reading the finger and doing the CAS too long?
1847     HeapWord* res = _finger.compare_exchange(local_finger, end);
1848     if (res == local_finger && curr_region != nullptr) {
1849       // We succeeded.
1850       HeapWord* bottom = curr_region->bottom();
1851       HeapWord* limit = top_at_mark_start(curr_region);
1852 
1853       log_trace(gc, marking)("Claim region %u bottom " PTR_FORMAT " tams " PTR_FORMAT, curr_region->hrm_index(), p2i(curr_region->bottom()), p2i(top_at_mark_start(curr_region)));
1854       // Notice that _finger == end cannot be guaranteed here since,
1855       // someone else might have moved the finger even further.
1856       assert(finger() >= end, "The finger should have moved forward");
1857 
1858       if (limit > bottom) {
1859         return curr_region;
1860       } else {
1861         assert(limit == bottom,
1862                "The region limit should be at bottom");
1863         // We return null and the caller should try calling
1864         // claim_region() again.
1865         return nullptr;
1866       }
1867     } else {
1868       // Read the finger again.
1869       HeapWord* next_finger = finger();
1870       assert(next_finger > local_finger, "The finger should have moved forward " PTR_FORMAT " " PTR_FORMAT, p2i(local_finger), p2i(next_finger));
1871       local_finger = next_finger;
1872     }
1873   }
1874 
1875   return nullptr;
1876 }
1877 
1878 #ifndef PRODUCT
1879 class VerifyNoCSetOops {
1880   G1CollectedHeap* _g1h;
1881   const char* _phase;
1882   int _info;
1883 
1884 public:
1885   VerifyNoCSetOops(const char* phase, int info = -1) :
1886     _g1h(G1CollectedHeap::heap()),
1887     _phase(phase),
1888     _info(info)
1889   { }
1890 
1891   void operator()(G1TaskQueueEntry task_entry) const {
1892     if (task_entry.is_partial_array_state()) {
1893       oop obj = task_entry.to_partial_array_state()->source();
1894       guarantee(_g1h->is_in_reserved(obj), "Partial Array " PTR_FORMAT " must be in heap.", p2i(obj));
1895       return;
1896     }
1897     guarantee(oopDesc::is_oop(task_entry.to_oop()),
1898               "Non-oop " PTR_FORMAT ", phase: %s, info: %d",
1899               p2i(task_entry.to_oop()), _phase, _info);
1900     G1HeapRegion* r = _g1h->heap_region_containing(task_entry.to_oop());
1901     guarantee(!(r->in_collection_set() || r->has_index_in_opt_cset()),
1902               "obj " PTR_FORMAT " from %s (%d) in region %u in (optional) collection set",
1903               p2i(task_entry.to_oop()), _phase, _info, r->hrm_index());
1904   }
1905 };
1906 
1907 void G1ConcurrentMark::verify_no_collection_set_oops() {
1908   assert(SafepointSynchronize::is_at_safepoint() || !is_init_completed(),
1909          "should be at a safepoint or initializing");
1910   if (!is_fully_initialized() || !_g1h->collector_state()->is_in_mark_or_rebuild()) {
1911     return;
1912   }
1913 
1914   // Verify entries on the global mark stack
1915   _global_mark_stack.iterate(VerifyNoCSetOops("Stack"));
1916 
1917   // Verify entries on the task queues
1918   for (uint i = 0; i < _max_num_tasks; ++i) {
1919     G1CMTaskQueue* queue = _task_queues->queue(i);
1920     queue->iterate(VerifyNoCSetOops("Queue", i));
1921   }
1922 
1923   // Verify the global finger
1924   HeapWord* global_finger = finger();
1925   if (global_finger != nullptr && global_finger < _heap.end()) {
1926     // Since we always iterate over all regions, we might get a null G1HeapRegion
1927     // here.
1928     G1HeapRegion* global_hr = _g1h->heap_region_containing_or_null(global_finger);
1929     guarantee(global_hr == nullptr || global_finger == global_hr->bottom(),
1930               "global finger: " PTR_FORMAT " region: " HR_FORMAT,
1931               p2i(global_finger), HR_FORMAT_PARAMS(global_hr));
1932   }
1933 
1934   // Verify the task fingers
1935   assert(_num_concurrent_workers <= _max_num_tasks, "sanity");
1936   for (uint i = 0; i < _num_concurrent_workers; ++i) {
1937     G1CMTask* task = _tasks[i];
1938     HeapWord* task_finger = task->finger();
1939     if (task_finger != nullptr && task_finger < _heap.end()) {
1940       // See above note on the global finger verification.
1941       G1HeapRegion* r = _g1h->heap_region_containing_or_null(task_finger);
1942       guarantee(r == nullptr || task_finger == r->bottom() ||
1943                 !r->in_collection_set() || !r->has_index_in_opt_cset(),
1944                 "task finger: " PTR_FORMAT " region: " HR_FORMAT,
1945                 p2i(task_finger), HR_FORMAT_PARAMS(r));
1946     }
1947   }
1948 }
1949 #endif // PRODUCT
1950 
1951 void G1ConcurrentMark::rebuild_and_scrub() {
1952   if (!needs_remembered_set_rebuild()) {
1953     log_debug(gc, marking)("Skipping Remembered Set Rebuild. No regions selected for rebuild, will only scrub");
1954   }
1955 
1956   G1ConcurrentRebuildAndScrub::rebuild_and_scrub(this, needs_remembered_set_rebuild(), _concurrent_workers);
1957 }
1958 
1959 void G1ConcurrentMark::print_stats() {
1960   if (!log_is_enabled(Debug, gc, stats)) {
1961     return;
1962   }
1963   log_debug(gc, stats)("---------------------------------------------------------------------");
1964   for (size_t i = 0; i < _num_active_tasks; ++i) {
1965     _tasks[i]->print_stats();
1966     log_debug(gc, stats)("---------------------------------------------------------------------");
1967   }
1968 }
1969 
1970 bool G1ConcurrentMark::concurrent_cycle_abort() {
1971   assert_at_safepoint_on_vm_thread();
1972   assert(_g1h->collector_state()->is_in_full_gc(), "must be");
1973 
1974   // If we start the compaction before the CM threads finish
1975   // scanning the root regions we might trip them over as we'll
1976   // be moving objects / updating references. Since the root region
1977   // scan synchronized with the safepoint, just tell it to abort.
1978   // It will notice when the threads start up again later.
1979   abort_root_region_scan_at_safepoint();
1980 
1981   // We haven't started a concurrent cycle no need to do anything; we might have
1982   // aborted the marking because of shutting down though. In this case the marking
1983   // might have already completed the abort (leading to in_progress() below to
1984   // return false), however this still left marking state particularly in the
1985   // shared marking bitmap that must be cleaned up.
1986   // If there are multiple full gcs during shutdown we do this work repeatedly for
1987   // nothing, but this situation should be extremely rare (a full gc after shutdown
1988   // has been signalled is already rare), and this work should be negligible compared
1989   // to actual full gc work.
1990 
1991   if (!is_fully_initialized() || (!cm_thread()->is_in_progress() && !cm_thread()->should_terminate())) {
1992     return false;
1993   }
1994 
1995   reset_marking_for_restart();
1996 
1997   abort_marking_threads();
1998 
1999   SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
2000   satb_mq_set.abandon_partial_marking();
2001   // This can be called either during or outside marking, we'll read
2002   // the expected_active value from the SATB queue set.
2003   satb_mq_set.set_active_all_threads(false, /* new active value */
2004                                      satb_mq_set.is_active() /* expected_active */);
2005   return true;
2006 }
2007 
2008 void G1ConcurrentMark::abort_marking_threads() {
2009   assert_root_region_scan_completed_or_aborted();
2010   _has_aborted.store_relaxed(true);
2011   _first_overflow_barrier_sync.abort();
2012   _second_overflow_barrier_sync.abort();
2013 }
2014 
2015 double G1ConcurrentMark::worker_threads_cpu_time_s() {
2016   class CountCpuTimeThreadClosure : public ThreadClosure {
2017   public:
2018     jlong _total_cpu_time;
2019 
2020     CountCpuTimeThreadClosure() : ThreadClosure(), _total_cpu_time(0) { }
2021 
2022     void do_thread(Thread* t) {
2023       _total_cpu_time += os::thread_cpu_time(t);
2024     }
2025   } cl;
2026 
2027   threads_do(&cl);
2028 
2029   return (double)cl._total_cpu_time / NANOSECS_PER_SEC;
2030 }
2031 
2032 static void print_ms_time_info(const char* prefix, const char* name,
2033                                NumberSeq& ns) {
2034   log_trace(gc, marking)("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
2035                          prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
2036   if (ns.num() > 0) {
2037     log_trace(gc, marking)("%s         [std. dev = %8.2f ms, max = %8.2f ms]",
2038                            prefix, ns.sd(), ns.maximum());
2039   }
2040 }
2041 
2042 void G1ConcurrentMark::print_summary_info() {
2043   Log(gc, marking) log;
2044   if (!log.is_trace()) {
2045     return;
2046   }
2047 
2048   log.trace(" Concurrent marking:");
2049   if (!is_fully_initialized()) {
2050     log.trace("    has not been initialized yet");
2051     return;
2052   }
2053   print_ms_time_info("  ", "remarks", _remark_times);
2054   {
2055     print_ms_time_info("     ", "final marks", _remark_mark_times);
2056     print_ms_time_info("     ", "weak refs", _remark_weak_ref_times);
2057 
2058   }
2059   print_ms_time_info("  ", "cleanups", _cleanup_times);
2060   log.trace("    Finalize live data total time = %8.2f s (avg = %8.2f ms).",
2061             _cleanup_times.sum() / 1000.0, _cleanup_times.avg());
2062   log.trace("  Total stop_world time = %8.2f s.",
2063             (_remark_times.sum() + _cleanup_times.sum())/1000.0);
2064   log.trace("  Total concurrent time = %8.2f s (%8.2f s marking).",
2065             cm_thread()->total_mark_cpu_time_s(), cm_thread()->worker_threads_cpu_time_s());
2066 }
2067 
2068 void G1ConcurrentMark::threads_do(ThreadClosure* tc) const {
2069   if (is_fully_initialized()) { // they are initialized late
2070     tc->do_thread(_cm_thread);
2071     _concurrent_workers->threads_do(tc);
2072   }
2073 }
2074 
2075 void G1ConcurrentMark::print_on(outputStream* st) const {
2076   st->print_cr("Marking Bits: (CMBitMap*) " PTR_FORMAT, p2i(mark_bitmap()));
2077   _mark_bitmap.print_on(st, " Bits: ");
2078 }
2079 
2080 static ReferenceProcessor* get_cm_oop_closure_ref_processor(G1CollectedHeap* g1h) {
2081   ReferenceProcessor* result = g1h->ref_processor_cm();
2082   assert(result != nullptr, "CM reference processor should not be null");
2083   return result;
2084 }
2085 
2086 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h,
2087                                G1CMTask* task)
2088   : ClaimMetadataVisitingOopIterateClosure(ClassLoaderData::_claim_strong, get_cm_oop_closure_ref_processor(g1h)),
2089     _g1h(g1h), _task(task)
2090 { }
2091 
2092 void G1CMTask::setup_for_region(G1HeapRegion* hr) {
2093   assert(hr != nullptr,
2094         "claim_region() should have filtered out null regions");
2095   _curr_region  = hr;
2096   _finger       = hr->bottom();
2097   update_region_limit();
2098 }
2099 
2100 void G1CMTask::update_region_limit() {
2101   G1HeapRegion* hr = _curr_region;
2102   HeapWord* bottom = hr->bottom();
2103   HeapWord* limit = _cm->top_at_mark_start(hr);
2104 
2105   if (limit == bottom) {
2106     // The region was collected underneath our feet.
2107     // We set the finger to bottom to ensure that the bitmap
2108     // iteration that will follow this will not do anything.
2109     // (this is not a condition that holds when we set the region up,
2110     // as the region is not supposed to be empty in the first place)
2111     _finger = bottom;
2112   } else if (limit >= _region_limit) {
2113     assert(limit >= _finger, "peace of mind");
2114   } else {
2115     assert(limit < _region_limit, "only way to get here");
2116     // This can happen under some pretty unusual circumstances.  An
2117     // evacuation pause empties the region underneath our feet (TAMS
2118     // at bottom). We then do some allocation in the region (TAMS
2119     // stays at bottom), followed by the region being used as a GC
2120     // alloc region (TAMS will move to top() and the objects
2121     // originally below it will be greyed). All objects now marked in
2122     // the region are explicitly greyed, if below the global finger,
2123     // and we do not need in fact to scan anything else. So, we simply
2124     // set _finger to be limit to ensure that the bitmap iteration
2125     // doesn't do anything.
2126     _finger = limit;
2127   }
2128 
2129   _region_limit = limit;
2130 }
2131 
2132 void G1CMTask::giveup_current_region() {
2133   assert(_curr_region != nullptr, "invariant");
2134   clear_region_fields();
2135 }
2136 
2137 void G1CMTask::clear_region_fields() {
2138   // Values for these three fields that indicate that we're not
2139   // holding on to a region.
2140   _curr_region   = nullptr;
2141   _finger        = nullptr;
2142   _region_limit  = nullptr;
2143 }
2144 
2145 void G1CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) {
2146   if (cm_oop_closure == nullptr) {
2147     assert(_cm_oop_closure != nullptr, "invariant");
2148   } else {
2149     assert(_cm_oop_closure == nullptr, "invariant");
2150   }
2151   _cm_oop_closure = cm_oop_closure;
2152 }
2153 
2154 void G1CMTask::reset(G1CMBitMap* mark_bitmap) {
2155   guarantee(mark_bitmap != nullptr, "invariant");
2156   _mark_bitmap              = mark_bitmap;
2157   clear_region_fields();
2158 
2159   _calls                         = 0;
2160   _elapsed_time_ms               = 0.0;
2161   _termination_time_ms           = 0.0;
2162 
2163   _mark_stats_cache.reset();
2164 }
2165 
2166 void G1CMTask::reset_for_restart() {
2167   clear_region_fields();
2168   _task_queue->set_empty();
2169   TASKQUEUE_STATS_ONLY(_partial_array_splitter.stats()->reset());
2170   TASKQUEUE_STATS_ONLY(_task_queue->stats.reset());
2171 }
2172 
2173 void G1CMTask::register_partial_array_splitter() {
2174 
2175   ::new (&_partial_array_splitter) PartialArraySplitter(_cm->partial_array_state_manager(),
2176                                                         _cm->max_num_tasks());
2177 }
2178 
2179 void G1CMTask::unregister_partial_array_splitter() {
2180   _partial_array_splitter.~PartialArraySplitter();
2181 }
2182 
2183 bool G1CMTask::should_exit_termination() {
2184   if (!regular_clock_call()) {
2185     return true;
2186   }
2187 
2188   // This is called when we are in the termination protocol. We should
2189   // quit if, for some reason, this task wants to abort or the global
2190   // stack is not empty (this means that we can get work from it).
2191   return !_cm->mark_stack_empty() || has_aborted();
2192 }
2193 
2194 void G1CMTask::reached_limit() {
2195   assert(_words_scanned >= _words_scanned_limit ||
2196          _refs_reached >= _refs_reached_limit ,
2197          "shouldn't have been called otherwise");
2198   abort_marking_if_regular_check_fail();
2199 }
2200 
2201 bool G1CMTask::regular_clock_call() {
2202   if (has_aborted()) {
2203     return false;
2204   }
2205 
2206   // First, we need to recalculate the words scanned and refs reached
2207   // limits for the next clock call.
2208   recalculate_limits();
2209 
2210   // During the regular clock call we do the following
2211 
2212   // (1) If an overflow has been flagged, then we abort.
2213   if (_cm->has_overflown()) {
2214     return false;
2215   }
2216 
2217   // If we are not concurrent (i.e. we're doing remark) we don't need
2218   // to check anything else. The other steps are only needed during
2219   // the concurrent marking phase.
2220   if (!_cm->concurrent()) {
2221     return true;
2222   }
2223 
2224   // (2) If marking has been aborted for Full GC, then we also abort.
2225   if (_cm->has_aborted()) {
2226     return false;
2227   }
2228 
2229   // (4) We check whether we should yield. If we have to, then we abort.
2230   if (SuspendibleThreadSet::should_yield()) {
2231     // We should yield. To do this we abort the task. The caller is
2232     // responsible for yielding.
2233     return false;
2234   }
2235 
2236   // (5) We check whether we've reached our time quota. If we have,
2237   // then we abort.
2238   double elapsed_time_ms = (double)(os::current_thread_cpu_time() - _start_cpu_time_ns) / NANOSECS_PER_MILLISEC;
2239   if (elapsed_time_ms > _time_target_ms) {
2240     _has_timed_out = true;
2241     return false;
2242   }
2243 
2244   // (6) Finally, we check whether there are enough completed STAB
2245   // buffers available for processing. If there are, we abort.
2246   SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
2247   if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
2248     // we do need to process SATB buffers, we'll abort and restart
2249     // the marking task to do so
2250     return false;
2251   }
2252   return true;
2253 }
2254 
2255 void G1CMTask::recalculate_limits() {
2256   _real_words_scanned_limit = _words_scanned + words_scanned_period;
2257   _words_scanned_limit      = _real_words_scanned_limit;
2258 
2259   _real_refs_reached_limit  = _refs_reached  + refs_reached_period;
2260   _refs_reached_limit       = _real_refs_reached_limit;
2261 }
2262 
2263 void G1CMTask::decrease_limits() {
2264   // This is called when we believe that we're going to do an infrequent
2265   // operation which will increase the per byte scanned cost (i.e. move
2266   // entries to/from the global stack). It basically tries to decrease the
2267   // scanning limit so that the clock is called earlier.
2268 
2269   _words_scanned_limit = _real_words_scanned_limit - 3 * words_scanned_period / 4;
2270   _refs_reached_limit  = _real_refs_reached_limit - 3 * refs_reached_period / 4;
2271 }
2272 
2273 void G1CMTask::move_entries_to_global_stack() {
2274   // Local array where we'll store the entries that will be popped
2275   // from the local queue.
2276   G1TaskQueueEntry buffer[G1CMMarkStack::EntriesPerChunk];
2277 
2278   size_t n = 0;
2279   G1TaskQueueEntry task_entry;
2280   while (n < G1CMMarkStack::EntriesPerChunk && _task_queue->pop_local(task_entry)) {
2281     buffer[n] = task_entry;
2282     ++n;
2283   }
2284   if (n < G1CMMarkStack::EntriesPerChunk) {
2285     buffer[n] = G1TaskQueueEntry();
2286   }
2287 
2288   if (n > 0) {
2289     if (!_cm->mark_stack_push(buffer)) {
2290       set_has_aborted();
2291     }
2292   }
2293 
2294   // This operation was quite expensive, so decrease the limits.
2295   decrease_limits();
2296 }
2297 
2298 bool G1CMTask::get_entries_from_global_stack() {
2299   // Local array where we'll store the entries that will be popped
2300   // from the global stack.
2301   G1TaskQueueEntry buffer[G1CMMarkStack::EntriesPerChunk];
2302 
2303   if (!_cm->mark_stack_pop(buffer)) {
2304     return false;
2305   }
2306 
2307   // We did actually pop at least one entry.
2308   for (size_t i = 0; i < G1CMMarkStack::EntriesPerChunk; ++i) {
2309     G1TaskQueueEntry task_entry = buffer[i];
2310     if (task_entry.is_null()) {
2311       break;
2312     }
2313     assert(task_entry.is_partial_array_state() || oopDesc::is_oop(task_entry.to_oop()), "Element " PTR_FORMAT " must be an array slice or oop", p2i(task_entry.to_oop()));
2314     bool success = _task_queue->push(task_entry);
2315     // We only call this when the local queue is empty or under a
2316     // given target limit. So, we do not expect this push to fail.
2317     assert(success, "invariant");
2318   }
2319 
2320   // This operation was quite expensive, so decrease the limits
2321   decrease_limits();
2322   return true;
2323 }
2324 
2325 void G1CMTask::drain_local_queue(bool partially) {
2326   if (has_aborted()) {
2327     return;
2328   }
2329 
2330   // Decide what the target size is, depending whether we're going to
2331   // drain it partially (so that other tasks can steal if they run out
2332   // of things to do) or totally (at the very end).
2333   uint target_size;
2334   if (partially) {
2335     target_size = GCDrainStackTargetSize;
2336   } else {
2337     target_size = 0;
2338   }
2339 
2340   if (_task_queue->size() > target_size) {
2341     G1TaskQueueEntry entry;
2342     bool ret = _task_queue->pop_local(entry);
2343     while (ret) {
2344       process_entry(entry, false /* stolen */);
2345       if (_task_queue->size() <= target_size || has_aborted()) {
2346         ret = false;
2347       } else {
2348         ret = _task_queue->pop_local(entry);
2349       }
2350     }
2351   }
2352 }
2353 
2354 size_t G1CMTask::start_partial_array_processing(objArrayOop obj) {
2355   assert(obj->length() >= (int)ObjArrayMarkingStride, "Must be a large array object %d", obj->length());
2356 
2357   // Mark objArray klass metadata
2358   process_klass(obj->klass());
2359 
2360   size_t array_length = obj->length();
2361   size_t initial_chunk_size = _partial_array_splitter.start(_task_queue, obj, nullptr, array_length, ObjArrayMarkingStride);
2362 
2363   process_array_chunk(obj, 0, initial_chunk_size);
2364 
2365   // Include object header size
2366   return objArrayOopDesc::object_size(checked_cast<int>(initial_chunk_size));
2367 }
2368 
2369 size_t G1CMTask::process_partial_array(const G1TaskQueueEntry& task, bool stolen) {
2370   PartialArrayState* state = task.to_partial_array_state();
2371   // Access state before release by claim().
2372   objArrayOop obj = objArrayOop(state->source());
2373 
2374   PartialArraySplitter::Claim claim =
2375     _partial_array_splitter.claim(state, _task_queue, stolen);
2376 
2377   process_array_chunk(obj, claim._start, claim._end);
2378   return heap_word_size((claim._end - claim._start) * heapOopSize);
2379 }
2380 
2381 void G1CMTask::drain_global_stack(bool partially) {
2382   if (has_aborted()) {
2383     return;
2384   }
2385 
2386   // We have a policy to drain the local queue before we attempt to
2387   // drain the global stack.
2388   assert(partially || _task_queue->size() == 0, "invariant");
2389 
2390   // Decide what the target size is, depending whether we're going to
2391   // drain it partially (so that other tasks can steal if they run out
2392   // of things to do) or totally (at the very end).
2393   // Notice that when draining the global mark stack partially, due to the racyness
2394   // of the mark stack size update we might in fact drop below the target. But,
2395   // this is not a problem.
2396   // In case of total draining, we simply process until the global mark stack is
2397   // totally empty, disregarding the size counter.
2398   if (partially) {
2399     size_t const target_size = _cm->partial_mark_stack_size_target();
2400     while (!has_aborted() && _cm->mark_stack_size() > target_size) {
2401       if (get_entries_from_global_stack()) {
2402         drain_local_queue(partially);
2403       }
2404     }
2405   } else {
2406     while (!has_aborted() && get_entries_from_global_stack()) {
2407       drain_local_queue(partially);
2408     }
2409   }
2410 }
2411 
2412 // SATB Queue has several assumptions on whether to call the par or
2413 // non-par versions of the methods. this is why some of the code is
2414 // replicated. We should really get rid of the single-threaded version
2415 // of the code to simplify things.
2416 void G1CMTask::drain_satb_buffers() {
2417   if (has_aborted()) {
2418     return;
2419   }
2420 
2421   // We set this so that the regular clock knows that we're in the
2422   // middle of draining buffers and doesn't set the abort flag when it
2423   // notices that SATB buffers are available for draining. It'd be
2424   // very counter productive if it did that. :-)
2425   _draining_satb_buffers = true;
2426 
2427   G1CMSATBBufferClosure satb_cl(this, _g1h);
2428   SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
2429 
2430   // This keeps claiming and applying the closure to completed buffers
2431   // until we run out of buffers or we need to abort.
2432   while (!has_aborted() &&
2433          satb_mq_set.apply_closure_to_completed_buffer(&satb_cl)) {
2434     abort_marking_if_regular_check_fail();
2435   }
2436 
2437   // Can't assert qset is empty here, even if not aborted.  If concurrent,
2438   // some other thread might be adding to the queue.  If not concurrent,
2439   // some other thread might have won the race for the last buffer, but
2440   // has not yet decremented the count.
2441 
2442   _draining_satb_buffers = false;
2443 
2444   // again, this was a potentially expensive operation, decrease the
2445   // limits to get the regular clock call early
2446   decrease_limits();
2447 }
2448 
2449 void G1CMTask::clear_mark_stats_cache(uint region_idx) {
2450   _mark_stats_cache.reset(region_idx);
2451 }
2452 
2453 Pair<size_t, size_t> G1CMTask::flush_mark_stats_cache() {
2454   return _mark_stats_cache.evict_all();
2455 }
2456 
2457 void G1CMTask::print_stats() {
2458   log_debug(gc, stats)("Marking Stats, task = %u, calls = %u", _worker_id, _calls);
2459   log_debug(gc, stats)("  Elapsed time = %1.2lfms, Termination time = %1.2lfms",
2460                        _elapsed_time_ms, _termination_time_ms);
2461   log_debug(gc, stats)("  Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms max = %1.2lfms, total = %1.2lfms",
2462                        _step_times_ms.num(),
2463                        _step_times_ms.avg(),
2464                        _step_times_ms.sd(),
2465                        _step_times_ms.maximum(),
2466                        _step_times_ms.sum());
2467   size_t const hits = _mark_stats_cache.hits();
2468   size_t const misses = _mark_stats_cache.misses();
2469   log_debug(gc, stats)("  Mark Stats Cache: hits %zu misses %zu ratio %.3f",
2470                        hits, misses, percent_of(hits, hits + misses));
2471 }
2472 
2473 bool G1ConcurrentMark::try_stealing(uint worker_id, G1TaskQueueEntry& task_entry) {
2474   return _task_queues->steal(worker_id, task_entry);
2475 }
2476 
2477 void G1CMTask::process_current_region(G1CMBitMapClosure& bitmap_closure) {
2478   if (has_aborted() || _curr_region == nullptr) {
2479     return;
2480   }
2481 
2482   // This means that we're already holding on to a region.
2483   assert(_finger != nullptr, "if region is not null, then the finger "
2484          "should not be null either");
2485 
2486   // We might have restarted this task after an evacuation pause
2487   // which might have evacuated the region we're holding on to
2488   // underneath our feet. Let's read its limit again to make sure
2489   // that we do not iterate over a region of the heap that
2490   // contains garbage (update_region_limit() will also move
2491   // _finger to the start of the region if it is found empty).
2492   update_region_limit();
2493   // We will start from _finger not from the start of the region,
2494   // as we might be restarting this task after aborting half-way
2495   // through scanning this region. In this case, _finger points to
2496   // the address where we last found a marked object. If this is a
2497   // fresh region, _finger points to start().
2498   MemRegion mr = MemRegion(_finger, _region_limit);
2499 
2500   assert(!_curr_region->is_humongous() || mr.start() == _curr_region->bottom(),
2501          "humongous regions should go around loop once only");
2502 
2503   // Some special cases:
2504   // If the memory region is empty, we can just give up the region.
2505   // If the current region is humongous then we only need to check
2506   // the bitmap for the bit associated with the start of the object,
2507   // scan the object if it's live, and give up the region.
2508   // Otherwise, let's iterate over the bitmap of the part of the region
2509   // that is left.
2510   // If the iteration is successful, give up the region.
2511   if (mr.is_empty()) {
2512     giveup_current_region();
2513     abort_marking_if_regular_check_fail();
2514   } else if (_curr_region->is_humongous() && mr.start() == _curr_region->bottom()) {
2515     if (_mark_bitmap->is_marked(mr.start())) {
2516       // The object is marked - apply the closure
2517       bitmap_closure.do_addr(mr.start());
2518     }
2519     // Even if this task aborted while scanning the humongous object
2520     // we can (and should) give up the current region.
2521     giveup_current_region();
2522     abort_marking_if_regular_check_fail();
2523   } else if (_mark_bitmap->iterate(&bitmap_closure, mr)) {
2524     giveup_current_region();
2525     abort_marking_if_regular_check_fail();
2526   } else {
2527     assert(has_aborted(), "currently the only way to do so");
2528     // The only way to abort the bitmap iteration is to return
2529     // false from the do_bit() method. However, inside the
2530     // do_bit() method we move the _finger to point to the
2531     // object currently being looked at. So, if we bail out, we
2532     // have definitely set _finger to something non-null.
2533     assert(_finger != nullptr, "invariant");
2534 
2535     // Region iteration was actually aborted. So now _finger
2536     // points to the address of the object we last scanned. If we
2537     // leave it there, when we restart this task, we will rescan
2538     // the object. It is easy to avoid this. We move the finger by
2539     // enough to point to the next possible object header.
2540     assert(_finger < _region_limit, "invariant");
2541     HeapWord* const new_finger = _finger + cast_to_oop(_finger)->size();
2542     if (new_finger >= _region_limit) {
2543       giveup_current_region();
2544     } else {
2545       move_finger_to(new_finger);
2546     }
2547   }
2548 }
2549 
2550 void G1CMTask::claim_new_region() {
2551   // Read the note on the claim_region() method on why it might
2552   // return null with potentially more regions available for
2553   // claiming and why we have to check out_of_regions() to determine
2554   // whether we're done or not.
2555   while (!has_aborted() && _curr_region == nullptr && !_cm->out_of_regions()) {
2556     // We are going to try to claim a new region. We should have
2557     // given up on the previous one.
2558     // Separated the asserts so that we know which one fires.
2559     assert(_curr_region  == nullptr, "invariant");
2560     assert(_finger       == nullptr, "invariant");
2561     assert(_region_limit == nullptr, "invariant");
2562     G1HeapRegion* claimed_region = _cm->claim_region(_worker_id);
2563     if (claimed_region != nullptr) {
2564       // Yes, we managed to claim one
2565       setup_for_region(claimed_region);
2566       assert(_curr_region == claimed_region, "invariant");
2567     }
2568     // It is important to call the regular clock here. It might take
2569     // a while to claim a region if, for example, we hit a large
2570     // block of empty regions. So we need to call the regular clock
2571     // method once round the loop to make sure it's called
2572     // frequently enough.
2573     abort_marking_if_regular_check_fail();
2574   }
2575 }
2576 
2577 void G1CMTask::attempt_stealing() {
2578   // We cannot check whether the global stack is empty, since other
2579   // tasks might be pushing objects to it concurrently.
2580   assert(_cm->out_of_regions() && _task_queue->size() == 0,
2581          "only way to reach here");
2582   while (!has_aborted()) {
2583     G1TaskQueueEntry entry;
2584     if (_cm->try_stealing(_worker_id, entry)) {
2585       process_entry(entry, true /* stolen */);
2586 
2587       // And since we're towards the end, let's totally drain the
2588       // local queue and global stack.
2589       drain_local_queue(false);
2590       drain_global_stack(false);
2591     } else {
2592       break;
2593     }
2594   }
2595 }
2596 
2597 void G1CMTask::attempt_termination(bool is_serial) {
2598   // We cannot check whether the global stack is empty, since other
2599   // tasks might be concurrently pushing objects on it.
2600   // Separated the asserts so that we know which one fires.
2601   assert(_cm->out_of_regions(), "only way to reach here");
2602   assert(_task_queue->size() == 0, "only way to reach here");
2603   double termination_start_time_ms = os::elapsedTime() * 1000.0;
2604 
2605   // The G1CMTask class also extends the TerminatorTerminator class,
2606   // hence its should_exit_termination() method will also decide
2607   // whether to exit the termination protocol or not.
2608   bool finished = (is_serial ||
2609                    _cm->terminator()->offer_termination(this));
2610   _termination_time_ms += (os::elapsedTime() * 1000.0 - termination_start_time_ms);
2611 
2612   if (finished) {
2613     // We're all done.
2614 
2615     // We can now guarantee that the global stack is empty, since
2616     // all other tasks have finished. We separated the guarantees so
2617     // that, if a condition is false, we can immediately find out
2618     // which one.
2619     guarantee(_cm->out_of_regions(), "only way to reach here");
2620     guarantee(_cm->mark_stack_empty(), "only way to reach here");
2621     guarantee(_task_queue->size() == 0, "only way to reach here");
2622     guarantee(!_cm->has_overflown(), "only way to reach here");
2623     guarantee(!has_aborted(), "should never happen if termination has completed");
2624   } else {
2625     // Apparently there's more work to do. Let's abort this task. We
2626     // will restart it and hopefully we can find more things to do.
2627     set_has_aborted();
2628   }
2629 }
2630 
2631 void G1CMTask::handle_abort(bool is_serial, double elapsed_time_ms) {
2632   if (_has_timed_out) {
2633     double diff_ms = elapsed_time_ms - _time_target_ms;
2634     // Keep statistics of how well we did with respect to hitting
2635     // our target only if we actually timed out (if we aborted for
2636     // other reasons, then the results might get skewed).
2637     _marking_step_diff_ms.add(diff_ms);
2638   }
2639 
2640   if (!_cm->has_overflown()) {
2641     return;
2642   }
2643 
2644   // This is the interesting one. We aborted because a global
2645   // overflow was raised. This means we have to restart the
2646   // marking phase and start iterating over regions. However, in
2647   // order to do this we have to make sure that all tasks stop
2648   // what they are doing and re-initialize in a safe manner. We
2649   // will achieve this with the use of two barrier sync points.
2650   if (!is_serial) {
2651     // We only need to enter the sync barrier if being called
2652     // from a parallel context
2653     _cm->enter_first_sync_barrier(_worker_id);
2654 
2655     // When we exit this sync barrier we know that all tasks have
2656     // stopped doing marking work. So, it's now safe to
2657     // re-initialize our data structures.
2658   }
2659 
2660   clear_region_fields();
2661   flush_mark_stats_cache();
2662 
2663   if (!is_serial) {
2664     // If we're executing the concurrent phase of marking, reset the marking
2665     // state; otherwise the marking state is reset after reference processing,
2666     // during the remark pause.
2667     // If we reset here as a result of an overflow during the remark we will
2668     // see assertion failures from any subsequent set_concurrency_and_phase()
2669     // calls.
2670     if (_cm->concurrent() && _worker_id == 0) {
2671       // Worker 0 is responsible for clearing the global data structures because
2672       // of an overflow. During STW we should not clear the overflow flag (in
2673       // G1ConcurrentMark::reset_marking_state()) since we rely on it being true when we exit
2674       // method to abort the pause and restart concurrent marking.
2675       _cm->reset_marking_for_restart();
2676 
2677       log_info(gc, marking)("Concurrent Mark reset for overflow");
2678     }
2679 
2680     // ...and enter the second barrier.
2681     _cm->enter_second_sync_barrier(_worker_id);
2682   }
2683 }
2684 
2685 /*****************************************************************************
2686 
2687     The do_marking_step(time_target_ms, ...) method is the building
2688     block of the parallel marking framework. It can be called in parallel
2689     with other invocations of do_marking_step() on different tasks
2690     (but only one per task, obviously) and concurrently with the
2691     mutator threads, or during remark, hence it eliminates the need
2692     for two versions of the code. When called during remark, it will
2693     pick up from where the task left off during the concurrent marking
2694     phase. Interestingly, tasks are also claimable during evacuation
2695     pauses too, since do_marking_step() ensures that it aborts before
2696     it needs to yield.
2697 
2698     The data structures that it uses to do marking work are the
2699     following:
2700 
2701       (1) Marking Bitmap. If there are grey objects that appear only
2702       on the bitmap (this happens either when dealing with an overflow
2703       or when the concurrent start pause has simply marked the roots
2704       and didn't push them on the stack), then tasks claim heap
2705       regions whose bitmap they then scan to find grey objects. A
2706       global finger indicates where the end of the last claimed region
2707       is. A local finger indicates how far into the region a task has
2708       scanned. The two fingers are used to determine how to grey an
2709       object (i.e. whether simply marking it is OK, as it will be
2710       visited by a task in the future, or whether it needs to be also
2711       pushed on a stack).
2712 
2713       (2) Local Queue. The local queue of the task which is accessed
2714       reasonably efficiently by the task. Other tasks can steal from
2715       it when they run out of work. Throughout the marking phase, a
2716       task attempts to keep its local queue short but not totally
2717       empty, so that entries are available for stealing by other
2718       tasks. Only when there is no more work, a task will totally
2719       drain its local queue.
2720 
2721       (3) Global Mark Stack. This handles local queue overflow. During
2722       marking only sets of entries are moved between it and the local
2723       queues, as access to it requires a mutex and more fine-grain
2724       interaction with it which might cause contention. If it
2725       overflows, then the marking phase should restart and iterate
2726       over the bitmap to identify grey objects. Throughout the marking
2727       phase, tasks attempt to keep the global mark stack at a small
2728       length but not totally empty, so that entries are available for
2729       popping by other tasks. Only when there is no more work, tasks
2730       will totally drain the global mark stack.
2731 
2732       (4) SATB Buffer Queue. This is where completed SATB buffers are
2733       made available. Buffers are regularly removed from this queue
2734       and scanned for roots, so that the queue doesn't get too
2735       long. During remark, all completed buffers are processed, as
2736       well as the filled in parts of any uncompleted buffers.
2737 
2738     The do_marking_step() method tries to abort when the time target
2739     has been reached. There are a few other cases when the
2740     do_marking_step() method also aborts:
2741 
2742       (1) When the marking phase has been aborted (after a Full GC).
2743 
2744       (2) When a global overflow (on the global stack) has been
2745       triggered. Before the task aborts, it will actually sync up with
2746       the other tasks to ensure that all the marking data structures
2747       (local queues, stacks, fingers etc.)  are re-initialized so that
2748       when do_marking_step() completes, the marking phase can
2749       immediately restart.
2750 
2751       (3) When enough completed SATB buffers are available. The
2752       do_marking_step() method only tries to drain SATB buffers right
2753       at the beginning. So, if enough buffers are available, the
2754       marking step aborts and the SATB buffers are processed at
2755       the beginning of the next invocation.
2756 
2757       (4) To yield. when we have to yield then we abort and yield
2758       right at the end of do_marking_step(). This saves us from a lot
2759       of hassle as, by yielding we might allow a Full GC. If this
2760       happens then objects will be compacted underneath our feet, the
2761       heap might shrink, etc. We save checking for this by just
2762       aborting and doing the yield right at the end.
2763 
2764     From the above it follows that the do_marking_step() method should
2765     be called in a loop (or, otherwise, regularly) until it completes.
2766 
2767     If a marking step completes without its has_aborted() flag being
2768     true, it means it has completed the current marking phase (and
2769     also all other marking tasks have done so and have all synced up).
2770 
2771     A method called regular_clock_call() is invoked "regularly" (in
2772     sub ms intervals) throughout marking. It is this clock method that
2773     checks all the abort conditions which were mentioned above and
2774     decides when the task should abort. A work-based scheme is used to
2775     trigger this clock method: when the number of object words the
2776     marking phase has scanned or the number of references the marking
2777     phase has visited reach a given limit. Additional invocations to
2778     the method clock have been planted in a few other strategic places
2779     too. The initial reason for the clock method was to avoid calling
2780     cpu time gathering too regularly, as it is quite expensive. So,
2781     once it was in place, it was natural to piggy-back all the other
2782     conditions on it too and not constantly check them throughout the code.
2783 
2784     If do_termination is true then do_marking_step will enter its
2785     termination protocol.
2786 
2787     The value of is_serial must be true when do_marking_step is being
2788     called serially (i.e. by the VMThread) and do_marking_step should
2789     skip any synchronization in the termination and overflow code.
2790     Examples include the serial remark code and the serial reference
2791     processing closures.
2792 
2793     The value of is_serial must be false when do_marking_step is
2794     being called by any of the worker threads.
2795     Examples include the concurrent marking code (CMMarkingTask),
2796     the MT remark code, and the MT reference processing closures.
2797 
2798  *****************************************************************************/
2799 
2800 void G1CMTask::do_marking_step(double time_target_ms,
2801                                bool do_termination,
2802                                bool is_serial) {
2803   assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
2804 
2805   _start_cpu_time_ns = os::current_thread_cpu_time();
2806 
2807   // If do_stealing is true then do_marking_step will attempt to
2808   // steal work from the other G1CMTasks. It only makes sense to
2809   // enable stealing when the termination protocol is enabled
2810   // and do_marking_step() is not being called serially.
2811   bool do_stealing = do_termination && !is_serial;
2812 
2813   G1Predictions const& predictor = _g1h->policy()->predictor();
2814   double diff_prediction_ms = predictor.predict_zero_bounded(&_marking_step_diff_ms);
2815   _time_target_ms = time_target_ms - diff_prediction_ms;
2816 
2817   // set up the variables that are used in the work-based scheme to
2818   // call the regular clock method
2819   _words_scanned = 0;
2820   _refs_reached  = 0;
2821   recalculate_limits();
2822 
2823   // clear all flags
2824   clear_has_aborted();
2825   _has_timed_out = false;
2826   _draining_satb_buffers = false;
2827 
2828   ++_calls;
2829 
2830   // Set up the bitmap and oop closures. Anything that uses them is
2831   // eventually called from this method, so it is OK to allocate these
2832   // statically.
2833   G1CMBitMapClosure bitmap_closure(this, _cm);
2834   G1CMOopClosure cm_oop_closure(_g1h, this);
2835   set_cm_oop_closure(&cm_oop_closure);
2836 
2837   if (_cm->has_overflown()) {
2838     // This can happen if the mark stack overflows during a GC pause
2839     // and this task, after a yield point, restarts. We have to abort
2840     // as we need to get into the overflow protocol which happens
2841     // right at the end of this task.
2842     set_has_aborted();
2843   }
2844 
2845   // First drain any available SATB buffers. After this, we will not
2846   // look at SATB buffers before the next invocation of this method.
2847   // If enough completed SATB buffers are queued up, the regular clock
2848   // will abort this task so that it restarts.
2849   drain_satb_buffers();
2850   // ...then partially drain the local queue and the global stack
2851   drain_local_queue(true);
2852   drain_global_stack(true);
2853 
2854   do {
2855     process_current_region(bitmap_closure);
2856     // At this point we have either completed iterating over the
2857     // region we were holding on to, or we have aborted.
2858 
2859     // We then partially drain the local queue and the global stack.
2860     drain_local_queue(true);
2861     drain_global_stack(true);
2862 
2863     claim_new_region();
2864 
2865     assert(has_aborted() || _curr_region != nullptr || _cm->out_of_regions(),
2866            "at this point we should be out of regions");
2867   } while ( _curr_region != nullptr && !has_aborted());
2868 
2869   // We cannot check whether the global stack is empty, since other
2870   // tasks might be pushing objects to it concurrently.
2871   assert(has_aborted() || _cm->out_of_regions(),
2872          "at this point we should be out of regions");
2873   // Try to reduce the number of available SATB buffers so that
2874   // remark has less work to do.
2875   drain_satb_buffers();
2876 
2877   // Since we've done everything else, we can now totally drain the
2878   // local queue and global stack.
2879   drain_local_queue(false);
2880   drain_global_stack(false);
2881 
2882   // Attempt at work stealing from other task's queues.
2883   if (do_stealing && !has_aborted()) {
2884     // We have not aborted. This means that we have finished all that
2885     // we could. Let's try to do some stealing...
2886     attempt_stealing();
2887   }
2888 
2889   // We still haven't aborted. Now, let's try to get into the
2890   // termination protocol.
2891   if (do_termination && !has_aborted()) {
2892     attempt_termination(is_serial);
2893   }
2894 
2895   // Mainly for debugging purposes to make sure that a pointer to the
2896   // closure which was statically allocated in this frame doesn't
2897   // escape it by accident.
2898   set_cm_oop_closure(nullptr);
2899   jlong end_cpu_time_ns = os::current_thread_cpu_time();
2900   double elapsed_time_ms = (double)(end_cpu_time_ns - _start_cpu_time_ns) / NANOSECS_PER_MILLISEC;
2901   // Update the step history.
2902   _step_times_ms.add(elapsed_time_ms);
2903 
2904   if (has_aborted()) {
2905     // The task was aborted for some reason.
2906     handle_abort(is_serial, elapsed_time_ms);
2907   }
2908 }
2909 
2910 G1CMTask::G1CMTask(uint worker_id,
2911                    G1ConcurrentMark* cm,
2912                    G1CMTaskQueue* task_queue,
2913                    G1RegionMarkStats* mark_stats) :
2914   _worker_id(worker_id),
2915   _g1h(G1CollectedHeap::heap()),
2916   _cm(cm),
2917   _mark_bitmap(nullptr),
2918   _task_queue(task_queue),
2919   _partial_array_splitter(_cm->partial_array_state_manager(), _cm->max_num_tasks()),
2920   _mark_stats_cache(mark_stats, G1RegionMarkStatsCache::RegionMarkStatsCacheSize),
2921   _calls(0),
2922   _time_target_ms(0.0),
2923   _start_cpu_time_ns(0),
2924   _cm_oop_closure(nullptr),
2925   _curr_region(nullptr),
2926   _finger(nullptr),
2927   _region_limit(nullptr),
2928   _words_scanned(0),
2929   _words_scanned_limit(0),
2930   _real_words_scanned_limit(0),
2931   _refs_reached(0),
2932   _refs_reached_limit(0),
2933   _real_refs_reached_limit(0),
2934   _has_aborted(false),
2935   _has_timed_out(false),
2936   _draining_satb_buffers(false),
2937   _step_times_ms(),
2938   _elapsed_time_ms(0.0),
2939   _termination_time_ms(0.0),
2940   _marking_step_diff_ms()
2941 {
2942   guarantee(task_queue != nullptr, "invariant");
2943 
2944   _marking_step_diff_ms.add(0.5);
2945 }
2946 
2947 // These are formatting macros that are used below to ensure
2948 // consistent formatting. The *_H_* versions are used to format the
2949 // header for a particular value and they should be kept consistent
2950 // with the corresponding macro. Also note that most of the macros add
2951 // the necessary white space (as a prefix) which makes them a bit
2952 // easier to compose.
2953 
2954 // All the output lines are prefixed with this string to be able to
2955 // identify them easily in a large log file.
2956 #define G1PPRL_LINE_PREFIX            "###"
2957 
2958 #define G1PPRL_ADDR_BASE_FORMAT    " " PTR_FORMAT "-" PTR_FORMAT
2959 #ifdef _LP64
2960 #define G1PPRL_ADDR_BASE_H_FORMAT  " %37s"
2961 #else // _LP64
2962 #define G1PPRL_ADDR_BASE_H_FORMAT  " %21s"
2963 #endif // _LP64
2964 
2965 // For per-region info
2966 #define G1PPRL_TYPE_FORMAT            "   %-4s"
2967 #define G1PPRL_TYPE_H_FORMAT          "   %4s"
2968 #define G1PPRL_STATE_FORMAT           "   %-5s"
2969 #define G1PPRL_STATE_H_FORMAT         "   %5s"
2970 #define G1PPRL_BYTE_FORMAT            "  %9zu"
2971 #define G1PPRL_BYTE_H_FORMAT          "  %9s"
2972 #define G1PPRL_DOUBLE_FORMAT          "%14.1f"
2973 #define G1PPRL_GCEFF_H_FORMAT         "  %14s"
2974 #define G1PPRL_GID_H_FORMAT           "  %9s"
2975 #define G1PPRL_GID_FORMAT             "  " UINT32_FORMAT_W(9)
2976 #define G1PPRL_LEN_FORMAT             "  " UINT32_FORMAT_W(14)
2977 #define G1PPRL_LEN_H_FORMAT           "  %14s"
2978 #define G1PPRL_GID_GCEFF_FORMAT       "  %14.1f"
2979 #define G1PPRL_GID_LIVENESS_FORMAT    "  %9.2f"
2980 
2981 // For summary info
2982 #define G1PPRL_SUM_ADDR_FORMAT(tag)    "  " tag ":" G1PPRL_ADDR_BASE_FORMAT
2983 #define G1PPRL_SUM_BYTE_FORMAT(tag)    "  " tag ": %zu"
2984 #define G1PPRL_SUM_MB_FORMAT(tag)      "  " tag ": %1.2f MB"
2985 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag) " / %1.2f %%"
2986 
2987 G1PrintRegionLivenessInfoClosure::G1PrintRegionLivenessInfoClosure(const char* phase_name) :
2988   _total_used_bytes(0),
2989   _total_capacity_bytes(0),
2990   _total_live_bytes(0),
2991   _total_remset_bytes(0),
2992   _total_code_roots_bytes(0)
2993 {
2994   if (!log_is_enabled(Trace, gc, liveness)) {
2995     return;
2996   }
2997 
2998   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2999   MemRegion reserved = g1h->reserved();
3000   double now = os::elapsedTime();
3001 
3002   // Print the header of the output.
3003   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now);
3004   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" HEAP"
3005                           G1PPRL_SUM_ADDR_FORMAT("reserved")
3006                           G1PPRL_SUM_BYTE_FORMAT("region-size"),
3007                           p2i(reserved.start()), p2i(reserved.end()),
3008                           G1HeapRegion::GrainBytes);
3009   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX);
3010   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3011                           G1PPRL_TYPE_H_FORMAT
3012                           G1PPRL_ADDR_BASE_H_FORMAT
3013                           G1PPRL_BYTE_H_FORMAT
3014                           G1PPRL_BYTE_H_FORMAT
3015                           G1PPRL_STATE_H_FORMAT
3016                           G1PPRL_BYTE_H_FORMAT
3017                           G1PPRL_GID_H_FORMAT,
3018                           "type", "address-range",
3019                           "used", "live",
3020                           "state", "code-roots",
3021                           "group-id");
3022   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3023                           G1PPRL_TYPE_H_FORMAT
3024                           G1PPRL_ADDR_BASE_H_FORMAT
3025                           G1PPRL_BYTE_H_FORMAT
3026                           G1PPRL_BYTE_H_FORMAT
3027                           G1PPRL_STATE_H_FORMAT
3028                           G1PPRL_BYTE_H_FORMAT
3029                           G1PPRL_GID_H_FORMAT,
3030                           "", "",
3031                           "(bytes)", "(bytes)",
3032                           "", "(bytes)", "");
3033 }
3034 
3035 bool G1PrintRegionLivenessInfoClosure::do_heap_region(G1HeapRegion* r) {
3036   if (!log_is_enabled(Trace, gc, liveness)) {
3037     return false;
3038   }
3039 
3040   const char* type       = r->get_type_str();
3041   HeapWord* bottom       = r->bottom();
3042   HeapWord* end          = r->end();
3043   size_t capacity_bytes  = r->capacity();
3044   size_t used_bytes      = r->used();
3045   size_t live_bytes      = r->live_bytes();
3046   size_t remset_bytes    = r->rem_set()->mem_size();
3047   size_t code_roots_bytes = r->rem_set()->code_roots_mem_size();
3048   const char* remset_type = r->rem_set()->get_short_state_str();
3049   uint cset_group_id     = r->rem_set()->has_cset_group()
3050                          ? r->rem_set()->cset_group_id()
3051                          : G1CSetCandidateGroup::NoRemSetId;
3052 
3053   _total_used_bytes      += used_bytes;
3054   _total_capacity_bytes  += capacity_bytes;
3055   _total_live_bytes      += live_bytes;
3056   _total_remset_bytes    += remset_bytes;
3057   _total_code_roots_bytes += code_roots_bytes;
3058 
3059   // Print a line for this particular region.
3060   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3061                         G1PPRL_TYPE_FORMAT
3062                         G1PPRL_ADDR_BASE_FORMAT
3063                         G1PPRL_BYTE_FORMAT
3064                         G1PPRL_BYTE_FORMAT
3065                         G1PPRL_STATE_FORMAT
3066                         G1PPRL_BYTE_FORMAT
3067                         G1PPRL_GID_FORMAT,
3068                         type, p2i(bottom), p2i(end),
3069                         used_bytes, live_bytes,
3070                         remset_type, code_roots_bytes,
3071                         cset_group_id);
3072 
3073   return false;
3074 }
3075 
3076 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() {
3077   if (!log_is_enabled(Trace, gc, liveness)) {
3078     return;
3079   }
3080 
3081   G1CollectedHeap* g1h = G1CollectedHeap::heap();
3082   _total_remset_bytes += g1h->card_set_freelist_pool()->mem_size();
3083   // add static memory usages to remembered set sizes
3084   _total_remset_bytes += G1HeapRegionRemSet::static_mem_size();
3085 
3086   log_cset_candidate_groups();
3087 
3088   // Print the footer of the output.
3089   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX);
3090   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3091                          " SUMMARY"
3092                          G1PPRL_SUM_MB_FORMAT("capacity")
3093                          G1PPRL_SUM_MB_PERC_FORMAT("used")
3094                          G1PPRL_SUM_MB_PERC_FORMAT("live")
3095                          G1PPRL_SUM_MB_FORMAT("remset")
3096                          G1PPRL_SUM_MB_FORMAT("code-roots"),
3097                          bytes_to_mb(_total_capacity_bytes),
3098                          bytes_to_mb(_total_used_bytes),
3099                          percent_of(_total_used_bytes, _total_capacity_bytes),
3100                          bytes_to_mb(_total_live_bytes),
3101                          percent_of(_total_live_bytes, _total_capacity_bytes),
3102                          bytes_to_mb(_total_remset_bytes),
3103                          bytes_to_mb(_total_code_roots_bytes));
3104 }
3105 
3106 void G1PrintRegionLivenessInfoClosure::log_cset_candidate_group_add_total(G1CSetCandidateGroup* group, const char* type) {
3107   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3108                           G1PPRL_GID_FORMAT
3109                           G1PPRL_LEN_FORMAT
3110                           G1PPRL_GID_GCEFF_FORMAT
3111                           G1PPRL_GID_LIVENESS_FORMAT
3112                           G1PPRL_BYTE_FORMAT
3113                           G1PPRL_TYPE_H_FORMAT,
3114                           group->group_id(),
3115                           group->length(),
3116                           group->length() > 0 ? group->gc_efficiency() : 0.0,
3117                           group->length() > 0 ? group->liveness_percent() : 0.0,
3118                           group->card_set()->mem_size(),
3119                           type);
3120   _total_remset_bytes += group->card_set()->mem_size();
3121 }
3122 
3123 void G1PrintRegionLivenessInfoClosure::log_cset_candidate_grouplist(G1CSetCandidateGroupList& gl, const char* type) {
3124   for (G1CSetCandidateGroup* group : gl) {
3125     log_cset_candidate_group_add_total(group, type);
3126   }
3127 }
3128 
3129 void G1PrintRegionLivenessInfoClosure::log_cset_candidate_groups() {
3130   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX);
3131   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" Collection Set Candidate Groups");
3132   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX " Types: Y=Young, M=From Marking Regions, R=Retained Regions");
3133   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3134                           G1PPRL_GID_H_FORMAT
3135                           G1PPRL_LEN_H_FORMAT
3136                           G1PPRL_GCEFF_H_FORMAT
3137                           G1PPRL_BYTE_H_FORMAT
3138                           G1PPRL_BYTE_H_FORMAT
3139                           G1PPRL_TYPE_H_FORMAT,
3140                           "groud-id", "num-regions",
3141                           "gc-eff", "liveness",
3142                           "remset", "type");
3143 
3144   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3145                           G1PPRL_GID_H_FORMAT
3146                           G1PPRL_LEN_H_FORMAT
3147                           G1PPRL_GCEFF_H_FORMAT
3148                           G1PPRL_BYTE_H_FORMAT
3149                           G1PPRL_BYTE_H_FORMAT
3150                           G1PPRL_TYPE_H_FORMAT,
3151                           "", "",
3152                           "(bytes/ms)", "%",
3153                           "(bytes)", "");
3154 
3155   G1CollectedHeap* g1h = G1CollectedHeap::heap();
3156 
3157   log_cset_candidate_group_add_total(g1h->young_regions_cset_group(), "Y");
3158 
3159   G1CollectionSetCandidates* candidates = g1h->policy()->candidates();
3160   log_cset_candidate_grouplist(candidates->from_marking_groups(), "M");
3161   log_cset_candidate_grouplist(candidates->retained_groups(), "R");
3162 }