1 /*
   2  * Copyright (c) 2001, 2026, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "classfile/classLoaderData.hpp"
  26 #include "classfile/classLoaderDataGraph.hpp"
  27 #include "cppstdlib/new.hpp"
  28 #include "gc/g1/g1BarrierSet.hpp"
  29 #include "gc/g1/g1BatchedTask.hpp"
  30 #include "gc/g1/g1CardSetMemory.hpp"
  31 #include "gc/g1/g1CardTableClaimTable.inline.hpp"
  32 #include "gc/g1/g1CollectedHeap.inline.hpp"
  33 #include "gc/g1/g1CollectorState.inline.hpp"
  34 #include "gc/g1/g1ConcurrentMark.inline.hpp"
  35 #include "gc/g1/g1ConcurrentMarkRemarkTasks.hpp"
  36 #include "gc/g1/g1ConcurrentMarkThread.inline.hpp"
  37 #include "gc/g1/g1ConcurrentRebuildAndScrub.hpp"
  38 #include "gc/g1/g1ConcurrentRefine.hpp"
  39 #include "gc/g1/g1HeapRegion.inline.hpp"
  40 #include "gc/g1/g1HeapRegionManager.hpp"
  41 #include "gc/g1/g1HeapRegionPrinter.hpp"
  42 #include "gc/g1/g1HeapRegionRemSet.inline.hpp"
  43 #include "gc/g1/g1HeapRegionSet.inline.hpp"
  44 #include "gc/g1/g1HeapVerifier.hpp"
  45 #include "gc/g1/g1OopClosures.inline.hpp"
  46 #include "gc/g1/g1Policy.hpp"
  47 #include "gc/g1/g1RegionMarkStatsCache.inline.hpp"
  48 #include "gc/g1/g1ThreadLocalData.hpp"
  49 #include "gc/g1/g1Trace.hpp"
  50 #include "gc/shared/gcId.hpp"
  51 #include "gc/shared/gcTimer.hpp"
  52 #include "gc/shared/gcTraceTime.inline.hpp"
  53 #include "gc/shared/gcVMOperations.hpp"
  54 #include "gc/shared/partialArraySplitter.inline.hpp"
  55 #include "gc/shared/partialArrayState.hpp"
  56 #include "gc/shared/partialArrayTaskStats.hpp"
  57 #include "gc/shared/referencePolicy.hpp"
  58 #include "gc/shared/suspendibleThreadSet.hpp"
  59 #include "gc/shared/taskqueue.inline.hpp"
  60 #include "gc/shared/taskTerminator.hpp"
  61 #include "gc/shared/weakProcessor.inline.hpp"
  62 #include "gc/shared/workerPolicy.hpp"
  63 #include "jvm.h"
  64 #include "logging/log.hpp"
  65 #include "memory/allocation.hpp"
  66 #include "memory/iterator.hpp"
  67 #include "memory/metaspaceUtils.hpp"
  68 #include "memory/resourceArea.hpp"
  69 #include "memory/universe.hpp"
  70 #include "nmt/memTracker.hpp"
  71 #include "oops/access.inline.hpp"
  72 #include "oops/oop.inline.hpp"
  73 #include "runtime/globals_extension.hpp"
  74 #include "runtime/handles.inline.hpp"
  75 #include "runtime/java.hpp"
  76 #include "runtime/orderAccess.hpp"
  77 #include "runtime/os.hpp"
  78 #include "runtime/prefetch.inline.hpp"
  79 #include "runtime/threads.hpp"
  80 #include "utilities/align.hpp"
  81 #include "utilities/checkedCast.hpp"
  82 #include "utilities/formatBuffer.hpp"
  83 #include "utilities/growableArray.hpp"
  84 #include "utilities/powerOfTwo.hpp"
  85 
  86 G1CMIsAliveClosure::G1CMIsAliveClosure() : _cm(nullptr) { }
  87 
  88 G1CMIsAliveClosure::G1CMIsAliveClosure(G1ConcurrentMark* cm) : _cm(cm) {
  89   assert(cm != nullptr, "must be");
  90 }
  91 
  92 void G1CMIsAliveClosure::initialize(G1ConcurrentMark* cm) {
  93   assert(cm != nullptr, "must be");
  94   assert(_cm == nullptr, "double initialize");
  95   _cm = cm;
  96 }
  97 
  98 bool G1CMBitMapClosure::do_addr(HeapWord* const addr) {
  99   assert(addr < _cm->finger(), "invariant");
 100   assert(addr >= _task->finger(), "invariant");
 101 
 102   // We move that task's local finger along.
 103   _task->move_finger_to(addr);
 104 
 105   _task->process_entry(G1TaskQueueEntry(cast_to_oop(addr)), false /* stolen */);
 106   // we only partially drain the local queue and global stack
 107   _task->drain_local_queue(true);
 108   _task->drain_global_stack(true);
 109 
 110   // if the has_aborted flag has been raised, we need to bail out of
 111   // the iteration
 112   return !_task->has_aborted();
 113 }
 114 
 115 G1CMMarkStack::G1CMMarkStack() :
 116   _chunk_allocator() {
 117   set_empty();
 118 }
 119 
 120 size_t G1CMMarkStack::capacity_alignment() {
 121   return (size_t)lcm(os::vm_allocation_granularity(), sizeof(TaskQueueEntryChunk)) / sizeof(G1TaskQueueEntry);
 122 }
 123 
 124 bool G1CMMarkStack::initialize() {
 125   guarantee(_chunk_allocator.capacity() == 0, "G1CMMarkStack already initialized.");
 126 
 127   size_t initial_capacity = MarkStackSize;
 128   size_t max_capacity = MarkStackSizeMax;
 129 
 130   size_t const TaskEntryChunkSizeInVoidStar = sizeof(TaskQueueEntryChunk) / sizeof(G1TaskQueueEntry);
 131 
 132   size_t max_num_chunks = align_up(max_capacity, capacity_alignment()) / TaskEntryChunkSizeInVoidStar;
 133   size_t initial_num_chunks = align_up(initial_capacity, capacity_alignment()) / TaskEntryChunkSizeInVoidStar;
 134 
 135   initial_num_chunks = round_up_power_of_2(initial_num_chunks);
 136   max_num_chunks = MAX2(initial_num_chunks, max_num_chunks);
 137 
 138   size_t limit = (INT_MAX - 1);
 139   max_capacity = MIN2((max_num_chunks * TaskEntryChunkSizeInVoidStar), limit);
 140   initial_capacity = MIN2((initial_num_chunks * TaskEntryChunkSizeInVoidStar), limit);
 141 
 142   FLAG_SET_ERGO(MarkStackSizeMax, max_capacity);
 143   FLAG_SET_ERGO(MarkStackSize, initial_capacity);
 144 
 145   log_trace(gc)("MarkStackSize: %uk  MarkStackSizeMax: %uk", (uint)(MarkStackSize / K), (uint)(MarkStackSizeMax / K));
 146 
 147   log_debug(gc)("Initialize mark stack with %zu chunks, maximum %zu",
 148                 initial_num_chunks, max_capacity);
 149 
 150   return _chunk_allocator.initialize(initial_num_chunks, max_num_chunks);
 151 }
 152 
 153 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::ChunkAllocator::allocate_new_chunk() {
 154   if (_size.load_relaxed() >= _max_capacity) {
 155     return nullptr;
 156   }
 157 
 158   size_t cur_idx = _size.fetch_then_add(1u);
 159 
 160   if (cur_idx >= _max_capacity) {
 161     return nullptr;
 162   }
 163 
 164   size_t bucket = get_bucket(cur_idx);
 165   if (_buckets[bucket].load_acquire() == nullptr) {
 166     if (!_should_grow) {
 167       // Prefer to restart the CM.
 168       return nullptr;
 169     }
 170 
 171     MutexLocker x(G1MarkStackChunkList_lock, Mutex::_no_safepoint_check_flag);
 172     if (_buckets[bucket].load_acquire() == nullptr) {
 173       size_t desired_capacity = bucket_size(bucket) * 2;
 174       if (!try_expand_to(desired_capacity)) {
 175         return nullptr;
 176       }
 177     }
 178   }
 179 
 180   size_t bucket_idx = get_bucket_index(cur_idx);
 181   TaskQueueEntryChunk* result = ::new (&_buckets[bucket].load_relaxed()[bucket_idx]) TaskQueueEntryChunk;
 182   result->next = nullptr;
 183   return result;
 184 }
 185 
 186 G1CMMarkStack::ChunkAllocator::ChunkAllocator() :
 187   _min_capacity(0),
 188   _max_capacity(0),
 189   _capacity(0),
 190   _num_buckets(0),
 191   _should_grow(false),
 192   _buckets(nullptr),
 193   _size(0)
 194 { }
 195 
 196 bool G1CMMarkStack::ChunkAllocator::initialize(size_t initial_capacity, size_t max_capacity) {
 197   guarantee(is_power_of_2(initial_capacity), "Invalid initial_capacity");
 198 
 199   _min_capacity = initial_capacity;
 200   _max_capacity = max_capacity;
 201   _num_buckets  = get_bucket(_max_capacity) + 1;
 202 
 203   _buckets = NEW_C_HEAP_ARRAY(Atomic<TaskQueueEntryChunk*>, _num_buckets, mtGC);
 204 
 205   for (size_t i = 0; i < _num_buckets; i++) {
 206     _buckets[i].store_relaxed(nullptr);
 207   }
 208 
 209   size_t new_capacity = bucket_size(0);
 210 
 211   if (!reserve(new_capacity)) {
 212     log_warning(gc)("Failed to reserve memory for new overflow mark stack with %zu chunks and size %zuB.", new_capacity, new_capacity * sizeof(TaskQueueEntryChunk));
 213     return false;
 214   }
 215   return true;
 216 }
 217 
 218 bool G1CMMarkStack::ChunkAllocator::try_expand_to(size_t desired_capacity) {
 219   if (_capacity == _max_capacity) {
 220     log_debug(gc)("Can not expand overflow mark stack further, already at maximum capacity of %zu chunks.", _capacity);
 221     return false;
 222   }
 223 
 224   size_t old_capacity = _capacity;
 225   desired_capacity = MIN2(desired_capacity, _max_capacity);
 226 
 227   if (reserve(desired_capacity)) {
 228     log_debug(gc)("Expanded the mark stack capacity from %zu to %zu chunks",
 229                   old_capacity, desired_capacity);
 230     return true;
 231   }
 232   return false;
 233 }
 234 
 235 bool G1CMMarkStack::ChunkAllocator::try_expand() {
 236   size_t new_capacity = _capacity * 2;
 237   return try_expand_to(new_capacity);
 238 }
 239 
 240 G1CMMarkStack::ChunkAllocator::~ChunkAllocator() {
 241   if (_buckets == nullptr) {
 242     return;
 243   }
 244 
 245   for (size_t i = 0; i < _num_buckets; i++) {
 246     if (_buckets[i].load_relaxed() != nullptr) {
 247       MmapArrayAllocator<TaskQueueEntryChunk>::free(_buckets[i].load_relaxed(),  bucket_size(i));
 248       _buckets[i].store_relaxed(nullptr);
 249     }
 250   }
 251 
 252   FREE_C_HEAP_ARRAY(_buckets);
 253 }
 254 
 255 bool G1CMMarkStack::ChunkAllocator::reserve(size_t new_capacity) {
 256   assert(new_capacity <= _max_capacity, "Cannot expand overflow mark stack beyond the max_capacity of %zu chunks.", _max_capacity);
 257 
 258   size_t highest_bucket = get_bucket(new_capacity - 1);
 259   size_t i = get_bucket(_capacity);
 260 
 261   // Allocate all buckets associated with indexes between the current capacity (_capacity)
 262   // and the new capacity (new_capacity). This step ensures that there are no gaps in the
 263   // array and that the capacity accurately reflects the reserved memory.
 264   for (; i <= highest_bucket; i++) {
 265     if (_buckets[i].load_acquire() != nullptr) {
 266       continue; // Skip over already allocated buckets.
 267     }
 268 
 269     size_t bucket_capacity = bucket_size(i);
 270 
 271     // Trim bucket size so that we do not exceed the _max_capacity.
 272     bucket_capacity = (_capacity + bucket_capacity) <= _max_capacity ?
 273                       bucket_capacity :
 274                       _max_capacity - _capacity;
 275 
 276 
 277     TaskQueueEntryChunk* bucket_base = MmapArrayAllocator<TaskQueueEntryChunk>::allocate_or_null(bucket_capacity, mtGC);
 278 
 279     if (bucket_base == nullptr) {
 280       log_warning(gc)("Failed to reserve memory for increasing the overflow mark stack capacity with %zu chunks and size %zuB.",
 281                       bucket_capacity, bucket_capacity * sizeof(TaskQueueEntryChunk));
 282       return false;
 283     }
 284     _capacity += bucket_capacity;
 285     _buckets[i].release_store(bucket_base);
 286   }
 287   return true;
 288 }
 289 
 290 void G1CMMarkStack::expand() {
 291   _chunk_allocator.try_expand();
 292 }
 293 
 294 void G1CMMarkStack::add_chunk_to_list(Atomic<TaskQueueEntryChunk*>* list, TaskQueueEntryChunk* elem) {
 295   elem->next = list->load_relaxed();
 296   list->store_relaxed(elem);
 297 }
 298 
 299 void G1CMMarkStack::add_chunk_to_chunk_list(TaskQueueEntryChunk* elem) {
 300   MutexLocker x(G1MarkStackChunkList_lock, Mutex::_no_safepoint_check_flag);
 301   add_chunk_to_list(&_chunk_list, elem);
 302   _chunks_in_chunk_list++;
 303 }
 304 
 305 void G1CMMarkStack::add_chunk_to_free_list(TaskQueueEntryChunk* elem) {
 306   MutexLocker x(G1MarkStackFreeList_lock, Mutex::_no_safepoint_check_flag);
 307   add_chunk_to_list(&_free_list, elem);
 308 }
 309 
 310 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_list(Atomic<TaskQueueEntryChunk*>* list) {
 311   TaskQueueEntryChunk* result = list->load_relaxed();
 312   if (result != nullptr) {
 313     list->store_relaxed(list->load_relaxed()->next);
 314   }
 315   return result;
 316 }
 317 
 318 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_chunk_list() {
 319   MutexLocker x(G1MarkStackChunkList_lock, Mutex::_no_safepoint_check_flag);
 320   TaskQueueEntryChunk* result = remove_chunk_from_list(&_chunk_list);
 321   if (result != nullptr) {
 322     _chunks_in_chunk_list--;
 323   }
 324   return result;
 325 }
 326 
 327 G1CMMarkStack::TaskQueueEntryChunk* G1CMMarkStack::remove_chunk_from_free_list() {
 328   MutexLocker x(G1MarkStackFreeList_lock, Mutex::_no_safepoint_check_flag);
 329   return remove_chunk_from_list(&_free_list);
 330 }
 331 
 332 bool G1CMMarkStack::par_push_chunk(G1TaskQueueEntry* ptr_arr) {
 333   // Get a new chunk.
 334   TaskQueueEntryChunk* new_chunk = remove_chunk_from_free_list();
 335 
 336   if (new_chunk == nullptr) {
 337     // Did not get a chunk from the free list. Allocate from backing memory.
 338     new_chunk = _chunk_allocator.allocate_new_chunk();
 339 
 340     if (new_chunk == nullptr) {
 341       return false;
 342     }
 343   }
 344 
 345   Copy::conjoint_memory_atomic(ptr_arr, new_chunk->data, EntriesPerChunk * sizeof(G1TaskQueueEntry));
 346 
 347   add_chunk_to_chunk_list(new_chunk);
 348 
 349   return true;
 350 }
 351 
 352 bool G1CMMarkStack::par_pop_chunk(G1TaskQueueEntry* ptr_arr) {
 353   TaskQueueEntryChunk* cur = remove_chunk_from_chunk_list();
 354 
 355   if (cur == nullptr) {
 356     return false;
 357   }
 358 
 359   Copy::conjoint_memory_atomic(cur->data, ptr_arr, EntriesPerChunk * sizeof(G1TaskQueueEntry));
 360 
 361   add_chunk_to_free_list(cur);
 362   return true;
 363 }
 364 
 365 void G1CMMarkStack::set_empty() {
 366   _chunks_in_chunk_list = 0;
 367   _chunk_list.store_relaxed(nullptr);
 368   _free_list.store_relaxed(nullptr);
 369   _chunk_allocator.reset();
 370 }
 371 
 372 G1CMRootMemRegions::G1CMRootMemRegions(uint const max_regions) :
 373     _root_regions(MemRegion::create_array(max_regions, mtGC)),
 374     _max_regions(max_regions),
 375     _num_regions(0),
 376     _num_claimed_regions(0) { }
 377 
 378 G1CMRootMemRegions::~G1CMRootMemRegions() {
 379   MemRegion::destroy_array(_root_regions, _max_regions);
 380 }
 381 
 382 void G1CMRootMemRegions::reset() {
 383   assert_at_safepoint();
 384   assert(G1CollectedHeap::heap()->collector_state()->is_in_concurrent_start_gc(), "must be");
 385 
 386   _num_regions.store_relaxed(0);
 387   _num_claimed_regions.store_relaxed(0);
 388 }
 389 
 390 void G1CMRootMemRegions::add(HeapWord* start, HeapWord* end) {
 391   assert_at_safepoint();
 392   uint idx = _num_regions.fetch_then_add(1u);
 393   assert(idx < _max_regions, "Trying to add more root MemRegions than there is space %u", _max_regions);
 394   assert(start != nullptr && end != nullptr && start <= end, "Start (" PTR_FORMAT ") should be less or equal to "
 395          "end (" PTR_FORMAT ")", p2i(start), p2i(end));
 396   _root_regions[idx].set_start(start);
 397   _root_regions[idx].set_end(end);
 398 }
 399 
 400 const MemRegion* G1CMRootMemRegions::claim_next() {
 401   uint local_num_regions = num_regions();
 402   if (num_claimed_regions() >= local_num_regions) {
 403     return nullptr;
 404   }
 405 
 406   uint claimed_index = _num_claimed_regions.fetch_then_add(1u);
 407   if (claimed_index < local_num_regions) {
 408     return &_root_regions[claimed_index];
 409   }
 410   return nullptr;
 411 }
 412 
 413 bool G1CMRootMemRegions::work_completed() const {
 414   return num_remaining_regions() == 0;
 415 }
 416 
 417 uint G1CMRootMemRegions::num_remaining_regions() const {
 418   uint total = num_regions();
 419   uint claimed = num_claimed_regions();
 420   return (total > claimed) ? total - claimed : 0;
 421 }
 422 
 423 bool G1CMRootMemRegions::contains(const MemRegion mr) const {
 424   uint local_num_root_regions = num_regions();
 425   for (uint i = 0; i < local_num_root_regions; i++) {
 426     if (_root_regions[i].equals(mr)) {
 427       return true;
 428     }
 429   }
 430   return false;
 431 }
 432 
 433 G1ConcurrentMark::G1ConcurrentMark(G1CollectedHeap* g1h,
 434                                    G1RegionToSpaceMapper* bitmap_storage) :
 435   _cm_thread(nullptr),
 436   _g1h(g1h),
 437 
 438   _mark_bitmap(),
 439 
 440   _heap(_g1h->reserved()),
 441 
 442   _root_regions(_g1h->max_num_regions()),
 443   _root_region_scan_aborted(false),
 444 
 445   _global_mark_stack(),
 446 
 447   _finger(nullptr), // _finger set in set_non_marking_state
 448 
 449   _worker_id_offset(G1ConcRefinementThreads), // The refinement control thread does not refine cards, so it's just the worker threads.
 450   _max_num_tasks(MAX2(ConcGCThreads, ParallelGCThreads)),
 451   _num_active_tasks(0), // _num_active_tasks set in set_non_marking_state()
 452   _tasks(nullptr), // _tasks set inside late_init()
 453   _task_queues(new G1CMTaskQueueSet(_max_num_tasks)),
 454   _terminator(_max_num_tasks, _task_queues),
 455   _partial_array_state_manager(new PartialArrayStateManager(_max_num_tasks)),
 456 
 457   _first_overflow_barrier_sync(),
 458   _second_overflow_barrier_sync(),
 459 
 460   _completed_mark_cycles(0),
 461   _has_overflown(false),
 462   _concurrent(false),
 463   _has_aborted(false),
 464   _restart_for_overflow(false),
 465   _gc_timer_cm(new ConcurrentGCTimer()),
 466   _gc_tracer_cm(new G1OldTracer()),
 467 
 468   // _verbose_level set below
 469 
 470   _remark_times(),
 471   _remark_mark_times(),
 472   _remark_weak_ref_times(),
 473   _cleanup_times(),
 474 
 475   _concurrent_workers(nullptr),
 476   _num_concurrent_workers(0),
 477   _max_concurrent_workers(0),
 478 
 479   _region_mark_stats(NEW_C_HEAP_ARRAY(G1RegionMarkStats, _g1h->max_num_regions(), mtGC)),
 480   _top_at_mark_starts(NEW_C_HEAP_ARRAY(Atomic<HeapWord*>, _g1h->max_num_regions(), mtGC)),
 481   _top_at_rebuild_starts(NEW_C_HEAP_ARRAY(Atomic<HeapWord*>, _g1h->max_num_regions(), mtGC)),
 482   _needs_remembered_set_rebuild(false)
 483 {
 484   assert(G1CGC_lock != nullptr, "CGC_lock must be initialized");
 485 
 486   _mark_bitmap.initialize(g1h->reserved(), bitmap_storage);
 487 }
 488 
 489 void G1ConcurrentMark::fully_initialize() {
 490   if (is_fully_initialized()) {
 491     return;
 492   }
 493 
 494   // Create & start ConcurrentMark thread.
 495   _cm_thread = new G1ConcurrentMarkThread(this);
 496   if (_cm_thread->osthread() == nullptr) {
 497     vm_shutdown_during_initialization("Could not create ConcurrentMarkThread");
 498   }
 499 
 500   log_debug(gc)("ConcGCThreads: %u offset %u", ConcGCThreads, _worker_id_offset);
 501   log_debug(gc)("ParallelGCThreads: %u", ParallelGCThreads);
 502 
 503   _max_concurrent_workers = ConcGCThreads;
 504 
 505   _concurrent_workers = new WorkerThreads("G1 Conc", _max_concurrent_workers);
 506   _concurrent_workers->initialize_workers();
 507   _num_concurrent_workers = _concurrent_workers->active_workers();
 508 
 509   if (!_global_mark_stack.initialize()) {
 510     vm_exit_during_initialization("Failed to allocate initial concurrent mark overflow mark stack.");
 511   }
 512 
 513   _tasks = NEW_C_HEAP_ARRAY(G1CMTask*, _max_num_tasks, mtGC);
 514 
 515   // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
 516   _num_active_tasks = _max_num_tasks;
 517 
 518   for (uint i = 0; i < _max_num_tasks; ++i) {
 519     G1CMTaskQueue* task_queue = new G1CMTaskQueue();
 520     _task_queues->register_queue(i, task_queue);
 521 
 522     _tasks[i] = new G1CMTask(i, this, task_queue, _region_mark_stats);
 523   }
 524 
 525   uint max_num_regions = _g1h->max_num_regions();
 526   ::new (_region_mark_stats) G1RegionMarkStats[max_num_regions]{};
 527   for (uint i = 0; i < max_num_regions; i++) {
 528     ::new (&_top_at_mark_starts[i]) Atomic<HeapWord*>(_g1h->bottom_addr_for_region(i));
 529   }
 530   // Contrary to TAMS, the default value of _top_at_rebuild_starts needs to be null.
 531   ::new (_top_at_rebuild_starts) Atomic<HeapWord*>[max_num_regions]{};
 532 
 533   reset_at_marking_complete();
 534 }
 535 
 536 bool G1ConcurrentMark::is_in_concurrent_cycle() const {
 537   return is_fully_initialized() ? _cm_thread->is_in_progress() : false;
 538 }
 539 
 540 bool G1ConcurrentMark::is_in_marking() const {
 541   return is_fully_initialized() ? cm_thread()->is_in_marking() : false;
 542 }
 543 
 544 bool G1ConcurrentMark::is_in_rebuild_or_scrub() const {
 545   return cm_thread()->is_in_rebuild_or_scrub();
 546 }
 547 
 548 bool G1ConcurrentMark::is_in_reset_for_next_cycle() const {
 549   return cm_thread()->is_in_reset_for_next_cycle();
 550 }
 551 
 552 PartialArrayStateManager* G1ConcurrentMark::partial_array_state_manager() const {
 553   return _partial_array_state_manager;
 554 }
 555 
 556 G1ConcurrentMarkThread* G1ConcurrentMark::cm_thread() const {
 557   assert(is_fully_initialized(), "must be");
 558   return _cm_thread;
 559 }
 560 
 561 void G1ConcurrentMark::reset() {
 562   _has_aborted.store_relaxed(false);
 563 
 564   reset_marking_for_restart();
 565 
 566   // Reset all tasks, since different phases will use different number of active
 567   // threads. So, it's easiest to have all of them ready.
 568   for (uint i = 0; i < _max_num_tasks; ++i) {
 569     _tasks[i]->reset(mark_bitmap());
 570   }
 571 
 572   uint max_num_regions = _g1h->max_num_regions();
 573   ::new (_top_at_rebuild_starts) Atomic<HeapWord*>[max_num_regions]{};
 574   for (uint i = 0; i < max_num_regions; i++) {
 575     _region_mark_stats[i].clear();
 576   }
 577 
 578   _root_region_scan_aborted.store_relaxed(false);
 579   _root_regions.reset();
 580 }
 581 
 582 void G1ConcurrentMark::clear_statistics(G1HeapRegion* r) {
 583   uint region_idx = r->hrm_index();
 584   for (uint j = 0; j < _max_num_tasks; ++j) {
 585     _tasks[j]->clear_mark_stats_cache(region_idx);
 586   }
 587   _top_at_rebuild_starts[region_idx].store_relaxed(nullptr);
 588   _region_mark_stats[region_idx].clear();
 589 }
 590 
 591 void G1ConcurrentMark::humongous_object_eagerly_reclaimed(G1HeapRegion* r) {
 592   assert_at_safepoint();
 593   assert(r->is_starts_humongous(), "Got humongous continues region here");
 594 
 595   // Need to clear mark bit of the humongous object. Doing this unconditionally is fine.
 596   mark_bitmap()->clear(r->bottom());
 597 
 598   if (!_g1h->collector_state()->is_in_mark_or_rebuild()) {
 599     return;
 600   }
 601 
 602   // Clear any statistics about the region gathered so far.
 603   _g1h->humongous_obj_regions_iterate(r,
 604                                       [&] (G1HeapRegion* r) {
 605                                         clear_statistics(r);
 606                                       });
 607 }
 608 
 609 void G1ConcurrentMark::reset_marking_for_restart() {
 610   _global_mark_stack.set_empty();
 611 
 612   // Expand the marking stack, if we have to and if we can.
 613   if (has_overflown()) {
 614     _global_mark_stack.expand();
 615 
 616     uint max_num_regions = _g1h->max_num_regions();
 617     for (uint i = 0; i < max_num_regions; i++) {
 618       _region_mark_stats[i].clear_during_overflow();
 619     }
 620   }
 621 
 622   clear_has_overflown();
 623   _finger.store_relaxed(_heap.start());
 624 
 625   for (uint i = 0; i < _max_num_tasks; ++i) {
 626     _tasks[i]->reset_for_restart();
 627   }
 628 }
 629 
 630 void G1ConcurrentMark::set_concurrency(uint active_tasks) {
 631   assert(active_tasks <= _max_num_tasks, "we should not have more");
 632 
 633   _num_active_tasks = active_tasks;
 634   // Need to update the three data structures below according to the
 635   // number of active threads for this phase.
 636   _terminator.reset_for_reuse(active_tasks);
 637   _first_overflow_barrier_sync.set_n_workers(active_tasks);
 638   _second_overflow_barrier_sync.set_n_workers(active_tasks);
 639 }
 640 
 641 void G1ConcurrentMark::set_concurrency_and_phase(uint active_tasks, bool concurrent) {
 642   set_concurrency(active_tasks);
 643 
 644   _concurrent.store_relaxed(concurrent);
 645 
 646   if (!concurrent) {
 647     // At this point we should be in a STW phase, and completed marking.
 648     assert_at_safepoint_on_vm_thread();
 649     assert(out_of_regions(),
 650            "only way to get here: _finger: " PTR_FORMAT ", _heap_end: " PTR_FORMAT,
 651            p2i(finger()), p2i(_heap.end()));
 652   }
 653 }
 654 
 655 #if TASKQUEUE_STATS
 656 void G1ConcurrentMark::print_and_reset_taskqueue_stats() {
 657 
 658   _task_queues->print_and_reset_taskqueue_stats("Concurrent Mark");
 659 
 660   auto get_pa_stats = [&](uint i) {
 661     return _tasks[i]->partial_array_task_stats();
 662   };
 663 
 664   PartialArrayTaskStats::log_set(_max_num_tasks, get_pa_stats,
 665                                  "Concurrent Mark Partial Array");
 666 
 667   for (uint i = 0; i < _max_num_tasks; ++i) {
 668     get_pa_stats(i)->reset();
 669   }
 670 }
 671 #endif
 672 
 673 void G1ConcurrentMark::reset_at_marking_complete() {
 674   TASKQUEUE_STATS_ONLY(print_and_reset_taskqueue_stats());
 675   // We set the global marking state to some default values when we're
 676   // not doing marking.
 677   reset_marking_for_restart();
 678   _num_active_tasks = 0;
 679 }
 680 
 681 G1ConcurrentMark::~G1ConcurrentMark() {
 682   FREE_C_HEAP_ARRAY(_top_at_mark_starts);
 683   FREE_C_HEAP_ARRAY(_top_at_rebuild_starts);
 684   FREE_C_HEAP_ARRAY(_region_mark_stats);
 685   // The G1ConcurrentMark instance is never freed.
 686   ShouldNotReachHere();
 687 }
 688 
 689 class G1ClearBitMapTask : public WorkerTask {
 690 public:
 691   static size_t chunk_size() { return M; }
 692 
 693 private:
 694   // Heap region closure used for clearing the _mark_bitmap.
 695   class G1ClearBitmapHRClosure : public G1HeapRegionClosure {
 696     G1ConcurrentMark* _cm;
 697     G1CMBitMap* _bitmap;
 698     bool _suspendible; // If suspendible, do yield checks.
 699 
 700     bool suspendible() {
 701       return _suspendible;
 702     }
 703 
 704     bool is_clear_concurrent_undo() {
 705       return suspendible() && _cm->cm_thread()->is_in_undo_cycle();
 706     }
 707 
 708     bool has_aborted() {
 709       if (suspendible()) {
 710         _cm->do_yield_check();
 711         return _cm->has_aborted();
 712       }
 713       return false;
 714     }
 715 
 716     HeapWord* region_clear_limit(G1HeapRegion* r) {
 717       // During a Concurrent Undo Mark cycle, the per region top_at_mark_start and
 718       // live_words data are current wrt to the _mark_bitmap. We use this information
 719       // to only clear ranges of the bitmap that require clearing.
 720       if (is_clear_concurrent_undo()) {
 721         // No need to clear bitmaps for empty regions (which includes regions we
 722         // did not mark through).
 723         if (!_cm->contains_live_object(r->hrm_index())) {
 724           assert(_bitmap->get_next_marked_addr(r->bottom(), r->end()) == r->end(), "Should not have marked bits");
 725           return r->bottom();
 726         }
 727         assert(_bitmap->get_next_marked_addr(_cm->top_at_mark_start(r), r->end()) == r->end(), "Should not have marked bits above tams");
 728       }
 729       return r->end();
 730     }
 731 
 732   public:
 733     G1ClearBitmapHRClosure(G1ConcurrentMark* cm, bool suspendible) :
 734       G1HeapRegionClosure(),
 735       _cm(cm),
 736       _bitmap(cm->mark_bitmap()),
 737       _suspendible(suspendible)
 738     { }
 739 
 740     virtual bool do_heap_region(G1HeapRegion* r) {
 741       if (has_aborted()) {
 742         return true;
 743       }
 744 
 745       HeapWord* cur = r->bottom();
 746       HeapWord* const end = region_clear_limit(r);
 747 
 748       size_t const chunk_size_in_words = G1ClearBitMapTask::chunk_size() / HeapWordSize;
 749 
 750       while (cur < end) {
 751 
 752         MemRegion mr(cur, MIN2(cur + chunk_size_in_words, end));
 753         _bitmap->clear_range(mr);
 754 
 755         cur += chunk_size_in_words;
 756 
 757         // Repeat the asserts from before the start of the closure. We will do them
 758         // as asserts here to minimize their overhead on the product. However, we
 759         // will have them as guarantees at the beginning / end of the bitmap
 760         // clearing to get some checking in the product.
 761         assert(!suspendible() || _cm->is_in_reset_for_next_cycle(), "invariant");
 762 
 763         // Abort iteration if necessary.
 764         if (has_aborted()) {
 765           return true;
 766         }
 767       }
 768       assert(cur >= end, "Must have completed iteration over the bitmap for region %u.", r->hrm_index());
 769 
 770       _cm->reset_top_at_mark_start(r);
 771 
 772       return false;
 773     }
 774   };
 775 
 776   G1ClearBitmapHRClosure _cl;
 777   G1HeapRegionClaimer _hr_claimer;
 778   bool _suspendible; // If the task is suspendible, workers must join the STS.
 779 
 780 public:
 781   G1ClearBitMapTask(G1ConcurrentMark* cm, uint n_workers, bool suspendible) :
 782     WorkerTask("G1 Clear Bitmap"),
 783     _cl(cm, suspendible),
 784     _hr_claimer(n_workers),
 785     _suspendible(suspendible)
 786   { }
 787 
 788   void work(uint worker_id) {
 789     SuspendibleThreadSetJoiner sts_join(_suspendible);
 790     G1CollectedHeap::heap()->heap_region_par_iterate_from_worker_offset(&_cl, &_hr_claimer, worker_id);
 791   }
 792 };
 793 
 794 void G1ConcurrentMark::clear_bitmap(WorkerThreads* workers, bool may_yield) {
 795   assert(may_yield || SafepointSynchronize::is_at_safepoint(), "Non-yielding bitmap clear only allowed at safepoint.");
 796 
 797   size_t const num_bytes_to_clear = (G1HeapRegion::GrainBytes * _g1h->num_committed_regions()) / G1CMBitMap::heap_map_factor();
 798   size_t const num_chunks = align_up(num_bytes_to_clear, G1ClearBitMapTask::chunk_size()) / G1ClearBitMapTask::chunk_size();
 799 
 800   uint const num_workers = (uint)MIN2(num_chunks, (size_t)workers->active_workers());
 801 
 802   G1ClearBitMapTask cl(this, num_workers, may_yield);
 803 
 804   log_debug(gc, ergo)("Running %s with %u workers for %zu work units.", cl.name(), num_workers, num_chunks);
 805   workers->run_task(&cl, num_workers);
 806 }
 807 
 808 void G1ConcurrentMark::cleanup_for_next_mark() {
 809   // Make sure that the concurrent mark thread looks to still be in
 810   // the current cycle.
 811   guarantee(is_in_reset_for_next_cycle(), "invariant");
 812 
 813   clear_bitmap(_concurrent_workers, true);
 814 
 815   reset_partial_array_state_manager();
 816 
 817   // Should not have changed state yet (even if a Full GC interrupted us).
 818   guarantee(is_in_reset_for_next_cycle(), "invariant");
 819 }
 820 
 821 void G1ConcurrentMark::reset_partial_array_state_manager() {
 822   for (uint i = 0; i < _max_num_tasks; ++i) {
 823     _tasks[i]->unregister_partial_array_splitter();
 824   }
 825 
 826   partial_array_state_manager()->reset();
 827 
 828   for (uint i = 0; i < _max_num_tasks; ++i) {
 829     _tasks[i]->register_partial_array_splitter();
 830   }
 831 }
 832 
 833 void G1ConcurrentMark::clear_bitmap(WorkerThreads* workers) {
 834   assert_at_safepoint_on_vm_thread();
 835   // To avoid fragmentation the full collection requesting to clear the bitmap
 836   // might use fewer workers than available. To ensure the bitmap is cleared
 837   // as efficiently as possible the number of active workers are temporarily
 838   // increased to include all currently created workers.
 839   WithActiveWorkers update(workers, workers->created_workers());
 840   clear_bitmap(workers, false);
 841 }
 842 
 843 class G1PreConcurrentStartTask : public G1BatchedTask {
 844   // Reset marking state.
 845   class ResetMarkingStateTask;
 846   // For each region note start of marking.
 847   class NoteStartOfMarkTask;
 848 
 849 public:
 850   G1PreConcurrentStartTask(GCCause::Cause cause, G1ConcurrentMark* cm);
 851 };
 852 
 853 class G1PreConcurrentStartTask::ResetMarkingStateTask : public G1AbstractSubTask {
 854   G1ConcurrentMark* _cm;
 855 public:
 856   ResetMarkingStateTask(G1ConcurrentMark* cm) : G1AbstractSubTask(G1GCPhaseTimes::ResetMarkingState), _cm(cm) { }
 857 
 858   double worker_cost() const override { return 1.0; }
 859   void do_work(uint worker_id) override;
 860 };
 861 
 862 class G1PreConcurrentStartTask::NoteStartOfMarkTask : public G1AbstractSubTask {
 863 
 864   class NoteStartOfMarkHRClosure : public G1HeapRegionClosure {
 865     G1ConcurrentMark* _cm;
 866 
 867   public:
 868     NoteStartOfMarkHRClosure() : G1HeapRegionClosure(), _cm(G1CollectedHeap::heap()->concurrent_mark()) { }
 869 
 870     bool do_heap_region(G1HeapRegion* r) override {
 871       if (r->is_old_or_humongous() && !r->is_collection_set_candidate() && !r->in_collection_set()) {
 872         _cm->update_top_at_mark_start(r);
 873       } else {
 874         _cm->reset_top_at_mark_start(r);
 875       }
 876       return false;
 877     }
 878   } _region_cl;
 879 
 880   G1HeapRegionClaimer _claimer;
 881 public:
 882   NoteStartOfMarkTask() : G1AbstractSubTask(G1GCPhaseTimes::NoteStartOfMark), _region_cl(), _claimer(0) { }
 883 
 884   double worker_cost() const override {
 885     // The work done per region is very small, therefore we choose this magic number to cap the number
 886     // of threads used when there are few regions.
 887     const double regions_per_thread = 1000;
 888     return _claimer.n_regions() / regions_per_thread;
 889   }
 890 
 891   void set_max_workers(uint max_workers) override {
 892     _claimer.set_n_workers(max_workers);
 893   }
 894 
 895   void do_work(uint worker_id) override {
 896     G1CollectedHeap::heap()->heap_region_par_iterate_from_worker_offset(&_region_cl, &_claimer, worker_id);
 897   }
 898 };
 899 
 900 void G1PreConcurrentStartTask::ResetMarkingStateTask::do_work(uint worker_id) {
 901   // Reset marking state.
 902   _cm->reset();
 903 }
 904 
 905 G1PreConcurrentStartTask::G1PreConcurrentStartTask(GCCause::Cause cause, G1ConcurrentMark* cm) :
 906   G1BatchedTask("Pre Concurrent Start", G1CollectedHeap::heap()->phase_times()) {
 907   add_serial_task(new ResetMarkingStateTask(cm));
 908   add_parallel_task(new NoteStartOfMarkTask());
 909 };
 910 
 911 void G1ConcurrentMark::pre_concurrent_start(GCCause::Cause cause) {
 912   assert_at_safepoint_on_vm_thread();
 913 
 914   G1CollectedHeap::start_codecache_marking_cycle_if_inactive(true /* concurrent_mark_start */);
 915 
 916   ClassLoaderDataGraph::verify_claimed_marks_cleared(ClassLoaderData::_claim_strong);
 917 
 918   G1PreConcurrentStartTask cl(cause, this);
 919   G1CollectedHeap::heap()->run_batch_task(&cl);
 920 
 921   _gc_tracer_cm->set_gc_cause(cause);
 922 }
 923 
 924 void G1ConcurrentMark::start_full_concurrent_cycle() {
 925   // Start Concurrent Marking weak-reference discovery.
 926   ReferenceProcessor* rp = _g1h->ref_processor_cm();
 927   rp->start_discovery(false /* always_clear */);
 928 
 929   SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
 930   // This is the start of  the marking cycle, we're expected all
 931   // threads to have SATB queues with active set to false.
 932   satb_mq_set.set_active_all_threads(true, /* new active value */
 933                                      false /* expected_active */);
 934 
 935   // update_g1_committed() will be called at the end of an evac pause
 936   // when marking is on. So, it's also called at the end of the
 937   // concurrent start pause to update the heap end, if the heap expands
 938   // during it. No need to call it here.
 939 
 940   // Signal the thread to start work.
 941   cm_thread()->start_full_cycle();
 942 }
 943 
 944 void G1ConcurrentMark::start_undo_concurrent_cycle() {
 945   assert_at_safepoint_on_vm_thread();
 946   // At this time this GC is not a concurrent start gc any more, can only check for young only gc/phase.
 947   assert(_g1h->collector_state()->is_in_young_only_phase(), "must be");
 948 
 949   abort_root_region_scan_at_safepoint();
 950 
 951   // Signal the thread to start work.
 952   cm_thread()->start_undo_cycle();
 953 }
 954 
 955 void G1ConcurrentMark::notify_concurrent_cycle_completed() {
 956   cm_thread()->set_idle();
 957 }
 958 
 959 void G1ConcurrentMark::stop() {
 960   if (is_fully_initialized()) {
 961     cm_thread()->stop();
 962   }
 963 }
 964 
 965 /*
 966  * Notice that in the next two methods, we actually leave the STS
 967  * during the barrier sync and join it immediately afterwards. If we
 968  * do not do this, the following deadlock can occur: one thread could
 969  * be in the barrier sync code, waiting for the other thread to also
 970  * sync up, whereas another one could be trying to yield, while also
 971  * waiting for the other threads to sync up too.
 972  *
 973  * Note, however, that this code is also used during remark and in
 974  * this case we should not attempt to leave / enter the STS, otherwise
 975  * we'll either hit an assert (debug / fastdebug) or deadlock
 976  * (product). So we should only leave / enter the STS if we are
 977  * operating concurrently.
 978  *
 979  * Because the thread that does the sync barrier has left the STS, it
 980  * is possible to be suspended for a Full GC or an evacuation pause
 981  * could occur. This is actually safe, since the entering the sync
 982  * barrier is one of the last things do_marking_step() does, and it
 983  * doesn't manipulate any data structures afterwards.
 984  */
 985 
 986 void G1ConcurrentMark::enter_first_sync_barrier(uint worker_id) {
 987   bool barrier_aborted;
 988   {
 989     SuspendibleThreadSetLeaver sts_leave(concurrent());
 990     barrier_aborted = !_first_overflow_barrier_sync.enter();
 991   }
 992 
 993   // at this point everyone should have synced up and not be doing any
 994   // more work
 995 
 996   if (barrier_aborted) {
 997     // If the barrier aborted we ignore the overflow condition and
 998     // just abort the whole marking phase as quickly as possible.
 999     return;
1000   }
1001 }
1002 
1003 void G1ConcurrentMark::enter_second_sync_barrier(uint worker_id) {
1004   SuspendibleThreadSetLeaver sts_leave(concurrent());
1005   _second_overflow_barrier_sync.enter();
1006 
1007   // at this point everything should be re-initialized and ready to go
1008 }
1009 
1010 class G1CMConcurrentMarkingTask : public WorkerTask {
1011   G1ConcurrentMark*     _cm;
1012 
1013 public:
1014   void work(uint worker_id) {
1015     ResourceMark rm;
1016 
1017     SuspendibleThreadSetJoiner sts_join;
1018 
1019     assert(worker_id < _cm->active_tasks(), "invariant");
1020 
1021     G1CMTask* task = _cm->task(worker_id);
1022     task->record_start_time();
1023     if (!_cm->has_aborted()) {
1024       do {
1025         task->do_marking_step(G1ConcMarkStepDurationMillis,
1026                               true  /* do_termination */,
1027                               false /* is_serial*/);
1028 
1029         _cm->do_yield_check();
1030       } while (!_cm->has_aborted() && task->has_aborted());
1031     }
1032     task->record_end_time();
1033     guarantee(!task->has_aborted() || _cm->has_aborted(), "invariant");
1034   }
1035 
1036   G1CMConcurrentMarkingTask(G1ConcurrentMark* cm) :
1037       WorkerTask("Concurrent Mark"), _cm(cm) { }
1038 
1039   ~G1CMConcurrentMarkingTask() { }
1040 };
1041 
1042 uint G1ConcurrentMark::calc_active_marking_workers() {
1043   uint result = 0;
1044   if (!UseDynamicNumberOfGCThreads || !FLAG_IS_DEFAULT(ConcGCThreads)) {
1045     result = _max_concurrent_workers;
1046   } else {
1047     result =
1048       WorkerPolicy::calc_default_active_workers(_max_concurrent_workers,
1049                                                 1, /* Minimum workers */
1050                                                 _num_concurrent_workers,
1051                                                 Threads::number_of_non_daemon_threads());
1052     // Don't scale the result down by scale_concurrent_workers() because
1053     // that scaling has already gone into "_max_concurrent_workers".
1054   }
1055   assert(result > 0 && result <= _max_concurrent_workers,
1056          "Calculated number of marking workers must be larger than zero and at most the maximum %u, but is %u",
1057          _max_concurrent_workers, result);
1058   return result;
1059 }
1060 
1061 bool G1ConcurrentMark::has_root_region_scan_aborted() const {
1062   return _root_region_scan_aborted.load_relaxed();
1063 }
1064 
1065 #ifndef PRODUCT
1066 void G1ConcurrentMark::assert_root_region_scan_completed_or_aborted() {
1067   assert(root_regions()->work_completed() || has_root_region_scan_aborted(), "must be");
1068 }
1069 #endif
1070 
1071 void G1ConcurrentMark::scan_root_region(const MemRegion* region, uint worker_id) {
1072 #ifdef ASSERT
1073   HeapWord* last = region->last();
1074   G1HeapRegion* hr = _g1h->heap_region_containing(last);
1075   assert(hr->is_old() || top_at_mark_start(hr) == hr->bottom(),
1076          "Root regions must be old or survivor/eden but region %u is %s", hr->hrm_index(), hr->get_type_str());
1077   assert(top_at_mark_start(hr) == region->start(),
1078          "MemRegion start should be equal to TAMS");
1079 #endif
1080 
1081   G1RootRegionScanClosure cl(_g1h, this, worker_id);
1082 
1083   const uintx interval = PrefetchScanIntervalInBytes;
1084   HeapWord* curr = region->start();
1085   const HeapWord* end = region->end();
1086   while (curr < end) {
1087     Prefetch::read(curr, interval);
1088     oop obj = cast_to_oop(curr);
1089     size_t size = obj->oop_iterate_size(&cl);
1090     assert(size == obj->size(), "sanity");
1091     curr += size;
1092   }
1093 }
1094 
1095 class G1CMRootRegionScanTask : public WorkerTask {
1096   G1ConcurrentMark* _cm;
1097   bool _should_yield;
1098 
1099 public:
1100   G1CMRootRegionScanTask(G1ConcurrentMark* cm, bool should_yield) :
1101     WorkerTask("G1 Root Region Scan"), _cm(cm), _should_yield(should_yield) { }
1102 
1103   void work(uint worker_id) {
1104     SuspendibleThreadSetJoiner sts_join(_should_yield);
1105 
1106     while (true) {
1107       if (_cm->has_root_region_scan_aborted()) {
1108         return;
1109       }
1110       G1CMRootMemRegions* root_regions = _cm->root_regions();
1111       const MemRegion* region = root_regions->claim_next();
1112       if (region == nullptr) {
1113         return;
1114       }
1115       _cm->scan_root_region(region, worker_id);
1116       if (_should_yield) {
1117         SuspendibleThreadSet::yield();
1118         // If we yielded, a GC may have processed all root regions,
1119         // so this loop will naturally exit on the next claim_next() call.
1120         // Same if a Full GC signalled abort of the concurrent mark.
1121       }
1122     }
1123   }
1124 };
1125 
1126 bool G1ConcurrentMark::scan_root_regions(WorkerThreads* workers, bool concurrent) {
1127   // We first check whether there is any work to do as we might have already aborted
1128   // the concurrent cycle, or ran into a GC that did the actual work when we reach here.
1129   // We want to avoid spinning up the worker threads if that happened.
1130   // (Note that due to races reading the abort-flag, we might spin up the threads anyway).
1131   //
1132   // Abort happens if a Full GC occurs right after starting the concurrent cycle or
1133   // a young gc doing the work.
1134   //
1135   // Concurrent gc threads enter an STS when starting the task, so they stop, then
1136   // continue after that safepoint.
1137   //
1138   // Must not use G1CMRootMemRegions::work_completed() here because we need to get a
1139   // consistent view of the value containing the number of remaining regions across the
1140   // usages below. The safepoint/gc may already be running and modifying it
1141   // while this code is still executing.
1142   uint num_remaining = root_regions()->num_remaining_regions();
1143   bool do_scan = num_remaining > 0 && !has_root_region_scan_aborted();
1144   if (do_scan) {
1145     // Assign one worker to each root-region but subject to the max constraint.
1146     // The constraint is also important to avoid accesses beyond the allocated per-worker
1147     // marking helper data structures. We might get passed different WorkerThreads with
1148     // different number of threads (potential worker ids) than helper data structures when
1149     // completing this work during GC.
1150     const uint num_workers = MIN2(num_remaining,
1151                                   _max_concurrent_workers);
1152 
1153     G1CMRootRegionScanTask task(this, concurrent);
1154     log_debug(gc, ergo)("Running %s using %u workers for %u work units.",
1155                         task.name(), num_workers, num_remaining);
1156     workers->run_task(&task, num_workers);
1157   }
1158 
1159   // At the end of this method, we can re-read num_remaining() in the assert: either
1160   // we got non-zero above and we processed all root regions (and it must be zero
1161   // after the worker task synchronization) or it had already been zero. We also
1162   // can't have started another concurrent cycle that could have set it to something else
1163   // while still in the concurrent cycle (if called concurrently).
1164   assert_root_region_scan_completed_or_aborted();
1165 
1166   return do_scan;
1167 }
1168 
1169 void G1ConcurrentMark::scan_root_regions_concurrently() {
1170   assert(Thread::current() == cm_thread(), "must be on Concurrent Mark Thread");
1171   scan_root_regions(_concurrent_workers, true /* concurrent */);
1172 }
1173 
1174 bool G1ConcurrentMark::complete_root_regions_scan_in_safepoint() {
1175   assert_at_safepoint_on_vm_thread();
1176   return scan_root_regions(_g1h->workers(), false /* concurrent */);
1177 }
1178 
1179 void G1ConcurrentMark::add_root_region(G1HeapRegion* r) {
1180   root_regions()->add(top_at_mark_start(r), r->top());
1181 }
1182 
1183 bool G1ConcurrentMark::is_root_region(G1HeapRegion* r) {
1184   return root_regions()->contains(MemRegion(top_at_mark_start(r), r->top()));
1185 }
1186 
1187 void G1ConcurrentMark::abort_root_region_scan() {
1188   assert_not_at_safepoint();
1189 
1190   _root_region_scan_aborted.store_relaxed(true);
1191 }
1192 
1193 void G1ConcurrentMark::abort_root_region_scan_at_safepoint() {
1194   assert_at_safepoint_on_vm_thread();
1195 
1196   _root_region_scan_aborted.store_relaxed(true);
1197 }
1198 
1199 void G1ConcurrentMark::concurrent_cycle_start() {
1200   _gc_timer_cm->register_gc_start();
1201 
1202   _gc_tracer_cm->report_gc_start(GCCause::_no_gc /* first parameter is not used */, _gc_timer_cm->gc_start());
1203 
1204   _g1h->trace_heap_before_gc(_gc_tracer_cm);
1205 }
1206 
1207 uint G1ConcurrentMark::completed_mark_cycles() const {
1208   return _completed_mark_cycles.load_relaxed();
1209 }
1210 
1211 void G1ConcurrentMark::concurrent_cycle_end(bool mark_cycle_completed) {
1212   _g1h->trace_heap_after_gc(_gc_tracer_cm);
1213 
1214   if (mark_cycle_completed) {
1215     _completed_mark_cycles.add_then_fetch(1u, memory_order_relaxed);
1216   }
1217 
1218   if (has_aborted()) {
1219     log_info(gc, marking)("Concurrent Mark Abort");
1220     _gc_tracer_cm->report_concurrent_mode_failure();
1221   }
1222 
1223   _gc_timer_cm->register_gc_end();
1224 
1225   _gc_tracer_cm->report_gc_end(_gc_timer_cm->gc_end(), _gc_timer_cm->time_partitions());
1226 }
1227 
1228 void G1ConcurrentMark::mark_from_roots() {
1229   _restart_for_overflow.store_relaxed(false);
1230 
1231   uint active_workers = calc_active_marking_workers();
1232 
1233   // Setting active workers is not guaranteed since fewer
1234   // worker threads may currently exist and more may not be
1235   // available.
1236   active_workers = _concurrent_workers->set_active_workers(active_workers);
1237   log_info(gc, task)("Concurrent Mark Using %u of %u Workers", active_workers, _concurrent_workers->max_workers());
1238 
1239   _num_concurrent_workers = active_workers;
1240 
1241   // Parallel task terminator is set in "set_concurrency_and_phase()"
1242   set_concurrency_and_phase(active_workers, true /* concurrent */);
1243 
1244   G1CMConcurrentMarkingTask marking_task(this);
1245   _concurrent_workers->run_task(&marking_task);
1246   print_stats();
1247 }
1248 
1249 const char* G1ConcurrentMark::verify_location_string(VerifyLocation location) {
1250   static const char* location_strings[] = { "Remark Before",
1251                                             "Remark After",
1252                                             "Remark Overflow",
1253                                             "Cleanup Before",
1254                                             "Cleanup After" };
1255   return location_strings[static_cast<std::underlying_type_t<VerifyLocation>>(location)];
1256 }
1257 
1258 void G1ConcurrentMark::verify_during_pause(G1HeapVerifier::G1VerifyType type,
1259                                            VerifyLocation location) {
1260   G1HeapVerifier* verifier = _g1h->verifier();
1261 
1262   verifier->verify_region_sets_optional();
1263 
1264   const char* caller = verify_location_string(location);
1265 
1266   if (VerifyDuringGC && G1HeapVerifier::should_verify(type)) {
1267     GCTraceTime(Debug, gc, phases) debug(caller, _gc_timer_cm);
1268 
1269     size_t const BufLen = 512;
1270     char buffer[BufLen];
1271 
1272     jio_snprintf(buffer, BufLen, "During GC (%s)", caller);
1273     verifier->verify(VerifyOption::G1UseConcMarking, buffer);
1274 
1275     // Only check bitmap in Remark, and not at After-Verification because the regions
1276     // already have their TAMS'es reset.
1277     if (location != VerifyLocation::RemarkAfter) {
1278       verifier->verify_bitmap_clear(true /* above_tams_only */);
1279     }
1280   }
1281 }
1282 
1283 class G1ObjectCountIsAliveClosure: public BoolObjectClosure {
1284   G1CollectedHeap* _g1h;
1285 public:
1286   G1ObjectCountIsAliveClosure(G1CollectedHeap* g1h) : _g1h(g1h) {}
1287 
1288   bool do_object_b(oop obj) {
1289     return !_g1h->is_obj_dead(obj);
1290   }
1291 };
1292 
1293 void G1ConcurrentMark::remark() {
1294   assert_at_safepoint_on_vm_thread();
1295 
1296   // If a full collection has happened, we should not continue. However we might
1297   // have ended up here as the Remark VM operation has been scheduled already.
1298   if (has_aborted()) {
1299     return;
1300   }
1301 
1302   G1Policy* policy = _g1h->policy();
1303   policy->record_pause_start_time();
1304 
1305   double start = os::elapsedTime();
1306 
1307   verify_during_pause(G1HeapVerifier::G1VerifyRemark, VerifyLocation::RemarkBefore);
1308 
1309   {
1310     GCTraceTime(Debug, gc, phases) debug("Finalize Marking", _gc_timer_cm);
1311     finalize_marking();
1312   }
1313 
1314   double mark_work_end = os::elapsedTime();
1315 
1316   bool const mark_finished = !has_overflown();
1317   if (mark_finished) {
1318     weak_refs_work();
1319 
1320     // Unload Klasses, String, Code Cache, etc.
1321     if (ClassUnloadingWithConcurrentMark) {
1322       G1CMIsAliveClosure is_alive(this);
1323       _g1h->unload_classes_and_code("Class Unloading", &is_alive, _gc_timer_cm);
1324     }
1325 
1326     SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
1327     // We're done with marking.
1328     // This is the end of the marking cycle, we're expected all
1329     // threads to have SATB queues with active set to true.
1330     satb_mq_set.set_active_all_threads(false, /* new active value */
1331                                        true /* expected_active */);
1332 
1333     {
1334       GCTraceTime(Debug, gc, phases) debug("Flush Task Caches", _gc_timer_cm);
1335       flush_all_task_caches();
1336     }
1337 
1338     // All marking completed. Check bitmap now as we will start to reset TAMSes
1339     // in parallel below so that we can not do this in the After-Remark verification.
1340     _g1h->verifier()->verify_bitmap_clear(true /* above_tams_only */);
1341 
1342     {
1343       GCTraceTime(Debug, gc, phases) debug("Select For Rebuild and Reclaim Empty Regions", _gc_timer_cm);
1344 
1345       G1UpdateRegionLivenessAndSelectForRebuildTask cl(_g1h, this, _g1h->workers()->active_workers());
1346       uint const num_workers = MIN2(G1UpdateRegionLivenessAndSelectForRebuildTask::desired_num_workers(_g1h->num_committed_regions()),
1347                                     _g1h->workers()->active_workers());
1348       log_debug(gc,ergo)("Running %s using %u workers for %u regions in heap", cl.name(), num_workers, _g1h->num_committed_regions());
1349       _g1h->workers()->run_task(&cl, num_workers);
1350 
1351       log_debug(gc, remset, tracking)("Remembered Set Tracking update regions total %u, selected %u",
1352                                       _g1h->num_committed_regions(), cl.total_selected_for_rebuild());
1353 
1354       _needs_remembered_set_rebuild = (cl.total_selected_for_rebuild() > 0);
1355 
1356       if (_needs_remembered_set_rebuild) {
1357         GrowableArrayCHeap<G1HeapRegion*, mtGC>* selected = cl.sort_and_prune_old_selected();
1358         _g1h->policy()->candidates()->set_candidates_from_marking(selected);
1359       }
1360     }
1361 
1362     if (log_is_enabled(Trace, gc, liveness)) {
1363       G1PrintRegionLivenessInfoClosure cl("Post-Marking");
1364       _g1h->heap_region_iterate(&cl);
1365     }
1366 
1367     // Potentially, some empty-regions have been reclaimed; make this a
1368     // "collection" so that pending allocation can retry before attempting a
1369     // GC pause.
1370     _g1h->increment_total_collections();
1371 
1372     // For Remark Pauses that may have been triggered by PeriodicGCs, we maintain
1373     // resizing based on MinHeapFreeRatio or MaxHeapFreeRatio. If a PeriodicGC is
1374     // triggered, it likely means there are very few regular GCs, making resizing
1375     // based on gc heuristics less effective.
1376     if (_g1h->last_gc_was_periodic()) {
1377       _g1h->resize_heap_after_full_collection(0 /* allocation_word_size */);
1378     }
1379 
1380     compute_new_sizes();
1381 
1382     verify_during_pause(G1HeapVerifier::G1VerifyRemark, VerifyLocation::RemarkAfter);
1383 
1384     assert(!restart_for_overflow(), "sanity");
1385     // Completely reset the marking state (except bitmaps) since marking completed.
1386     reset_at_marking_complete();
1387 
1388     G1CollectedHeap::finish_codecache_marking_cycle();
1389 
1390     {
1391       GCTraceTime(Debug, gc, phases) debug("Report Object Count", _gc_timer_cm);
1392       G1ObjectCountIsAliveClosure is_alive(_g1h);
1393       _gc_tracer_cm->report_object_count_after_gc(&is_alive, _g1h->workers());
1394     }
1395 
1396     // Successfully completed marking, advance state.
1397     cm_thread()->set_full_cycle_rebuild_and_scrub();
1398   } else {
1399     // We overflowed.  Restart concurrent marking.
1400     _restart_for_overflow.store_relaxed(true);
1401 
1402     verify_during_pause(G1HeapVerifier::G1VerifyRemark, VerifyLocation::RemarkOverflow);
1403 
1404     // Clear the marking state because we will be restarting
1405     // marking due to overflowing the global mark stack.
1406     reset_marking_for_restart();
1407   }
1408 
1409   // Statistics
1410   double now = os::elapsedTime();
1411   _remark_mark_times.add((mark_work_end - start) * 1000.0);
1412   _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
1413   _remark_times.add((now - start) * 1000.0);
1414 
1415   _g1h->update_perf_counter_cpu_time();
1416 
1417   policy->record_concurrent_mark_remark_end();
1418 
1419   return;
1420 }
1421 
1422 void G1ConcurrentMark::compute_new_sizes() {
1423   MetaspaceGC::compute_new_size();
1424 
1425   // Cleanup will have freed any regions completely full of garbage.
1426   // Update the soft reference policy with the new heap occupancy.
1427   Universe::heap()->update_capacity_and_used_at_gc();
1428 
1429   // We reclaimed old regions so we should calculate the sizes to make
1430   // sure we update the old gen/space data.
1431   _g1h->monitoring_support()->update_sizes();
1432 }
1433 
1434 class G1UpdateRegionsAfterRebuild : public G1HeapRegionClosure {
1435   G1CollectedHeap* _g1h;
1436 
1437 public:
1438   G1UpdateRegionsAfterRebuild(G1CollectedHeap* g1h) : _g1h(g1h) { }
1439 
1440   bool do_heap_region(G1HeapRegion* r) override {
1441     // Update the remset tracking state from updating to complete
1442     // if remembered sets have been rebuilt.
1443     _g1h->policy()->remset_tracker()->update_after_rebuild(r);
1444     return false;
1445   }
1446 };
1447 
1448 void G1ConcurrentMark::cleanup() {
1449   assert_at_safepoint_on_vm_thread();
1450 
1451   // If a full collection has happened, we shouldn't do this.
1452   if (has_aborted()) {
1453     return;
1454   }
1455 
1456   G1Policy* policy = _g1h->policy();
1457   policy->record_pause_start_time();
1458 
1459   double start = os::elapsedTime();
1460 
1461   verify_during_pause(G1HeapVerifier::G1VerifyCleanup, VerifyLocation::CleanupBefore);
1462 
1463   if (needs_remembered_set_rebuild()) {
1464     // Update the remset tracking information as well as marking all regions
1465     // as fully parsable.
1466     GCTraceTime(Debug, gc, phases) debug("Update Remembered Set Tracking After Rebuild", _gc_timer_cm);
1467     G1UpdateRegionsAfterRebuild cl(_g1h);
1468     _g1h->heap_region_iterate(&cl);
1469   } else {
1470     log_debug(gc, phases)("No Remembered Sets to update after rebuild");
1471   }
1472 
1473   verify_during_pause(G1HeapVerifier::G1VerifyCleanup, VerifyLocation::CleanupAfter);
1474 
1475   // Local statistics
1476   _cleanup_times.add((os::elapsedTime() - start) * 1000.0);
1477 
1478   {
1479     GCTraceTime(Debug, gc, phases) debug("Finalize Concurrent Mark Cleanup", _gc_timer_cm);
1480     policy->record_concurrent_mark_cleanup_end(needs_remembered_set_rebuild());
1481   }
1482 
1483   // Advance state.
1484   cm_thread()->set_full_cycle_reset_for_next_cycle();
1485   return;
1486 }
1487 
1488 // 'Keep Alive' oop closure used by both serial parallel reference processing.
1489 // Uses the G1CMTask associated with a worker thread (for serial reference
1490 // processing the G1CMTask for worker 0 is used) to preserve (mark) and
1491 // trace referent objects.
1492 //
1493 // Using the G1CMTask and embedded local queues avoids having the worker
1494 // threads operating on the global mark stack. This reduces the risk
1495 // of overflowing the stack - which we would rather avoid at this late
1496 // state. Also using the tasks' local queues removes the potential
1497 // of the workers interfering with each other that could occur if
1498 // operating on the global stack.
1499 
1500 class G1CMKeepAliveAndDrainClosure : public OopClosure {
1501   G1ConcurrentMark* _cm;
1502   G1CMTask*         _task;
1503   uint              _ref_counter_limit;
1504   uint              _ref_counter;
1505   bool              _is_serial;
1506 public:
1507   G1CMKeepAliveAndDrainClosure(G1ConcurrentMark* cm, G1CMTask* task, bool is_serial) :
1508     _cm(cm), _task(task), _ref_counter_limit(G1RefProcDrainInterval),
1509     _ref_counter(_ref_counter_limit), _is_serial(is_serial) {
1510     assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
1511   }
1512 
1513   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
1514   virtual void do_oop(      oop* p) { do_oop_work(p); }
1515 
1516   template <class T> void do_oop_work(T* p) {
1517     if (_cm->has_overflown()) {
1518       return;
1519     }
1520     if (!_task->deal_with_reference(p)) {
1521       // We did not add anything to the mark bitmap (or mark stack), so there is
1522       // no point trying to drain it.
1523       return;
1524     }
1525     _ref_counter--;
1526 
1527     if (_ref_counter == 0) {
1528       // We have dealt with _ref_counter_limit references, pushing them
1529       // and objects reachable from them on to the local stack (and
1530       // possibly the global stack). Call G1CMTask::do_marking_step() to
1531       // process these entries.
1532       //
1533       // We call G1CMTask::do_marking_step() in a loop, which we'll exit if
1534       // there's nothing more to do (i.e. we're done with the entries that
1535       // were pushed as a result of the G1CMTask::deal_with_reference() calls
1536       // above) or we overflow.
1537       //
1538       // Note: G1CMTask::do_marking_step() can set the G1CMTask::has_aborted()
1539       // flag while there may still be some work to do. (See the comment at
1540       // the beginning of G1CMTask::do_marking_step() for those conditions -
1541       // one of which is reaching the specified time target.) It is only
1542       // when G1CMTask::do_marking_step() returns without setting the
1543       // has_aborted() flag that the marking step has completed.
1544       do {
1545         double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
1546         _task->do_marking_step(mark_step_duration_ms,
1547                                false      /* do_termination */,
1548                                _is_serial);
1549       } while (_task->has_aborted() && !_cm->has_overflown());
1550       _ref_counter = _ref_counter_limit;
1551     }
1552   }
1553 };
1554 
1555 // 'Drain' oop closure used by both serial and parallel reference processing.
1556 // Uses the G1CMTask associated with a given worker thread (for serial
1557 // reference processing the G1CMtask for worker 0 is used). Calls the
1558 // do_marking_step routine, with an unbelievably large timeout value,
1559 // to drain the marking data structures of the remaining entries
1560 // added by the 'keep alive' oop closure above.
1561 
1562 class G1CMDrainMarkingStackClosure : public VoidClosure {
1563   G1ConcurrentMark* _cm;
1564   G1CMTask*         _task;
1565   bool              _is_serial;
1566  public:
1567   G1CMDrainMarkingStackClosure(G1ConcurrentMark* cm, G1CMTask* task, bool is_serial) :
1568     _cm(cm), _task(task), _is_serial(is_serial) {
1569     assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
1570   }
1571 
1572   void do_void() {
1573     do {
1574       // We call G1CMTask::do_marking_step() to completely drain the local
1575       // and global marking stacks of entries pushed by the 'keep alive'
1576       // oop closure (an instance of G1CMKeepAliveAndDrainClosure above).
1577       //
1578       // G1CMTask::do_marking_step() is called in a loop, which we'll exit
1579       // if there's nothing more to do (i.e. we've completely drained the
1580       // entries that were pushed as a result of applying the 'keep alive'
1581       // closure to the entries on the discovered ref lists) or we overflow
1582       // the global marking stack.
1583       //
1584       // Note: G1CMTask::do_marking_step() can set the G1CMTask::has_aborted()
1585       // flag while there may still be some work to do. (See the comment at
1586       // the beginning of G1CMTask::do_marking_step() for those conditions -
1587       // one of which is reaching the specified time target.) It is only
1588       // when G1CMTask::do_marking_step() returns without setting the
1589       // has_aborted() flag that the marking step has completed.
1590 
1591       _task->do_marking_step(1000000000.0 /* something very large */,
1592                              true         /* do_termination */,
1593                              _is_serial);
1594     } while (_task->has_aborted() && !_cm->has_overflown());
1595   }
1596 };
1597 
1598 class G1CMRefProcProxyTask : public RefProcProxyTask {
1599   G1CollectedHeap& _g1h;
1600   G1ConcurrentMark& _cm;
1601 
1602 public:
1603   G1CMRefProcProxyTask(uint max_workers, G1CollectedHeap& g1h, G1ConcurrentMark &cm)
1604     : RefProcProxyTask("G1CMRefProcProxyTask", max_workers),
1605       _g1h(g1h),
1606       _cm(cm) {}
1607 
1608   void work(uint worker_id) override {
1609     assert(worker_id < _max_workers, "sanity");
1610     G1CMIsAliveClosure is_alive(&_cm);
1611     uint index = (_tm == RefProcThreadModel::Single) ? 0 : worker_id;
1612     G1CMKeepAliveAndDrainClosure keep_alive(&_cm, _cm.task(index), _tm == RefProcThreadModel::Single);
1613     BarrierEnqueueDiscoveredFieldClosure enqueue;
1614     G1CMDrainMarkingStackClosure complete_gc(&_cm, _cm.task(index), _tm == RefProcThreadModel::Single);
1615     _rp_task->rp_work(worker_id, &is_alive, &keep_alive, &enqueue, &complete_gc);
1616   }
1617 
1618   void prepare_run_task_hook() override {
1619     // We need to reset the concurrency level before each
1620     // proxy task execution, so that the termination protocol
1621     // and overflow handling in G1CMTask::do_marking_step() knows
1622     // how many workers to wait for.
1623     _cm.set_concurrency(_queue_count);
1624   }
1625 };
1626 
1627 void G1ConcurrentMark::weak_refs_work() {
1628   ResourceMark rm;
1629 
1630   {
1631     GCTraceTime(Debug, gc, phases) debug("Reference Processing", _gc_timer_cm);
1632 
1633     ReferenceProcessor* rp = _g1h->ref_processor_cm();
1634 
1635     // See the comment in G1CollectedHeap::ref_processing_init()
1636     // about how reference processing currently works in G1.
1637 
1638     assert(_global_mark_stack.is_empty(), "mark stack should be empty");
1639 
1640     // Prefer to grow the stack until the max capacity.
1641     _global_mark_stack.set_should_grow();
1642 
1643     // Parallel processing task executor.
1644     G1CMRefProcProxyTask task(rp->max_num_queues(), *_g1h, *this);
1645     ReferenceProcessorPhaseTimes pt(_gc_timer_cm, rp->max_num_queues());
1646 
1647     // Process the weak references.
1648     const ReferenceProcessorStats& stats = rp->process_discovered_references(task, _g1h->workers(), pt);
1649     _gc_tracer_cm->report_gc_reference_stats(stats);
1650     pt.print_all_references();
1651 
1652     // The do_oop work routines of the keep_alive and drain_marking_stack
1653     // oop closures will set the has_overflown flag if we overflow the
1654     // global marking stack.
1655 
1656     assert(has_overflown() || _global_mark_stack.is_empty(),
1657            "Mark stack should be empty (unless it has overflown)");
1658   }
1659 
1660   if (has_overflown()) {
1661     // We can not trust g1_is_alive and the contents of the heap if the marking stack
1662     // overflowed while processing references. Exit the VM.
1663     fatal("Overflow during reference processing, can not continue. Current mark stack depth: "
1664           "%zu, MarkStackSize: %zu, MarkStackSizeMax: %zu. "
1665           "Please increase MarkStackSize and/or MarkStackSizeMax and restart.",
1666           _global_mark_stack.size(), MarkStackSize, MarkStackSizeMax);
1667     return;
1668   }
1669 
1670   assert(_global_mark_stack.is_empty(), "Marking should have completed");
1671 
1672   {
1673     GCTraceTime(Debug, gc, phases) debug("Weak Processing", _gc_timer_cm);
1674     G1CMIsAliveClosure is_alive(this);
1675     WeakProcessor::weak_oops_do(_g1h->workers(), &is_alive, &do_nothing_cl, 1);
1676   }
1677 }
1678 
1679 class G1PrecleanYieldClosure : public YieldClosure {
1680   G1ConcurrentMark* _cm;
1681 
1682 public:
1683   G1PrecleanYieldClosure(G1ConcurrentMark* cm) : _cm(cm) { }
1684 
1685   virtual bool should_return() {
1686     return _cm->has_aborted();
1687   }
1688 
1689   virtual bool should_return_fine_grain() {
1690     _cm->do_yield_check();
1691     return _cm->has_aborted();
1692   }
1693 };
1694 
1695 void G1ConcurrentMark::preclean() {
1696   assert(G1UseReferencePrecleaning, "Precleaning must be enabled.");
1697 
1698   SuspendibleThreadSetJoiner joiner;
1699 
1700   BarrierEnqueueDiscoveredFieldClosure enqueue;
1701 
1702   set_concurrency_and_phase(1, true);
1703 
1704   G1PrecleanYieldClosure yield_cl(this);
1705 
1706   ReferenceProcessor* rp = _g1h->ref_processor_cm();
1707   // Precleaning is single threaded. Temporarily disable MT discovery.
1708   ReferenceProcessorMTDiscoveryMutator rp_mut_discovery(rp, false);
1709   rp->preclean_discovered_references(rp->is_alive_non_header(),
1710                                      &enqueue,
1711                                      &yield_cl,
1712                                      _gc_timer_cm);
1713 }
1714 
1715 // Closure for marking entries in SATB buffers.
1716 class G1CMSATBBufferClosure : public SATBBufferClosure {
1717 private:
1718   G1CMTask* _task;
1719   G1CollectedHeap* _g1h;
1720 
1721   // This is very similar to G1CMTask::deal_with_reference, but with
1722   // more relaxed requirements for the argument, so this must be more
1723   // circumspect about treating the argument as an object.
1724   void do_entry(void* entry) const {
1725     _task->increment_refs_reached();
1726     oop const obj = cast_to_oop(entry);
1727     _task->make_reference_grey(obj);
1728   }
1729 
1730 public:
1731   G1CMSATBBufferClosure(G1CMTask* task, G1CollectedHeap* g1h)
1732     : _task(task), _g1h(g1h) { }
1733 
1734   virtual void do_buffer(void** buffer, size_t size) {
1735     for (size_t i = 0; i < size; ++i) {
1736       do_entry(buffer[i]);
1737     }
1738   }
1739 };
1740 
1741 class G1RemarkThreadsClosure : public ThreadClosure {
1742   G1SATBMarkQueueSet& _qset;
1743 
1744  public:
1745   G1RemarkThreadsClosure(G1CollectedHeap* g1h, G1CMTask* task) :
1746     _qset(G1BarrierSet::satb_mark_queue_set()) {}
1747 
1748   void do_thread(Thread* thread) {
1749     // Transfer any partial buffer to the qset for completed buffer processing.
1750     _qset.flush_queue(G1ThreadLocalData::satb_mark_queue(thread));
1751   }
1752 };
1753 
1754 class G1CMRemarkTask : public WorkerTask {
1755   // For Threads::possibly_parallel_threads_do
1756   ThreadsClaimTokenScope _threads_claim_token_scope;
1757   G1ConcurrentMark* _cm;
1758 public:
1759   void work(uint worker_id) {
1760     G1CMTask* task = _cm->task(worker_id);
1761     task->record_start_time();
1762     {
1763       ResourceMark rm;
1764 
1765       G1RemarkThreadsClosure threads_f(G1CollectedHeap::heap(), task);
1766       Threads::possibly_parallel_threads_do(true /* is_par */, &threads_f);
1767     }
1768 
1769     do {
1770       task->do_marking_step(1000000000.0 /* something very large */,
1771                             true         /* do_termination       */,
1772                             false        /* is_serial            */);
1773     } while (task->has_aborted() && !_cm->has_overflown());
1774     // If we overflow, then we do not want to restart. We instead
1775     // want to abort remark and do concurrent marking again.
1776     task->record_end_time();
1777   }
1778 
1779   G1CMRemarkTask(G1ConcurrentMark* cm, uint active_workers) :
1780     WorkerTask("Par Remark"), _threads_claim_token_scope(), _cm(cm) {
1781     _cm->terminator()->reset_for_reuse(active_workers);
1782   }
1783 };
1784 
1785 void G1ConcurrentMark::finalize_marking() {
1786   ResourceMark rm;
1787 
1788   _g1h->ensure_parsability(false);
1789 
1790   // this is remark, so we'll use up all active threads
1791   uint active_workers = _g1h->workers()->active_workers();
1792   set_concurrency_and_phase(active_workers, false /* concurrent */);
1793   // Leave _parallel_marking_threads at it's
1794   // value originally calculated in the G1ConcurrentMark
1795   // constructor and pass values of the active workers
1796   // through the task.
1797 
1798   {
1799     G1CMRemarkTask remarkTask(this, active_workers);
1800     // We will start all available threads, even if we decide that the
1801     // active_workers will be fewer. The extra ones will just bail out
1802     // immediately.
1803     _g1h->workers()->run_task(&remarkTask);
1804   }
1805 
1806   SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
1807   guarantee(has_overflown() ||
1808             satb_mq_set.completed_buffers_num() == 0,
1809             "Invariant: has_overflown = %s, num buffers = %zu",
1810             BOOL_TO_STR(has_overflown()),
1811             satb_mq_set.completed_buffers_num());
1812 
1813   print_stats();
1814 }
1815 
1816 void G1ConcurrentMark::flush_all_task_caches() {
1817   size_t hits = 0;
1818   size_t misses = 0;
1819   for (uint i = 0; i < _max_num_tasks; i++) {
1820     Pair<size_t, size_t> stats = _tasks[i]->flush_mark_stats_cache();
1821     hits += stats.first;
1822     misses += stats.second;
1823   }
1824   size_t sum = hits + misses;
1825   log_debug(gc, stats)("Mark stats cache hits %zu misses %zu ratio %1.3lf",
1826                        hits, misses, percent_of(hits, sum));
1827 }
1828 
1829 void G1ConcurrentMark::clear_bitmap_for_region(G1HeapRegion* hr) {
1830   assert_at_safepoint();
1831   _mark_bitmap.clear_range(MemRegion(hr->bottom(), hr->end()));
1832 }
1833 
1834 G1HeapRegion* G1ConcurrentMark::claim_region(uint worker_id) {
1835   // "Checkpoint" the finger.
1836   HeapWord* local_finger = finger();
1837 
1838   while (local_finger < _heap.end()) {
1839     assert(_g1h->is_in_reserved(local_finger), "invariant");
1840 
1841     G1HeapRegion* curr_region = _g1h->heap_region_containing_or_null(local_finger);
1842     // Make sure that the reads below do not float before loading curr_region.
1843     OrderAccess::loadload();
1844     // Above heap_region_containing may return null as we always scan claim
1845     // until the end of the heap. In this case, just jump to the next region.
1846     HeapWord* end = curr_region != nullptr ? curr_region->end() : local_finger + G1HeapRegion::GrainWords;
1847 
1848     // Is the gap between reading the finger and doing the CAS too long?
1849     HeapWord* res = _finger.compare_exchange(local_finger, end);
1850     if (res == local_finger && curr_region != nullptr) {
1851       // We succeeded.
1852       HeapWord* bottom = curr_region->bottom();
1853       HeapWord* limit = top_at_mark_start(curr_region);
1854 
1855       log_trace(gc, marking)("Claim region %u bottom " PTR_FORMAT " tams " PTR_FORMAT, curr_region->hrm_index(), p2i(curr_region->bottom()), p2i(top_at_mark_start(curr_region)));
1856       // Notice that _finger == end cannot be guaranteed here since,
1857       // someone else might have moved the finger even further.
1858       assert(finger() >= end, "The finger should have moved forward");
1859 
1860       if (limit > bottom) {
1861         return curr_region;
1862       } else {
1863         assert(limit == bottom,
1864                "The region limit should be at bottom");
1865         // We return null and the caller should try calling
1866         // claim_region() again.
1867         return nullptr;
1868       }
1869     } else {
1870       // Read the finger again.
1871       HeapWord* next_finger = finger();
1872       assert(next_finger > local_finger, "The finger should have moved forward " PTR_FORMAT " " PTR_FORMAT, p2i(local_finger), p2i(next_finger));
1873       local_finger = next_finger;
1874     }
1875   }
1876 
1877   return nullptr;
1878 }
1879 
1880 #ifndef PRODUCT
1881 class VerifyNoCSetOops {
1882   G1CollectedHeap* _g1h;
1883   const char* _phase;
1884   int _info;
1885 
1886 public:
1887   VerifyNoCSetOops(const char* phase, int info = -1) :
1888     _g1h(G1CollectedHeap::heap()),
1889     _phase(phase),
1890     _info(info)
1891   { }
1892 
1893   void operator()(G1TaskQueueEntry task_entry) const {
1894     if (task_entry.is_partial_array_state()) {
1895       oop obj = task_entry.to_partial_array_state()->source();
1896       guarantee(_g1h->is_in_reserved(obj), "Partial Array " PTR_FORMAT " must be in heap.", p2i(obj));
1897       return;
1898     }
1899     guarantee(oopDesc::is_oop(task_entry.to_oop()),
1900               "Non-oop " PTR_FORMAT ", phase: %s, info: %d",
1901               p2i(task_entry.to_oop()), _phase, _info);
1902     G1HeapRegion* r = _g1h->heap_region_containing(task_entry.to_oop());
1903     guarantee(!(r->in_collection_set() || r->has_index_in_opt_cset()),
1904               "obj " PTR_FORMAT " from %s (%d) in region %u in (optional) collection set",
1905               p2i(task_entry.to_oop()), _phase, _info, r->hrm_index());
1906   }
1907 };
1908 
1909 void G1ConcurrentMark::verify_no_collection_set_oops() {
1910   assert(SafepointSynchronize::is_at_safepoint() || !is_init_completed(),
1911          "should be at a safepoint or initializing");
1912   if (!is_fully_initialized() || !_g1h->collector_state()->is_in_mark_or_rebuild()) {
1913     return;
1914   }
1915 
1916   // Verify entries on the global mark stack
1917   _global_mark_stack.iterate(VerifyNoCSetOops("Stack"));
1918 
1919   // Verify entries on the task queues
1920   for (uint i = 0; i < _max_num_tasks; ++i) {
1921     G1CMTaskQueue* queue = _task_queues->queue(i);
1922     queue->iterate(VerifyNoCSetOops("Queue", i));
1923   }
1924 
1925   // Verify the global finger
1926   HeapWord* global_finger = finger();
1927   if (global_finger != nullptr && global_finger < _heap.end()) {
1928     // Since we always iterate over all regions, we might get a null G1HeapRegion
1929     // here.
1930     G1HeapRegion* global_hr = _g1h->heap_region_containing_or_null(global_finger);
1931     guarantee(global_hr == nullptr || global_finger == global_hr->bottom(),
1932               "global finger: " PTR_FORMAT " region: " HR_FORMAT,
1933               p2i(global_finger), HR_FORMAT_PARAMS(global_hr));
1934   }
1935 
1936   // Verify the task fingers
1937   assert(_num_concurrent_workers <= _max_num_tasks, "sanity");
1938   for (uint i = 0; i < _num_concurrent_workers; ++i) {
1939     G1CMTask* task = _tasks[i];
1940     HeapWord* task_finger = task->finger();
1941     if (task_finger != nullptr && task_finger < _heap.end()) {
1942       // See above note on the global finger verification.
1943       G1HeapRegion* r = _g1h->heap_region_containing_or_null(task_finger);
1944       guarantee(r == nullptr || task_finger == r->bottom() ||
1945                 !r->in_collection_set() || !r->has_index_in_opt_cset(),
1946                 "task finger: " PTR_FORMAT " region: " HR_FORMAT,
1947                 p2i(task_finger), HR_FORMAT_PARAMS(r));
1948     }
1949   }
1950 }
1951 #endif // PRODUCT
1952 
1953 void G1ConcurrentMark::rebuild_and_scrub() {
1954   if (!needs_remembered_set_rebuild()) {
1955     log_debug(gc, marking)("Skipping Remembered Set Rebuild. No regions selected for rebuild, will only scrub");
1956   }
1957 
1958   G1ConcurrentRebuildAndScrub::rebuild_and_scrub(this, needs_remembered_set_rebuild(), _concurrent_workers);
1959 }
1960 
1961 void G1ConcurrentMark::print_stats() {
1962   if (!log_is_enabled(Debug, gc, stats)) {
1963     return;
1964   }
1965   log_debug(gc, stats)("---------------------------------------------------------------------");
1966   for (size_t i = 0; i < _num_active_tasks; ++i) {
1967     _tasks[i]->print_stats();
1968     log_debug(gc, stats)("---------------------------------------------------------------------");
1969   }
1970 }
1971 
1972 bool G1ConcurrentMark::concurrent_cycle_abort() {
1973   assert_at_safepoint_on_vm_thread();
1974   assert(_g1h->collector_state()->is_in_full_gc(), "must be");
1975 
1976   // If we start the compaction before the CM threads finish
1977   // scanning the root regions we might trip them over as we'll
1978   // be moving objects / updating references. Since the root region
1979   // scan synchronized with the safepoint, just tell it to abort.
1980   // It will notice when the threads start up again later.
1981   abort_root_region_scan_at_safepoint();
1982 
1983   // We haven't started a concurrent cycle no need to do anything; we might have
1984   // aborted the marking because of shutting down though. In this case the marking
1985   // might have already completed the abort (leading to in_progress() below to
1986   // return false), however this still left marking state particularly in the
1987   // shared marking bitmap that must be cleaned up.
1988   // If there are multiple full gcs during shutdown we do this work repeatedly for
1989   // nothing, but this situation should be extremely rare (a full gc after shutdown
1990   // has been signalled is already rare), and this work should be negligible compared
1991   // to actual full gc work.
1992 
1993   if (!is_fully_initialized() || (!cm_thread()->is_in_progress() && !cm_thread()->should_terminate())) {
1994     return false;
1995   }
1996 
1997   reset_marking_for_restart();
1998 
1999   abort_marking_threads();
2000 
2001   SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
2002   satb_mq_set.abandon_partial_marking();
2003   // This can be called either during or outside marking, we'll read
2004   // the expected_active value from the SATB queue set.
2005   satb_mq_set.set_active_all_threads(false, /* new active value */
2006                                      satb_mq_set.is_active() /* expected_active */);
2007   return true;
2008 }
2009 
2010 void G1ConcurrentMark::abort_marking_threads() {
2011   assert_root_region_scan_completed_or_aborted();
2012   _has_aborted.store_relaxed(true);
2013   _first_overflow_barrier_sync.abort();
2014   _second_overflow_barrier_sync.abort();
2015 }
2016 
2017 double G1ConcurrentMark::worker_threads_cpu_time_s() {
2018   class CountCpuTimeThreadClosure : public ThreadClosure {
2019   public:
2020     jlong _total_cpu_time;
2021 
2022     CountCpuTimeThreadClosure() : ThreadClosure(), _total_cpu_time(0) { }
2023 
2024     void do_thread(Thread* t) {
2025       _total_cpu_time += os::thread_cpu_time(t);
2026     }
2027   } cl;
2028 
2029   threads_do(&cl);
2030 
2031   return (double)cl._total_cpu_time / NANOSECS_PER_SEC;
2032 }
2033 
2034 static void print_ms_time_info(const char* prefix, const char* name,
2035                                NumberSeq& ns) {
2036   log_trace(gc, marking)("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
2037                          prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
2038   if (ns.num() > 0) {
2039     log_trace(gc, marking)("%s         [std. dev = %8.2f ms, max = %8.2f ms]",
2040                            prefix, ns.sd(), ns.maximum());
2041   }
2042 }
2043 
2044 void G1ConcurrentMark::print_summary_info() {
2045   Log(gc, marking) log;
2046   if (!log.is_trace()) {
2047     return;
2048   }
2049 
2050   log.trace(" Concurrent marking:");
2051   if (!is_fully_initialized()) {
2052     log.trace("    has not been initialized yet");
2053     return;
2054   }
2055   print_ms_time_info("  ", "remarks", _remark_times);
2056   {
2057     print_ms_time_info("     ", "final marks", _remark_mark_times);
2058     print_ms_time_info("     ", "weak refs", _remark_weak_ref_times);
2059 
2060   }
2061   print_ms_time_info("  ", "cleanups", _cleanup_times);
2062   log.trace("    Finalize live data total time = %8.2f s (avg = %8.2f ms).",
2063             _cleanup_times.sum() / 1000.0, _cleanup_times.avg());
2064   log.trace("  Total stop_world time = %8.2f s.",
2065             (_remark_times.sum() + _cleanup_times.sum())/1000.0);
2066   log.trace("  Total concurrent time = %8.2f s (%8.2f s marking).",
2067             cm_thread()->total_mark_cpu_time_s(), cm_thread()->worker_threads_cpu_time_s());
2068 }
2069 
2070 void G1ConcurrentMark::threads_do(ThreadClosure* tc) const {
2071   if (is_fully_initialized()) { // they are initialized late
2072     tc->do_thread(_cm_thread);
2073     _concurrent_workers->threads_do(tc);
2074   }
2075 }
2076 
2077 void G1ConcurrentMark::print_on(outputStream* st) const {
2078   st->print_cr("Marking Bits: (CMBitMap*) " PTR_FORMAT, p2i(mark_bitmap()));
2079   _mark_bitmap.print_on(st, " Bits: ");
2080 }
2081 
2082 static ReferenceProcessor* get_cm_oop_closure_ref_processor(G1CollectedHeap* g1h) {
2083   ReferenceProcessor* result = g1h->ref_processor_cm();
2084   assert(result != nullptr, "CM reference processor should not be null");
2085   return result;
2086 }
2087 
2088 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h,
2089                                G1CMTask* task)
2090   : ClaimMetadataVisitingOopIterateClosure(ClassLoaderData::_claim_strong, get_cm_oop_closure_ref_processor(g1h)),
2091     _g1h(g1h), _task(task)
2092 { }
2093 
2094 void G1CMTask::setup_for_region(G1HeapRegion* hr) {
2095   assert(hr != nullptr,
2096         "claim_region() should have filtered out null regions");
2097   _curr_region  = hr;
2098   _finger       = hr->bottom();
2099   update_region_limit();
2100 }
2101 
2102 void G1CMTask::update_region_limit() {
2103   G1HeapRegion* hr = _curr_region;
2104   HeapWord* bottom = hr->bottom();
2105   HeapWord* limit = _cm->top_at_mark_start(hr);
2106 
2107   if (limit == bottom) {
2108     // The region was collected underneath our feet.
2109     // We set the finger to bottom to ensure that the bitmap
2110     // iteration that will follow this will not do anything.
2111     // (this is not a condition that holds when we set the region up,
2112     // as the region is not supposed to be empty in the first place)
2113     _finger = bottom;
2114   } else if (limit >= _region_limit) {
2115     assert(limit >= _finger, "peace of mind");
2116   } else {
2117     assert(limit < _region_limit, "only way to get here");
2118     // This can happen under some pretty unusual circumstances.  An
2119     // evacuation pause empties the region underneath our feet (TAMS
2120     // at bottom). We then do some allocation in the region (TAMS
2121     // stays at bottom), followed by the region being used as a GC
2122     // alloc region (TAMS will move to top() and the objects
2123     // originally below it will be greyed). All objects now marked in
2124     // the region are explicitly greyed, if below the global finger,
2125     // and we do not need in fact to scan anything else. So, we simply
2126     // set _finger to be limit to ensure that the bitmap iteration
2127     // doesn't do anything.
2128     _finger = limit;
2129   }
2130 
2131   _region_limit = limit;
2132 }
2133 
2134 void G1CMTask::giveup_current_region() {
2135   assert(_curr_region != nullptr, "invariant");
2136   clear_region_fields();
2137 }
2138 
2139 void G1CMTask::clear_region_fields() {
2140   // Values for these three fields that indicate that we're not
2141   // holding on to a region.
2142   _curr_region   = nullptr;
2143   _finger        = nullptr;
2144   _region_limit  = nullptr;
2145 }
2146 
2147 void G1CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) {
2148   if (cm_oop_closure == nullptr) {
2149     assert(_cm_oop_closure != nullptr, "invariant");
2150   } else {
2151     assert(_cm_oop_closure == nullptr, "invariant");
2152   }
2153   _cm_oop_closure = cm_oop_closure;
2154 }
2155 
2156 void G1CMTask::reset(G1CMBitMap* mark_bitmap) {
2157   guarantee(mark_bitmap != nullptr, "invariant");
2158   _mark_bitmap              = mark_bitmap;
2159   clear_region_fields();
2160 
2161   _calls                         = 0;
2162   _elapsed_time_ms               = 0.0;
2163   _termination_time_ms           = 0.0;
2164 
2165   _mark_stats_cache.reset();
2166 }
2167 
2168 void G1CMTask::reset_for_restart() {
2169   clear_region_fields();
2170   _task_queue->set_empty();
2171   TASKQUEUE_STATS_ONLY(_partial_array_splitter.stats()->reset());
2172   TASKQUEUE_STATS_ONLY(_task_queue->stats.reset());
2173 }
2174 
2175 void G1CMTask::register_partial_array_splitter() {
2176 
2177   ::new (&_partial_array_splitter) PartialArraySplitter(_cm->partial_array_state_manager(),
2178                                                         _cm->max_num_tasks());
2179 }
2180 
2181 void G1CMTask::unregister_partial_array_splitter() {
2182   _partial_array_splitter.~PartialArraySplitter();
2183 }
2184 
2185 bool G1CMTask::should_exit_termination() {
2186   if (!regular_clock_call()) {
2187     return true;
2188   }
2189 
2190   // This is called when we are in the termination protocol. We should
2191   // quit if, for some reason, this task wants to abort or the global
2192   // stack is not empty (this means that we can get work from it).
2193   return !_cm->mark_stack_empty() || has_aborted();
2194 }
2195 
2196 void G1CMTask::reached_limit() {
2197   assert(_words_scanned >= _words_scanned_limit ||
2198          _refs_reached >= _refs_reached_limit ,
2199          "shouldn't have been called otherwise");
2200   abort_marking_if_regular_check_fail();
2201 }
2202 
2203 bool G1CMTask::regular_clock_call() {
2204   if (has_aborted()) {
2205     return false;
2206   }
2207 
2208   // First, we need to recalculate the words scanned and refs reached
2209   // limits for the next clock call.
2210   recalculate_limits();
2211 
2212   // During the regular clock call we do the following
2213 
2214   // (1) If an overflow has been flagged, then we abort.
2215   if (_cm->has_overflown()) {
2216     return false;
2217   }
2218 
2219   // If we are not concurrent (i.e. we're doing remark) we don't need
2220   // to check anything else. The other steps are only needed during
2221   // the concurrent marking phase.
2222   if (!_cm->concurrent()) {
2223     return true;
2224   }
2225 
2226   // (2) If marking has been aborted for Full GC, then we also abort.
2227   if (_cm->has_aborted()) {
2228     return false;
2229   }
2230 
2231   // (4) We check whether we should yield. If we have to, then we abort.
2232   if (SuspendibleThreadSet::should_yield()) {
2233     // We should yield. To do this we abort the task. The caller is
2234     // responsible for yielding.
2235     return false;
2236   }
2237 
2238   // (5) We check whether we've reached our time quota. If we have,
2239   // then we abort.
2240   double elapsed_time_ms = (double)(os::current_thread_cpu_time() - _start_cpu_time_ns) / NANOSECS_PER_MILLISEC;
2241   if (elapsed_time_ms > _time_target_ms) {
2242     _has_timed_out = true;
2243     return false;
2244   }
2245 
2246   // (6) Finally, we check whether there are enough completed STAB
2247   // buffers available for processing. If there are, we abort.
2248   SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
2249   if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
2250     // we do need to process SATB buffers, we'll abort and restart
2251     // the marking task to do so
2252     return false;
2253   }
2254   return true;
2255 }
2256 
2257 void G1CMTask::recalculate_limits() {
2258   _real_words_scanned_limit = _words_scanned + words_scanned_period;
2259   _words_scanned_limit      = _real_words_scanned_limit;
2260 
2261   _real_refs_reached_limit  = _refs_reached  + refs_reached_period;
2262   _refs_reached_limit       = _real_refs_reached_limit;
2263 }
2264 
2265 void G1CMTask::decrease_limits() {
2266   // This is called when we believe that we're going to do an infrequent
2267   // operation which will increase the per byte scanned cost (i.e. move
2268   // entries to/from the global stack). It basically tries to decrease the
2269   // scanning limit so that the clock is called earlier.
2270 
2271   _words_scanned_limit = _real_words_scanned_limit - 3 * words_scanned_period / 4;
2272   _refs_reached_limit  = _real_refs_reached_limit - 3 * refs_reached_period / 4;
2273 }
2274 
2275 void G1CMTask::move_entries_to_global_stack() {
2276   // Local array where we'll store the entries that will be popped
2277   // from the local queue.
2278   G1TaskQueueEntry buffer[G1CMMarkStack::EntriesPerChunk];
2279 
2280   size_t n = 0;
2281   G1TaskQueueEntry task_entry;
2282   while (n < G1CMMarkStack::EntriesPerChunk && _task_queue->pop_local(task_entry)) {
2283     buffer[n] = task_entry;
2284     ++n;
2285   }
2286   if (n < G1CMMarkStack::EntriesPerChunk) {
2287     buffer[n] = G1TaskQueueEntry();
2288   }
2289 
2290   if (n > 0) {
2291     if (!_cm->mark_stack_push(buffer)) {
2292       set_has_aborted();
2293     }
2294   }
2295 
2296   // This operation was quite expensive, so decrease the limits.
2297   decrease_limits();
2298 }
2299 
2300 bool G1CMTask::get_entries_from_global_stack() {
2301   // Local array where we'll store the entries that will be popped
2302   // from the global stack.
2303   G1TaskQueueEntry buffer[G1CMMarkStack::EntriesPerChunk];
2304 
2305   if (!_cm->mark_stack_pop(buffer)) {
2306     return false;
2307   }
2308 
2309   // We did actually pop at least one entry.
2310   for (size_t i = 0; i < G1CMMarkStack::EntriesPerChunk; ++i) {
2311     G1TaskQueueEntry task_entry = buffer[i];
2312     if (task_entry.is_null()) {
2313       break;
2314     }
2315     assert(task_entry.is_partial_array_state() || oopDesc::is_oop(task_entry.to_oop()), "Element " PTR_FORMAT " must be an array slice or oop", p2i(task_entry.to_oop()));
2316     bool success = _task_queue->push(task_entry);
2317     // We only call this when the local queue is empty or under a
2318     // given target limit. So, we do not expect this push to fail.
2319     assert(success, "invariant");
2320   }
2321 
2322   // This operation was quite expensive, so decrease the limits
2323   decrease_limits();
2324   return true;
2325 }
2326 
2327 void G1CMTask::drain_local_queue(bool partially) {
2328   if (has_aborted()) {
2329     return;
2330   }
2331 
2332   // Decide what the target size is, depending whether we're going to
2333   // drain it partially (so that other tasks can steal if they run out
2334   // of things to do) or totally (at the very end).
2335   uint target_size;
2336   if (partially) {
2337     target_size = GCDrainStackTargetSize;
2338   } else {
2339     target_size = 0;
2340   }
2341 
2342   if (_task_queue->size() > target_size) {
2343     G1TaskQueueEntry entry;
2344     bool ret = _task_queue->pop_local(entry);
2345     while (ret) {
2346       process_entry(entry, false /* stolen */);
2347       if (_task_queue->size() <= target_size || has_aborted()) {
2348         ret = false;
2349       } else {
2350         ret = _task_queue->pop_local(entry);
2351       }
2352     }
2353   }
2354 }
2355 
2356 size_t G1CMTask::start_partial_array_processing(objArrayOop obj) {
2357   assert(obj->length() >= (int)ObjArrayMarkingStride, "Must be a large array object %d", obj->length());
2358 
2359   // Mark objArray klass metadata
2360   process_klass(obj->klass());
2361 
2362   size_t array_length = obj->length();
2363   size_t initial_chunk_size = _partial_array_splitter.start(_task_queue, obj, nullptr, array_length, ObjArrayMarkingStride);
2364 
2365   process_array_chunk(obj, 0, initial_chunk_size);
2366 
2367   // Include object header size
2368   return objArrayOopDesc::object_size(checked_cast<int>(initial_chunk_size));
2369 }
2370 
2371 size_t G1CMTask::process_partial_array(const G1TaskQueueEntry& task, bool stolen) {
2372   PartialArrayState* state = task.to_partial_array_state();
2373   // Access state before release by claim().
2374   objArrayOop obj = objArrayOop(state->source());
2375 
2376   PartialArraySplitter::Claim claim =
2377     _partial_array_splitter.claim(state, _task_queue, stolen);
2378 
2379   process_array_chunk(obj, claim._start, claim._end);
2380   return heap_word_size((claim._end - claim._start) * heapOopSize);
2381 }
2382 
2383 void G1CMTask::drain_global_stack(bool partially) {
2384   if (has_aborted()) {
2385     return;
2386   }
2387 
2388   // We have a policy to drain the local queue before we attempt to
2389   // drain the global stack.
2390   assert(partially || _task_queue->size() == 0, "invariant");
2391 
2392   // Decide what the target size is, depending whether we're going to
2393   // drain it partially (so that other tasks can steal if they run out
2394   // of things to do) or totally (at the very end).
2395   // Notice that when draining the global mark stack partially, due to the racyness
2396   // of the mark stack size update we might in fact drop below the target. But,
2397   // this is not a problem.
2398   // In case of total draining, we simply process until the global mark stack is
2399   // totally empty, disregarding the size counter.
2400   if (partially) {
2401     size_t const target_size = _cm->partial_mark_stack_size_target();
2402     while (!has_aborted() && _cm->mark_stack_size() > target_size) {
2403       if (get_entries_from_global_stack()) {
2404         drain_local_queue(partially);
2405       }
2406     }
2407   } else {
2408     while (!has_aborted() && get_entries_from_global_stack()) {
2409       drain_local_queue(partially);
2410     }
2411   }
2412 }
2413 
2414 // SATB Queue has several assumptions on whether to call the par or
2415 // non-par versions of the methods. this is why some of the code is
2416 // replicated. We should really get rid of the single-threaded version
2417 // of the code to simplify things.
2418 void G1CMTask::drain_satb_buffers() {
2419   if (has_aborted()) {
2420     return;
2421   }
2422 
2423   // We set this so that the regular clock knows that we're in the
2424   // middle of draining buffers and doesn't set the abort flag when it
2425   // notices that SATB buffers are available for draining. It'd be
2426   // very counter productive if it did that. :-)
2427   _draining_satb_buffers = true;
2428 
2429   G1CMSATBBufferClosure satb_cl(this, _g1h);
2430   SATBMarkQueueSet& satb_mq_set = G1BarrierSet::satb_mark_queue_set();
2431 
2432   // This keeps claiming and applying the closure to completed buffers
2433   // until we run out of buffers or we need to abort.
2434   while (!has_aborted() &&
2435          satb_mq_set.apply_closure_to_completed_buffer(&satb_cl)) {
2436     abort_marking_if_regular_check_fail();
2437   }
2438 
2439   // Can't assert qset is empty here, even if not aborted.  If concurrent,
2440   // some other thread might be adding to the queue.  If not concurrent,
2441   // some other thread might have won the race for the last buffer, but
2442   // has not yet decremented the count.
2443 
2444   _draining_satb_buffers = false;
2445 
2446   // again, this was a potentially expensive operation, decrease the
2447   // limits to get the regular clock call early
2448   decrease_limits();
2449 }
2450 
2451 void G1CMTask::clear_mark_stats_cache(uint region_idx) {
2452   _mark_stats_cache.reset(region_idx);
2453 }
2454 
2455 Pair<size_t, size_t> G1CMTask::flush_mark_stats_cache() {
2456   return _mark_stats_cache.evict_all();
2457 }
2458 
2459 void G1CMTask::print_stats() {
2460   log_debug(gc, stats)("Marking Stats, task = %u, calls = %u", _worker_id, _calls);
2461   log_debug(gc, stats)("  Elapsed time = %1.2lfms, Termination time = %1.2lfms",
2462                        _elapsed_time_ms, _termination_time_ms);
2463   log_debug(gc, stats)("  Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms max = %1.2lfms, total = %1.2lfms",
2464                        _step_times_ms.num(),
2465                        _step_times_ms.avg(),
2466                        _step_times_ms.sd(),
2467                        _step_times_ms.maximum(),
2468                        _step_times_ms.sum());
2469   size_t const hits = _mark_stats_cache.hits();
2470   size_t const misses = _mark_stats_cache.misses();
2471   log_debug(gc, stats)("  Mark Stats Cache: hits %zu misses %zu ratio %.3f",
2472                        hits, misses, percent_of(hits, hits + misses));
2473 }
2474 
2475 bool G1ConcurrentMark::try_stealing(uint worker_id, G1TaskQueueEntry& task_entry) {
2476   return _task_queues->steal(worker_id, task_entry);
2477 }
2478 
2479 void G1CMTask::process_current_region(G1CMBitMapClosure& bitmap_closure) {
2480   if (has_aborted() || _curr_region == nullptr) {
2481     return;
2482   }
2483 
2484   // This means that we're already holding on to a region.
2485   assert(_finger != nullptr, "if region is not null, then the finger "
2486          "should not be null either");
2487 
2488   // We might have restarted this task after an evacuation pause
2489   // which might have evacuated the region we're holding on to
2490   // underneath our feet. Let's read its limit again to make sure
2491   // that we do not iterate over a region of the heap that
2492   // contains garbage (update_region_limit() will also move
2493   // _finger to the start of the region if it is found empty).
2494   update_region_limit();
2495   // We will start from _finger not from the start of the region,
2496   // as we might be restarting this task after aborting half-way
2497   // through scanning this region. In this case, _finger points to
2498   // the address where we last found a marked object. If this is a
2499   // fresh region, _finger points to start().
2500   MemRegion mr = MemRegion(_finger, _region_limit);
2501 
2502   assert(!_curr_region->is_humongous() || mr.start() == _curr_region->bottom(),
2503          "humongous regions should go around loop once only");
2504 
2505   // Some special cases:
2506   // If the memory region is empty, we can just give up the region.
2507   // If the current region is humongous then we only need to check
2508   // the bitmap for the bit associated with the start of the object,
2509   // scan the object if it's live, and give up the region.
2510   // Otherwise, let's iterate over the bitmap of the part of the region
2511   // that is left.
2512   // If the iteration is successful, give up the region.
2513   if (mr.is_empty()) {
2514     giveup_current_region();
2515     abort_marking_if_regular_check_fail();
2516   } else if (_curr_region->is_humongous() && mr.start() == _curr_region->bottom()) {
2517     if (_mark_bitmap->is_marked(mr.start())) {
2518       // The object is marked - apply the closure
2519       bitmap_closure.do_addr(mr.start());
2520     }
2521     // Even if this task aborted while scanning the humongous object
2522     // we can (and should) give up the current region.
2523     giveup_current_region();
2524     abort_marking_if_regular_check_fail();
2525   } else if (_mark_bitmap->iterate(&bitmap_closure, mr)) {
2526     giveup_current_region();
2527     abort_marking_if_regular_check_fail();
2528   } else {
2529     assert(has_aborted(), "currently the only way to do so");
2530     // The only way to abort the bitmap iteration is to return
2531     // false from the do_bit() method. However, inside the
2532     // do_bit() method we move the _finger to point to the
2533     // object currently being looked at. So, if we bail out, we
2534     // have definitely set _finger to something non-null.
2535     assert(_finger != nullptr, "invariant");
2536 
2537     // Region iteration was actually aborted. So now _finger
2538     // points to the address of the object we last scanned. If we
2539     // leave it there, when we restart this task, we will rescan
2540     // the object. It is easy to avoid this. We move the finger by
2541     // enough to point to the next possible object header.
2542     assert(_finger < _region_limit, "invariant");
2543     HeapWord* const new_finger = _finger + cast_to_oop(_finger)->size();
2544     if (new_finger >= _region_limit) {
2545       giveup_current_region();
2546     } else {
2547       move_finger_to(new_finger);
2548     }
2549   }
2550 }
2551 
2552 void G1CMTask::claim_new_region() {
2553   // Read the note on the claim_region() method on why it might
2554   // return null with potentially more regions available for
2555   // claiming and why we have to check out_of_regions() to determine
2556   // whether we're done or not.
2557   while (!has_aborted() && _curr_region == nullptr && !_cm->out_of_regions()) {
2558     // We are going to try to claim a new region. We should have
2559     // given up on the previous one.
2560     // Separated the asserts so that we know which one fires.
2561     assert(_curr_region  == nullptr, "invariant");
2562     assert(_finger       == nullptr, "invariant");
2563     assert(_region_limit == nullptr, "invariant");
2564     G1HeapRegion* claimed_region = _cm->claim_region(_worker_id);
2565     if (claimed_region != nullptr) {
2566       // Yes, we managed to claim one
2567       setup_for_region(claimed_region);
2568       assert(_curr_region == claimed_region, "invariant");
2569     }
2570     // It is important to call the regular clock here. It might take
2571     // a while to claim a region if, for example, we hit a large
2572     // block of empty regions. So we need to call the regular clock
2573     // method once round the loop to make sure it's called
2574     // frequently enough.
2575     abort_marking_if_regular_check_fail();
2576   }
2577 }
2578 
2579 void G1CMTask::attempt_stealing() {
2580   // We cannot check whether the global stack is empty, since other
2581   // tasks might be pushing objects to it concurrently.
2582   assert(_cm->out_of_regions() && _task_queue->size() == 0,
2583          "only way to reach here");
2584   while (!has_aborted()) {
2585     G1TaskQueueEntry entry;
2586     if (_cm->try_stealing(_worker_id, entry)) {
2587       process_entry(entry, true /* stolen */);
2588 
2589       // And since we're towards the end, let's totally drain the
2590       // local queue and global stack.
2591       drain_local_queue(false);
2592       drain_global_stack(false);
2593     } else {
2594       break;
2595     }
2596   }
2597 }
2598 
2599 void G1CMTask::attempt_termination(bool is_serial) {
2600   // We cannot check whether the global stack is empty, since other
2601   // tasks might be concurrently pushing objects on it.
2602   // Separated the asserts so that we know which one fires.
2603   assert(_cm->out_of_regions(), "only way to reach here");
2604   assert(_task_queue->size() == 0, "only way to reach here");
2605   double termination_start_time_ms = os::elapsedTime() * 1000.0;
2606 
2607   // The G1CMTask class also extends the TerminatorTerminator class,
2608   // hence its should_exit_termination() method will also decide
2609   // whether to exit the termination protocol or not.
2610   bool finished = (is_serial ||
2611                    _cm->terminator()->offer_termination(this));
2612   _termination_time_ms += (os::elapsedTime() * 1000.0 - termination_start_time_ms);
2613 
2614   if (finished) {
2615     // We're all done.
2616 
2617     // We can now guarantee that the global stack is empty, since
2618     // all other tasks have finished. We separated the guarantees so
2619     // that, if a condition is false, we can immediately find out
2620     // which one.
2621     guarantee(_cm->out_of_regions(), "only way to reach here");
2622     guarantee(_cm->mark_stack_empty(), "only way to reach here");
2623     guarantee(_task_queue->size() == 0, "only way to reach here");
2624     guarantee(!_cm->has_overflown(), "only way to reach here");
2625     guarantee(!has_aborted(), "should never happen if termination has completed");
2626   } else {
2627     // Apparently there's more work to do. Let's abort this task. We
2628     // will restart it and hopefully we can find more things to do.
2629     set_has_aborted();
2630   }
2631 }
2632 
2633 void G1CMTask::handle_abort(bool is_serial, double elapsed_time_ms) {
2634   if (_has_timed_out) {
2635     double diff_ms = elapsed_time_ms - _time_target_ms;
2636     // Keep statistics of how well we did with respect to hitting
2637     // our target only if we actually timed out (if we aborted for
2638     // other reasons, then the results might get skewed).
2639     _marking_step_diff_ms.add(diff_ms);
2640   }
2641 
2642   if (!_cm->has_overflown()) {
2643     return;
2644   }
2645 
2646   // This is the interesting one. We aborted because a global
2647   // overflow was raised. This means we have to restart the
2648   // marking phase and start iterating over regions. However, in
2649   // order to do this we have to make sure that all tasks stop
2650   // what they are doing and re-initialize in a safe manner. We
2651   // will achieve this with the use of two barrier sync points.
2652   if (!is_serial) {
2653     // We only need to enter the sync barrier if being called
2654     // from a parallel context
2655     _cm->enter_first_sync_barrier(_worker_id);
2656 
2657     // When we exit this sync barrier we know that all tasks have
2658     // stopped doing marking work. So, it's now safe to
2659     // re-initialize our data structures.
2660   }
2661 
2662   clear_region_fields();
2663   flush_mark_stats_cache();
2664 
2665   if (!is_serial) {
2666     // If we're executing the concurrent phase of marking, reset the marking
2667     // state; otherwise the marking state is reset after reference processing,
2668     // during the remark pause.
2669     // If we reset here as a result of an overflow during the remark we will
2670     // see assertion failures from any subsequent set_concurrency_and_phase()
2671     // calls.
2672     if (_cm->concurrent() && _worker_id == 0) {
2673       // Worker 0 is responsible for clearing the global data structures because
2674       // of an overflow. During STW we should not clear the overflow flag (in
2675       // G1ConcurrentMark::reset_marking_state()) since we rely on it being true when we exit
2676       // method to abort the pause and restart concurrent marking.
2677       _cm->reset_marking_for_restart();
2678 
2679       log_info(gc, marking)("Concurrent Mark reset for overflow");
2680     }
2681 
2682     // ...and enter the second barrier.
2683     _cm->enter_second_sync_barrier(_worker_id);
2684   }
2685 }
2686 
2687 /*****************************************************************************
2688 
2689     The do_marking_step(time_target_ms, ...) method is the building
2690     block of the parallel marking framework. It can be called in parallel
2691     with other invocations of do_marking_step() on different tasks
2692     (but only one per task, obviously) and concurrently with the
2693     mutator threads, or during remark, hence it eliminates the need
2694     for two versions of the code. When called during remark, it will
2695     pick up from where the task left off during the concurrent marking
2696     phase. Interestingly, tasks are also claimable during evacuation
2697     pauses too, since do_marking_step() ensures that it aborts before
2698     it needs to yield.
2699 
2700     The data structures that it uses to do marking work are the
2701     following:
2702 
2703       (1) Marking Bitmap. If there are grey objects that appear only
2704       on the bitmap (this happens either when dealing with an overflow
2705       or when the concurrent start pause has simply marked the roots
2706       and didn't push them on the stack), then tasks claim heap
2707       regions whose bitmap they then scan to find grey objects. A
2708       global finger indicates where the end of the last claimed region
2709       is. A local finger indicates how far into the region a task has
2710       scanned. The two fingers are used to determine how to grey an
2711       object (i.e. whether simply marking it is OK, as it will be
2712       visited by a task in the future, or whether it needs to be also
2713       pushed on a stack).
2714 
2715       (2) Local Queue. The local queue of the task which is accessed
2716       reasonably efficiently by the task. Other tasks can steal from
2717       it when they run out of work. Throughout the marking phase, a
2718       task attempts to keep its local queue short but not totally
2719       empty, so that entries are available for stealing by other
2720       tasks. Only when there is no more work, a task will totally
2721       drain its local queue.
2722 
2723       (3) Global Mark Stack. This handles local queue overflow. During
2724       marking only sets of entries are moved between it and the local
2725       queues, as access to it requires a mutex and more fine-grain
2726       interaction with it which might cause contention. If it
2727       overflows, then the marking phase should restart and iterate
2728       over the bitmap to identify grey objects. Throughout the marking
2729       phase, tasks attempt to keep the global mark stack at a small
2730       length but not totally empty, so that entries are available for
2731       popping by other tasks. Only when there is no more work, tasks
2732       will totally drain the global mark stack.
2733 
2734       (4) SATB Buffer Queue. This is where completed SATB buffers are
2735       made available. Buffers are regularly removed from this queue
2736       and scanned for roots, so that the queue doesn't get too
2737       long. During remark, all completed buffers are processed, as
2738       well as the filled in parts of any uncompleted buffers.
2739 
2740     The do_marking_step() method tries to abort when the time target
2741     has been reached. There are a few other cases when the
2742     do_marking_step() method also aborts:
2743 
2744       (1) When the marking phase has been aborted (after a Full GC).
2745 
2746       (2) When a global overflow (on the global stack) has been
2747       triggered. Before the task aborts, it will actually sync up with
2748       the other tasks to ensure that all the marking data structures
2749       (local queues, stacks, fingers etc.)  are re-initialized so that
2750       when do_marking_step() completes, the marking phase can
2751       immediately restart.
2752 
2753       (3) When enough completed SATB buffers are available. The
2754       do_marking_step() method only tries to drain SATB buffers right
2755       at the beginning. So, if enough buffers are available, the
2756       marking step aborts and the SATB buffers are processed at
2757       the beginning of the next invocation.
2758 
2759       (4) To yield. when we have to yield then we abort and yield
2760       right at the end of do_marking_step(). This saves us from a lot
2761       of hassle as, by yielding we might allow a Full GC. If this
2762       happens then objects will be compacted underneath our feet, the
2763       heap might shrink, etc. We save checking for this by just
2764       aborting and doing the yield right at the end.
2765 
2766     From the above it follows that the do_marking_step() method should
2767     be called in a loop (or, otherwise, regularly) until it completes.
2768 
2769     If a marking step completes without its has_aborted() flag being
2770     true, it means it has completed the current marking phase (and
2771     also all other marking tasks have done so and have all synced up).
2772 
2773     A method called regular_clock_call() is invoked "regularly" (in
2774     sub ms intervals) throughout marking. It is this clock method that
2775     checks all the abort conditions which were mentioned above and
2776     decides when the task should abort. A work-based scheme is used to
2777     trigger this clock method: when the number of object words the
2778     marking phase has scanned or the number of references the marking
2779     phase has visited reach a given limit. Additional invocations to
2780     the method clock have been planted in a few other strategic places
2781     too. The initial reason for the clock method was to avoid calling
2782     cpu time gathering too regularly, as it is quite expensive. So,
2783     once it was in place, it was natural to piggy-back all the other
2784     conditions on it too and not constantly check them throughout the code.
2785 
2786     If do_termination is true then do_marking_step will enter its
2787     termination protocol.
2788 
2789     The value of is_serial must be true when do_marking_step is being
2790     called serially (i.e. by the VMThread) and do_marking_step should
2791     skip any synchronization in the termination and overflow code.
2792     Examples include the serial remark code and the serial reference
2793     processing closures.
2794 
2795     The value of is_serial must be false when do_marking_step is
2796     being called by any of the worker threads.
2797     Examples include the concurrent marking code (CMMarkingTask),
2798     the MT remark code, and the MT reference processing closures.
2799 
2800  *****************************************************************************/
2801 
2802 void G1CMTask::do_marking_step(double time_target_ms,
2803                                bool do_termination,
2804                                bool is_serial) {
2805   assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
2806 
2807   _start_cpu_time_ns = os::current_thread_cpu_time();
2808 
2809   // If do_stealing is true then do_marking_step will attempt to
2810   // steal work from the other G1CMTasks. It only makes sense to
2811   // enable stealing when the termination protocol is enabled
2812   // and do_marking_step() is not being called serially.
2813   bool do_stealing = do_termination && !is_serial;
2814 
2815   G1Predictions const& predictor = _g1h->policy()->predictor();
2816   double diff_prediction_ms = predictor.predict_zero_bounded(&_marking_step_diff_ms);
2817   _time_target_ms = time_target_ms - diff_prediction_ms;
2818 
2819   // set up the variables that are used in the work-based scheme to
2820   // call the regular clock method
2821   _words_scanned = 0;
2822   _refs_reached  = 0;
2823   recalculate_limits();
2824 
2825   // clear all flags
2826   clear_has_aborted();
2827   _has_timed_out = false;
2828   _draining_satb_buffers = false;
2829 
2830   ++_calls;
2831 
2832   // Set up the bitmap and oop closures. Anything that uses them is
2833   // eventually called from this method, so it is OK to allocate these
2834   // statically.
2835   G1CMBitMapClosure bitmap_closure(this, _cm);
2836   G1CMOopClosure cm_oop_closure(_g1h, this);
2837   set_cm_oop_closure(&cm_oop_closure);
2838 
2839   if (_cm->has_overflown()) {
2840     // This can happen if the mark stack overflows during a GC pause
2841     // and this task, after a yield point, restarts. We have to abort
2842     // as we need to get into the overflow protocol which happens
2843     // right at the end of this task.
2844     set_has_aborted();
2845   }
2846 
2847   // First drain any available SATB buffers. After this, we will not
2848   // look at SATB buffers before the next invocation of this method.
2849   // If enough completed SATB buffers are queued up, the regular clock
2850   // will abort this task so that it restarts.
2851   drain_satb_buffers();
2852   // ...then partially drain the local queue and the global stack
2853   drain_local_queue(true);
2854   drain_global_stack(true);
2855 
2856   do {
2857     process_current_region(bitmap_closure);
2858     // At this point we have either completed iterating over the
2859     // region we were holding on to, or we have aborted.
2860 
2861     // We then partially drain the local queue and the global stack.
2862     drain_local_queue(true);
2863     drain_global_stack(true);
2864 
2865     claim_new_region();
2866 
2867     assert(has_aborted() || _curr_region != nullptr || _cm->out_of_regions(),
2868            "at this point we should be out of regions");
2869   } while ( _curr_region != nullptr && !has_aborted());
2870 
2871   // We cannot check whether the global stack is empty, since other
2872   // tasks might be pushing objects to it concurrently.
2873   assert(has_aborted() || _cm->out_of_regions(),
2874          "at this point we should be out of regions");
2875   // Try to reduce the number of available SATB buffers so that
2876   // remark has less work to do.
2877   drain_satb_buffers();
2878 
2879   // Since we've done everything else, we can now totally drain the
2880   // local queue and global stack.
2881   drain_local_queue(false);
2882   drain_global_stack(false);
2883 
2884   // Attempt at work stealing from other task's queues.
2885   if (do_stealing && !has_aborted()) {
2886     // We have not aborted. This means that we have finished all that
2887     // we could. Let's try to do some stealing...
2888     attempt_stealing();
2889   }
2890 
2891   // We still haven't aborted. Now, let's try to get into the
2892   // termination protocol.
2893   if (do_termination && !has_aborted()) {
2894     attempt_termination(is_serial);
2895   }
2896 
2897   // Mainly for debugging purposes to make sure that a pointer to the
2898   // closure which was statically allocated in this frame doesn't
2899   // escape it by accident.
2900   set_cm_oop_closure(nullptr);
2901   jlong end_cpu_time_ns = os::current_thread_cpu_time();
2902   double elapsed_time_ms = (double)(end_cpu_time_ns - _start_cpu_time_ns) / NANOSECS_PER_MILLISEC;
2903   // Update the step history.
2904   _step_times_ms.add(elapsed_time_ms);
2905 
2906   if (has_aborted()) {
2907     // The task was aborted for some reason.
2908     handle_abort(is_serial, elapsed_time_ms);
2909   }
2910 }
2911 
2912 G1CMTask::G1CMTask(uint worker_id,
2913                    G1ConcurrentMark* cm,
2914                    G1CMTaskQueue* task_queue,
2915                    G1RegionMarkStats* mark_stats) :
2916   _worker_id(worker_id),
2917   _g1h(G1CollectedHeap::heap()),
2918   _cm(cm),
2919   _mark_bitmap(nullptr),
2920   _task_queue(task_queue),
2921   _partial_array_splitter(_cm->partial_array_state_manager(), _cm->max_num_tasks()),
2922   _mark_stats_cache(mark_stats, G1RegionMarkStatsCache::RegionMarkStatsCacheSize),
2923   _calls(0),
2924   _time_target_ms(0.0),
2925   _start_cpu_time_ns(0),
2926   _cm_oop_closure(nullptr),
2927   _curr_region(nullptr),
2928   _finger(nullptr),
2929   _region_limit(nullptr),
2930   _words_scanned(0),
2931   _words_scanned_limit(0),
2932   _real_words_scanned_limit(0),
2933   _refs_reached(0),
2934   _refs_reached_limit(0),
2935   _real_refs_reached_limit(0),
2936   _has_aborted(false),
2937   _has_timed_out(false),
2938   _draining_satb_buffers(false),
2939   _step_times_ms(),
2940   _elapsed_time_ms(0.0),
2941   _termination_time_ms(0.0),
2942   _marking_step_diff_ms()
2943 {
2944   guarantee(task_queue != nullptr, "invariant");
2945 
2946   _marking_step_diff_ms.add(0.5);
2947 }
2948 
2949 // These are formatting macros that are used below to ensure
2950 // consistent formatting. The *_H_* versions are used to format the
2951 // header for a particular value and they should be kept consistent
2952 // with the corresponding macro. Also note that most of the macros add
2953 // the necessary white space (as a prefix) which makes them a bit
2954 // easier to compose.
2955 
2956 // All the output lines are prefixed with this string to be able to
2957 // identify them easily in a large log file.
2958 #define G1PPRL_LINE_PREFIX            "###"
2959 
2960 #define G1PPRL_ADDR_BASE_FORMAT    " " PTR_FORMAT "-" PTR_FORMAT
2961 #ifdef _LP64
2962 #define G1PPRL_ADDR_BASE_H_FORMAT  " %37s"
2963 #else // _LP64
2964 #define G1PPRL_ADDR_BASE_H_FORMAT  " %21s"
2965 #endif // _LP64
2966 
2967 // For per-region info
2968 #define G1PPRL_TYPE_FORMAT            "   %-4s"
2969 #define G1PPRL_TYPE_H_FORMAT          "   %4s"
2970 #define G1PPRL_STATE_FORMAT           "   %-5s"
2971 #define G1PPRL_STATE_H_FORMAT         "   %5s"
2972 #define G1PPRL_BYTE_FORMAT            "  %9zu"
2973 #define G1PPRL_BYTE_H_FORMAT          "  %9s"
2974 #define G1PPRL_DOUBLE_FORMAT          "%14.1f"
2975 #define G1PPRL_GCEFF_H_FORMAT         "  %14s"
2976 #define G1PPRL_GID_H_FORMAT           "  %9s"
2977 #define G1PPRL_GID_FORMAT             "  " UINT32_FORMAT_W(9)
2978 #define G1PPRL_LEN_FORMAT             "  " UINT32_FORMAT_W(14)
2979 #define G1PPRL_LEN_H_FORMAT           "  %14s"
2980 #define G1PPRL_GID_GCEFF_FORMAT       "  %14.1f"
2981 #define G1PPRL_GID_LIVENESS_FORMAT    "  %9.2f"
2982 
2983 // For summary info
2984 #define G1PPRL_SUM_ADDR_FORMAT(tag)    "  " tag ":" G1PPRL_ADDR_BASE_FORMAT
2985 #define G1PPRL_SUM_BYTE_FORMAT(tag)    "  " tag ": %zu"
2986 #define G1PPRL_SUM_MB_FORMAT(tag)      "  " tag ": %1.2f MB"
2987 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag) " / %1.2f %%"
2988 
2989 G1PrintRegionLivenessInfoClosure::G1PrintRegionLivenessInfoClosure(const char* phase_name) :
2990   _total_used_bytes(0),
2991   _total_capacity_bytes(0),
2992   _total_live_bytes(0),
2993   _total_remset_bytes(0),
2994   _total_code_roots_bytes(0)
2995 {
2996   if (!log_is_enabled(Trace, gc, liveness)) {
2997     return;
2998   }
2999 
3000   G1CollectedHeap* g1h = G1CollectedHeap::heap();
3001   MemRegion reserved = g1h->reserved();
3002   double now = os::elapsedTime();
3003 
3004   // Print the header of the output.
3005   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now);
3006   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" HEAP"
3007                           G1PPRL_SUM_ADDR_FORMAT("reserved")
3008                           G1PPRL_SUM_BYTE_FORMAT("region-size"),
3009                           p2i(reserved.start()), p2i(reserved.end()),
3010                           G1HeapRegion::GrainBytes);
3011   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX);
3012   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3013                           G1PPRL_TYPE_H_FORMAT
3014                           G1PPRL_ADDR_BASE_H_FORMAT
3015                           G1PPRL_BYTE_H_FORMAT
3016                           G1PPRL_BYTE_H_FORMAT
3017                           G1PPRL_STATE_H_FORMAT
3018                           G1PPRL_BYTE_H_FORMAT
3019                           G1PPRL_GID_H_FORMAT,
3020                           "type", "address-range",
3021                           "used", "live",
3022                           "state", "code-roots",
3023                           "group-id");
3024   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3025                           G1PPRL_TYPE_H_FORMAT
3026                           G1PPRL_ADDR_BASE_H_FORMAT
3027                           G1PPRL_BYTE_H_FORMAT
3028                           G1PPRL_BYTE_H_FORMAT
3029                           G1PPRL_STATE_H_FORMAT
3030                           G1PPRL_BYTE_H_FORMAT
3031                           G1PPRL_GID_H_FORMAT,
3032                           "", "",
3033                           "(bytes)", "(bytes)",
3034                           "", "(bytes)", "");
3035 }
3036 
3037 bool G1PrintRegionLivenessInfoClosure::do_heap_region(G1HeapRegion* r) {
3038   if (!log_is_enabled(Trace, gc, liveness)) {
3039     return false;
3040   }
3041 
3042   const char* type       = r->get_type_str();
3043   HeapWord* bottom       = r->bottom();
3044   HeapWord* end          = r->end();
3045   size_t capacity_bytes  = r->capacity();
3046   size_t used_bytes      = r->used();
3047   size_t live_bytes      = r->live_bytes();
3048   size_t remset_bytes    = r->rem_set()->mem_size();
3049   size_t code_roots_bytes = r->rem_set()->code_roots_mem_size();
3050   const char* remset_type = r->rem_set()->get_short_state_str();
3051   uint cset_group_id     = r->rem_set()->has_cset_group()
3052                          ? r->rem_set()->cset_group_id()
3053                          : G1CSetCandidateGroup::NoRemSetId;
3054 
3055   _total_used_bytes      += used_bytes;
3056   _total_capacity_bytes  += capacity_bytes;
3057   _total_live_bytes      += live_bytes;
3058   _total_remset_bytes    += remset_bytes;
3059   _total_code_roots_bytes += code_roots_bytes;
3060 
3061   // Print a line for this particular region.
3062   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3063                         G1PPRL_TYPE_FORMAT
3064                         G1PPRL_ADDR_BASE_FORMAT
3065                         G1PPRL_BYTE_FORMAT
3066                         G1PPRL_BYTE_FORMAT
3067                         G1PPRL_STATE_FORMAT
3068                         G1PPRL_BYTE_FORMAT
3069                         G1PPRL_GID_FORMAT,
3070                         type, p2i(bottom), p2i(end),
3071                         used_bytes, live_bytes,
3072                         remset_type, code_roots_bytes,
3073                         cset_group_id);
3074 
3075   return false;
3076 }
3077 
3078 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() {
3079   if (!log_is_enabled(Trace, gc, liveness)) {
3080     return;
3081   }
3082 
3083   G1CollectedHeap* g1h = G1CollectedHeap::heap();
3084   _total_remset_bytes += g1h->card_set_freelist_pool()->mem_size();
3085   // add static memory usages to remembered set sizes
3086   _total_remset_bytes += G1HeapRegionRemSet::static_mem_size();
3087 
3088   log_cset_candidate_groups();
3089 
3090   // Print the footer of the output.
3091   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX);
3092   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3093                          " SUMMARY"
3094                          G1PPRL_SUM_MB_FORMAT("capacity")
3095                          G1PPRL_SUM_MB_PERC_FORMAT("used")
3096                          G1PPRL_SUM_MB_PERC_FORMAT("live")
3097                          G1PPRL_SUM_MB_FORMAT("remset")
3098                          G1PPRL_SUM_MB_FORMAT("code-roots"),
3099                          bytes_to_mb(_total_capacity_bytes),
3100                          bytes_to_mb(_total_used_bytes),
3101                          percent_of(_total_used_bytes, _total_capacity_bytes),
3102                          bytes_to_mb(_total_live_bytes),
3103                          percent_of(_total_live_bytes, _total_capacity_bytes),
3104                          bytes_to_mb(_total_remset_bytes),
3105                          bytes_to_mb(_total_code_roots_bytes));
3106 }
3107 
3108 void G1PrintRegionLivenessInfoClosure::log_cset_candidate_group_add_total(G1CSetCandidateGroup* group, const char* type) {
3109   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3110                           G1PPRL_GID_FORMAT
3111                           G1PPRL_LEN_FORMAT
3112                           G1PPRL_GID_GCEFF_FORMAT
3113                           G1PPRL_GID_LIVENESS_FORMAT
3114                           G1PPRL_BYTE_FORMAT
3115                           G1PPRL_TYPE_H_FORMAT,
3116                           group->group_id(),
3117                           group->length(),
3118                           group->length() > 0 ? group->gc_efficiency() : 0.0,
3119                           group->length() > 0 ? group->liveness_percent() : 0.0,
3120                           group->card_set()->mem_size(),
3121                           type);
3122   _total_remset_bytes += group->card_set()->mem_size();
3123 }
3124 
3125 void G1PrintRegionLivenessInfoClosure::log_cset_candidate_grouplist(G1CSetCandidateGroupList& gl, const char* type) {
3126   for (G1CSetCandidateGroup* group : gl) {
3127     log_cset_candidate_group_add_total(group, type);
3128   }
3129 }
3130 
3131 void G1PrintRegionLivenessInfoClosure::log_cset_candidate_groups() {
3132   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX);
3133   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" Collection Set Candidate Groups");
3134   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX " Types: Y=Young, M=From Marking Regions, R=Retained Regions");
3135   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3136                           G1PPRL_GID_H_FORMAT
3137                           G1PPRL_LEN_H_FORMAT
3138                           G1PPRL_GCEFF_H_FORMAT
3139                           G1PPRL_BYTE_H_FORMAT
3140                           G1PPRL_BYTE_H_FORMAT
3141                           G1PPRL_TYPE_H_FORMAT,
3142                           "groud-id", "num-regions",
3143                           "gc-eff", "liveness",
3144                           "remset", "type");
3145 
3146   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3147                           G1PPRL_GID_H_FORMAT
3148                           G1PPRL_LEN_H_FORMAT
3149                           G1PPRL_GCEFF_H_FORMAT
3150                           G1PPRL_BYTE_H_FORMAT
3151                           G1PPRL_BYTE_H_FORMAT
3152                           G1PPRL_TYPE_H_FORMAT,
3153                           "", "",
3154                           "(bytes/ms)", "%",
3155                           "(bytes)", "");
3156 
3157   G1CollectedHeap* g1h = G1CollectedHeap::heap();
3158 
3159   log_cset_candidate_group_add_total(g1h->young_regions_cset_group(), "Y");
3160 
3161   G1CollectionSetCandidates* candidates = g1h->policy()->candidates();
3162   log_cset_candidate_grouplist(candidates->from_marking_groups(), "M");
3163   log_cset_candidate_grouplist(candidates->retained_groups(), "R");
3164 }