New src/hotspot/share/gc/shenandoah/shenandoahGenerationalHeap.cpp

   1 /*
   2  * Copyright Amazon.com Inc. or its affiliates. All Rights Reserved.
   3  * Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved.
   4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   5  *
   6  * This code is free software; you can redistribute it and/or modify it
   7  * under the terms of the GNU General Public License version 2 only, as
   8  * published by the Free Software Foundation.
   9  *
  10  * This code is distributed in the hope that it will be useful, but WITHOUT
  11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  13  * version 2 for more details (a copy is included in the LICENSE file that
  14  * accompanied this code).
  15  *
  16  * You should have received a copy of the GNU General Public License version
  17  * 2 along with this work; if not, write to the Free Software Foundation,
  18  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  19  *
  20  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  21  * or visit www.oracle.com if you need additional information or have any
  22  * questions.
  23  *
  24  */
  25 
  26 #include "gc/shenandoah/shenandoahAgeCensus.hpp"
  27 #include "gc/shenandoah/shenandoahClosures.inline.hpp"
  28 #include "gc/shenandoah/shenandoahCollectorPolicy.hpp"
  29 #include "gc/shenandoah/shenandoahFreeSet.hpp"
  30 #include "gc/shenandoah/shenandoahGeneration.hpp"
  31 #include "gc/shenandoah/shenandoahGenerationalControlThread.hpp"
  32 #include "gc/shenandoah/shenandoahGenerationalEvacuationTask.hpp"
  33 #include "gc/shenandoah/shenandoahGenerationalHeap.hpp"
  34 #include "gc/shenandoah/shenandoahHeap.inline.hpp"
  35 #include "gc/shenandoah/shenandoahHeapRegion.hpp"
  36 #include "gc/shenandoah/shenandoahHeapRegionClosures.hpp"
  37 #include "gc/shenandoah/shenandoahInitLogger.hpp"
  38 #include "gc/shenandoah/shenandoahMemoryPool.hpp"
  39 #include "gc/shenandoah/shenandoahMonitoringSupport.hpp"
  40 #include "gc/shenandoah/shenandoahOldGeneration.hpp"
  41 #include "gc/shenandoah/shenandoahPhaseTimings.hpp"
  42 #include "gc/shenandoah/shenandoahRegulatorThread.hpp"
  43 #include "gc/shenandoah/shenandoahScanRemembered.inline.hpp"
  44 #include "gc/shenandoah/shenandoahUtils.hpp"
  45 #include "gc/shenandoah/shenandoahWorkerPolicy.hpp"
  46 #include "gc/shenandoah/shenandoahYoungGeneration.hpp"
  47 #include "logging/log.hpp"
  48 #include "utilities/events.hpp"
  49 
  50 
  51 class ShenandoahGenerationalInitLogger : public ShenandoahInitLogger {
  52 public:
  53   static void print() {
  54     ShenandoahGenerationalInitLogger logger;
  55     logger.print_all();
  56   }
  57 protected:
  58   void print_gc_specific() override {
  59     ShenandoahInitLogger::print_gc_specific();
  60 
  61     ShenandoahGenerationalHeap* heap = ShenandoahGenerationalHeap::heap();
  62     log_info(gc, init)("Young Heuristics: %s", heap->young_generation()->heuristics()->name());
  63     log_info(gc, init)("Old Heuristics: %s", heap->old_generation()->heuristics()->name());
  64   }
  65 };
  66 
  67 size_t ShenandoahGenerationalHeap::calculate_min_plab() {
  68   return align_up(PLAB::min_size(), CardTable::card_size_in_words());
  69 }
  70 
  71 size_t ShenandoahGenerationalHeap::calculate_max_plab() {
  72   size_t MaxTLABSizeWords = ShenandoahHeapRegion::max_tlab_size_words();
  73   return align_down(MaxTLABSizeWords, CardTable::card_size_in_words());
  74 }
  75 
  76 // Returns size in bytes
  77 size_t ShenandoahGenerationalHeap::unsafe_max_tlab_alloc() const {
  78   return MIN2(ShenandoahHeapRegion::max_tlab_size_bytes(), young_generation()->available());
  79 }
  80 
  81 ShenandoahGenerationalHeap::ShenandoahGenerationalHeap(ShenandoahCollectorPolicy* policy) :
  82   ShenandoahHeap(policy),
  83   _age_census(nullptr),
  84   _min_plab_size(calculate_min_plab()),
  85   _max_plab_size(calculate_max_plab()),
  86   _regulator_thread(nullptr),
  87   _young_gen_memory_pool(nullptr),
  88   _old_gen_memory_pool(nullptr) {
  89   assert(is_aligned(_min_plab_size, CardTable::card_size_in_words()), "min_plab_size must be aligned");
  90   assert(is_aligned(_max_plab_size, CardTable::card_size_in_words()), "max_plab_size must be aligned");
  91 }
  92 
  93 void ShenandoahGenerationalHeap::initialize_generations() {
  94   ShenandoahHeap::initialize_generations();
  95   _young_generation->post_initialize(this);
  96   _old_generation->post_initialize(this);
  97 }
  98 
  99 void ShenandoahGenerationalHeap::post_initialize() {
 100   ShenandoahHeap::post_initialize();
 101   _age_census = new ShenandoahAgeCensus();
 102 }
 103 
 104 void ShenandoahGenerationalHeap::post_initialize_heuristics() {
 105   ShenandoahHeap::post_initialize_heuristics();
 106   _young_generation->post_initialize_heuristics();
 107   _old_generation->post_initialize_heuristics();
 108 }
 109 
 110 void ShenandoahGenerationalHeap::print_init_logger() const {
 111   ShenandoahGenerationalInitLogger logger;
 112   logger.print_all();
 113 }
 114 
 115 void ShenandoahGenerationalHeap::initialize_heuristics() {
 116   // Initialize global generation and heuristics even in generational mode.
 117   ShenandoahHeap::initialize_heuristics();
 118 
 119   _young_generation = new ShenandoahYoungGeneration(max_workers());
 120   _old_generation = new ShenandoahOldGeneration(max_workers());
 121   _young_generation->initialize_heuristics(mode());
 122   _old_generation->initialize_heuristics(mode());
 123 }
 124 
 125 void ShenandoahGenerationalHeap::initialize_serviceability() {
 126   assert(mode()->is_generational(), "Only for the generational mode");
 127   _young_gen_memory_pool = new ShenandoahYoungGenMemoryPool(this);
 128   _old_gen_memory_pool = new ShenandoahOldGenMemoryPool(this);
 129   cycle_memory_manager()->add_pool(_young_gen_memory_pool);
 130   cycle_memory_manager()->add_pool(_old_gen_memory_pool);
 131   stw_memory_manager()->add_pool(_young_gen_memory_pool);
 132   stw_memory_manager()->add_pool(_old_gen_memory_pool);
 133 }
 134 
 135 GrowableArray<MemoryPool*> ShenandoahGenerationalHeap::memory_pools() {
 136   assert(mode()->is_generational(), "Only for the generational mode");
 137   GrowableArray<MemoryPool*> memory_pools(2);
 138   memory_pools.append(_young_gen_memory_pool);
 139   memory_pools.append(_old_gen_memory_pool);
 140   return memory_pools;
 141 }
 142 
 143 void ShenandoahGenerationalHeap::initialize_controller() {
 144   auto control_thread = new ShenandoahGenerationalControlThread();
 145   _control_thread = control_thread;
 146   _regulator_thread = new ShenandoahRegulatorThread(control_thread);
 147 }
 148 
 149 void ShenandoahGenerationalHeap::gc_threads_do(ThreadClosure* tcl) const {
 150   if (!shenandoah_policy()->is_at_shutdown()) {
 151     ShenandoahHeap::gc_threads_do(tcl);
 152     tcl->do_thread(regulator_thread());
 153   }
 154 }
 155 
 156 void ShenandoahGenerationalHeap::stop() {
 157   ShenandoahHeap::stop();
 158   regulator_thread()->stop();
 159 }
 160 
 161 void ShenandoahGenerationalHeap::start_idle_span() {
 162   young_generation()->heuristics()->start_idle_span();
 163 }
 164 
 165 bool ShenandoahGenerationalHeap::requires_barriers(stackChunkOop obj) const {
 166   if (is_idle()) {
 167     return false;
 168   }
 169 
 170   if (is_concurrent_young_mark_in_progress() && is_in_young(obj) && !marking_context()->allocated_after_mark_start(obj)) {
 171     // We are marking young, this object is in young, and it is below the TAMS
 172     return true;
 173   }
 174 
 175   if (is_in_old(obj)) {
 176     // Card marking barriers are required for objects in the old generation
 177     return true;
 178   }
 179 
 180   if (has_forwarded_objects()) {
 181     // Object may have pointers that need to be updated
 182     return true;
 183   }
 184 
 185   return false;
 186 }
 187 
 188 void ShenandoahGenerationalHeap::evacuate_collection_set(ShenandoahGeneration* generation, bool concurrent) {
 189   ShenandoahRegionIterator regions;
 190   ShenandoahGenerationalEvacuationTask task(this, generation, &regions, concurrent,
 191                                            (ShenandoahGCStateCheckHotpatch ? collection_set()->is_empty() : false) /* only promote regions */);
 192   workers()->run_task(&task);
 193 }
 194 
 195 void ShenandoahGenerationalHeap::promote_regions_in_place(ShenandoahGeneration* generation, bool concurrent) {
 196   ShenandoahRegionIterator regions;
 197   ShenandoahGenerationalEvacuationTask task(this, generation, &regions, concurrent, true /* only promote regions */);
 198   workers()->run_task(&task);
 199 }
 200 
 201 oop ShenandoahGenerationalHeap::evacuate_object(oop p, Thread* thread) {
 202   assert(thread == Thread::current(), "Expected thread parameter to be current thread.");
 203   if (ShenandoahThreadLocalData::is_oom_during_evac(thread)) {
 204     // This thread went through the OOM during evac protocol and it is safe to return
 205     // the forward pointer. It must not attempt to evacuate anymore.
 206     return ShenandoahBarrierSet::resolve_forwarded(p);
 207   }
 208 
 209   assert(ShenandoahThreadLocalData::is_evac_allowed(thread), "must be enclosed in oom-evac scope");
 210 
 211   ShenandoahHeapRegion* from_region = heap_region_containing(p);
 212   assert(!from_region->is_humongous(), "never evacuate humongous objects");
 213 
 214   // Try to keep the object in the same generation
 215   const ShenandoahAffiliation target_gen = from_region->affiliation();
 216 
 217   if (target_gen == YOUNG_GENERATION) {
 218     markWord mark = p->mark();
 219     if (mark.is_marked()) {
 220       // Already forwarded.
 221       return ShenandoahBarrierSet::resolve_forwarded(p);
 222     }
 223 
 224     if (mark.has_displaced_mark_helper()) {
 225       // We don't want to deal with MT here just to ensure we read the right mark word.
 226       // Skip the potential promotion attempt for this one.
 227     } else if (age_census()->is_tenurable(from_region->age() + mark.age())) {
 228       // If the object is tenurable, try to promote it
 229       oop result = try_evacuate_object<YOUNG_GENERATION, OLD_GENERATION>(p, thread, from_region->age());
 230 
 231       // If we failed to promote this aged object, we'll fall through to code below and evacuate to young-gen.
 232       if (result != nullptr) {
 233         return result;
 234       }
 235     }
 236     return try_evacuate_object<YOUNG_GENERATION, YOUNG_GENERATION>(p, thread, from_region->age());
 237   }
 238 
 239   assert(target_gen == OLD_GENERATION, "Expected evacuation to old");
 240   return try_evacuate_object<OLD_GENERATION, OLD_GENERATION>(p, thread, from_region->age());
 241 }
 242 
 243 // try_evacuate_object registers the object and dirties the associated remembered set information when evacuating
 244 // to OLD_GENERATION.
 245 template<ShenandoahAffiliation FROM_GENERATION, ShenandoahAffiliation TO_GENERATION>
 246 oop ShenandoahGenerationalHeap::try_evacuate_object(oop p, Thread* thread, uint from_region_age) {
 247   bool alloc_from_lab = true;
 248   bool has_plab = false;
 249   HeapWord* copy = nullptr;
 250   size_t size = ShenandoahForwarding::size(p);
 251   constexpr bool is_promotion = (TO_GENERATION == OLD_GENERATION) && (FROM_GENERATION == YOUNG_GENERATION);
 252 
 253 #ifdef ASSERT
 254   if (ShenandoahOOMDuringEvacALot &&
 255       (os::random() & 1) == 0) { // Simulate OOM every ~2nd slow-path call
 256     copy = nullptr;
 257   } else {
 258 #endif
 259     if (UseTLAB) {
 260       switch (TO_GENERATION) {
 261         case YOUNG_GENERATION: {
 262           copy = allocate_from_gclab(thread, size);
 263           if ((copy == nullptr) && (size < ShenandoahThreadLocalData::gclab_size(thread))) {
 264             // GCLAB allocation failed because we are bumping up against the limit on young evacuation reserve.  Try resetting
 265             // the desired GCLAB size and retry GCLAB allocation to avoid cascading of shared memory allocations.
 266             ShenandoahThreadLocalData::set_gclab_size(thread, PLAB::min_size());
 267             copy = allocate_from_gclab(thread, size);
 268             // If we still get nullptr, we'll try a shared allocation below.
 269           }
 270           break;
 271         }
 272         case OLD_GENERATION: {
 273           PLAB* plab = ShenandoahThreadLocalData::plab(thread);
 274           if (plab != nullptr) {
 275             has_plab = true;
 276             copy = allocate_from_plab(thread, size, is_promotion);
 277             if ((copy == nullptr) && (size < ShenandoahThreadLocalData::plab_size(thread)) &&
 278                 ShenandoahThreadLocalData::plab_retries_enabled(thread)) {
 279               // PLAB allocation failed because we are bumping up against the limit on old evacuation reserve or because
 280               // the requested object does not fit within the current plab but the plab still has an "abundance" of memory,
 281               // where abundance is defined as >= ShenGenHeap::plab_min_size().  In the former case, we try shrinking the
 282               // desired PLAB size to the minimum and retry PLAB allocation to avoid cascading of shared memory allocations.
 283               // Shrinking the desired PLAB size may allow us to eke out a small PLAB while staying beneath evacuation reserve.
 284               if (plab->words_remaining() < plab_min_size()) {
 285                 ShenandoahThreadLocalData::set_plab_size(thread, plab_min_size());
 286                 copy = allocate_from_plab(thread, size, is_promotion);
 287                 // If we still get nullptr, we'll try a shared allocation below.
 288                 if (copy == nullptr) {
 289                   // If retry fails, don't continue to retry until we have success (probably in next GC pass)
 290                   ShenandoahThreadLocalData::disable_plab_retries(thread);
 291                 }
 292               }
 293               // else, copy still equals nullptr.  this causes shared allocation below, preserving this plab for future needs.
 294             }
 295           }
 296           break;
 297         }
 298         default: {
 299           ShouldNotReachHere();
 300           break;
 301         }
 302       }
 303     }
 304 
 305     if (copy == nullptr) {
 306       // If we failed to allocate in LAB, we'll try a shared allocation.
 307       if (!is_promotion || !has_plab || (size > PLAB::min_size())) {
 308         ShenandoahAllocRequest req = ShenandoahAllocRequest::for_shared_gc(size, TO_GENERATION, is_promotion);
 309         copy = allocate_memory(req);
 310         alloc_from_lab = false;
 311       }
 312       // else, we leave copy equal to nullptr, signaling a promotion failure below if appropriate.
 313       // We choose not to promote objects smaller than size_threshold by way of shared allocations as this is too
 314       // costly.  Instead, we'll simply "evacuate" to young-gen memory (using a GCLAB) and will promote in a future
 315       // evacuation pass.  This condition is denoted by: is_promotion && has_plab && (size <= size_threshhold).
 316     }
 317 #ifdef ASSERT
 318   }
 319 #endif
 320 
 321   if (copy == nullptr) {
 322     if (TO_GENERATION == OLD_GENERATION) {
 323       if (FROM_GENERATION == YOUNG_GENERATION) {
 324         // Signal that promotion failed. Will evacuate this old object somewhere in young gen.
 325         old_generation()->handle_failed_promotion(thread, size);
 326         return nullptr;
 327       } else {
 328         // Remember that evacuation to old gen failed. We'll want to trigger a full gc to recover from this
 329         // after the evacuation threads have finished.
 330         old_generation()->handle_failed_evacuation();
 331       }
 332     }
 333 
 334     control_thread()->handle_alloc_failure_evac(size);
 335     oom_evac_handler()->handle_out_of_memory_during_evacuation();
 336     return ShenandoahBarrierSet::resolve_forwarded(p);
 337   }
 338 
 339   if (ShenandoahEvacTracking) {
 340     evac_tracker()->begin_evacuation(thread, size * HeapWordSize, FROM_GENERATION, TO_GENERATION);
 341   }
 342 
 343   // Copy the object:
 344   Copy::aligned_disjoint_words(cast_from_oop<HeapWord*>(p), copy, size);
 345   oop copy_val = cast_to_oop(copy);
 346 
 347   // Update the age of the evacuated object
 348   if (TO_GENERATION == YOUNG_GENERATION && is_aging_cycle()) {
 349     increase_object_age(copy_val, from_region_age + 1);
 350   }
 351 
 352   // Try to install the new forwarding pointer.
 353   oop result = ShenandoahForwarding::try_update_forwardee(p, copy_val);
 354   if (result == copy_val) {
 355     // Successfully evacuated. Our copy is now the public one!
 356 
 357     // This is necessary for virtual thread support. This uses the mark word without
 358     // considering that it may now be a forwarding pointer (and could therefore crash).
 359     // Secondarily, we do not want to spend cycles relativizing stack chunks for oops
 360     // that lost the evacuation race (and will therefore not become visible). It is
 361     // safe to do this on the public copy (this is also done during concurrent mark).
 362     ContinuationGCSupport::relativize_stack_chunk(copy_val);
 363 
 364     if (ShenandoahEvacTracking) {
 365       // Record that the evacuation succeeded
 366       evac_tracker()->end_evacuation(thread, size * HeapWordSize, FROM_GENERATION, TO_GENERATION);
 367     }
 368 
 369     if (TO_GENERATION == OLD_GENERATION) {
 370       old_generation()->handle_evacuation(copy, size);
 371     }
 372   }  else {
 373     // Failed to evacuate. We need to deal with the object that is left behind. Since this
 374     // new allocation is certainly after TAMS, it will be considered live in the next cycle.
 375     // But if it happens to contain references to evacuated regions, those references would
 376     // not get updated for this stale copy during this cycle, and we will crash while scanning
 377     // it the next cycle.
 378     if (alloc_from_lab) {
 379       // For LAB allocations, it is enough to rollback the allocation ptr. Either the next
 380       // object will overwrite this stale copy, or the filler object on LAB retirement will
 381       // do this.
 382       switch (TO_GENERATION) {
 383         case YOUNG_GENERATION: {
 384           ShenandoahThreadLocalData::gclab(thread)->undo_allocation(copy, size);
 385           break;
 386         }
 387         case OLD_GENERATION: {
 388           ShenandoahThreadLocalData::plab(thread)->undo_allocation(copy, size);
 389           if (is_promotion) {
 390             ShenandoahThreadLocalData::subtract_from_plab_promoted(thread, size * HeapWordSize);
 391           }
 392           break;
 393         }
 394         default: {
 395           ShouldNotReachHere();
 396           break;
 397         }
 398       }
 399     } else {
 400       // For non-LAB allocations, we have no way to retract the allocation, and
 401       // have to explicitly overwrite the copy with the filler object. With that overwrite,
 402       // we have to keep the fwdptr initialized and pointing to our (stale) copy.
 403       assert(size >= ShenandoahHeap::min_fill_size(), "previously allocated object known to be larger than min_size");
 404       fill_with_object(copy, size);
 405     }
 406   }
 407   shenandoah_assert_correct(nullptr, result);
 408   return result;
 409 }
 410 
 411 template oop ShenandoahGenerationalHeap::try_evacuate_object<YOUNG_GENERATION, YOUNG_GENERATION>(oop p, Thread* thread, uint from_region_age);
 412 template oop ShenandoahGenerationalHeap::try_evacuate_object<YOUNG_GENERATION, OLD_GENERATION>(oop p, Thread* thread, uint from_region_age);
 413 template oop ShenandoahGenerationalHeap::try_evacuate_object<OLD_GENERATION, OLD_GENERATION>(oop p, Thread* thread, uint from_region_age);
 414 
 415 inline HeapWord* ShenandoahGenerationalHeap::allocate_from_plab(Thread* thread, size_t size, bool is_promotion) {
 416   assert(UseTLAB, "TLABs should be enabled");
 417 
 418   PLAB* plab = ShenandoahThreadLocalData::plab(thread);
 419   HeapWord* obj;
 420 
 421   if (plab == nullptr) {
 422     assert(!thread->is_Java_thread() && !thread->is_Worker_thread(), "Performance: thread should have PLAB: %s", thread->name());
 423     // No PLABs in this thread, fallback to shared allocation
 424     return nullptr;
 425   } else if (is_promotion && !ShenandoahThreadLocalData::allow_plab_promotions(thread)) {
 426     return nullptr;
 427   }
 428   // if plab->word_size() <= 0, thread's plab not yet initialized for this pass, so allow_plab_promotions() is not trustworthy
 429   obj = plab->allocate(size);
 430   if ((obj == nullptr) && (plab->words_remaining() < plab_min_size())) {
 431     // allocate_from_plab_slow will establish allow_plab_promotions(thread) for future invocations
 432     obj = allocate_from_plab_slow(thread, size, is_promotion);
 433   }
 434   // if plab->words_remaining() >= ShenGenHeap::heap()->plab_min_size(), just return nullptr so we can use a shared allocation
 435   if (obj == nullptr) {
 436     return nullptr;
 437   }
 438 
 439   if (is_promotion) {
 440     ShenandoahThreadLocalData::add_to_plab_promoted(thread, size * HeapWordSize);
 441   }
 442   return obj;
 443 }
 444 
 445 // Establish a new PLAB and allocate size HeapWords within it.
 446 HeapWord* ShenandoahGenerationalHeap::allocate_from_plab_slow(Thread* thread, size_t size, bool is_promotion) {
 447   assert(mode()->is_generational(), "PLABs only relevant to generational GC");
 448 
 449   const size_t plab_min_size = this->plab_min_size();
 450   // PLABs are aligned to card boundaries to avoid synchronization with concurrent
 451   // allocations in other PLABs.
 452   const size_t min_size = (size > plab_min_size)? align_up(size, CardTable::card_size_in_words()): plab_min_size;
 453 
 454   // Figure out size of new PLAB, using value determined at last refill.
 455   size_t cur_size = ShenandoahThreadLocalData::plab_size(thread);
 456   if (cur_size == 0) {
 457     cur_size = plab_min_size;
 458   }
 459 
 460   // Expand aggressively, doubling at each refill in this epoch, ceiling at plab_max_size()
 461   const size_t future_size = MIN2(cur_size * 2, plab_max_size());
 462   // Doubling, starting at a card-multiple, should give us a card-multiple. (Ceiling and floor
 463   // are card multiples.)
 464   assert(is_aligned(future_size, CardTable::card_size_in_words()), "Card multiple by construction, future_size: %zu"
 465           ", card_size: %u, cur_size: %zu, max: %zu",
 466          future_size, CardTable::card_size_in_words(), cur_size, plab_max_size());
 467 
 468   // Record new heuristic value even if we take any shortcut. This captures
 469   // the case when moderately-sized objects always take a shortcut. At some point,
 470   // heuristics should catch up with them.  Note that the requested cur_size may
 471   // not be honored, but we remember that this is the preferred size.
 472   log_debug(gc, plab)("Set next PLAB refill size: %zu bytes", future_size * HeapWordSize);
 473   ShenandoahThreadLocalData::set_plab_size(thread, future_size);
 474 
 475   if (cur_size < size) {
 476     // The PLAB to be allocated is still not large enough to hold the object. Fall back to shared allocation.
 477     // This avoids retiring perfectly good PLABs in order to represent a single large object allocation.
 478     log_debug(gc, plab)("Current PLAB size (%zu) is too small for %zu", cur_size * HeapWordSize, size * HeapWordSize);
 479     return nullptr;
 480   }
 481 
 482   // Retire current PLAB, and allocate a new one.
 483   PLAB* plab = ShenandoahThreadLocalData::plab(thread);
 484   if (plab->words_remaining() < plab_min_size) {
 485     // Retire current PLAB. This takes care of any PLAB book-keeping.
 486     // retire_plab() registers the remnant filler object with the remembered set scanner without a lock.
 487     // Since PLABs are card-aligned, concurrent registrations in other PLABs don't interfere.
 488     retire_plab(plab, thread);
 489 
 490     size_t actual_size = 0;
 491     HeapWord* plab_buf = allocate_new_plab(min_size, cur_size, &actual_size);
 492     if (plab_buf == nullptr) {
 493       if (min_size == plab_min_size) {
 494         // Disable PLAB promotions for this thread because we cannot even allocate a minimal PLAB. This allows us
 495         // to fail faster on subsequent promotion attempts.
 496         ShenandoahThreadLocalData::disable_plab_promotions(thread);
 497       }
 498       return nullptr;
 499     } else {
 500       ShenandoahThreadLocalData::enable_plab_retries(thread);
 501     }
 502     // Since the allocated PLAB may have been down-sized for alignment, plab->allocate(size) below may still fail.
 503     if (ZeroTLAB) {
 504       // ... and clear it.
 505       Copy::zero_to_words(plab_buf, actual_size);
 506     } else {
 507       // ...and zap just allocated object.
 508 #ifdef ASSERT
 509       // Skip mangling the space corresponding to the object header to
 510       // ensure that the returned space is not considered parsable by
 511       // any concurrent GC thread.
 512       size_t hdr_size = oopDesc::header_size();
 513       Copy::fill_to_words(plab_buf + hdr_size, actual_size - hdr_size, badHeapWordVal);
 514 #endif // ASSERT
 515     }
 516     assert(is_aligned(actual_size, CardTable::card_size_in_words()), "Align by design");
 517     plab->set_buf(plab_buf, actual_size);
 518     if (is_promotion && !ShenandoahThreadLocalData::allow_plab_promotions(thread)) {
 519       return nullptr;
 520     }
 521     return plab->allocate(size);
 522   } else {
 523     // If there's still at least min_size() words available within the current plab, don't retire it.  Let's nibble
 524     // away on this plab as long as we can.  Meanwhile, return nullptr to force this particular allocation request
 525     // to be satisfied with a shared allocation.  By packing more promotions into the previously allocated PLAB, we
 526     // reduce the likelihood of evacuation failures, and we reduce the need for downsizing our PLABs.
 527     return nullptr;
 528   }
 529 }
 530 
 531 HeapWord* ShenandoahGenerationalHeap::allocate_new_plab(size_t min_size, size_t word_size, size_t* actual_size) {
 532   // Align requested sizes to card-sized multiples.  Align down so that we don't violate max size of TLAB.
 533   assert(is_aligned(min_size, CardTable::card_size_in_words()), "Align by design");
 534   assert(word_size >= min_size, "Requested PLAB is too small");
 535 
 536   ShenandoahAllocRequest req = ShenandoahAllocRequest::for_plab(min_size, word_size);
 537   // Note that allocate_memory() sets a thread-local flag to prohibit further promotions by this thread
 538   // if we are at risk of infringing on the old-gen evacuation budget.
 539   HeapWord* res = allocate_memory(req);
 540   if (res != nullptr) {
 541     *actual_size = req.actual_size();
 542   } else {
 543     *actual_size = 0;
 544   }
 545   assert(is_aligned(res, CardTable::card_size_in_words()), "Align by design");
 546   return res;
 547 }
 548 
 549 void ShenandoahGenerationalHeap::retire_plab(PLAB* plab, Thread* thread) {
 550   // We don't enforce limits on plab evacuations.  We let it consume all available old-gen memory in order to reduce
 551   // probability of an evacuation failure.  We do enforce limits on promotion, to make sure that excessive promotion
 552   // does not result in an old-gen evacuation failure.  Note that a failed promotion is relatively harmless.  Any
 553   // object that fails to promote in the current cycle will be eligible for promotion in a subsequent cycle.
 554 
 555   // When the plab was instantiated, its entirety was treated as if the entire buffer was going to be dedicated to
 556   // promotions.  Now that we are retiring the buffer, we adjust for the reality that the plab is not entirely promotions.
 557   //  1. Some of the plab may have been dedicated to evacuations.
 558   //  2. Some of the plab may have been abandoned due to waste (at the end of the plab).
 559   size_t not_promoted =
 560           ShenandoahThreadLocalData::get_plab_actual_size(thread) - ShenandoahThreadLocalData::get_plab_promoted(thread);
 561   ShenandoahThreadLocalData::reset_plab_promoted(thread);
 562   ShenandoahThreadLocalData::set_plab_actual_size(thread, 0);
 563   if (not_promoted > 0) {
 564     log_debug(gc, plab)("Retire PLAB, unexpend unpromoted: %zu", not_promoted * HeapWordSize);
 565     old_generation()->unexpend_promoted(not_promoted);
 566   }
 567   const size_t original_waste = plab->waste();
 568   HeapWord* const top = plab->top();
 569 
 570   // plab->retire() overwrites unused memory between plab->top() and plab->hard_end() with a dummy object to make memory parsable.
 571   // It adds the size of this unused memory, in words, to plab->waste().
 572   plab->retire();
 573   if (top != nullptr && plab->waste() > original_waste && is_in_old(top)) {
 574     // If retiring the plab created a filler object, then we need to register it with our card scanner so it can
 575     // safely walk the region backing the plab.
 576     log_debug(gc, plab)("retire_plab() is registering remnant of size %zu at " PTR_FORMAT,
 577                         (plab->waste() - original_waste) * HeapWordSize, p2i(top));
 578     // No lock is necessary because the PLAB memory is aligned on card boundaries.
 579     old_generation()->card_scan()->register_object_without_lock(top);
 580   }
 581 }
 582 
 583 void ShenandoahGenerationalHeap::retire_plab(PLAB* plab) {
 584   Thread* thread = Thread::current();
 585   retire_plab(plab, thread);
 586 }
 587 
 588 // Make sure old-generation is large enough, but no larger than is necessary, to hold mixed evacuations
 589 // and promotions, if we anticipate either. Any deficit is provided by the young generation, subject to
 590 // mutator_xfer_limit, and any surplus is transferred to the young generation.  mutator_xfer_limit is
 591 // the maximum we're able to transfer from young to old.  This is called at the end of GC, as we prepare
 592 // for the idle span that precedes the next GC.
 593 void ShenandoahGenerationalHeap::compute_old_generation_balance(size_t mutator_xfer_limit,
 594                                                                 size_t old_trashed_regions, size_t young_trashed_regions) {
 595   shenandoah_assert_heaplocked();
 596   // We can limit the old reserve to the size of anticipated promotions:
 597   // max_old_reserve is an upper bound on memory evacuated from old and promoted to old,
 598   // clamped by the old generation space available.
 599   //
 600   // Here's the algebra.
 601   // Let SOEP = ShenandoahOldEvacPercent,
 602   //     OE = old evac,
 603   //     YE = young evac, and
 604   //     TE = total evac = OE + YE
 605   // By definition:
 606   //            SOEP/100 = OE/TE
 607   //                     = OE/(OE+YE)
 608   //  => SOEP/(100-SOEP) = OE/((OE+YE)-OE)      // componendo-dividendo: If a/b = c/d, then a/(b-a) = c/(d-c)
 609   //                     = OE/YE
 610   //  =>              OE = YE*SOEP/(100-SOEP)
 611 
 612   // We have to be careful in the event that SOEP is set to 100 by the user.
 613   assert(ShenandoahOldEvacPercent <= 100, "Error");
 614   const size_t region_size_bytes = ShenandoahHeapRegion::region_size_bytes();
 615 
 616   ShenandoahOldGeneration* old_gen = old_generation();
 617   size_t old_capacity = old_gen->max_capacity();
 618   size_t old_usage = old_gen->used(); // includes humongous waste
 619   size_t old_currently_available =
 620     ((old_capacity >= old_usage)? old_capacity - old_usage: 0) + old_trashed_regions * region_size_bytes;
 621 
 622   ShenandoahYoungGeneration* young_gen = young_generation();
 623   size_t young_capacity = young_gen->max_capacity();
 624   size_t young_usage = young_gen->used(); // includes humongous waste
 625   size_t young_available = ((young_capacity >= young_usage)? young_capacity - young_usage: 0);
 626   size_t freeset_available = free_set()->available_locked();
 627   if (young_available > freeset_available) {
 628     young_available = freeset_available;
 629   }
 630   young_available += young_trashed_regions * region_size_bytes;
 631 
 632   // The free set will reserve this amount of memory to hold young evacuations (initialized to the ideal reserve)
 633   size_t young_reserve = (young_generation()->max_capacity() * ShenandoahEvacReserve) / 100;
 634 
 635   // If ShenandoahOldEvacPercent equals 100, max_old_reserve is limited only by mutator_xfer_limit and young_reserve
 636   const size_t bound_on_old_reserve =
 637     ((old_currently_available + mutator_xfer_limit + young_reserve) * ShenandoahOldEvacPercent) / 100;
 638   size_t proposed_max_old = ((ShenandoahOldEvacPercent == 100)?
 639                              bound_on_old_reserve:
 640                              MIN2((young_reserve * ShenandoahOldEvacPercent) / (100 - ShenandoahOldEvacPercent),
 641                                   bound_on_old_reserve));
 642   if (young_reserve > young_available) {
 643     young_reserve = young_available;
 644   }
 645 
 646   // Decide how much old space we should reserve for a mixed collection
 647   size_t proposed_reserve_for_mixed = 0;
 648   const size_t old_fragmented_available =
 649     old_currently_available - (old_generation()->free_unaffiliated_regions() + old_trashed_regions) * region_size_bytes;
 650 
 651   if (old_fragmented_available > proposed_max_old) {
 652     // In this case, the old_fragmented_available is greater than the desired amount of evacuation to old.
 653     // We'll use all of this memory to hold results of old evacuation, and we'll give back to the young generation
 654     // any old regions that are not fragmented.
 655     //
 656     // This scenario may happen after we have promoted many regions in place, and each of these regions had non-zero
 657     // unused memory, so there is now an abundance of old-fragmented available memory, even more than the desired
 658     // percentage for old reserve.  We cannot transfer these fragmented regions back to young.  Instead we make the
 659     // best of the situation by using this fragmented memory for both promotions and evacuations.
 660 
 661     proposed_max_old = old_fragmented_available;
 662   }
 663   // Otherwise: old_fragmented_available <= proposed_max_old. Do not shrink proposed_max_old from the original computation.
 664 
 665   // Though we initially set proposed_reserve_for_promo to equal the entirety of old fragmented available, we have the
 666   // opportunity below to shift some of this memory into the proposed_reserve_for_mixed.
 667   size_t proposed_reserve_for_promo = old_fragmented_available;
 668   const size_t max_old_reserve = proposed_max_old;
 669 
 670   const size_t mixed_candidate_live_memory = old_generation()->unprocessed_collection_candidates_live_memory();
 671   const bool doing_mixed = (mixed_candidate_live_memory > 0);
 672   if (doing_mixed) {
 673     // In the ideal, all of the memory reserved for mixed evacuation would be unfragmented, but we don't enforce
 674     // this.  Note that the initial value of  max_evac_need is conservative because we may not evacuate all of the
 675     // remaining mixed evacuation candidates in a single cycle.
 676     const size_t max_evac_need = (size_t) (mixed_candidate_live_memory * ShenandoahOldEvacWaste);
 677     assert(old_currently_available >= old_generation()->free_unaffiliated_regions() * region_size_bytes,
 678            "Unaffiliated available must be less than total available");
 679 
 680     // We prefer to evacuate all of mixed into unfragmented memory, and will expand old in order to do so, unless
 681     // we already have too much fragmented available memory in old.
 682     proposed_reserve_for_mixed = max_evac_need;
 683     if (proposed_reserve_for_mixed + proposed_reserve_for_promo > max_old_reserve) {
 684       // We're trying to reserve more memory than is available.  So we need to shrink our reserves.
 685       size_t excess_reserves = (proposed_reserve_for_mixed + proposed_reserve_for_promo) - max_old_reserve;
 686       // We need to shrink reserves by excess_reserves.  We prefer to shrink by reducing promotion, giving priority to mixed
 687       // evacuation.  If the promotion reserve is larger than the amount we need to shrink by, do all the shrinkage there.
 688       if (proposed_reserve_for_promo > excess_reserves) {
 689         proposed_reserve_for_promo -= excess_reserves;
 690       } else {
 691         // Otherwise, we'll shrink promotion reserve to zero and we'll shrink the mixed-evac reserve by the remaining excess.
 692         excess_reserves -= proposed_reserve_for_promo;
 693         proposed_reserve_for_promo = 0;
 694         proposed_reserve_for_mixed -= excess_reserves;
 695       }
 696     }
 697   }
 698   assert(proposed_reserve_for_mixed + proposed_reserve_for_promo <= max_old_reserve,
 699          "Reserve for mixed (%zu) plus reserve for promotions (%zu) must be less than maximum old reserve (%zu)",
 700          proposed_reserve_for_mixed, proposed_reserve_for_promo, max_old_reserve);
 701 
 702   // Decide how much additional space we should reserve for promotions from young.  We give priority to mixed evacations
 703   // over promotions.
 704   const size_t promo_load = old_generation()->get_promotion_potential();
 705   const bool doing_promotions = promo_load > 0;
 706 
 707   // promo_load represents the combined total of live memory within regions that have reached tenure age.  The true
 708   // promotion potential is larger than this, because individual objects within regions that have not yet reached tenure
 709   // age may be promotable. On the other hand, some of the objects that we intend to promote in the next GC cycle may
 710   // die before they are next marked.  In the future, the promo_load will include the total size of tenurable objects
 711   // residing in regions that have not yet reached tenure age.
 712 
 713   if (doing_promotions) {
 714     // We are always doing promotions, even when old_generation->get_promotion_potential() returns 0.  As currently implemented,
 715     // get_promotion_potential() only knows the total live memory contained within young-generation regions whose age is
 716     // tenurable. It does not know whether that memory will still be live at the end of the next mark cycle, and it doesn't
 717     // know how much memory is contained within objects whose individual ages are tenurable, which reside in regions with
 718     // non-tenurable age.  We use this, as adjusted by ShenandoahPromoEvacWaste, as an approximation of the total amount of
 719     // memory to be promoted.  In the near future, we expect to implement a change that will allow get_promotion_potential()
 720     // to account also for the total memory contained within individual objects that are tenure-ready even when they do
 721     // not reside in aged regions.  This will represent a conservative over approximation of promotable memory because
 722     // some of these objects may die before the next GC cycle executes.
 723 
 724     // Be careful not to ask for too much promotion reserves. We have observed jtreg test failures under which a greedy
 725     // promotion reserve causes a humongous allocation which is awaiting a full GC to fail (specifically
 726     // gc/TestAllocHumongousFragment.java). This happens if too much of the memory reclaimed by the full GC
 727     // is immediately reserved so that it cannot be allocated by the waiting mutator. It's not clear that this
 728     // particular test is representative of the needs of typical GenShen users.  It is really a test of high frequency
 729     // Full GCs under heap fragmentation stress.
 730 
 731     size_t promo_need = (size_t) (promo_load * ShenandoahPromoEvacWaste);
 732     if (promo_need > proposed_reserve_for_promo) {
 733       const size_t available_for_additional_promotions =
 734         max_old_reserve - (proposed_reserve_for_mixed + proposed_reserve_for_promo);
 735       if (proposed_reserve_for_promo + available_for_additional_promotions >= promo_need) {
 736         proposed_reserve_for_promo = promo_need;
 737       } else {
 738         proposed_reserve_for_promo += available_for_additional_promotions;
 739       }
 740     }
 741   }
 742   // else, leave proposed_reserve_for_promo as is.  By default, it is initialized to represent old_fragmented_available.
 743 
 744   // This is the total old we want to reserve (initialized to the ideal reserve)
 745   size_t proposed_old_reserve = proposed_reserve_for_mixed + proposed_reserve_for_promo;
 746 
 747   // We now check if the old generation is running a surplus or a deficit.
 748   size_t old_region_deficit = 0;
 749   size_t old_region_surplus = 0;
 750 
 751   size_t mutator_region_xfer_limit = mutator_xfer_limit / region_size_bytes;
 752   // align the mutator_xfer_limit on region size
 753   mutator_xfer_limit = mutator_region_xfer_limit * region_size_bytes;
 754 
 755   if (old_currently_available >= proposed_old_reserve) {
 756     // We are running a surplus, so the old region surplus can go to young
 757     const size_t old_surplus = old_currently_available - proposed_old_reserve;
 758     old_region_surplus = old_surplus / region_size_bytes;
 759     const size_t unaffiliated_old_regions = old_generation()->free_unaffiliated_regions() + old_trashed_regions;
 760     old_region_surplus = MIN2(old_region_surplus, unaffiliated_old_regions);
 761     old_generation()->set_region_balance(checked_cast<ssize_t>(old_region_surplus));
 762     old_currently_available -= old_region_surplus * region_size_bytes;
 763     young_available += old_region_surplus * region_size_bytes;
 764   } else if (old_currently_available + mutator_xfer_limit >= proposed_old_reserve) {
 765     // We know that old_currently_available < proposed_old_reserve because above test failed. Expand old_currently_available.
 766     // Mutator's xfer limit is sufficient to satisfy our need: transfer all memory from there.
 767     size_t old_deficit = proposed_old_reserve - old_currently_available;
 768     old_region_deficit = (old_deficit + region_size_bytes - 1) / region_size_bytes;
 769     old_generation()->set_region_balance(0 - checked_cast<ssize_t>(old_region_deficit));
 770     old_currently_available += old_region_deficit * region_size_bytes;
 771     young_available -= old_region_deficit * region_size_bytes;
 772   } else {
 773     // We know that (old_currently_available < proposed_old_reserve) and
 774     //   (old_currently_available + mutator_xfer_limit < proposed_old_reserve) because above tests failed.
 775     // We need to shrink proposed_old_reserves.
 776 
 777     // We could potentially shrink young_reserves in order to further expand proposed_old_reserves.  Let's not bother.  The
 778     // important thing is that we keep a total amount of memory in reserve in preparation for the next GC cycle.  At
 779     // the time we choose the next collection set, we'll have an opportunity to shift some of these young reserves
 780     // into old reserves if that makes sense.
 781 
 782     // Start by taking all of mutator_xfer_limit into old_currently_available.
 783     size_t old_region_deficit = mutator_region_xfer_limit;
 784     old_generation()->set_region_balance(0 - checked_cast<ssize_t>(old_region_deficit));
 785     old_currently_available += old_region_deficit * region_size_bytes;
 786     young_available -= old_region_deficit * region_size_bytes;
 787 
 788     assert(old_currently_available < proposed_old_reserve,
 789            "Old currently available (%zu) must be less than old reserve (%zu)", old_currently_available, proposed_old_reserve);
 790 
 791     // There's not enough memory to satisfy our desire.  Scale back our old-gen intentions.  We prefer to satisfy
 792     // the budget_overrun entirely from the promotion reserve, if that is large enough.  Otherwise, we'll satisfy
 793     // the overrun from a combination of promotion and mixed-evacuation reserves.
 794     size_t budget_overrun = proposed_old_reserve - old_currently_available;
 795     if (proposed_reserve_for_promo > budget_overrun) {
 796       proposed_reserve_for_promo -= budget_overrun;
 797       // Dead code:
 798       //  proposed_old_reserve -= budget_overrun;
 799     } else {
 800       budget_overrun -= proposed_reserve_for_promo;
 801       proposed_reserve_for_promo = 0;
 802       proposed_reserve_for_mixed = (proposed_reserve_for_mixed > budget_overrun)? proposed_reserve_for_mixed - budget_overrun: 0;
 803       // Dead code:
 804       //  Note: proposed_reserve_for_promo is 0 and proposed_reserve_for_mixed may equal 0.
 805       //  proposed_old_reserve = proposed_reserve_for_mixed;
 806     }
 807   }
 808 
 809   assert(old_region_deficit == 0 || old_region_surplus == 0,
 810          "Only surplus (%zu) or deficit (%zu), never both", old_region_surplus, old_region_deficit);
 811   assert(young_reserve + proposed_reserve_for_mixed + proposed_reserve_for_promo <= old_currently_available + young_available,
 812          "Cannot reserve more memory than is available: %zu + %zu + %zu <= %zu + %zu",
 813          young_reserve, proposed_reserve_for_mixed, proposed_reserve_for_promo, old_currently_available, young_available);
 814 
 815   // deficit/surplus adjustments to generation sizes will precede rebuild
 816   young_generation()->set_evacuation_reserve(young_reserve);
 817   old_generation()->set_evacuation_reserve(proposed_reserve_for_mixed);
 818   old_generation()->set_promoted_reserve(proposed_reserve_for_promo);
 819 }
 820 
 821 void ShenandoahGenerationalHeap::coalesce_and_fill_old_regions(bool concurrent) {
 822   class ShenandoahGlobalCoalesceAndFill : public WorkerTask {
 823   private:
 824       ShenandoahPhaseTimings::Phase _phase;
 825       ShenandoahRegionIterator _regions;
 826   public:
 827     explicit ShenandoahGlobalCoalesceAndFill(ShenandoahPhaseTimings::Phase phase) :
 828       WorkerTask("Shenandoah Global Coalesce"),
 829       _phase(phase) {}
 830 
 831     void work(uint worker_id) override {
 832       ShenandoahWorkerTimingsTracker timer(_phase,
 833                                            ShenandoahPhaseTimings::ScanClusters,
 834                                            worker_id, true);
 835       ShenandoahHeapRegion* region;
 836       while ((region = _regions.next()) != nullptr) {
 837         // old region is not in the collection set and was not immediately trashed
 838         if (region->is_old() && region->is_active() && !region->is_humongous()) {
 839           // Reset the coalesce and fill boundary because this is a global collect
 840           // and cannot be preempted by young collects. We want to be sure the entire
 841           // region is coalesced here and does not resume from a previously interrupted
 842           // or completed coalescing.
 843           region->begin_preemptible_coalesce_and_fill();
 844           region->oop_coalesce_and_fill(false);
 845         }
 846       }
 847     }
 848   };
 849 
 850   ShenandoahPhaseTimings::Phase phase = concurrent ?
 851           ShenandoahPhaseTimings::conc_coalesce_and_fill :
 852           ShenandoahPhaseTimings::degen_gc_coalesce_and_fill;
 853 
 854   // This is not cancellable
 855   ShenandoahGlobalCoalesceAndFill coalesce(phase);
 856   workers()->run_task(&coalesce);
 857   old_generation()->set_parsable(true);
 858 }
 859 
 860 template<bool CONCURRENT>
 861 class ShenandoahGenerationalUpdateHeapRefsTask : public WorkerTask {
 862 private:
 863   // For update refs, _generation will be young or global. Mixed collections use the young generation.
 864   ShenandoahGeneration* _generation;
 865   ShenandoahGenerationalHeap* _heap;
 866   ShenandoahRegionIterator* _regions;
 867   ShenandoahRegionChunkIterator* _work_chunks;
 868 
 869 public:
 870   ShenandoahGenerationalUpdateHeapRefsTask(ShenandoahGeneration* generation,
 871                                            ShenandoahRegionIterator* regions,
 872                                            ShenandoahRegionChunkIterator* work_chunks) :
 873           WorkerTask("Shenandoah Update References"),
 874           _generation(generation),
 875           _heap(ShenandoahGenerationalHeap::heap()),
 876           _regions(regions),
 877           _work_chunks(work_chunks)
 878   {
 879     const bool old_bitmap_stable = _heap->old_generation()->is_mark_complete();
 880     log_debug(gc, remset)("Update refs, scan remembered set using bitmap: %s", BOOL_TO_STR(old_bitmap_stable));
 881   }
 882 
 883   void work(uint worker_id) override {
 884     if (CONCURRENT) {
 885       ShenandoahConcurrentWorkerSession worker_session(worker_id);
 886       ShenandoahSuspendibleThreadSetJoiner stsj;
 887       do_work<ShenandoahConcUpdateRefsClosure>(worker_id);
 888     } else {
 889       ShenandoahParallelWorkerSession worker_session(worker_id);
 890       do_work<ShenandoahNonConcUpdateRefsClosure>(worker_id);
 891     }
 892   }
 893 
 894 private:
 895   template<class T>
 896   void do_work(uint worker_id) {
 897     T cl;
 898 
 899     if (CONCURRENT && (worker_id == 0)) {
 900       // We ask the first worker to replenish the Mutator free set by moving regions previously reserved to hold the
 901       // results of evacuation.  These reserves are no longer necessary because evacuation has completed.
 902       size_t cset_regions = _heap->collection_set()->count();
 903 
 904       // Now that evacuation is done, we can reassign any regions that had been reserved to hold the results of evacuation
 905       // to the mutator free set.  At the end of GC, we will have cset_regions newly evacuated fully empty regions from
 906       // which we will be able to replenish the Collector free set and the OldCollector free set in preparation for the
 907       // next GC cycle.
 908       _heap->free_set()->move_regions_from_collector_to_mutator(cset_regions);
 909     }
 910     // If !CONCURRENT, there's no value in expanding Mutator free set
 911 
 912     ShenandoahHeapRegion* r = _regions->next();
 913     // We update references for global, mixed, and young collections.
 914     assert(_generation->is_mark_complete(), "Expected complete marking");
 915     ShenandoahMarkingContext* const ctx = _heap->marking_context();
 916     bool is_mixed = _heap->collection_set()->has_old_regions();
 917     while (r != nullptr) {
 918       HeapWord* update_watermark = r->get_update_watermark();
 919       assert(update_watermark >= r->bottom(), "sanity");
 920 
 921       log_debug(gc)("Update refs worker " UINT32_FORMAT ", looking at region %zu", worker_id, r->index());
 922       if (r->is_active() && !r->is_cset()) {
 923         if (r->is_young()) {
 924           _heap->marked_object_oop_iterate(r, &cl, update_watermark);
 925         } else if (r->is_old()) {
 926           if (_generation->is_global()) {
 927 
 928             _heap->marked_object_oop_iterate(r, &cl, update_watermark);
 929           }
 930           // Otherwise, this is an old region in a young or mixed cycle.  Process it during a second phase, below.
 931         } else {
 932           // Because updating of references runs concurrently, it is possible that a FREE inactive region transitions
 933           // to a non-free active region while this loop is executing.  Whenever this happens, the changing of a region's
 934           // active status may propagate at a different speed than the changing of the region's affiliation.
 935 
 936           // When we reach this control point, it is because a race has allowed a region's is_active() status to be seen
 937           // by this thread before the region's affiliation() is seen by this thread.
 938 
 939           // It's ok for this race to occur because the newly transformed region does not have any references to be
 940           // updated.
 941 
 942           assert(r->get_update_watermark() == r->bottom(),
 943                  "%s Region %zu is_active but not recognized as YOUNG or OLD so must be newly transitioned from FREE",
 944                  r->affiliation_name(), r->index());
 945         }
 946       }
 947 
 948       if (_heap->check_cancelled_gc_and_yield(CONCURRENT)) {
 949         return;
 950       }
 951 
 952       r = _regions->next();
 953     }
 954 
 955     if (_generation->is_young()) {
 956       // Since this is generational and not GLOBAL, we have to process the remembered set.  There's no remembered
 957       // set processing if not in generational mode or if GLOBAL mode.
 958 
 959       // After this thread has exhausted its traditional update-refs work, it continues with updating refs within
 960       // remembered set. The remembered set workload is better balanced between threads, so threads that are "behind"
 961       // can catch up with other threads during this phase, allowing all threads to work more effectively in parallel.
 962       update_references_in_remembered_set(worker_id, cl, ctx, is_mixed);
 963     }
 964   }
 965 
 966   template<class T>
 967   void update_references_in_remembered_set(uint worker_id, T &cl, const ShenandoahMarkingContext* ctx, bool is_mixed) {
 968 
 969     struct ShenandoahRegionChunk assignment;
 970     ShenandoahScanRemembered* scanner = _heap->old_generation()->card_scan();
 971 
 972     while (!_heap->check_cancelled_gc_and_yield(CONCURRENT) && _work_chunks->next(&assignment)) {
 973       // Keep grabbing next work chunk to process until finished, or asked to yield
 974       ShenandoahHeapRegion* r = assignment._r;
 975       if (r->is_active() && !r->is_cset() && r->is_old()) {
 976         HeapWord* start_of_range = r->bottom() + assignment._chunk_offset;
 977         HeapWord* end_of_range = r->get_update_watermark();
 978         if (end_of_range > start_of_range + assignment._chunk_size) {
 979           end_of_range = start_of_range + assignment._chunk_size;
 980         }
 981 
 982         if (start_of_range >= end_of_range) {
 983           continue;
 984         }
 985 
 986         // Old region in a young cycle or mixed cycle.
 987         if (is_mixed) {
 988           if (r->is_humongous()) {
 989             // Need to examine both dirty and clean cards during mixed evac.
 990             r->oop_iterate_humongous_slice_all(&cl,start_of_range, assignment._chunk_size);
 991           } else {
 992             // Since this is mixed evacuation, old regions that are candidates for collection have not been coalesced
 993             // and filled.  This will use mark bits to find objects that need to be updated.
 994             update_references_in_old_region(cl, ctx, scanner, r, start_of_range, end_of_range);
 995           }
 996         } else {
 997           // This is a young evacuation
 998           size_t cluster_size = CardTable::card_size_in_words() * ShenandoahCardCluster::CardsPerCluster;
 999           size_t clusters = assignment._chunk_size / cluster_size;
1000           assert(clusters * cluster_size == assignment._chunk_size, "Chunk assignment must align on cluster boundaries");
1001           scanner->process_region_slice(r, assignment._chunk_offset, clusters, end_of_range, &cl, true, worker_id);
1002         }
1003       }
1004     }
1005   }
1006 
1007   template<class T>
1008   void update_references_in_old_region(T &cl, const ShenandoahMarkingContext* ctx, ShenandoahScanRemembered* scanner,
1009                                     const ShenandoahHeapRegion* r, HeapWord* start_of_range,
1010                                     HeapWord* end_of_range) const {
1011     // In case last object in my range spans boundary of my chunk, I may need to scan all the way to top()
1012     ShenandoahObjectToOopBoundedClosure<T> objs(&cl, start_of_range, r->top());
1013 
1014     // Any object that begins in a previous range is part of a different scanning assignment.  Any object that
1015     // starts after end_of_range is also not my responsibility.  (Either allocated during evacuation, so does
1016     // not hold pointers to from-space, or is beyond the range of my assigned work chunk.)
1017 
1018     // Find the first object that begins in my range, if there is one. Note that `p` will be set to `end_of_range`
1019     // when no live object is found in the range.
1020     HeapWord* tams = ctx->top_at_mark_start(r);
1021     HeapWord* p = get_first_object_start_word(ctx, scanner, tams, start_of_range, end_of_range);
1022 
1023     while (p < end_of_range) {
1024       // p is known to point to the beginning of marked object obj
1025       oop obj = cast_to_oop(p);
1026       objs.do_object(obj);
1027       HeapWord* prev_p = p;
1028       p += obj->size();
1029       if (p < tams) {
1030         p = ctx->get_next_marked_addr(p, tams);
1031         // If there are no more marked objects before tams, this returns tams.  Note that tams is
1032         // either >= end_of_range, or tams is the start of an object that is marked.
1033       }
1034       assert(p != prev_p, "Lack of forward progress");
1035     }
1036   }
1037 
1038   HeapWord* get_first_object_start_word(const ShenandoahMarkingContext* ctx, ShenandoahScanRemembered* scanner, HeapWord* tams,
1039                                         HeapWord* start_of_range, HeapWord* end_of_range) const {
1040     HeapWord* p = start_of_range;
1041 
1042     if (p >= tams) {
1043       // We cannot use ctx->is_marked(obj) to test whether an object begins at this address.  Instead,
1044       // we need to use the remembered set crossing map to advance p to the first object that starts
1045       // within the enclosing card.
1046       size_t card_index = scanner->card_index_for_addr(start_of_range);
1047       while (true) {
1048         HeapWord* first_object = scanner->first_object_in_card(card_index);
1049         if (first_object != nullptr) {
1050           p = first_object;
1051           break;
1052         } else if (scanner->addr_for_card_index(card_index + 1) < end_of_range) {
1053           card_index++;
1054         } else {
1055           // Signal that no object was found in range
1056           p = end_of_range;
1057           break;
1058         }
1059       }
1060     } else if (!ctx->is_marked(cast_to_oop(p))) {
1061       p = ctx->get_next_marked_addr(p, tams);
1062       // If there are no more marked objects before tams, this returns tams.
1063       // Note that tams is either >= end_of_range, or tams is the start of an object that is marked.
1064     }
1065     return p;
1066   }
1067 };
1068 
1069 void ShenandoahGenerationalHeap::update_heap_references(ShenandoahGeneration* generation, bool concurrent) {
1070   assert(!is_full_gc_in_progress(), "Only for concurrent and degenerated GC");
1071   const uint nworkers = workers()->active_workers();
1072   ShenandoahRegionChunkIterator work_list(nworkers);
1073   if (concurrent) {
1074     ShenandoahGenerationalUpdateHeapRefsTask<true> task(generation, &_update_refs_iterator, &work_list);
1075     workers()->run_task(&task);
1076   } else {
1077     ShenandoahGenerationalUpdateHeapRefsTask<false> task(generation, &_update_refs_iterator, &work_list);
1078     workers()->run_task(&task);
1079   }
1080 
1081   if (ShenandoahEnableCardStats) {
1082     // Only do this if we are collecting card stats
1083     ShenandoahScanRemembered* card_scan = old_generation()->card_scan();
1084     assert(card_scan != nullptr, "Card table must exist when card stats are enabled");
1085     card_scan->log_card_stats(nworkers, CARD_STAT_UPDATE_REFS);
1086   }
1087 }
1088 
1089 struct ShenandoahCompositeRegionClosure {
1090   template<typename C1, typename C2>
1091   class Closure : public ShenandoahHeapRegionClosure {
1092   private:
1093     C1 &_c1;
1094     C2 &_c2;
1095 
1096   public:
1097     Closure(C1 &c1, C2 &c2) : ShenandoahHeapRegionClosure(), _c1(c1), _c2(c2) {}
1098 
1099     void heap_region_do(ShenandoahHeapRegion* r) override {
1100       _c1.heap_region_do(r);
1101       _c2.heap_region_do(r);
1102     }
1103 
1104     bool is_thread_safe() override {
1105       return _c1.is_thread_safe() && _c2.is_thread_safe();
1106     }
1107   };
1108 
1109   template<typename C1, typename C2>
1110   static Closure<C1, C2> of(C1 &c1, C2 &c2) {
1111     return Closure<C1, C2>(c1, c2);
1112   }
1113 };
1114 
1115 class ShenandoahUpdateRegionAges : public ShenandoahHeapRegionClosure {
1116 private:
1117   ShenandoahMarkingContext* _ctx;
1118 
1119 public:
1120   explicit ShenandoahUpdateRegionAges(ShenandoahMarkingContext* ctx) : _ctx(ctx) { }
1121 
1122   void heap_region_do(ShenandoahHeapRegion* r) override {
1123     // Maintenance of region age must follow evacuation in order to account for
1124     // evacuation allocations within survivor regions.  We consult region age during
1125     // the subsequent evacuation to determine whether certain objects need to
1126     // be promoted.
1127     if (r->is_young() && r->is_active()) {
1128       HeapWord *tams = _ctx->top_at_mark_start(r);
1129       HeapWord *top = r->top();
1130 
1131       // Allocations move the watermark when top moves.  However, compacting
1132       // objects will sometimes lower top beneath the watermark, after which,
1133       // attempts to read the watermark will assert out (watermark should not be
1134       // higher than top).
1135       if (top > tams) {
1136         // There have been allocations in this region since the start of the cycle.
1137         // Any objects new to this region must not assimilate elevated age.
1138         r->reset_age();
1139       } else if (ShenandoahGenerationalHeap::heap()->is_aging_cycle()) {
1140         r->increment_age();
1141       }
1142     }
1143   }
1144 
1145   bool is_thread_safe() override {
1146     return true;
1147   }
1148 };
1149 
1150 void ShenandoahGenerationalHeap::final_update_refs_update_region_states() {
1151   ShenandoahSynchronizePinnedRegionStates pins;
1152   ShenandoahUpdateRegionAges ages(marking_context());
1153   auto cl = ShenandoahCompositeRegionClosure::of(pins, ages);
1154   parallel_heap_region_iterate(&cl);
1155 }
1156 
1157 void ShenandoahGenerationalHeap::complete_degenerated_cycle() {
1158   shenandoah_assert_heaplocked_or_safepoint();
1159   if (!old_generation()->is_parsable()) {
1160     ShenandoahGCPhase phase(ShenandoahPhaseTimings::degen_gc_coalesce_and_fill);
1161     coalesce_and_fill_old_regions(false);
1162   }
1163 
1164   log_info(gc, cset)("Degenerated cycle complete, promotions reserved: %zu, promotions expended: %zu, failed count: %zu, failed bytes: %zu",
1165                      old_generation()->get_promoted_reserve(), old_generation()->get_promoted_expended(),
1166                      old_generation()->get_promotion_failed_count(), old_generation()->get_promotion_failed_words() * HeapWordSize);
1167 }
1168 
1169 void ShenandoahGenerationalHeap::complete_concurrent_cycle() {
1170   if (!old_generation()->is_parsable()) {
1171     // Class unloading may render the card offsets unusable, so we must rebuild them before
1172     // the next remembered set scan. We _could_ let the control thread do this sometime after
1173     // the global cycle has completed and before the next young collection, but under memory
1174     // pressure the control thread may not have the time (that is, because it's running back
1175     // to back GCs). In that scenario, we would have to make the old regions parsable before
1176     // we could start a young collection. This could delay the start of the young cycle and
1177     // throw off the heuristics.
1178     entry_global_coalesce_and_fill();
1179   }
1180 
1181   log_info(gc, cset)("Concurrent cycle complete, promotions reserved: %zu, promotions expended: %zu, failed count: %zu, failed bytes: %zu",
1182                      old_generation()->get_promoted_reserve(), old_generation()->get_promoted_expended(),
1183                      old_generation()->get_promotion_failed_count(), old_generation()->get_promotion_failed_words() * HeapWordSize);
1184 }
1185 
1186 void ShenandoahGenerationalHeap::entry_global_coalesce_and_fill() {
1187   const char* msg = "Coalescing and filling old regions";
1188   ShenandoahConcurrentPhase gc_phase(msg, ShenandoahPhaseTimings::conc_coalesce_and_fill);
1189 
1190   TraceCollectorStats tcs(monitoring_support()->concurrent_collection_counters());
1191   EventMark em("%s", msg);
1192   ShenandoahWorkerScope scope(workers(),
1193                               ShenandoahWorkerPolicy::calc_workers_for_conc_marking(),
1194                               "concurrent coalesce and fill");
1195 
1196   coalesce_and_fill_old_regions(true);
1197 }
1198 
1199 void ShenandoahGenerationalHeap::update_region_ages(ShenandoahMarkingContext* ctx) {
1200   ShenandoahUpdateRegionAges cl(ctx);
1201   parallel_heap_region_iterate(&cl);
1202 }