Udiff src/hotspot/share/gc/shenandoah/shenandoahHeap.cpp

src/hotspot/share/gc/shenandoah/shenandoahHeap.cpp

@@ -1,8 +1,9 @@
  /*
-  * Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
+  * Copyright (c) 2023, 2024, Oracle and/or its affiliates. All rights reserved.
   * Copyright (c) 2013, 2022, Red Hat, Inc. All rights reserved.
+  * Copyright Amazon.com Inc. or its affiliates. All Rights Reserved.
   * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   *
   * This code is free software; you can redistribute it and/or modify it
   * under the terms of the GNU General Public License version 2 only, as
   * published by the Free Software Foundation.

@@ -29,48 +30,61 @@
  
  #include "gc/shared/classUnloadingContext.hpp"
  #include "gc/shared/gcArguments.hpp"
  #include "gc/shared/gcTimer.hpp"
  #include "gc/shared/gcTraceTime.inline.hpp"
+ #include "gc/shared/gc_globals.hpp"
  #include "gc/shared/locationPrinter.inline.hpp"
  #include "gc/shared/memAllocator.hpp"
  #include "gc/shared/plab.hpp"
  #include "gc/shared/tlab_globals.hpp"
  
+ #include "gc/shenandoah/heuristics/shenandoahOldHeuristics.hpp"
+ #include "gc/shenandoah/heuristics/shenandoahYoungHeuristics.hpp"
+ #include "gc/shenandoah/shenandoahAllocRequest.hpp"
  #include "gc/shenandoah/shenandoahBarrierSet.hpp"
  #include "gc/shenandoah/shenandoahClosures.inline.hpp"
  #include "gc/shenandoah/shenandoahCollectionSet.hpp"
  #include "gc/shenandoah/shenandoahCollectorPolicy.hpp"
  #include "gc/shenandoah/shenandoahConcurrentMark.hpp"
  #include "gc/shenandoah/shenandoahMarkingContext.inline.hpp"
  #include "gc/shenandoah/shenandoahControlThread.hpp"
  #include "gc/shenandoah/shenandoahFreeSet.hpp"
+ #include "gc/shenandoah/shenandoahGenerationalEvacuationTask.hpp"
+ #include "gc/shenandoah/shenandoahGenerationalHeap.hpp"
+ #include "gc/shenandoah/shenandoahGlobalGeneration.hpp"
  #include "gc/shenandoah/shenandoahPhaseTimings.hpp"
  #include "gc/shenandoah/shenandoahHeap.inline.hpp"
+ #include "gc/shenandoah/shenandoahHeapRegionClosures.hpp"
  #include "gc/shenandoah/shenandoahHeapRegion.inline.hpp"
  #include "gc/shenandoah/shenandoahHeapRegionSet.hpp"
  #include "gc/shenandoah/shenandoahInitLogger.hpp"
  #include "gc/shenandoah/shenandoahMarkingContext.inline.hpp"
  #include "gc/shenandoah/shenandoahMemoryPool.hpp"
- #include "gc/shenandoah/shenandoahMetrics.hpp"
  #include "gc/shenandoah/shenandoahMonitoringSupport.hpp"
+ #include "gc/shenandoah/shenandoahOldGeneration.hpp"
  #include "gc/shenandoah/shenandoahOopClosures.inline.hpp"
  #include "gc/shenandoah/shenandoahPacer.inline.hpp"
  #include "gc/shenandoah/shenandoahPadding.hpp"
  #include "gc/shenandoah/shenandoahParallelCleaning.inline.hpp"
  #include "gc/shenandoah/shenandoahReferenceProcessor.hpp"
  #include "gc/shenandoah/shenandoahRootProcessor.inline.hpp"
+ #include "gc/shenandoah/shenandoahScanRemembered.inline.hpp"
  #include "gc/shenandoah/shenandoahSTWMark.hpp"
+ #include "gc/shenandoah/shenandoahUncommitThread.hpp"
  #include "gc/shenandoah/shenandoahUtils.hpp"
  #include "gc/shenandoah/shenandoahVerifier.hpp"
  #include "gc/shenandoah/shenandoahCodeRoots.hpp"
  #include "gc/shenandoah/shenandoahVMOperations.hpp"
  #include "gc/shenandoah/shenandoahWorkGroup.hpp"
  #include "gc/shenandoah/shenandoahWorkerPolicy.hpp"
- #include "gc/shenandoah/mode/shenandoahIUMode.hpp"
+ #include "gc/shenandoah/shenandoahYoungGeneration.hpp"
+ #include "gc/shenandoah/mode/shenandoahGenerationalMode.hpp"
  #include "gc/shenandoah/mode/shenandoahPassiveMode.hpp"
  #include "gc/shenandoah/mode/shenandoahSATBMode.hpp"
+ #include "utilities/globalDefinitions.hpp"
+ 
  #if INCLUDE_JFR
  #include "gc/shenandoah/shenandoahJfrSupport.hpp"
  #endif
  
  #include "classfile/systemDictionary.hpp"

@@ -83,10 +97,11 @@
  #include "runtime/globals.hpp"
  #include "runtime/interfaceSupport.inline.hpp"
  #include "runtime/java.hpp"
  #include "runtime/orderAccess.hpp"
  #include "runtime/safepointMechanism.hpp"
+ #include "runtime/threads.hpp"
  #include "runtime/vmThread.hpp"
  #include "services/mallocTracker.hpp"
  #include "services/memTracker.hpp"
  #include "utilities/events.hpp"
  #include "utilities/powerOfTwo.hpp"

@@ -158,25 +173,21 @@
    _num_regions = ShenandoahHeapRegion::region_count();
    assert(_num_regions == (max_byte_size / reg_size_bytes),
           "Regions should cover entire heap exactly: " SIZE_FORMAT " != " SIZE_FORMAT "/" SIZE_FORMAT,
           _num_regions, max_byte_size, reg_size_bytes);
  
-   // Now we know the number of regions, initialize the heuristics.
-   initialize_heuristics();
- 
    size_t num_committed_regions = init_byte_size / reg_size_bytes;
    num_committed_regions = MIN2(num_committed_regions, _num_regions);
    assert(num_committed_regions <= _num_regions, "sanity");
    _initial_size = num_committed_regions * reg_size_bytes;
  
    size_t num_min_regions = min_byte_size / reg_size_bytes;
    num_min_regions = MIN2(num_min_regions, _num_regions);
    assert(num_min_regions <= _num_regions, "sanity");
    _minimum_size = num_min_regions * reg_size_bytes;
  
-   // Default to max heap size.
-   _soft_max_size = _num_regions * reg_size_bytes;
+   _soft_max_size = SoftMaxHeapSize;
  
    _committed = _initial_size;
  
    size_t heap_page_size   = UseLargePages ? os::large_page_size() : os::vm_page_size();
    size_t bitmap_page_size = UseLargePages ? os::large_page_size() : os::vm_page_size();

@@ -191,10 +202,13 @@
    _heap_region = MemRegion((HeapWord*)heap_rs.base(), heap_rs.size() / HeapWordSize);
    _heap_region_special = heap_rs.special();
  
    assert((((size_t) base()) & ShenandoahHeapRegion::region_size_bytes_mask()) == 0,
           "Misaligned heap: " PTR_FORMAT, p2i(base()));
+   os::trace_page_sizes_for_requested_size("Heap",
+                                           max_byte_size, heap_rs.page_size(), heap_alignment,
+                                           heap_rs.base(), heap_rs.size());
  
  #if SHENANDOAH_OPTIMIZED_MARKTASK
    // The optimized ShenandoahMarkTask takes some bits away from the full object bits.
    // Fail if we ever attempt to address more than we can.
    if ((uintptr_t)heap_rs.end() >= ShenandoahMarkTask::max_addressable()) {

@@ -210,16 +224,38 @@
    if (!_heap_region_special) {
      os::commit_memory_or_exit(sh_rs.base(), _initial_size, heap_alignment, false,
                                "Cannot commit heap memory");
    }
  
+   BarrierSet::set_barrier_set(new ShenandoahBarrierSet(this, _heap_region));
+ 
+   // Now we know the number of regions and heap sizes, initialize the heuristics.
+   initialize_heuristics();
+ 
+   assert(_heap_region.byte_size() == heap_rs.size(), "Need to know reserved size for card table");
+ 
+   //
+   // Worker threads must be initialized after the barrier is configured
+   //
+   _workers = new ShenandoahWorkerThreads("Shenandoah GC Threads", _max_workers);
+   if (_workers == nullptr) {
+     vm_exit_during_initialization("Failed necessary allocation.");
+   } else {
+     _workers->initialize_workers();
+   }
+ 
+   if (ParallelGCThreads > 1) {
+     _safepoint_workers = new ShenandoahWorkerThreads("Safepoint Cleanup Thread", ParallelGCThreads);
+     _safepoint_workers->initialize_workers();
+   }
+ 
    //
    // Reserve and commit memory for bitmap(s)
    //
  
-   _bitmap_size = ShenandoahMarkBitMap::compute_size(heap_rs.size());
-   _bitmap_size = align_up(_bitmap_size, bitmap_page_size);
+   size_t bitmap_size_orig = ShenandoahMarkBitMap::compute_size(heap_rs.size());
+   _bitmap_size = align_up(bitmap_size_orig, bitmap_page_size);
  
    size_t bitmap_bytes_per_region = reg_size_bytes / ShenandoahMarkBitMap::heap_map_factor();
  
    guarantee(bitmap_bytes_per_region != 0,
              "Bitmap bytes per region should not be zero");

@@ -241,26 +277,34 @@
    guarantee(((_bitmap_bytes_per_slice) % bitmap_page_size) == 0,
              "Bitmap slices should be page-granular: bps = " SIZE_FORMAT ", page size = " SIZE_FORMAT,
              _bitmap_bytes_per_slice, bitmap_page_size);
  
    ReservedSpace bitmap(_bitmap_size, bitmap_page_size);
+   os::trace_page_sizes_for_requested_size("Mark Bitmap",
+                                           bitmap_size_orig, bitmap.page_size(), bitmap_page_size,
+                                           bitmap.base(),
+                                           bitmap.size());
    MemTracker::record_virtual_memory_type(bitmap.base(), mtGC);
    _bitmap_region = MemRegion((HeapWord*) bitmap.base(), bitmap.size() / HeapWordSize);
    _bitmap_region_special = bitmap.special();
  
    size_t bitmap_init_commit = _bitmap_bytes_per_slice *
-                               align_up(num_committed_regions, _bitmap_regions_per_slice) / _bitmap_regions_per_slice;
+     align_up(num_committed_regions, _bitmap_regions_per_slice) / _bitmap_regions_per_slice;
    bitmap_init_commit = MIN2(_bitmap_size, bitmap_init_commit);
    if (!_bitmap_region_special) {
      os::commit_memory_or_exit((char *) _bitmap_region.start(), bitmap_init_commit, bitmap_page_size, false,
                                "Cannot commit bitmap memory");
    }
  
-   _marking_context = new ShenandoahMarkingContext(_heap_region, _bitmap_region, _num_regions, _max_workers);
+   _marking_context = new ShenandoahMarkingContext(_heap_region, _bitmap_region, _num_regions);
  
    if (ShenandoahVerify) {
      ReservedSpace verify_bitmap(_bitmap_size, bitmap_page_size);
+     os::trace_page_sizes_for_requested_size("Verify Bitmap",
+                                             bitmap_size_orig, verify_bitmap.page_size(), bitmap_page_size,
+                                             verify_bitmap.base(),
+                                             verify_bitmap.size());
      if (!verify_bitmap.special()) {
        os::commit_memory_or_exit(verify_bitmap.base(), verify_bitmap.size(), bitmap_page_size, false,
                                  "Cannot commit verification bitmap memory");
      }
      MemTracker::record_virtual_memory_type(verify_bitmap.base(), mtGC);

@@ -268,58 +312,74 @@
      _verification_bit_map.initialize(_heap_region, verify_bitmap_region);
      _verifier = new ShenandoahVerifier(this, &_verification_bit_map);
    }
  
    // Reserve aux bitmap for use in object_iterate(). We don't commit it here.
-   ReservedSpace aux_bitmap(_bitmap_size, bitmap_page_size);
+   size_t aux_bitmap_page_size = bitmap_page_size;
+ 
+   ReservedSpace aux_bitmap(_bitmap_size, aux_bitmap_page_size);
+   os::trace_page_sizes_for_requested_size("Aux Bitmap",
+                                           bitmap_size_orig, aux_bitmap.page_size(), aux_bitmap_page_size,
+                                           aux_bitmap.base(), aux_bitmap.size());
    MemTracker::record_virtual_memory_type(aux_bitmap.base(), mtGC);
    _aux_bitmap_region = MemRegion((HeapWord*) aux_bitmap.base(), aux_bitmap.size() / HeapWordSize);
    _aux_bitmap_region_special = aux_bitmap.special();
    _aux_bit_map.initialize(_heap_region, _aux_bitmap_region);
  
    //
    // Create regions and region sets
    //
    size_t region_align = align_up(sizeof(ShenandoahHeapRegion), SHENANDOAH_CACHE_LINE_SIZE);
-   size_t region_storage_size = align_up(region_align * _num_regions, region_page_size);
-   region_storage_size = align_up(region_storage_size, os::vm_allocation_granularity());
+   size_t region_storage_size_orig = region_align * _num_regions;
+   size_t region_storage_size = align_up(region_storage_size_orig,
+                                         MAX2(region_page_size, os::vm_allocation_granularity()));
  
    ReservedSpace region_storage(region_storage_size, region_page_size);
+   os::trace_page_sizes_for_requested_size("Region Storage",
+                                           region_storage_size_orig, region_storage.page_size(), region_page_size,
+                                           region_storage.base(), region_storage.size());
    MemTracker::record_virtual_memory_type(region_storage.base(), mtGC);
    if (!region_storage.special()) {
      os::commit_memory_or_exit(region_storage.base(), region_storage_size, region_page_size, false,
                                "Cannot commit region memory");
    }
  
    // Try to fit the collection set bitmap at lower addresses. This optimizes code generation for cset checks.
    // Go up until a sensible limit (subject to encoding constraints) and try to reserve the space there.
    // If not successful, bite a bullet and allocate at whatever address.
    {
-     size_t cset_align = MAX2<size_t>(os::vm_page_size(), os::vm_allocation_granularity());
-     size_t cset_size = align_up(((size_t) sh_rs.base() + sh_rs.size()) >> ShenandoahHeapRegion::region_size_bytes_shift(), cset_align);
+     const size_t cset_align = MAX2<size_t>(os::vm_page_size(), os::vm_allocation_granularity());
+     const size_t cset_size = align_up(((size_t) sh_rs.base() + sh_rs.size()) >> ShenandoahHeapRegion::region_size_bytes_shift(), cset_align);
+     const size_t cset_page_size = os::vm_page_size();
  
      uintptr_t min = round_up_power_of_2(cset_align);
      uintptr_t max = (1u << 30u);
+     ReservedSpace cset_rs;
  
      for (uintptr_t addr = min; addr <= max; addr <<= 1u) {
        char* req_addr = (char*)addr;
        assert(is_aligned(req_addr, cset_align), "Should be aligned");
-       ReservedSpace cset_rs(cset_size, cset_align, os::vm_page_size(), req_addr);
+       cset_rs = ReservedSpace(cset_size, cset_align, cset_page_size, req_addr);
        if (cset_rs.is_reserved()) {
          assert(cset_rs.base() == req_addr, "Allocated where requested: " PTR_FORMAT ", " PTR_FORMAT, p2i(cset_rs.base()), addr);
          _collection_set = new ShenandoahCollectionSet(this, cset_rs, sh_rs.base());
          break;
        }
      }
  
      if (_collection_set == nullptr) {
-       ReservedSpace cset_rs(cset_size, cset_align, os::vm_page_size());
+       cset_rs = ReservedSpace(cset_size, cset_align, os::vm_page_size());
        _collection_set = new ShenandoahCollectionSet(this, cset_rs, sh_rs.base());
      }
+     os::trace_page_sizes_for_requested_size("Collection Set",
+                                             cset_size, cset_rs.page_size(), cset_page_size,
+                                             cset_rs.base(),
+                                             cset_rs.size());
    }
  
    _regions = NEW_C_HEAP_ARRAY(ShenandoahHeapRegion*, _num_regions, mtGC);
+   _affiliations = NEW_C_HEAP_ARRAY(uint8_t, _num_regions, mtGC);
    _free_set = new ShenandoahFreeSet(this, _num_regions);
  
    {
      ShenandoahHeapLocker locker(lock());

@@ -332,16 +392,20 @@
        assert(is_aligned(r, SHENANDOAH_CACHE_LINE_SIZE), "Sanity");
  
        _marking_context->initialize_top_at_mark_start(r);
        _regions[i] = r;
        assert(!collection_set()->is_in(i), "New region should not be in collection set");
+ 
+       _affiliations[i] = ShenandoahAffiliation::FREE;
      }
  
-     // Initialize to complete
-     _marking_context->mark_complete();
+     size_t young_cset_regions, old_cset_regions;
  
-     _free_set->rebuild();
+     // We are initializing free set.  We ignore cset region tallies.
+     size_t first_old, last_old, num_old;
+     _free_set->prepare_to_rebuild(young_cset_regions, old_cset_regions, first_old, last_old, num_old);
+     _free_set->finish_rebuild(young_cset_regions, old_cset_regions, num_old);
    }
  
    if (AlwaysPreTouch) {
      // For NUMA, it is important to pre-touch the storage under bitmaps with worker threads,
      // before initialize() below zeroes it with initializing thread. For any given region,

@@ -349,20 +413,10 @@
      ShenandoahPushWorkerScope scope(workers(), _max_workers, false);
  
      _pretouch_heap_page_size = heap_page_size;
      _pretouch_bitmap_page_size = bitmap_page_size;
  
- #ifdef LINUX
-     // UseTransparentHugePages would madvise that backing memory can be coalesced into huge
-     // pages. But, the kernel needs to know that every small page is used, in order to coalesce
-     // them into huge one. Therefore, we need to pretouch with smaller pages.
-     if (UseTransparentHugePages) {
-       _pretouch_heap_page_size = (size_t)os::vm_page_size();
-       _pretouch_bitmap_page_size = (size_t)os::vm_page_size();
-     }
- #endif
- 
      // OS memory managers may want to coalesce back-to-back pages. Make their jobs
      // simpler by pre-touching continuous spaces (heap and bitmap) separately.
  
      ShenandoahPretouchBitmapTask bcl(bitmap.base(), _bitmap_size, _pretouch_bitmap_page_size);
      _workers->run_task(&bcl);

@@ -394,29 +448,39 @@
    ShenandoahCodeRoots::initialize();
  
    if (ShenandoahPacing) {
      _pacer = new ShenandoahPacer(this);
      _pacer->setup_for_idle();
-   } else {
-     _pacer = nullptr;
    }
  
-   _control_thread = new ShenandoahControlThread();
+   initialize_controller();
  
-   ShenandoahInitLogger::print();
+   if (ShenandoahUncommit) {
+     _uncommit_thread = new ShenandoahUncommitThread(this);
+   }
+ 
+   print_init_logger();
  
    return JNI_OK;
  }
  
+ void ShenandoahHeap::initialize_controller() {
+   _control_thread = new ShenandoahControlThread();
+ }
+ 
+ void ShenandoahHeap::print_init_logger() const {
+   ShenandoahInitLogger::print();
+ }
+ 
  void ShenandoahHeap::initialize_mode() {
    if (ShenandoahGCMode != nullptr) {
      if (strcmp(ShenandoahGCMode, "satb") == 0) {
        _gc_mode = new ShenandoahSATBMode();
-     } else if (strcmp(ShenandoahGCMode, "iu") == 0) {
-       _gc_mode = new ShenandoahIUMode();
      } else if (strcmp(ShenandoahGCMode, "passive") == 0) {
        _gc_mode = new ShenandoahPassiveMode();
+     } else if (strcmp(ShenandoahGCMode, "generational") == 0) {
+       _gc_mode = new ShenandoahGenerationalMode();
      } else {
        vm_exit_during_initialization("Unknown -XX:ShenandoahGCMode option");
      }
    } else {
      vm_exit_during_initialization("Unknown -XX:ShenandoahGCMode option (null)");

@@ -433,48 +497,43 @@
                      _gc_mode->name()));
    }
  }
  
  void ShenandoahHeap::initialize_heuristics() {
-   assert(_gc_mode != nullptr, "Must be initialized");
-   _heuristics = _gc_mode->initialize_heuristics();
- 
-   if (_heuristics->is_diagnostic() && !UnlockDiagnosticVMOptions) {
-     vm_exit_during_initialization(
-             err_msg("Heuristics \"%s\" is diagnostic, and must be enabled via -XX:+UnlockDiagnosticVMOptions.",
-                     _heuristics->name()));
-   }
-   if (_heuristics->is_experimental() && !UnlockExperimentalVMOptions) {
-     vm_exit_during_initialization(
-             err_msg("Heuristics \"%s\" is experimental, and must be enabled via -XX:+UnlockExperimentalVMOptions.",
-                     _heuristics->name()));
-   }
+   _global_generation = new ShenandoahGlobalGeneration(mode()->is_generational(), max_workers(), max_capacity());
+   _global_generation->initialize_heuristics(mode());
  }
  
  #ifdef _MSC_VER
  #pragma warning( push )
  #pragma warning( disable:4355 ) // 'this' : used in base member initializer list
  #endif
  
  ShenandoahHeap::ShenandoahHeap(ShenandoahCollectorPolicy* policy) :
    CollectedHeap(),
+   _gc_generation(nullptr),
+   _active_generation(nullptr),
    _initial_size(0),
-   _used(0),
    _committed(0),
-   _bytes_allocated_since_gc_start(0),
-   _max_workers(MAX2(ConcGCThreads, ParallelGCThreads)),
+   _max_workers(MAX3(ConcGCThreads, ParallelGCThreads, 1U)),
    _workers(nullptr),
    _safepoint_workers(nullptr),
    _heap_region_special(false),
    _num_regions(0),
    _regions(nullptr),
-   _update_refs_iterator(this),
+   _affiliations(nullptr),
    _gc_state_changed(false),
+   _gc_no_progress_count(0),
+   _cancel_requested_time(0),
+   _update_refs_iterator(this),
+   _global_generation(nullptr),
    _control_thread(nullptr),
+   _uncommit_thread(nullptr),
+   _young_generation(nullptr),
+   _old_generation(nullptr),
    _shenandoah_policy(policy),
    _gc_mode(nullptr),
-   _heuristics(nullptr),
    _free_set(nullptr),
    _pacer(nullptr),
    _verifier(nullptr),
    _phase_timings(nullptr),
    _monitoring_support(nullptr),

@@ -482,72 +541,28 @@
    _stw_memory_manager("Shenandoah Pauses"),
    _cycle_memory_manager("Shenandoah Cycles"),
    _gc_timer(new ConcurrentGCTimer()),
    _soft_ref_policy(),
    _log_min_obj_alignment_in_bytes(LogMinObjAlignmentInBytes),
-   _ref_processor(new ShenandoahReferenceProcessor(MAX2(_max_workers, 1U))),
    _marking_context(nullptr),
    _bitmap_size(0),
    _bitmap_regions_per_slice(0),
    _bitmap_bytes_per_slice(0),
    _bitmap_region_special(false),
    _aux_bitmap_region_special(false),
    _liveness_cache(nullptr),
    _collection_set(nullptr)
  {
-   // Initialize GC mode early, so we can adjust barrier support
+   // Initialize GC mode early, many subsequent initialization procedures depend on it
    initialize_mode();
-   BarrierSet::set_barrier_set(new ShenandoahBarrierSet(this));
- 
-   _max_workers = MAX2(_max_workers, 1U);
-   _workers = new ShenandoahWorkerThreads("Shenandoah GC Threads", _max_workers);
-   if (_workers == nullptr) {
-     vm_exit_during_initialization("Failed necessary allocation.");
-   } else {
-     _workers->initialize_workers();
-   }
- 
-   if (ParallelGCThreads > 1) {
-     _safepoint_workers = new ShenandoahWorkerThreads("Safepoint Cleanup Thread",
-                                                 ParallelGCThreads);
-     _safepoint_workers->initialize_workers();
-   }
+   _cancelled_gc.set(GCCause::_no_gc);
  }
  
  #ifdef _MSC_VER
  #pragma warning( pop )
  #endif
  
- class ShenandoahResetBitmapTask : public WorkerTask {
- private:
-   ShenandoahRegionIterator _regions;
- 
- public:
-   ShenandoahResetBitmapTask() :
-     WorkerTask("Shenandoah Reset Bitmap") {}
- 
-   void work(uint worker_id) {
-     ShenandoahHeapRegion* region = _regions.next();
-     ShenandoahHeap* heap = ShenandoahHeap::heap();
-     ShenandoahMarkingContext* const ctx = heap->marking_context();
-     while (region != nullptr) {
-       if (heap->is_bitmap_slice_committed(region)) {
-         ctx->clear_bitmap(region);
-       }
-       region = _regions.next();
-     }
-   }
- };
- 
- void ShenandoahHeap::reset_mark_bitmap() {
-   assert_gc_workers(_workers->active_workers());
-   mark_incomplete_marking_context();
- 
-   ShenandoahResetBitmapTask task;
-   _workers->run_task(&task);
- }
- 
  void ShenandoahHeap::print_on(outputStream* st) const {
    st->print_cr("Shenandoah Heap");
    st->print_cr(" " SIZE_FORMAT "%s max, " SIZE_FORMAT "%s soft max, " SIZE_FORMAT "%s committed, " SIZE_FORMAT "%s used",
                 byte_size_in_proper_unit(max_capacity()), proper_unit_for_byte_size(max_capacity()),
                 byte_size_in_proper_unit(soft_max_capacity()), proper_unit_for_byte_size(soft_max_capacity()),

@@ -558,11 +573,16 @@
                 byte_size_in_proper_unit(ShenandoahHeapRegion::region_size_bytes()),
                 proper_unit_for_byte_size(ShenandoahHeapRegion::region_size_bytes()));
  
    st->print("Status: ");
    if (has_forwarded_objects())                 st->print("has forwarded objects, ");
-   if (is_concurrent_mark_in_progress())        st->print("marking, ");
+   if (!mode()->is_generational()) {
+     if (is_concurrent_mark_in_progress())      st->print("marking,");
+   } else {
+     if (is_concurrent_old_mark_in_progress())    st->print("old marking, ");
+     if (is_concurrent_young_mark_in_progress())  st->print("young marking, ");
+   }
    if (is_evacuation_in_progress())             st->print("evacuating, ");
    if (is_update_refs_in_progress())            st->print("updating refs, ");
    if (is_degenerated_gc_in_progress())         st->print("degenerated gc, ");
    if (is_full_gc_in_progress())                st->print("full gc, ");
    if (is_full_gc_move_in_progress())           st->print("full gc move, ");

@@ -602,37 +622,45 @@
  
  class ShenandoahInitWorkerGCLABClosure : public ThreadClosure {
  public:
    void do_thread(Thread* thread) {
      assert(thread != nullptr, "Sanity");
-     assert(thread->is_Worker_thread(), "Only worker thread expected");
      ShenandoahThreadLocalData::initialize_gclab(thread);
    }
  };
  
  void ShenandoahHeap::post_initialize() {
    CollectedHeap::post_initialize();
+ 
+   // Schedule periodic task to report on gc thread CPU utilization
+   _mmu_tracker.initialize();
+ 
    MutexLocker ml(Threads_lock);
  
    ShenandoahInitWorkerGCLABClosure init_gclabs;
    _workers->threads_do(&init_gclabs);
  
    // gclab can not be initialized early during VM startup, as it can not determinate its max_size.
    // Now, we will let WorkerThreads to initialize gclab when new worker is created.
    _workers->set_initialize_gclab();
+ 
+   // Note that the safepoint workers may require gclabs if the threads are used to create a heap dump
+   // during a concurrent evacuation phase.
    if (_safepoint_workers != nullptr) {
      _safepoint_workers->threads_do(&init_gclabs);
      _safepoint_workers->set_initialize_gclab();
    }
  
-   _heuristics->initialize();
- 
    JFR_ONLY(ShenandoahJFRSupport::register_jfr_type_serializers());
  }
  
+ ShenandoahHeuristics* ShenandoahHeap::heuristics() {
+   return _global_generation->heuristics();
+ }
+ 
  size_t ShenandoahHeap::used() const {
-   return Atomic::load(&_used);
+   return global_generation()->used();
  }
  
  size_t ShenandoahHeap::committed() const {
    return Atomic::load(&_committed);
  }

@@ -645,37 +673,88 @@
  void ShenandoahHeap::decrease_committed(size_t bytes) {
    shenandoah_assert_heaplocked_or_safepoint();
    _committed -= bytes;
  }
  
- void ShenandoahHeap::increase_used(size_t bytes) {
-   Atomic::add(&_used, bytes, memory_order_relaxed);
+ // For tracking usage based on allocations, it should be the case that:
+ // * The sum of regions::used == heap::used
+ // * The sum of a generation's regions::used == generation::used
+ // * The sum of a generation's humongous regions::free == generation::humongous_waste
+ // These invariants are checked by the verifier on GC safepoints.
+ //
+ // Additional notes:
+ // * When a mutator's allocation request causes a region to be retired, the
+ //   free memory left in that region is considered waste. It does not contribute
+ //   to the usage, but it _does_ contribute to allocation rate.
+ // * The bottom of a PLAB must be aligned on card size. In some cases this will
+ //   require padding in front of the PLAB (a filler object). Because this padding
+ //   is included in the region's used memory we include the padding in the usage
+ //   accounting as waste.
+ // * Mutator allocations are used to compute an allocation rate. They are also
+ //   sent to the Pacer for those purposes.
+ // * There are three sources of waste:
+ //  1. The padding used to align a PLAB on card size
+ //  2. Region's free is less than minimum TLAB size and is retired
+ //  3. The unused portion of memory in the last region of a humongous object
+ void ShenandoahHeap::increase_used(const ShenandoahAllocRequest& req) {
+   size_t actual_bytes = req.actual_size() * HeapWordSize;
+   size_t wasted_bytes = req.waste() * HeapWordSize;
+   ShenandoahGeneration* generation = generation_for(req.affiliation());
+ 
+   if (req.is_gc_alloc()) {
+     assert(wasted_bytes == 0 || req.type() == ShenandoahAllocRequest::_alloc_plab, "Only PLABs have waste");
+     increase_used(generation, actual_bytes + wasted_bytes);
+   } else {
+     assert(req.is_mutator_alloc(), "Expected mutator alloc here");
+     // padding and actual size both count towards allocation counter
+     generation->increase_allocated(actual_bytes + wasted_bytes);
+ 
+     // only actual size counts toward usage for mutator allocations
+     increase_used(generation, actual_bytes);
+ 
+     // notify pacer of both actual size and waste
+     notify_mutator_alloc_words(req.actual_size(), req.waste());
+ 
+     if (wasted_bytes > 0 && ShenandoahHeapRegion::requires_humongous(req.actual_size())) {
+       increase_humongous_waste(generation,wasted_bytes);
+     }
+   }
  }
  
- void ShenandoahHeap::set_used(size_t bytes) {
-   Atomic::store(&_used, bytes);
+ void ShenandoahHeap::increase_humongous_waste(ShenandoahGeneration* generation, size_t bytes) {
+   generation->increase_humongous_waste(bytes);
+   if (!generation->is_global()) {
+     global_generation()->increase_humongous_waste(bytes);
+   }
  }
  
- void ShenandoahHeap::decrease_used(size_t bytes) {
-   assert(used() >= bytes, "never decrease heap size by more than we've left");
-   Atomic::sub(&_used, bytes, memory_order_relaxed);
+ void ShenandoahHeap::decrease_humongous_waste(ShenandoahGeneration* generation, size_t bytes) {
+   generation->decrease_humongous_waste(bytes);
+   if (!generation->is_global()) {
+     global_generation()->decrease_humongous_waste(bytes);
+   }
  }
  
- void ShenandoahHeap::increase_allocated(size_t bytes) {
-   Atomic::add(&_bytes_allocated_since_gc_start, bytes, memory_order_relaxed);
+ void ShenandoahHeap::increase_used(ShenandoahGeneration* generation, size_t bytes) {
+   generation->increase_used(bytes);
+   if (!generation->is_global()) {
+     global_generation()->increase_used(bytes);
+   }
  }
  
- void ShenandoahHeap::notify_mutator_alloc_words(size_t words, bool waste) {
-   size_t bytes = words * HeapWordSize;
-   if (!waste) {
-     increase_used(bytes);
+ void ShenandoahHeap::decrease_used(ShenandoahGeneration* generation, size_t bytes) {
+   generation->decrease_used(bytes);
+   if (!generation->is_global()) {
+     global_generation()->decrease_used(bytes);
    }
-   increase_allocated(bytes);
+ }
+ 
+ void ShenandoahHeap::notify_mutator_alloc_words(size_t words, size_t waste) {
    if (ShenandoahPacing) {
      control_thread()->pacing_notify_alloc(words);
-     if (waste) {
-       pacer()->claim_for_alloc(words, true);
+     if (waste > 0) {
+       pacer()->claim_for_alloc<true>(waste);
      }
    }
  }
  
  size_t ShenandoahHeap::capacity() const {

@@ -708,71 +787,102 @@
  size_t ShenandoahHeap::initial_capacity() const {
    return _initial_size;
  }
  
  bool ShenandoahHeap::is_in(const void* p) const {
-   if (is_in_reserved(p)) {
-     if (is_full_gc_move_in_progress()) {
-       // Full GC move is running, we do not have a consistent region
-       // information yet. But we know the pointer is in heap.
-       return true;
-     }
-     // Now check if we point to a live section in active region.
-     ShenandoahHeapRegion* r = heap_region_containing(p);
-     return (r->is_active() && p < r->top());
-   } else {
+   if (!is_in_reserved(p)) {
      return false;
    }
- }
  
- void ShenandoahHeap::op_uncommit(double shrink_before, size_t shrink_until) {
-   assert (ShenandoahUncommit, "should be enabled");
+   if (is_full_gc_move_in_progress()) {
+     // Full GC move is running, we do not have a consistent region
+     // information yet. But we know the pointer is in heap.
+     return true;
+   }
+ 
+   // Now check if we point to a live section in active region.
+   const ShenandoahHeapRegion* r = heap_region_containing(p);
+   if (p >= r->top()) {
+     return false;
+   }
  
-   // Application allocates from the beginning of the heap, and GC allocates at
-   // the end of it. It is more efficient to uncommit from the end, so that applications
-   // could enjoy the near committed regions. GC allocations are much less frequent,
-   // and therefore can accept the committing costs.
+   if (r->is_active()) {
+     return true;
+   }
  
-   size_t count = 0;
-   for (size_t i = num_regions(); i > 0; i--) { // care about size_t underflow
-     ShenandoahHeapRegion* r = get_region(i - 1);
-     if (r->is_empty_committed() && (r->empty_time() < shrink_before)) {
-       ShenandoahHeapLocker locker(lock());
-       if (r->is_empty_committed()) {
-         if (committed() < shrink_until + ShenandoahHeapRegion::region_size_bytes()) {
-           break;
-         }
+   // The region is trash, but won't be recycled until after concurrent weak
+   // roots. We also don't allow mutators to allocate from trash regions
+   // during weak roots. Concurrent class unloading may access unmarked oops
+   // in trash regions.
+   return r->is_trash() && is_concurrent_weak_root_in_progress();
+ }
  
-         r->make_uncommitted();
-         count++;
-       }
-     }
-     SpinPause(); // allow allocators to take the lock
+ void ShenandoahHeap::notify_soft_max_changed() {
+   if (_uncommit_thread != nullptr) {
+     _uncommit_thread->notify_soft_max_changed();
    }
+ }
  
-   if (count > 0) {
-     control_thread()->notify_heap_changed();
+ void ShenandoahHeap::notify_explicit_gc_requested() {
+   if (_uncommit_thread != nullptr) {
+     _uncommit_thread->notify_explicit_gc_requested();
    }
  }
  
+ bool ShenandoahHeap::check_soft_max_changed() {
+   size_t new_soft_max = Atomic::load(&SoftMaxHeapSize);
+   size_t old_soft_max = soft_max_capacity();
+   if (new_soft_max != old_soft_max) {
+     new_soft_max = MAX2(min_capacity(), new_soft_max);
+     new_soft_max = MIN2(max_capacity(), new_soft_max);
+     if (new_soft_max != old_soft_max) {
+       log_info(gc)("Soft Max Heap Size: " SIZE_FORMAT "%s -> " SIZE_FORMAT "%s",
+                    byte_size_in_proper_unit(old_soft_max), proper_unit_for_byte_size(old_soft_max),
+                    byte_size_in_proper_unit(new_soft_max), proper_unit_for_byte_size(new_soft_max)
+       );
+       set_soft_max_capacity(new_soft_max);
+       return true;
+     }
+   }
+   return false;
+ }
+ 
+ void ShenandoahHeap::notify_heap_changed() {
+   // Update monitoring counters when we took a new region. This amortizes the
+   // update costs on slow path.
+   monitoring_support()->notify_heap_changed();
+   _heap_changed.try_set();
+ }
+ 
+ void ShenandoahHeap::set_forced_counters_update(bool value) {
+   monitoring_support()->set_forced_counters_update(value);
+ }
+ 
+ void ShenandoahHeap::handle_force_counters_update() {
+   monitoring_support()->handle_force_counters_update();
+ }
+ 
  HeapWord* ShenandoahHeap::allocate_from_gclab_slow(Thread* thread, size_t size) {
    // New object should fit the GCLAB size
    size_t min_size = MAX2(size, PLAB::min_size());
  
    // Figure out size of new GCLAB, looking back at heuristics. Expand aggressively.
    size_t new_size = ShenandoahThreadLocalData::gclab_size(thread) * 2;
+ 
    new_size = MIN2(new_size, PLAB::max_size());
    new_size = MAX2(new_size, PLAB::min_size());
  
    // Record new heuristic value even if we take any shortcut. This captures
    // the case when moderately-sized objects always take a shortcut. At some point,
    // heuristics should catch up with them.
+   log_debug(gc, free)("Set new GCLAB size: " SIZE_FORMAT, new_size);
    ShenandoahThreadLocalData::set_gclab_size(thread, new_size);
  
    if (new_size < size) {
      // New size still does not fit the object. Fall back to shared allocation.
      // This avoids retiring perfectly good GCLABs, when we encounter a large object.
+     log_debug(gc, free)("New gclab size (" SIZE_FORMAT ") is too small for " SIZE_FORMAT, new_size, size);
      return nullptr;
    }
  
    // Retire current GCLAB, and allocate a new one.
    PLAB* gclab = ShenandoahThreadLocalData::gclab(thread);

@@ -798,10 +908,11 @@
    }
    gclab->set_buf(gclab_buf, actual_size);
    return gclab->allocate(size);
  }
  
+ // Called from stubs in JIT code or interpreter
  HeapWord* ShenandoahHeap::allocate_new_tlab(size_t min_size,
                                              size_t requested_size,
                                              size_t* actual_size) {
    ShenandoahAllocRequest req = ShenandoahAllocRequest::for_tlab(min_size, requested_size);
    HeapWord* res = allocate_memory(req);

@@ -839,66 +950,141 @@
  
      if (!ShenandoahAllocFailureALot || !should_inject_alloc_failure()) {
        result = allocate_memory_under_lock(req, in_new_region);
      }
  
-     // Allocation failed, block until control thread reacted, then retry allocation.
+     // Check that gc overhead is not exceeded.
      //
-     // It might happen that one of the threads requesting allocation would unblock
-     // way later after GC happened, only to fail the second allocation, because
-     // other threads have already depleted the free storage. In this case, a better
-     // strategy is to try again, as long as GC makes progress (or until at least
-     // one full GC has completed).
-     size_t original_count = shenandoah_policy()->full_gc_count();
-     while (result == nullptr
-         && (_progress_last_gc.is_set() || original_count == shenandoah_policy()->full_gc_count())) {
-       control_thread()->handle_alloc_failure(req);
-       result = allocate_memory_under_lock(req, in_new_region);
+     // Shenandoah will grind along for quite a while allocating one
+     // object at a time using shared (non-tlab) allocations. This check
+     // is testing that the GC overhead limit has not been exceeded.
+     // This will notify the collector to start a cycle, but will raise
+     // an OOME to the mutator if the last Full GCs have not made progress.
+     // gc_no_progress_count is incremented following each degen or full GC that fails to achieve is_good_progress().
+     if ((result == nullptr) && !req.is_lab_alloc() && (get_gc_no_progress_count() > ShenandoahNoProgressThreshold)) {
+       control_thread()->handle_alloc_failure(req, false);
+       req.set_actual_size(0);
+       return nullptr;
+     }
+ 
+     if (result == nullptr) {
+       // Block until control thread reacted, then retry allocation.
+       //
+       // It might happen that one of the threads requesting allocation would unblock
+       // way later after GC happened, only to fail the second allocation, because
+       // other threads have already depleted the free storage. In this case, a better
+       // strategy is to try again, until at least one full GC has completed.
+       //
+       // Stop retrying and return nullptr to cause OOMError exception if our allocation failed even after:
+       //   a) We experienced a GC that had good progress, or
+       //   b) We experienced at least one Full GC (whether or not it had good progress)
+ 
+       const size_t original_count = shenandoah_policy()->full_gc_count();
+       while (result == nullptr && should_retry_allocation(original_count)) {
+         control_thread()->handle_alloc_failure(req, true);
+         result = allocate_memory_under_lock(req, in_new_region);
+       }
+       if (result != nullptr) {
+         // If our allocation request has been satisfied after it initially failed, we count this as good gc progress
+         notify_gc_progress();
+       }
+       if (log_develop_is_enabled(Debug, gc, alloc)) {
+         ResourceMark rm;
+         log_debug(gc, alloc)("Thread: %s, Result: " PTR_FORMAT ", Request: %s, Size: " SIZE_FORMAT
+                              ", Original: " SIZE_FORMAT ", Latest: " SIZE_FORMAT,
+                              Thread::current()->name(), p2i(result), req.type_string(), req.size(),
+                              original_count, get_gc_no_progress_count());
+       }
      }
    } else {
      assert(req.is_gc_alloc(), "Can only accept GC allocs here");
      result = allocate_memory_under_lock(req, in_new_region);
      // Do not call handle_alloc_failure() here, because we cannot block.
      // The allocation failure would be handled by the LRB slowpath with handle_alloc_failure_evac().
    }
  
    if (in_new_region) {
-     control_thread()->notify_heap_changed();
+     notify_heap_changed();
+   }
+ 
+   if (result == nullptr) {
+     req.set_actual_size(0);
    }
  
+   // This is called regardless of the outcome of the allocation to account
+   // for any waste created by retiring regions with this request.
+   increase_used(req);
+ 
    if (result != nullptr) {
      size_t requested = req.size();
      size_t actual = req.actual_size();
  
      assert (req.is_lab_alloc() || (requested == actual),
              "Only LAB allocations are elastic: %s, requested = " SIZE_FORMAT ", actual = " SIZE_FORMAT,
              ShenandoahAllocRequest::alloc_type_to_string(req.type()), requested, actual);
  
      if (req.is_mutator_alloc()) {
-       notify_mutator_alloc_words(actual, false);
- 
        // If we requested more than we were granted, give the rest back to pacer.
        // This only matters if we are in the same pacing epoch: do not try to unpace
        // over the budget for the other phase.
        if (ShenandoahPacing && (pacer_epoch > 0) && (requested > actual)) {
          pacer()->unpace_for_alloc(pacer_epoch, requested - actual);
        }
-     } else {
-       increase_used(actual*HeapWordSize);
      }
    }
  
    return result;
  }
  
+ inline bool ShenandoahHeap::should_retry_allocation(size_t original_full_gc_count) const {
+   return shenandoah_policy()->full_gc_count() == original_full_gc_count
+       && !shenandoah_policy()->is_at_shutdown();
+ }
+ 
  HeapWord* ShenandoahHeap::allocate_memory_under_lock(ShenandoahAllocRequest& req, bool& in_new_region) {
    // If we are dealing with mutator allocation, then we may need to block for safepoint.
    // We cannot block for safepoint for GC allocations, because there is a high chance
    // we are already running at safepoint or from stack watermark machinery, and we cannot
    // block again.
    ShenandoahHeapLocker locker(lock(), req.is_mutator_alloc());
-   return _free_set->allocate(req, in_new_region);
+ 
+   // Make sure the old generation has room for either evacuations or promotions before trying to allocate.
+   if (req.is_old() && !old_generation()->can_allocate(req)) {
+     return nullptr;
+   }
+ 
+   // If TLAB request size is greater than available, allocate() will attempt to downsize request to fit within available
+   // memory.
+   HeapWord* result = _free_set->allocate(req, in_new_region);
+ 
+   // Record the plab configuration for this result and register the object.
+   if (result != nullptr && req.is_old()) {
+     old_generation()->configure_plab_for_current_thread(req);
+     if (req.type() == ShenandoahAllocRequest::_alloc_shared_gc) {
+       // Register the newly allocated object while we're holding the global lock since there's no synchronization
+       // built in to the implementation of register_object().  There are potential races when multiple independent
+       // threads are allocating objects, some of which might span the same card region.  For example, consider
+       // a card table's memory region within which three objects are being allocated by three different threads:
+       //
+       // objects being "concurrently" allocated:
+       //    [-----a------][-----b-----][--------------c------------------]
+       //            [---- card table memory range --------------]
+       //
+       // Before any objects are allocated, this card's memory range holds no objects.  Note that allocation of object a
+       // wants to set the starts-object, first-start, and last-start attributes of the preceding card region.
+       // Allocation of object b wants to set the starts-object, first-start, and last-start attributes of this card region.
+       // Allocation of object c also wants to set the starts-object, first-start, and last-start attributes of this
+       // card region.
+       //
+       // The thread allocating b and the thread allocating c can "race" in various ways, resulting in confusion, such as
+       // last-start representing object b while first-start represents object c.  This is why we need to require all
+       // register_object() invocations to be "mutually exclusive" with respect to each card's memory range.
+       old_generation()->card_scan()->register_object(result);
+     }
+   }
+ 
+   return result;
  }
  
  HeapWord* ShenandoahHeap::mem_allocate(size_t size,
                                          bool*  gc_overhead_limit_was_exceeded) {
    ShenandoahAllocRequest req = ShenandoahAllocRequest::for_shared(size);

@@ -909,12 +1095,12 @@
                                                               size_t size,
                                                               Metaspace::MetadataType mdtype) {
    MetaWord* result;
  
    // Inform metaspace OOM to GC heuristics if class unloading is possible.
-   if (heuristics()->can_unload_classes()) {
-     ShenandoahHeuristics* h = heuristics();
+   ShenandoahHeuristics* h = global_generation()->heuristics();
+   if (h->can_unload_classes()) {
      h->record_metaspace_oom();
    }
  
    // Expand and retry allocation
    result = loader_data->metaspace_non_null()->expand_and_allocate(size, mdtype);

@@ -1002,15 +1188,223 @@
        }
      }
    }
  };
  
+ class ShenandoahRetireGCLABClosure : public ThreadClosure {
+ private:
+   bool const _resize;
+ public:
+   explicit ShenandoahRetireGCLABClosure(bool resize) : _resize(resize) {}
+   void do_thread(Thread* thread) override {
+     PLAB* gclab = ShenandoahThreadLocalData::gclab(thread);
+     assert(gclab != nullptr, "GCLAB should be initialized for %s", thread->name());
+     gclab->retire();
+     if (_resize && ShenandoahThreadLocalData::gclab_size(thread) > 0) {
+       ShenandoahThreadLocalData::set_gclab_size(thread, 0);
+     }
+ 
+     if (ShenandoahHeap::heap()->mode()->is_generational()) {
+       PLAB* plab = ShenandoahThreadLocalData::plab(thread);
+       assert(plab != nullptr, "PLAB should be initialized for %s", thread->name());
+ 
+       // There are two reasons to retire all plabs between old-gen evacuation passes.
+       //  1. We need to make the plab memory parsable by remembered-set scanning.
+       //  2. We need to establish a trustworthy UpdateWaterMark value within each old-gen heap region
+       ShenandoahGenerationalHeap::heap()->retire_plab(plab, thread);
+ 
+       // Re-enable promotions for the next evacuation phase.
+       ShenandoahThreadLocalData::enable_plab_promotions(thread);
+ 
+       // Reset the fill size for next evacuation phase.
+       if (_resize && ShenandoahThreadLocalData::plab_size(thread) > 0) {
+         ShenandoahThreadLocalData::set_plab_size(thread, 0);
+       }
+     }
+   }
+ };
+ 
+ class ShenandoahGCStatePropagator : public HandshakeClosure {
+ public:
+   explicit ShenandoahGCStatePropagator(char gc_state) :
+     HandshakeClosure("Shenandoah GC State Change"),
+     _gc_state(gc_state) {}
+ 
+   void do_thread(Thread* thread) override {
+     ShenandoahThreadLocalData::set_gc_state(thread, _gc_state);
+   }
+ private:
+   char _gc_state;
+ };
+ 
+ class ShenandoahPrepareForUpdateRefs : public HandshakeClosure {
+ public:
+   explicit ShenandoahPrepareForUpdateRefs(char gc_state) :
+     HandshakeClosure("Shenandoah Prepare for Update Refs"),
+     _retire(ResizeTLAB), _propagator(gc_state) {}
+ 
+   void do_thread(Thread* thread) override {
+     _propagator.do_thread(thread);
+     if (ShenandoahThreadLocalData::gclab(thread) != nullptr) {
+       _retire.do_thread(thread);
+     }
+   }
+ private:
+   ShenandoahRetireGCLABClosure _retire;
+   ShenandoahGCStatePropagator _propagator;
+ };
+ 
  void ShenandoahHeap::evacuate_collection_set(bool concurrent) {
    ShenandoahEvacuationTask task(this, _collection_set, concurrent);
    workers()->run_task(&task);
  }
  
+ void ShenandoahHeap::concurrent_prepare_for_update_refs() {
+   {
+     // Java threads take this lock while they are being attached and added to the list of thread.
+     // If another thread holds this lock before we update the gc state, it will receive a stale
+     // gc state, but they will have been added to the list of java threads and so will be corrected
+     // by the following handshake.
+     MutexLocker lock(Threads_lock);
+ 
+     // A cancellation at this point means the degenerated cycle must resume from update-refs.
+     set_gc_state_concurrent(EVACUATION, false);
+     set_gc_state_concurrent(WEAK_ROOTS, false);
+     set_gc_state_concurrent(UPDATE_REFS, true);
+   }
+ 
+   // This will propagate the gc state and retire gclabs and plabs for threads that require it.
+   ShenandoahPrepareForUpdateRefs prepare_for_update_refs(_gc_state.raw_value());
+ 
+   // The handshake won't touch worker threads (or control thread, or VM thread), so do those separately.
+   Threads::non_java_threads_do(&prepare_for_update_refs);
+ 
+   // Now retire gclabs and plabs and propagate gc_state for mutator threads
+   Handshake::execute(&prepare_for_update_refs);
+ 
+   _update_refs_iterator.reset();
+ }
+ 
+ class ShenandoahCompositeHandshakeClosure : public HandshakeClosure {
+   HandshakeClosure* _handshake_1;
+   HandshakeClosure* _handshake_2;
+   public:
+     ShenandoahCompositeHandshakeClosure(HandshakeClosure* handshake_1, HandshakeClosure* handshake_2) :
+       HandshakeClosure(handshake_2->name()),
+       _handshake_1(handshake_1), _handshake_2(handshake_2) {}
+ 
+   void do_thread(Thread* thread) override {
+       _handshake_1->do_thread(thread);
+       _handshake_2->do_thread(thread);
+     }
+ };
+ 
+ void ShenandoahHeap::concurrent_final_roots(HandshakeClosure* handshake_closure) {
+   {
+     assert(!is_evacuation_in_progress(), "Should not evacuate for abbreviated or old cycles");
+     MutexLocker lock(Threads_lock);
+     set_gc_state_concurrent(WEAK_ROOTS, false);
+   }
+ 
+   ShenandoahGCStatePropagator propagator(_gc_state.raw_value());
+   Threads::non_java_threads_do(&propagator);
+   if (handshake_closure == nullptr) {
+     Handshake::execute(&propagator);
+   } else {
+     ShenandoahCompositeHandshakeClosure composite(&propagator, handshake_closure);
+     Handshake::execute(&composite);
+   }
+ }
+ 
+ oop ShenandoahHeap::evacuate_object(oop p, Thread* thread) {
+   assert(thread == Thread::current(), "Expected thread parameter to be current thread.");
+   if (ShenandoahThreadLocalData::is_oom_during_evac(thread)) {
+     // This thread went through the OOM during evac protocol. It is safe to return
+     // the forward pointer. It must not attempt to evacuate any other objects.
+     return ShenandoahBarrierSet::resolve_forwarded(p);
+   }
+ 
+   assert(ShenandoahThreadLocalData::is_evac_allowed(thread), "must be enclosed in oom-evac scope");
+ 
+   ShenandoahHeapRegion* r = heap_region_containing(p);
+   assert(!r->is_humongous(), "never evacuate humongous objects");
+ 
+   ShenandoahAffiliation target_gen = r->affiliation();
+   return try_evacuate_object(p, thread, r, target_gen);
+ }
+ 
+ oop ShenandoahHeap::try_evacuate_object(oop p, Thread* thread, ShenandoahHeapRegion* from_region,
+                                                ShenandoahAffiliation target_gen) {
+   assert(target_gen == YOUNG_GENERATION, "Only expect evacuations to young in this mode");
+   assert(from_region->is_young(), "Only expect evacuations from young in this mode");
+   bool alloc_from_lab = true;
+   HeapWord* copy = nullptr;
+   size_t size = p->size();
+ 
+ #ifdef ASSERT
+   if (ShenandoahOOMDuringEvacALot &&
+       (os::random() & 1) == 0) { // Simulate OOM every ~2nd slow-path call
+     copy = nullptr;
+   } else {
+ #endif
+     if (UseTLAB) {
+       copy = allocate_from_gclab(thread, size);
+     }
+     if (copy == nullptr) {
+       // If we failed to allocate in LAB, we'll try a shared allocation.
+       ShenandoahAllocRequest req = ShenandoahAllocRequest::for_shared_gc(size, target_gen);
+       copy = allocate_memory(req);
+       alloc_from_lab = false;
+     }
+ #ifdef ASSERT
+   }
+ #endif
+ 
+   if (copy == nullptr) {
+     control_thread()->handle_alloc_failure_evac(size);
+ 
+     _oom_evac_handler.handle_out_of_memory_during_evacuation();
+ 
+     return ShenandoahBarrierSet::resolve_forwarded(p);
+   }
+ 
+   // Copy the object:
+   Copy::aligned_disjoint_words(cast_from_oop<HeapWord*>(p), copy, size);
+ 
+   // Try to install the new forwarding pointer.
+   oop copy_val = cast_to_oop(copy);
+   oop result = ShenandoahForwarding::try_update_forwardee(p, copy_val);
+   if (result == copy_val) {
+     // Successfully evacuated. Our copy is now the public one!
+     ContinuationGCSupport::relativize_stack_chunk(copy_val);
+     shenandoah_assert_correct(nullptr, copy_val);
+     return copy_val;
+   }  else {
+     // Failed to evacuate. We need to deal with the object that is left behind. Since this
+     // new allocation is certainly after TAMS, it will be considered live in the next cycle.
+     // But if it happens to contain references to evacuated regions, those references would
+     // not get updated for this stale copy during this cycle, and we will crash while scanning
+     // it the next cycle.
+     if (alloc_from_lab) {
+       // For LAB allocations, it is enough to rollback the allocation ptr. Either the next
+       // object will overwrite this stale copy, or the filler object on LAB retirement will
+       // do this.
+       ShenandoahThreadLocalData::gclab(thread)->undo_allocation(copy, size);
+     } else {
+       // For non-LAB allocations, we have no way to retract the allocation, and
+       // have to explicitly overwrite the copy with the filler object. With that overwrite,
+       // we have to keep the fwdptr initialized and pointing to our (stale) copy.
+       assert(size >= ShenandoahHeap::min_fill_size(), "previously allocated object known to be larger than min_size");
+       fill_with_object(copy, size);
+       shenandoah_assert_correct(nullptr, copy_val);
+       // For non-LAB allocations, the object has already been registered
+     }
+     shenandoah_assert_correct(nullptr, result);
+     return result;
+   }
+ }
+ 
  void ShenandoahHeap::trash_cset_regions() {
    ShenandoahHeapLocker locker(lock());
  
    ShenandoahCollectionSet* set = collection_set();
    ShenandoahHeapRegion* r;

@@ -1060,24 +1454,15 @@
    ShenandoahCheckCleanGCLABClosure() {}
    void do_thread(Thread* thread) {
      PLAB* gclab = ShenandoahThreadLocalData::gclab(thread);
      assert(gclab != nullptr, "GCLAB should be initialized for %s", thread->name());
      assert(gclab->words_remaining() == 0, "GCLAB should not need retirement");
-   }
- };
  
- class ShenandoahRetireGCLABClosure : public ThreadClosure {
- private:
-   bool const _resize;
- public:
-   ShenandoahRetireGCLABClosure(bool resize) : _resize(resize) {}
-   void do_thread(Thread* thread) {
-     PLAB* gclab = ShenandoahThreadLocalData::gclab(thread);
-     assert(gclab != nullptr, "GCLAB should be initialized for %s", thread->name());
-     gclab->retire();
-     if (_resize && ShenandoahThreadLocalData::gclab_size(thread) > 0) {
-       ShenandoahThreadLocalData::set_gclab_size(thread, 0);
+     if (ShenandoahHeap::heap()->mode()->is_generational()) {
+       PLAB* plab = ShenandoahThreadLocalData::plab(thread);
+       assert(plab != nullptr, "PLAB should be initialized for %s", thread->name());
+       assert(plab->words_remaining() == 0, "PLAB should not need retirement");
      }
    }
  };
  
  void ShenandoahHeap::labs_make_parsable() {

@@ -1090,10 +1475,14 @@
      tlab.make_parsable();
      cl.do_thread(t);
    }
  
    workers()->threads_do(&cl);
+ 
+   if (safepoint_workers() != nullptr) {
+     safepoint_workers()->threads_do(&cl);
+   }
  }
  
  void ShenandoahHeap::tlabs_retire(bool resize) {
    assert(UseTLAB, "Only call with UseTLAB");
    assert(!resize || ResizeTLAB, "Only call for resize when ResizeTLAB is enabled");

@@ -1125,10 +1514,11 @@
  
    ShenandoahRetireGCLABClosure cl(resize);
    for (JavaThreadIteratorWithHandle jtiwh; JavaThread *t = jtiwh.next(); ) {
      cl.do_thread(t);
    }
+ 
    workers()->threads_do(&cl);
  
    if (safepoint_workers() != nullptr) {
      safepoint_workers()->threads_do(&cl);
    }

@@ -1188,11 +1578,22 @@
      labs_make_parsable();
    }
  }
  
  void ShenandoahHeap::gc_threads_do(ThreadClosure* tcl) const {
-   tcl->do_thread(_control_thread);
+   if (_shenandoah_policy->is_at_shutdown()) {
+     return;
+   }
+ 
+   if (_control_thread != nullptr) {
+     tcl->do_thread(_control_thread);
+   }
+ 
+   if (_uncommit_thread != nullptr) {
+     tcl->do_thread(_uncommit_thread);
+   }
+ 
    workers()->threads_do(tcl);
    if (_safepoint_workers != nullptr) {
      _safepoint_workers->threads_do(tcl);
    }
  }

@@ -1213,10 +1614,52 @@
      ls.cr();
      ls.cr();
    }
  }
  
+ void ShenandoahHeap::set_gc_generation(ShenandoahGeneration* generation) {
+   shenandoah_assert_control_or_vm_thread_at_safepoint();
+   _gc_generation = generation;
+ }
+ 
+ // Active generation may only be set by the VM thread at a safepoint.
+ void ShenandoahHeap::set_active_generation() {
+   assert(Thread::current()->is_VM_thread(), "Only the VM Thread");
+   assert(SafepointSynchronize::is_at_safepoint(), "Only at a safepoint!");
+   assert(_gc_generation != nullptr, "Will set _active_generation to nullptr");
+   _active_generation = _gc_generation;
+ }
+ 
+ void ShenandoahHeap::on_cycle_start(GCCause::Cause cause, ShenandoahGeneration* generation) {
+   shenandoah_policy()->record_collection_cause(cause);
+ 
+   const GCCause::Cause current = gc_cause();
+   assert(current == GCCause::_no_gc, "Over-writing cause: %s, with: %s",
+          GCCause::to_string(current), GCCause::to_string(cause));
+   assert(_gc_generation == nullptr, "Over-writing _gc_generation");
+ 
+   set_gc_cause(cause);
+   set_gc_generation(generation);
+ 
+   generation->heuristics()->record_cycle_start();
+ }
+ 
+ void ShenandoahHeap::on_cycle_end(ShenandoahGeneration* generation) {
+   assert(gc_cause() != GCCause::_no_gc, "cause wasn't set");
+   assert(_gc_generation != nullptr, "_gc_generation wasn't set");
+ 
+   generation->heuristics()->record_cycle_end();
+   if (mode()->is_generational() && generation->is_global()) {
+     // If we just completed a GLOBAL GC, claim credit for completion of young-gen and old-gen GC as well
+     young_generation()->heuristics()->record_cycle_end();
+     old_generation()->heuristics()->record_cycle_end();
+   }
+ 
+   set_gc_generation(nullptr);
+   set_gc_cause(GCCause::_no_gc);
+ }
+ 
  void ShenandoahHeap::verify(VerifyOption vo) {
    if (ShenandoahSafepoint::is_at_shenandoah_safepoint()) {
      if (ShenandoahVerify) {
        verifier()->verify_generic(vo);
      } else {

@@ -1547,33 +1990,10 @@
    } else {
      heap_region_iterate(blk);
    }
  }
  
- class ShenandoahInitMarkUpdateRegionStateClosure : public ShenandoahHeapRegionClosure {
- private:
-   ShenandoahMarkingContext* const _ctx;
- public:
-   ShenandoahInitMarkUpdateRegionStateClosure() : _ctx(ShenandoahHeap::heap()->marking_context()) {}
- 
-   void heap_region_do(ShenandoahHeapRegion* r) {
-     assert(!r->has_live(), "Region " SIZE_FORMAT " should have no live data", r->index());
-     if (r->is_active()) {
-       // Check if region needs updating its TAMS. We have updated it already during concurrent
-       // reset, so it is very likely we don't need to do another write here.
-       if (_ctx->top_at_mark_start(r) != r->top()) {
-         _ctx->capture_top_at_mark_start(r);
-       }
-     } else {
-       assert(_ctx->top_at_mark_start(r) == r->top(),
-              "Region " SIZE_FORMAT " should already have correct TAMS", r->index());
-     }
-   }
- 
-   bool is_thread_safe() { return true; }
- };
- 
  class ShenandoahRendezvousClosure : public HandshakeClosure {
  public:
    inline ShenandoahRendezvousClosure(const char* name) : HandshakeClosure(name) {}
    inline void do_thread(Thread* thread) {}
  };

@@ -1585,164 +2005,123 @@
  
  void ShenandoahHeap::recycle_trash() {
    free_set()->recycle_trash();
  }
  
- class ShenandoahResetUpdateRegionStateClosure : public ShenandoahHeapRegionClosure {
- private:
-   ShenandoahMarkingContext* const _ctx;
- public:
-   ShenandoahResetUpdateRegionStateClosure() : _ctx(ShenandoahHeap::heap()->marking_context()) {}
- 
-   void heap_region_do(ShenandoahHeapRegion* r) {
-     if (r->is_active()) {
-       // Reset live data and set TAMS optimistically. We would recheck these under the pause
-       // anyway to capture any updates that happened since now.
-       r->clear_live_data();
-       _ctx->capture_top_at_mark_start(r);
-     }
-   }
- 
-   bool is_thread_safe() { return true; }
- };
- 
- void ShenandoahHeap::prepare_gc() {
-   reset_mark_bitmap();
- 
-   ShenandoahResetUpdateRegionStateClosure cl;
-   parallel_heap_region_iterate(&cl);
- }
- 
- class ShenandoahFinalMarkUpdateRegionStateClosure : public ShenandoahHeapRegionClosure {
- private:
-   ShenandoahMarkingContext* const _ctx;
-   ShenandoahHeapLock* const _lock;
- 
- public:
-   ShenandoahFinalMarkUpdateRegionStateClosure() :
-     _ctx(ShenandoahHeap::heap()->complete_marking_context()), _lock(ShenandoahHeap::heap()->lock()) {}
- 
-   void heap_region_do(ShenandoahHeapRegion* r) {
-     if (r->is_active()) {
-       // All allocations past TAMS are implicitly live, adjust the region data.
-       // Bitmaps/TAMS are swapped at this point, so we need to poll complete bitmap.
-       HeapWord *tams = _ctx->top_at_mark_start(r);
-       HeapWord *top = r->top();
-       if (top > tams) {
-         r->increase_live_data_alloc_words(pointer_delta(top, tams));
-       }
- 
-       // We are about to select the collection set, make sure it knows about
-       // current pinning status. Also, this allows trashing more regions that
-       // now have their pinning status dropped.
-       if (r->is_pinned()) {
-         if (r->pin_count() == 0) {
-           ShenandoahHeapLocker locker(_lock);
-           r->make_unpinned();
-         }
-       } else {
-         if (r->pin_count() > 0) {
-           ShenandoahHeapLocker locker(_lock);
-           r->make_pinned();
-         }
-       }
- 
-       // Remember limit for updating refs. It's guaranteed that we get no
-       // from-space-refs written from here on.
-       r->set_update_watermark_at_safepoint(r->top());
-     } else {
-       assert(!r->has_live(), "Region " SIZE_FORMAT " should have no live data", r->index());
-       assert(_ctx->top_at_mark_start(r) == r->top(),
-              "Region " SIZE_FORMAT " should have correct TAMS", r->index());
-     }
-   }
- 
-   bool is_thread_safe() { return true; }
- };
- 
- void ShenandoahHeap::prepare_regions_and_collection_set(bool concurrent) {
-   assert(!is_full_gc_in_progress(), "Only for concurrent and degenerated GC");
-   {
-     ShenandoahGCPhase phase(concurrent ? ShenandoahPhaseTimings::final_update_region_states :
-                                          ShenandoahPhaseTimings::degen_gc_final_update_region_states);
-     ShenandoahFinalMarkUpdateRegionStateClosure cl;
-     parallel_heap_region_iterate(&cl);
- 
-     assert_pinned_region_status();
-   }
- 
-   {
-     ShenandoahGCPhase phase(concurrent ? ShenandoahPhaseTimings::choose_cset :
-                                          ShenandoahPhaseTimings::degen_gc_choose_cset);
-     ShenandoahHeapLocker locker(lock());
-     _collection_set->clear();
-     heuristics()->choose_collection_set(_collection_set);
-   }
- 
-   {
-     ShenandoahGCPhase phase(concurrent ? ShenandoahPhaseTimings::final_rebuild_freeset :
-                                          ShenandoahPhaseTimings::degen_gc_final_rebuild_freeset);
-     ShenandoahHeapLocker locker(lock());
-     _free_set->rebuild();
-   }
- }
- 
  void ShenandoahHeap::do_class_unloading() {
    _unloader.unload();
+   if (mode()->is_generational()) {
+     old_generation()->set_parsable(false);
+   }
  }
  
  void ShenandoahHeap::stw_weak_refs(bool full_gc) {
    // Weak refs processing
    ShenandoahPhaseTimings::Phase phase = full_gc ? ShenandoahPhaseTimings::full_gc_weakrefs
                                                  : ShenandoahPhaseTimings::degen_gc_weakrefs;
    ShenandoahTimingsTracker t(phase);
    ShenandoahGCWorkerPhase worker_phase(phase);
-   ref_processor()->process_references(phase, workers(), false /* concurrent */);
+   shenandoah_assert_generations_reconciled();
+   gc_generation()->ref_processor()->process_references(phase, workers(), false /* concurrent */);
  }
  
- void ShenandoahHeap::prepare_update_heap_references(bool concurrent) {
+ void ShenandoahHeap::prepare_update_heap_references() {
    assert(ShenandoahSafepoint::is_at_shenandoah_safepoint(), "must be at safepoint");
  
    // Evacuation is over, no GCLABs are needed anymore. GCLABs are under URWM, so we need to
    // make them parsable for update code to work correctly. Plus, we can compute new sizes
    // for future GCLABs here.
    if (UseTLAB) {
-     ShenandoahGCPhase phase(concurrent ?
-                             ShenandoahPhaseTimings::init_update_refs_manage_gclabs :
-                             ShenandoahPhaseTimings::degen_gc_init_update_refs_manage_gclabs);
+     ShenandoahGCPhase phase(ShenandoahPhaseTimings::degen_gc_init_update_refs_manage_gclabs);
      gclabs_retire(ResizeTLAB);
    }
  
    _update_refs_iterator.reset();
  }
  
- void ShenandoahHeap::propagate_gc_state_to_java_threads() {
+ void ShenandoahHeap::propagate_gc_state_to_all_threads() {
    assert(ShenandoahSafepoint::is_at_shenandoah_safepoint(), "Must be at Shenandoah safepoint");
    if (_gc_state_changed) {
+     ShenandoahGCStatePropagator propagator(_gc_state.raw_value());
+     Threads::threads_do(&propagator);
      _gc_state_changed = false;
-     char state = gc_state();
-     for (JavaThreadIteratorWithHandle jtiwh; JavaThread *t = jtiwh.next(); ) {
-       ShenandoahThreadLocalData::set_gc_state(t, state);
-     }
    }
  }
  
- void ShenandoahHeap::set_gc_state(uint mask, bool value) {
+ void ShenandoahHeap::set_gc_state_at_safepoint(uint mask, bool value) {
    assert(ShenandoahSafepoint::is_at_shenandoah_safepoint(), "Must be at Shenandoah safepoint");
    _gc_state.set_cond(mask, value);
    _gc_state_changed = true;
  }
  
- void ShenandoahHeap::set_concurrent_mark_in_progress(bool in_progress) {
-   assert(!has_forwarded_objects(), "Not expected before/after mark phase");
-   set_gc_state(MARKING, in_progress);
-   ShenandoahBarrierSet::satb_mark_queue_set().set_active_all_threads(in_progress, !in_progress);
+ void ShenandoahHeap::set_gc_state_concurrent(uint mask, bool value) {
+   // Holding the thread lock here assures that any thread created after we change the gc
+   // state will have the correct state. It also prevents attaching threads from seeing
+   // an inconsistent state. See ShenandoahBarrierSet::on_thread_attach for reference. Established
+   // threads will use their thread local copy of the gc state (changed by a handshake, or on a
+   // safepoint).
+   assert(Threads_lock->is_locked(), "Must hold thread lock for concurrent gc state change");
+   _gc_state.set_cond(mask, value);
+ }
+ 
+ void ShenandoahHeap::set_concurrent_young_mark_in_progress(bool in_progress) {
+   uint mask;
+   assert(!has_forwarded_objects(), "Young marking is not concurrent with evacuation");
+   if (!in_progress && is_concurrent_old_mark_in_progress()) {
+     assert(mode()->is_generational(), "Only generational GC has old marking");
+     assert(_gc_state.is_set(MARKING), "concurrent_old_marking_in_progress implies MARKING");
+     // If old-marking is in progress when we turn off YOUNG_MARKING, leave MARKING (and OLD_MARKING) on
+     mask = YOUNG_MARKING;
+   } else {
+     mask = MARKING | YOUNG_MARKING;
+   }
+   set_gc_state_at_safepoint(mask, in_progress);
+   manage_satb_barrier(in_progress);
+ }
+ 
+ void ShenandoahHeap::set_concurrent_old_mark_in_progress(bool in_progress) {
+ #ifdef ASSERT
+   // has_forwarded_objects() iff UPDATE_REFS or EVACUATION
+   bool has_forwarded = has_forwarded_objects();
+   bool updating_or_evacuating = _gc_state.is_set(UPDATE_REFS | EVACUATION);
+   bool evacuating = _gc_state.is_set(EVACUATION);
+   assert ((has_forwarded == updating_or_evacuating) || (evacuating && !has_forwarded && collection_set()->is_empty()),
+           "Updating or evacuating iff has forwarded objects, or if evacuation phase is promoting in place without forwarding");
+ #endif
+   if (!in_progress && is_concurrent_young_mark_in_progress()) {
+     // If young-marking is in progress when we turn off OLD_MARKING, leave MARKING (and YOUNG_MARKING) on
+     assert(_gc_state.is_set(MARKING), "concurrent_young_marking_in_progress implies MARKING");
+     set_gc_state_at_safepoint(OLD_MARKING, in_progress);
+   } else {
+     set_gc_state_at_safepoint(MARKING | OLD_MARKING, in_progress);
+   }
+   manage_satb_barrier(in_progress);
+ }
+ 
+ bool ShenandoahHeap::is_prepare_for_old_mark_in_progress() const {
+   return old_generation()->is_preparing_for_mark();
+ }
+ 
+ void ShenandoahHeap::manage_satb_barrier(bool active) {
+   if (is_concurrent_mark_in_progress()) {
+     // Ignore request to deactivate barrier while concurrent mark is in progress.
+     // Do not attempt to re-activate the barrier if it is already active.
+     if (active && !ShenandoahBarrierSet::satb_mark_queue_set().is_active()) {
+       ShenandoahBarrierSet::satb_mark_queue_set().set_active_all_threads(active, !active);
+     }
+   } else {
+     // No concurrent marking is in progress so honor request to deactivate,
+     // but only if the barrier is already active.
+     if (!active && ShenandoahBarrierSet::satb_mark_queue_set().is_active()) {
+       ShenandoahBarrierSet::satb_mark_queue_set().set_active_all_threads(active, !active);
+     }
+   }
  }
  
  void ShenandoahHeap::set_evacuation_in_progress(bool in_progress) {
    assert(ShenandoahSafepoint::is_at_shenandoah_safepoint(), "Only call this at safepoint");
-   set_gc_state(EVACUATION, in_progress);
+   set_gc_state_at_safepoint(EVACUATION, in_progress);
  }
  
  void ShenandoahHeap::set_concurrent_strong_root_in_progress(bool in_progress) {
    if (in_progress) {
      _concurrent_strong_root_in_progress.set();

@@ -1750,32 +2129,46 @@
      _concurrent_strong_root_in_progress.unset();
    }
  }
  
  void ShenandoahHeap::set_concurrent_weak_root_in_progress(bool cond) {
-   set_gc_state(WEAK_ROOTS, cond);
+   set_gc_state_at_safepoint(WEAK_ROOTS, cond);
  }
  
  GCTracer* ShenandoahHeap::tracer() {
    return shenandoah_policy()->tracer();
  }
  
  size_t ShenandoahHeap::tlab_used(Thread* thread) const {
    return _free_set->used();
  }
  
- bool ShenandoahHeap::try_cancel_gc() {
-   jbyte prev = _cancelled_gc.cmpxchg(CANCELLED, CANCELLABLE);
-   return prev == CANCELLABLE;
+ bool ShenandoahHeap::try_cancel_gc(GCCause::Cause cause) {
+   const GCCause::Cause prev = _cancelled_gc.xchg(cause);
+   return prev == GCCause::_no_gc || prev == GCCause::_shenandoah_concurrent_gc;
+ }
+ 
+ void ShenandoahHeap::cancel_concurrent_mark() {
+   if (mode()->is_generational()) {
+     young_generation()->cancel_marking();
+     old_generation()->cancel_marking();
+   }
+ 
+   global_generation()->cancel_marking();
+ 
+   ShenandoahBarrierSet::satb_mark_queue_set().abandon_partial_marking();
  }
  
- void ShenandoahHeap::cancel_gc(GCCause::Cause cause) {
-   if (try_cancel_gc()) {
+ bool ShenandoahHeap::cancel_gc(GCCause::Cause cause) {
+   if (try_cancel_gc(cause)) {
      FormatBuffer<> msg("Cancelling GC: %s", GCCause::to_string(cause));
-     log_info(gc)("%s", msg.buffer());
+     log_info(gc,thread)("%s", msg.buffer());
      Events::log(Thread::current(), "%s", msg.buffer());
+     _cancel_requested_time = os::elapsedTime();
+     return true;
    }
+   return false;
  }
  
  uint ShenandoahHeap::max_workers() {
    return _max_workers;
  }

@@ -1784,20 +2177,20 @@
    // The shutdown sequence should be able to terminate when GC is running.
  
    // Step 0. Notify policy to disable event recording.
    _shenandoah_policy->record_shutdown();
  
-   // Step 1. Notify control thread that we are in shutdown.
-   // Note that we cannot do that with stop(), because stop() is blocking and waits for the actual shutdown.
-   // Doing stop() here would wait for the normal GC cycle to complete, never falling through to cancel below.
-   control_thread()->prepare_for_graceful_shutdown();
- 
-   // Step 2. Notify GC workers that we are cancelling GC.
-   cancel_gc(GCCause::_shenandoah_stop_vm);
+   // Step 1. Stop reporting on gc thread cpu utilization
+   mmu_tracker()->stop();
  
-   // Step 3. Wait until GC worker exits normally.
+   // Step 2. Wait until GC worker exits normally (this will cancel any ongoing GC).
    control_thread()->stop();
+ 
+   // Stop 4. Shutdown uncommit thread.
+   if (_uncommit_thread != nullptr) {
+     _uncommit_thread->stop();
+   }
  }
  
  void ShenandoahHeap::stw_unload_classes(bool full_gc) {
    if (!unload_classes()) return;
    ClassUnloadingContext ctx(_workers->active_workers(),

@@ -1833,11 +2226,11 @@
    // Resize and verify metaspace
    MetaspaceGC::compute_new_size();
    DEBUG_ONLY(MetaspaceUtils::verify();)
  }
  
- // Weak roots are either pre-evacuated (final mark) or updated (final updaterefs),
+ // Weak roots are either pre-evacuated (final mark) or updated (final update refs),
  // so they should not have forwarded oops.
  // However, we do need to "null" dead oops in the roots, if can not be done
  // in concurrent cycles.
  void ShenandoahHeap::stw_process_weak_roots(bool full_gc) {
    uint num_workers = _workers->active_workers();

@@ -1877,11 +2270,11 @@
    stw_process_weak_roots(full_gc);
    stw_unload_classes(full_gc);
  }
  
  void ShenandoahHeap::set_has_forwarded_objects(bool cond) {
-   set_gc_state(HAS_FORWARDED, cond);
+   set_gc_state_at_safepoint(HAS_FORWARDED, cond);
  }
  
  void ShenandoahHeap::set_unload_classes(bool uc) {
    _unload_classes.set_cond(uc);
  }

@@ -1894,16 +2287,17 @@
    ShenandoahHeap* heap = ShenandoahHeap::heap();
    assert(heap->collection_set() != nullptr, "Sanity");
    return (address) heap->collection_set()->biased_map_address();
  }
  
- size_t ShenandoahHeap::bytes_allocated_since_gc_start() {
-   return Atomic::load(&_bytes_allocated_since_gc_start);
- }
- 
  void ShenandoahHeap::reset_bytes_allocated_since_gc_start() {
-   Atomic::store(&_bytes_allocated_since_gc_start, (size_t)0);
+   if (mode()->is_generational()) {
+     young_generation()->reset_bytes_allocated_since_gc_start();
+     old_generation()->reset_bytes_allocated_since_gc_start();
+   }
+ 
+   global_generation()->reset_bytes_allocated_since_gc_start();
  }
  
  void ShenandoahHeap::set_degenerated_gc_in_progress(bool in_progress) {
    _degenerated_gc_in_progress.set_cond(in_progress);
  }

@@ -1916,11 +2310,11 @@
    assert (is_full_gc_in_progress(), "should be");
    _full_gc_move_in_progress.set_cond(in_progress);
  }
  
  void ShenandoahHeap::set_update_refs_in_progress(bool in_progress) {
-   set_gc_state(UPDATEREFS, in_progress);
+   set_gc_state_at_safepoint(UPDATE_REFS, in_progress);
  }
  
  void ShenandoahHeap::register_nmethod(nmethod* nm) {
    ShenandoahCodeRoots::register_nmethod(nm);
  }

@@ -1963,12 +2357,15 @@
  
  #ifdef ASSERT
  void ShenandoahHeap::assert_pinned_region_status() {
    for (size_t i = 0; i < num_regions(); i++) {
      ShenandoahHeapRegion* r = get_region(i);
-     assert((r->is_pinned() && r->pin_count() > 0) || (!r->is_pinned() && r->pin_count() == 0),
-            "Region " SIZE_FORMAT " pinning status is inconsistent", i);
+     shenandoah_assert_generations_reconciled();
+     if (gc_generation()->contains(r)) {
+       assert((r->is_pinned() && r->pin_count() > 0) || (!r->is_pinned() && r->pin_count() == 0),
+              "Region " SIZE_FORMAT " pinning status is inconsistent", i);
+     }
    }
  }
  #endif
  
  ConcurrentGCTimer* ShenandoahHeap::gc_timer() const {

@@ -1996,23 +2393,17 @@
  #ifdef ASSERT
  void ShenandoahHeap::assert_gc_workers(uint nworkers) {
    assert(nworkers > 0 && nworkers <= max_workers(), "Sanity");
  
    if (ShenandoahSafepoint::is_at_shenandoah_safepoint()) {
-     if (UseDynamicNumberOfGCThreads) {
-       assert(nworkers <= ParallelGCThreads, "Cannot use more than it has");
-     } else {
-       // Use ParallelGCThreads inside safepoints
-       assert(nworkers == ParallelGCThreads, "Use ParallelGCThreads within safepoints");
-     }
+     // Use ParallelGCThreads inside safepoints
+     assert(nworkers == ParallelGCThreads, "Use ParallelGCThreads (%u) within safepoint, not %u",
+            ParallelGCThreads, nworkers);
    } else {
-     if (UseDynamicNumberOfGCThreads) {
-       assert(nworkers <= ConcGCThreads, "Cannot use more than it has");
-     } else {
-       // Use ConcGCThreads outside safepoints
-       assert(nworkers == ConcGCThreads, "Use ConcGCThreads outside safepoints");
-     }
+     // Use ConcGCThreads outside safepoints
+     assert(nworkers == ConcGCThreads, "Use ConcGCThreads (%u) outside safepoints, %u",
+            ConcGCThreads, nworkers);
    }
  }
  #endif
  
  ShenandoahVerifier* ShenandoahHeap::verifier() {

@@ -2025,41 +2416,52 @@
  class ShenandoahUpdateHeapRefsTask : public WorkerTask {
  private:
    ShenandoahHeap* _heap;
    ShenandoahRegionIterator* _regions;
  public:
-   ShenandoahUpdateHeapRefsTask(ShenandoahRegionIterator* regions) :
+   explicit ShenandoahUpdateHeapRefsTask(ShenandoahRegionIterator* regions) :
      WorkerTask("Shenandoah Update References"),
      _heap(ShenandoahHeap::heap()),
      _regions(regions) {
    }
  
    void work(uint worker_id) {
      if (CONCURRENT) {
        ShenandoahConcurrentWorkerSession worker_session(worker_id);
        ShenandoahSuspendibleThreadSetJoiner stsj;
-       do_work<ShenandoahConcUpdateRefsClosure>();
+       do_work<ShenandoahConcUpdateRefsClosure>(worker_id);
      } else {
        ShenandoahParallelWorkerSession worker_session(worker_id);
-       do_work<ShenandoahSTWUpdateRefsClosure>();
+       do_work<ShenandoahSTWUpdateRefsClosure>(worker_id);
      }
    }
  
  private:
    template<class T>
-   void do_work() {
+   void do_work(uint worker_id) {
+     if (CONCURRENT && (worker_id == 0)) {
+       // We ask the first worker to replenish the Mutator free set by moving regions previously reserved to hold the
+       // results of evacuation.  These reserves are no longer necessary because evacuation has completed.
+       size_t cset_regions = _heap->collection_set()->count();
+ 
+       // Now that evacuation is done, we can reassign any regions that had been reserved to hold the results of evacuation
+       // to the mutator free set.  At the end of GC, we will have cset_regions newly evacuated fully empty regions from
+       // which we will be able to replenish the Collector free set and the OldCollector free set in preparation for the
+       // next GC cycle.
+       _heap->free_set()->move_regions_from_collector_to_mutator(cset_regions);
+     }
+     // If !CONCURRENT, there's no value in expanding Mutator free set
      T cl;
      ShenandoahHeapRegion* r = _regions->next();
-     ShenandoahMarkingContext* const ctx = _heap->complete_marking_context();
      while (r != nullptr) {
        HeapWord* update_watermark = r->get_update_watermark();
        assert (update_watermark >= r->bottom(), "sanity");
        if (r->is_active() && !r->is_cset()) {
          _heap->marked_object_oop_iterate(r, &cl, update_watermark);
-       }
-       if (ShenandoahPacing) {
-         _heap->pacer()->report_updaterefs(pointer_delta(update_watermark, r->bottom()));
+         if (ShenandoahPacing) {
+           _heap->pacer()->report_update_refs(pointer_delta(update_watermark, r->bottom()));
+         }
        }
        if (_heap->check_cancelled_gc_and_yield(CONCURRENT)) {
          return;
        }
        r = _regions->next();

@@ -2077,50 +2479,20 @@
      ShenandoahUpdateHeapRefsTask<false> task(&_update_refs_iterator);
      workers()->run_task(&task);
    }
  }
  
- 
- class ShenandoahFinalUpdateRefsUpdateRegionStateClosure : public ShenandoahHeapRegionClosure {
- private:
-   ShenandoahHeapLock* const _lock;
- 
- public:
-   ShenandoahFinalUpdateRefsUpdateRegionStateClosure() : _lock(ShenandoahHeap::heap()->lock()) {}
- 
-   void heap_region_do(ShenandoahHeapRegion* r) {
-     // Drop unnecessary "pinned" state from regions that does not have CP marks
-     // anymore, as this would allow trashing them.
- 
-     if (r->is_active()) {
-       if (r->is_pinned()) {
-         if (r->pin_count() == 0) {
-           ShenandoahHeapLocker locker(_lock);
-           r->make_unpinned();
-         }
-       } else {
-         if (r->pin_count() > 0) {
-           ShenandoahHeapLocker locker(_lock);
-           r->make_pinned();
-         }
-       }
-     }
-   }
- 
-   bool is_thread_safe() { return true; }
- };
- 
  void ShenandoahHeap::update_heap_region_states(bool concurrent) {
    assert(SafepointSynchronize::is_at_safepoint(), "Must be at a safepoint");
    assert(!is_full_gc_in_progress(), "Only for concurrent and degenerated GC");
  
    {
      ShenandoahGCPhase phase(concurrent ?
                              ShenandoahPhaseTimings::final_update_refs_update_region_states :
                              ShenandoahPhaseTimings::degen_gc_final_update_refs_update_region_states);
-     ShenandoahFinalUpdateRefsUpdateRegionStateClosure cl;
-     parallel_heap_region_iterate(&cl);
+ 
+     final_update_refs_update_region_states();
  
      assert_pinned_region_status();
    }
  
    {

@@ -2129,17 +2501,58 @@
                              ShenandoahPhaseTimings::degen_gc_final_update_refs_trash_cset);
      trash_cset_regions();
    }
  }
  
+ void ShenandoahHeap::final_update_refs_update_region_states() {
+   ShenandoahSynchronizePinnedRegionStates cl;
+   parallel_heap_region_iterate(&cl);
+ }
+ 
  void ShenandoahHeap::rebuild_free_set(bool concurrent) {
-   {
-     ShenandoahGCPhase phase(concurrent ?
-                             ShenandoahPhaseTimings::final_update_refs_rebuild_freeset :
-                             ShenandoahPhaseTimings::degen_gc_final_update_refs_rebuild_freeset);
-     ShenandoahHeapLocker locker(lock());
-     _free_set->rebuild();
+   ShenandoahGCPhase phase(concurrent ?
+                           ShenandoahPhaseTimings::final_update_refs_rebuild_freeset :
+                           ShenandoahPhaseTimings::degen_gc_final_update_refs_rebuild_freeset);
+   ShenandoahHeapLocker locker(lock());
+   size_t young_cset_regions, old_cset_regions;
+   size_t first_old_region, last_old_region, old_region_count;
+   _free_set->prepare_to_rebuild(young_cset_regions, old_cset_regions, first_old_region, last_old_region, old_region_count);
+   // If there are no old regions, first_old_region will be greater than last_old_region
+   assert((first_old_region > last_old_region) ||
+          ((last_old_region + 1 - first_old_region >= old_region_count) &&
+           get_region(first_old_region)->is_old() && get_region(last_old_region)->is_old()),
+          "sanity: old_region_count: " SIZE_FORMAT ", first_old_region: " SIZE_FORMAT ", last_old_region: " SIZE_FORMAT,
+          old_region_count, first_old_region, last_old_region);
+ 
+   if (mode()->is_generational()) {
+ #ifdef ASSERT
+     if (ShenandoahVerify) {
+       verifier()->verify_before_rebuilding_free_set();
+     }
+ #endif
+ 
+     // The computation of bytes_of_allocation_runway_before_gc_trigger is quite conservative so consider all of this
+     // available for transfer to old. Note that transfer of humongous regions does not impact available.
+     ShenandoahGenerationalHeap* gen_heap = ShenandoahGenerationalHeap::heap();
+     size_t allocation_runway = gen_heap->young_generation()->heuristics()->bytes_of_allocation_runway_before_gc_trigger(young_cset_regions);
+     gen_heap->compute_old_generation_balance(allocation_runway, old_cset_regions);
+ 
+     // Total old_available may have been expanded to hold anticipated promotions.  We trigger if the fragmented available
+     // memory represents more than 16 regions worth of data.  Note that fragmentation may increase when we promote regular
+     // regions in place when many of these regular regions have an abundant amount of available memory within them.  Fragmentation
+     // will decrease as promote-by-copy consumes the available memory within these partially consumed regions.
+     //
+     // We consider old-gen to have excessive fragmentation if more than 12.5% of old-gen is free memory that resides
+     // within partially consumed regions of memory.
+   }
+   // Rebuild free set based on adjusted generation sizes.
+   _free_set->finish_rebuild(young_cset_regions, old_cset_regions, old_region_count);
+ 
+   if (mode()->is_generational()) {
+     ShenandoahGenerationalHeap* gen_heap = ShenandoahGenerationalHeap::heap();
+     ShenandoahOldGeneration* old_gen = gen_heap->old_generation();
+     old_gen->heuristics()->evaluate_triggers(first_old_region, last_old_region, old_region_count, num_regions());
    }
  }
  
  void ShenandoahHeap::print_extended_on(outputStream *st) const {
    print_on(st);

@@ -2201,11 +2614,11 @@
      return true;
    }
  
    if (is_bitmap_slice_committed(r, true)) {
      // Some other region from the group is still committed, meaning the bitmap
-     // slice is should stay committed, exit right away.
+     // slice should stay committed, exit right away.
      return true;
    }
  
    // Uncommit the bitmap slice:
    size_t slice = r->index() / _bitmap_regions_per_slice;

@@ -2215,26 +2628,39 @@
      return false;
    }
    return true;
  }
  
+ void ShenandoahHeap::forbid_uncommit() {
+   if (_uncommit_thread != nullptr) {
+     _uncommit_thread->forbid_uncommit();
+   }
+ }
+ 
+ void ShenandoahHeap::allow_uncommit() {
+   if (_uncommit_thread != nullptr) {
+     _uncommit_thread->allow_uncommit();
+   }
+ }
+ 
+ #ifdef ASSERT
+ bool ShenandoahHeap::is_uncommit_in_progress() {
+   if (_uncommit_thread != nullptr) {
+     return _uncommit_thread->is_uncommit_in_progress();
+   }
+   return false;
+ }
+ #endif
+ 
  void ShenandoahHeap::safepoint_synchronize_begin() {
    SuspendibleThreadSet::synchronize();
  }
  
  void ShenandoahHeap::safepoint_synchronize_end() {
    SuspendibleThreadSet::desynchronize();
  }
  
- void ShenandoahHeap::entry_uncommit(double shrink_before, size_t shrink_until) {
-   static const char *msg = "Concurrent uncommit";
-   ShenandoahConcurrentPhase gc_phase(msg, ShenandoahPhaseTimings::conc_uncommit, true /* log_heap_usage */);
-   EventMark em("%s", msg);
- 
-   op_uncommit(shrink_before, shrink_until);
- }
- 
  void ShenandoahHeap::try_inject_alloc_failure() {
    if (ShenandoahAllocFailureALot && !cancelled_gc() && ((os::random() % 1000) > 950)) {
      _inject_alloc_failure.set();
      os::naked_short_sleep(1);
      if (cancelled_gc()) {

@@ -2265,11 +2691,11 @@
    memory_pools.append(_memory_pool);
    return memory_pools;
  }
  
  MemoryUsage ShenandoahHeap::memory_usage() {
-   return _memory_pool->get_memory_usage();
+   return MemoryUsage(_initial_size, used(), committed(), max_capacity());
  }
  
  ShenandoahRegionIterator::ShenandoahRegionIterator() :
    _heap(ShenandoahHeap::heap()),
    _index(0) {}

@@ -2288,10 +2714,18 @@
  
  char ShenandoahHeap::gc_state() const {
    return _gc_state.raw_value();
  }
  
+ bool ShenandoahHeap::is_gc_state(GCState state) const {
+   // If the global gc state has been changed, but hasn't yet been propagated to all threads, then
+   // the global gc state is the correct value. Once the gc state has been synchronized with all threads,
+   // _gc_state_changed will be toggled to false and we need to use the thread local state.
+   return _gc_state_changed ? _gc_state.is_set(state) : ShenandoahThreadLocalData::is_gc_state(state);
+ }
+ 
+ 
  ShenandoahLiveData* ShenandoahHeap::get_liveness_cache(uint worker_id) {
  #ifdef ASSERT
    assert(_liveness_cache != nullptr, "sanity");
    assert(worker_id < _max_workers, "sanity");
    for (uint i = 0; i < num_regions(); i++) {

@@ -2330,5 +2764,27 @@
      return true;
    }
  
    return false;
  }
+ 
+ ShenandoahGeneration* ShenandoahHeap::generation_for(ShenandoahAffiliation affiliation) const {
+   if (!mode()->is_generational()) {
+     return global_generation();
+   } else if (affiliation == YOUNG_GENERATION) {
+     return young_generation();
+   } else if (affiliation == OLD_GENERATION) {
+     return old_generation();
+   }
+ 
+   ShouldNotReachHere();
+   return nullptr;
+ }
+ 
+ void ShenandoahHeap::log_heap_status(const char* msg) const {
+   if (mode()->is_generational()) {
+     young_generation()->log_status(msg);
+     old_generation()->log_status(msg);
+   } else {
+     global_generation()->log_status(msg);
+   }
+ }

< prev index next >