1 /*
   2  * Copyright (c) 2018, 2019, Red Hat, Inc. All rights reserved.
   3  *
   4  * This code is free software; you can redistribute it and/or modify it
   5  * under the terms of the GNU General Public License version 2 only, as
   6  * published by the Free Software Foundation.
   7  *
   8  * This code is distributed in the hope that it will be useful, but WITHOUT
   9  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  10  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  11  * version 2 for more details (a copy is included in the LICENSE file that
  12  * accompanied this code).
  13  *
  14  * You should have received a copy of the GNU General Public License version
  15  * 2 along with this work; if not, write to the Free Software Foundation,
  16  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  17  *
  18  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  19  * or visit www.oracle.com if you need additional information or have any
  20  * questions.
  21  *
  22  */
  23 
  24 #include "precompiled.hpp"
  25 
  26 #include "gc_implementation/shenandoah/shenandoahFreeSet.hpp"
  27 #include "gc_implementation/shenandoah/shenandoahHeap.inline.hpp"
  28 #include "gc_implementation/shenandoah/shenandoahPacer.hpp"
  29 #include "gc_implementation/shenandoah/shenandoahPhaseTimings.hpp"
  30 #include "runtime/mutexLocker.hpp"
  31 
  32 /*
  33  * In normal concurrent cycle, we have to pace the application to let GC finish.
  34  *
  35  * Here, we do not know how large would be the collection set, and what are the
  36  * relative performances of the each stage in the concurrent cycle, and so we have to
  37  * make some assumptions.
  38  *
  39  * For concurrent mark, there is no clear notion of progress. The moderately accurate
  40  * and easy to get metric is the amount of live objects the mark had encountered. But,
  41  * that does directly correlate with the used heap, because the heap might be fully
  42  * dead or fully alive. We cannot assume either of the extremes: we would either allow
  43  * application to run out of memory if we assume heap is fully dead but it is not, and,
  44  * conversely, we would pacify application excessively if we assume heap is fully alive
  45  * but it is not. So we need to guesstimate the particular expected value for heap liveness.
  46  * The best way to do this is apparently recording the past history.
  47  *
  48  * For concurrent evac and update-refs, we are walking the heap per-region, and so the
  49  * notion of progress is clear: we get reported the "used" size from the processed regions
  50  * and use the global heap-used as the baseline.
  51  *
  52  * The allocatable space when GC is running is "free" at the start of phase, but the
  53  * accounted budget is based on "used". So, we need to adjust the tax knowing that.
  54  */
  55 
  56 void ShenandoahPacer::setup_for_mark() {
  57   assert(ShenandoahPacing, "Only be here when pacing is enabled");
  58 
  59   size_t live = update_and_get_progress_history();
  60   size_t free = _heap->free_set()->available();
  61 
  62   size_t non_taxable = free * ShenandoahPacingCycleSlack / 100;
  63   size_t taxable = free - non_taxable;
  64 
  65   double tax = 1.0 * live / taxable; // base tax for available free space
  66   tax *= 1;                          // mark can succeed with immediate garbage, claim all available space
  67   tax *= ShenandoahPacingSurcharge;  // additional surcharge to help unclutter heap
  68 
  69   restart_with(non_taxable, tax);
  70 
  71   log_info(gc, ergo)("Pacer for Mark. Expected Live: " SIZE_FORMAT "%s, Free: " SIZE_FORMAT "%s, "
  72                      "Non-Taxable: " SIZE_FORMAT "%s, Alloc Tax Rate: %.1fx",
  73                      byte_size_in_proper_unit(live),        proper_unit_for_byte_size(live),
  74                      byte_size_in_proper_unit(free),        proper_unit_for_byte_size(free),
  75                      byte_size_in_proper_unit(non_taxable), proper_unit_for_byte_size(non_taxable),
  76                      tax);
  77 }
  78 
  79 void ShenandoahPacer::setup_for_evac() {
  80   assert(ShenandoahPacing, "Only be here when pacing is enabled");
  81 
  82   size_t used = _heap->collection_set()->used();
  83   size_t free = _heap->free_set()->available();
  84 
  85   size_t non_taxable = free * ShenandoahPacingCycleSlack / 100;
  86   size_t taxable = free - non_taxable;
  87 
  88   double tax = 1.0 * used / taxable; // base tax for available free space
  89   tax *= 2;                          // evac is followed by update-refs, claim 1/2 of remaining free
  90   tax = MAX2<double>(1, tax);        // never allocate more than GC processes during the phase
  91   tax *= ShenandoahPacingSurcharge;  // additional surcharge to help unclutter heap
  92 
  93   restart_with(non_taxable, tax);
  94 
  95   log_info(gc, ergo)("Pacer for Evacuation. Used CSet: " SIZE_FORMAT "%s, Free: " SIZE_FORMAT "%s, "
  96                      "Non-Taxable: " SIZE_FORMAT "%s, Alloc Tax Rate: %.1fx",
  97                      byte_size_in_proper_unit(used),        proper_unit_for_byte_size(used),
  98                      byte_size_in_proper_unit(free),        proper_unit_for_byte_size(free),
  99                      byte_size_in_proper_unit(non_taxable), proper_unit_for_byte_size(non_taxable),
 100                      tax);
 101 }
 102 
 103 void ShenandoahPacer::setup_for_updaterefs() {
 104   assert(ShenandoahPacing, "Only be here when pacing is enabled");
 105 
 106   size_t used = _heap->used();
 107   size_t free = _heap->free_set()->available();
 108 
 109   size_t non_taxable = free * ShenandoahPacingCycleSlack / 100;
 110   size_t taxable = free - non_taxable;
 111 
 112   double tax = 1.0 * used / taxable; // base tax for available free space
 113   tax *= 1;                          // update-refs is the last phase, claim the remaining free
 114   tax = MAX2<double>(1, tax);        // never allocate more than GC processes during the phase
 115   tax *= ShenandoahPacingSurcharge;  // additional surcharge to help unclutter heap
 116 
 117   restart_with(non_taxable, tax);
 118 
 119   log_info(gc, ergo)("Pacer for Update Refs. Used: " SIZE_FORMAT "%s, Free: " SIZE_FORMAT "%s, "
 120                      "Non-Taxable: " SIZE_FORMAT "%s, Alloc Tax Rate: %.1fx",
 121                      byte_size_in_proper_unit(used),        proper_unit_for_byte_size(used),
 122                      byte_size_in_proper_unit(free),        proper_unit_for_byte_size(free),
 123                      byte_size_in_proper_unit(non_taxable), proper_unit_for_byte_size(non_taxable),
 124                      tax);
 125 }
 126 
 127 /*
 128  * In idle phase, we have to pace the application to let control thread react with GC start.
 129  *
 130  * Here, we have rendezvous with concurrent thread that adds up the budget as it acknowledges
 131  * it had seen recent allocations. It will naturally pace the allocations if control thread is
 132  * not catching up. To bootstrap this feedback cycle, we need to start with some initial budget
 133  * for applications to allocate at.
 134  */
 135 
 136 void ShenandoahPacer::setup_for_idle() {
 137   assert(ShenandoahPacing, "Only be here when pacing is enabled");
 138 
 139   size_t initial = _heap->max_capacity() / 100 * ShenandoahPacingIdleSlack;
 140   double tax = 1;
 141 
 142   restart_with(initial, tax);
 143 
 144   log_info(gc, ergo)("Pacer for Idle. Initial: " SIZE_FORMAT "%s, Alloc Tax Rate: %.1fx",
 145                      byte_size_in_proper_unit(initial), proper_unit_for_byte_size(initial),
 146                      tax);
 147 }
 148 
 149 /*
 150  * There is no useful notion of progress for these operations. To avoid stalling
 151  * the allocators unnecessarily, allow them to run unimpeded.
 152  */
 153 
 154 void ShenandoahPacer::setup_for_preclean() {
 155   assert(ShenandoahPacing, "Only be here when pacing is enabled");
 156 
 157   size_t initial = _heap->max_capacity();
 158   restart_with(initial, 1.0);
 159 
 160   log_info(gc, ergo)("Pacer for Precleaning. Non-Taxable: " SIZE_FORMAT "%s",
 161                      byte_size_in_proper_unit(initial), proper_unit_for_byte_size(initial));
 162 }
 163 
 164 void ShenandoahPacer::setup_for_reset() {
 165   assert(ShenandoahPacing, "Only be here when pacing is enabled");
 166 
 167   size_t initial = _heap->max_capacity();
 168   restart_with(initial, 1.0);
 169 
 170   log_info(gc, ergo)("Pacer for Reset. Non-Taxable: " SIZE_FORMAT "%s",
 171                      byte_size_in_proper_unit(initial), proper_unit_for_byte_size(initial));
 172 }
 173 
 174 size_t ShenandoahPacer::update_and_get_progress_history() {
 175   if (_progress == -1) {
 176     // First initialization, report some prior
 177     Atomic::store_ptr((intptr_t)PACING_PROGRESS_ZERO, &_progress);
 178     return (size_t) (_heap->max_capacity() * 0.1);
 179   } else {
 180     // Record history, and reply historical data
 181     _progress_history->add(_progress);
 182     Atomic::store_ptr((intptr_t)PACING_PROGRESS_ZERO, &_progress);
 183     return (size_t) (_progress_history->avg() * HeapWordSize);
 184   }
 185 }
 186 
 187 void ShenandoahPacer::restart_with(jlong non_taxable_bytes, jdouble tax_rate) {
 188   STATIC_ASSERT(sizeof(size_t) <= sizeof(intptr_t));
 189   {
 190     intptr_t initial = (size_t) (non_taxable_bytes * tax_rate) >> LogHeapWordSize;
 191     intptr_t cur;
 192     do {
 193       cur = OrderAccess::load_acquire(&_budget);
 194     } while (Atomic::cmpxchg_ptr(initial, &_budget, cur) != cur);
 195   }
 196 
 197   OrderAccess::release_store(&_tax_rate, tax_rate);
 198 
 199   {
 200     intptr_t cur, val;
 201     do {
 202       cur = OrderAccess::load_acquire(&_epoch);
 203       val = cur + 1;
 204     } while (Atomic::cmpxchg_ptr(val, &_epoch, cur) != cur);
 205   }
 206 
 207   // Shake up stalled waiters after budget update.
 208   _need_notify_waiters.try_set();
 209 }
 210 
 211 bool ShenandoahPacer::claim_for_alloc(size_t words, bool force) {
 212   assert(ShenandoahPacing, "Only be here when pacing is enabled");
 213 
 214   intptr_t tax = MAX2<intptr_t>(1, (intptr_t)(words * OrderAccess::load_acquire(&_tax_rate)));
 215 
 216   intptr_t cur = 0;
 217   intptr_t new_val = 0;
 218   do {
 219     cur = OrderAccess::load_acquire(&_budget);
 220     if (cur < tax && !force) {
 221       // Progress depleted, alas.
 222       return false;
 223     }
 224     new_val = cur - tax;
 225   } while (Atomic::cmpxchg_ptr(new_val, &_budget, cur) != cur);
 226   return true;
 227 }
 228 
 229 void ShenandoahPacer::unpace_for_alloc(intptr_t epoch, size_t words) {
 230   assert(ShenandoahPacing, "Only be here when pacing is enabled");
 231 
 232   if (_epoch != epoch) {
 233     // Stale ticket, no need to unpace.
 234     return;
 235   }
 236 
 237   size_t tax = MAX2<size_t>(1, words * OrderAccess::load_acquire(&_tax_rate));
 238   add_budget(tax);
 239 }
 240 
 241 intptr_t ShenandoahPacer::epoch() {
 242   return OrderAccess::load_acquire(&_epoch);
 243 }
 244 
 245 void ShenandoahPacer::pace_for_alloc(size_t words) {
 246   assert(ShenandoahPacing, "Only be here when pacing is enabled");
 247 
 248   // Fast path: try to allocate right away
 249   bool claimed = claim_for_alloc(words, false);
 250   if (claimed) {
 251     return;
 252   }
 253 
 254   // Forcefully claim the budget: it may go negative at this point, and
 255   // GC should replenish for this and subsequent allocations. After this claim,
 256   // we would wait a bit until our claim is matched by additional progress,
 257   // or the time budget depletes.
 258   claimed = claim_for_alloc(words, true);
 259   assert(claimed, "Should always succeed");
 260 
 261   // Threads that are attaching should not block at all: they are not
 262   // fully initialized yet. Blocking them would be awkward.
 263   // This is probably the path that allocates the thread oop itself.
 264   if (JavaThread::current()->is_attaching_via_jni()) {
 265     return;
 266   }
 267 
 268   double start = os::elapsedTime();
 269 
 270   size_t max_ms = ShenandoahPacingMaxDelay;
 271   size_t total_ms = 0;
 272 
 273   while (true) {
 274     // We could instead assist GC, but this would suffice for now.
 275     size_t cur_ms = (max_ms > total_ms) ? (max_ms - total_ms) : 1;
 276     wait(cur_ms);
 277 
 278     double end = os::elapsedTime();
 279     total_ms = (size_t)((end - start) * 1000);
 280 
 281     if (total_ms > max_ms || OrderAccess::load_ptr_acquire(&_budget) >= 0) {
 282       // Exiting if either:
 283       //  a) Spent local time budget to wait for enough GC progress.
 284       //     Breaking out and allocating anyway, which may mean we outpace GC,
 285       //     and start Degenerated GC cycle.
 286       //  b) The budget had been replenished, which means our claim is satisfied.
 287       JavaThread::current()->add_paced_time(end - start);
 288       break;
 289     }
 290   }
 291 }
 292 
 293 void ShenandoahPacer::wait(size_t time_ms) {
 294   // Perform timed wait. It works like like sleep(), except without modifying
 295   // the thread interruptible status. MonitorLocker also checks for safepoints.
 296   assert(time_ms > 0, "Should not call this with zero argument, as it would stall until notify");
 297   assert(time_ms <= LONG_MAX, "Sanity");
 298   MonitorLockerEx locker(_wait_monitor);
 299   _wait_monitor->wait(!Mutex::_no_safepoint_check_flag, (long)time_ms);
 300 }
 301 
 302 void ShenandoahPacer::notify_waiters() {
 303   if (_need_notify_waiters.try_unset()) {
 304     MonitorLockerEx locker(_wait_monitor);
 305     _wait_monitor->notify_all();
 306   }
 307 }
 308 
 309 void ShenandoahPacer::flush_stats_to_cycle() {
 310   MutexLocker lock(Threads_lock);
 311 
 312   double sum = 0;
 313   for (JavaThread* t = Threads::first(); t != NULL; t = t->next()) {
 314     sum += t->paced_time();
 315   }
 316   ShenandoahHeap::heap()->phase_timings()->record_phase_time(ShenandoahPhaseTimings::pacing, sum);
 317 }
 318 
 319 void ShenandoahPacer::print_cycle_on(outputStream* out) {
 320   MutexLocker lock(Threads_lock);
 321 
 322   double now = os::elapsedTime();
 323   double total = now - _last_time;
 324   _last_time = now;
 325 
 326   out->cr();
 327   out->print_cr("Allocation pacing accrued:");
 328 
 329   size_t threads_total = 0;
 330   size_t threads_nz = 0;
 331   double sum = 0;
 332   for (JavaThread* t = Threads::first(); t != NULL; t = t->next()) {
 333     double d = t->paced_time();
 334     if (d > 0) {
 335       threads_nz++;
 336       sum += d;
 337       out->print_cr("  %5.0f of %5.0f ms (%5.1f%%): %s",
 338               d * 1000, total * 1000, d/total*100, t->name());
 339     }
 340     threads_total++;
 341     t->reset_paced_time();
 342   }
 343   out->print_cr("  %5.0f of %5.0f ms (%5.1f%%): <total>",
 344           sum * 1000, total * 1000, sum/total*100);
 345 
 346   if (threads_total > 0) {
 347     out->print_cr("  %5.0f of %5.0f ms (%5.1f%%): <average total>",
 348             sum / threads_total * 1000, total * 1000, sum / threads_total / total * 100);
 349   }
 350   if (threads_nz > 0) {
 351     out->print_cr("  %5.0f of %5.0f ms (%5.1f%%): <average non-zero>",
 352             sum / threads_nz * 1000, total * 1000, sum / threads_nz / total * 100);
 353   }
 354   out->cr();
 355 }