1 /* 2 * Copyright (c) 2018, 2019, Red Hat, Inc. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 27 #include "gc/shenandoah/heuristics/shenandoahAdaptiveHeuristics.hpp" 28 #include "gc/shenandoah/shenandoahCollectionSet.hpp" 29 #include "gc/shenandoah/shenandoahFreeSet.hpp" 30 #include "gc/shenandoah/shenandoahGeneration.hpp" 31 #include "gc/shenandoah/shenandoahHeapRegion.inline.hpp" 32 #include "gc/shenandoah/shenandoahYoungGeneration.hpp" 33 #include "logging/log.hpp" 34 #include "logging/logTag.hpp" 35 #include "utilities/quickSort.hpp" 36 37 // These constants are used to adjust the margin of error for the moving 38 // average of the allocation rate and cycle time. The units are standard 39 // deviations. 40 const double ShenandoahAdaptiveHeuristics::FULL_PENALTY_SD = 0.2; 41 const double ShenandoahAdaptiveHeuristics::DEGENERATE_PENALTY_SD = 0.1; 42 43 // These are used to decide if we want to make any adjustments at all 44 // at the end of a successful concurrent cycle. 45 const double ShenandoahAdaptiveHeuristics::LOWEST_EXPECTED_AVAILABLE_AT_END = -0.5; 46 const double ShenandoahAdaptiveHeuristics::HIGHEST_EXPECTED_AVAILABLE_AT_END = 0.5; 47 48 // These values are the confidence interval expressed as standard deviations. 49 // At the minimum confidence level, there is a 25% chance that the true value of 50 // the estimate (average cycle time or allocation rate) is not more than 51 // MINIMUM_CONFIDENCE standard deviations away from our estimate. Similarly, the 52 // MAXIMUM_CONFIDENCE interval here means there is a one in a thousand chance 53 // that the true value of our estimate is outside the interval. These are used 54 // as bounds on the adjustments applied at the outcome of a GC cycle. 55 const double ShenandoahAdaptiveHeuristics::MINIMUM_CONFIDENCE = 0.319; // 25% 56 const double ShenandoahAdaptiveHeuristics::MAXIMUM_CONFIDENCE = 3.291; // 99.9% 57 58 const uint ShenandoahAdaptiveHeuristics::MINIMUM_RESIZE_INTERVAL = 10; 59 60 ShenandoahAdaptiveHeuristics::ShenandoahAdaptiveHeuristics(ShenandoahGeneration* generation) : 61 ShenandoahHeuristics(generation), 62 _margin_of_error_sd(ShenandoahAdaptiveInitialConfidence), 63 _spike_threshold_sd(ShenandoahAdaptiveInitialSpikeThreshold), 64 _last_trigger(OTHER), 65 _available(Moving_Average_Samples, ShenandoahAdaptiveDecayFactor) { } 66 67 ShenandoahAdaptiveHeuristics::~ShenandoahAdaptiveHeuristics() {} 68 69 void ShenandoahAdaptiveHeuristics::choose_collection_set_from_regiondata(ShenandoahCollectionSet* cset, 70 RegionData* data, size_t size, 71 size_t actual_free) { 72 size_t garbage_threshold = ShenandoahHeapRegion::region_size_bytes() * ShenandoahGarbageThreshold / 100; 73 size_t ignore_threshold = ShenandoahHeapRegion::region_size_bytes() * ShenandoahIgnoreGarbageThreshold / 100; 74 ShenandoahHeap* heap = ShenandoahHeap::heap(); 75 76 // The logic for cset selection in adaptive is as follows: 77 // 78 // 1. We cannot get cset larger than available free space. Otherwise we guarantee OOME 79 // during evacuation, and thus guarantee full GC. In practice, we also want to let 80 // application to allocate something. This is why we limit CSet to some fraction of 81 // available space. In non-overloaded heap, max_cset would contain all plausible candidates 82 // over garbage threshold. 83 // 84 // 2. We should not get cset too low so that free threshold would not be met right 85 // after the cycle. Otherwise we get back-to-back cycles for no reason if heap is 86 // too fragmented. In non-overloaded non-fragmented heap min_garbage would be around zero. 87 // 88 // Therefore, we start by sorting the regions by garbage. Then we unconditionally add the best candidates 89 // before we meet min_garbage. Then we add all candidates that fit with a garbage threshold before 90 // we hit max_cset. When max_cset is hit, we terminate the cset selection. Note that in this scheme, 91 // ShenandoahGarbageThreshold is the soft threshold which would be ignored until min_garbage is hit. 92 93 // In generational mode, the sort order within the data array is not strictly descending amounts of garbage. In 94 // particular, regions that have reached tenure age will be sorted into this array before younger regions that contain 95 // more garbage. This represents one of the reasons why we keep looking at regions even after we decide, for example, 96 // to exclude one of the regions because it might require evacuation of too much live data. 97 bool is_generational = heap->mode()->is_generational(); 98 bool is_global = (_generation->generation_mode() == GLOBAL); 99 size_t capacity = heap->young_generation()->max_capacity(); 100 101 // cur_young_garbage represents the amount of memory to be reclaimed from young-gen. In the case that live objects 102 // are known to be promoted out of young-gen, we count this as cur_young_garbage because this memory is reclaimed 103 // from young-gen and becomes available to serve future young-gen allocation requests. 104 size_t cur_young_garbage = 0; 105 106 // Better select garbage-first regions 107 QuickSort::sort<RegionData>(data, (int)size, compare_by_garbage, false); 108 109 if (is_generational) { 110 if (is_global) { 111 size_t max_young_cset = (size_t) (heap->get_young_evac_reserve() / ShenandoahEvacWaste); 112 size_t young_cur_cset = 0; 113 size_t max_old_cset = (size_t) (heap->get_old_evac_reserve() / ShenandoahEvacWaste); 114 size_t old_cur_cset = 0; 115 size_t free_target = (capacity * ShenandoahMinFreeThreshold) / 100 + max_young_cset; 116 size_t min_garbage = (free_target > actual_free) ? (free_target - actual_free) : 0; 117 118 log_info(gc, ergo)("Adaptive CSet Selection for GLOBAL. Max Young Evacuation: " SIZE_FORMAT 119 "%s, Max Old Evacuation: " SIZE_FORMAT "%s, Actual Free: " SIZE_FORMAT "%s.", 120 byte_size_in_proper_unit(max_young_cset), proper_unit_for_byte_size(max_young_cset), 121 byte_size_in_proper_unit(max_old_cset), proper_unit_for_byte_size(max_old_cset), 122 byte_size_in_proper_unit(actual_free), proper_unit_for_byte_size(actual_free)); 123 124 for (size_t idx = 0; idx < size; idx++) { 125 ShenandoahHeapRegion* r = data[idx]._region; 126 bool add_region = false; 127 if (r->is_old()) { 128 size_t new_cset = old_cur_cset + r->get_live_data_bytes(); 129 if ((new_cset <= max_old_cset) && (r->garbage() > garbage_threshold)) { 130 add_region = true; 131 old_cur_cset = new_cset; 132 } 133 } else if (cset->is_preselected(r->index())) { 134 assert(r->age() >= InitialTenuringThreshold, "Preselected regions must have tenure age"); 135 // Entire region will be promoted, This region does not impact young-gen or old-gen evacuation reserve. 136 // This region has been pre-selected and its impact on promotion reserve is already accounted for. 137 add_region = true; 138 // r->used() is r->garbage() + r->get_live_data_bytes() 139 // Since all live data in this region is being evacuated from young-gen, it is as if this memory 140 // is garbage insofar as young-gen is concerned. Counting this as garbage reduces the need to 141 // reclaim highly utilized young-gen regions just for the sake of finding min_garbage to reclaim 142 // within youn-gen memory. 143 cur_young_garbage += r->used(); 144 } else if (r->age() < InitialTenuringThreshold) { 145 size_t new_cset = young_cur_cset + r->get_live_data_bytes(); 146 size_t region_garbage = r->garbage(); 147 size_t new_garbage = cur_young_garbage + region_garbage; 148 bool add_regardless = (region_garbage > ignore_threshold) && (new_garbage < min_garbage); 149 if ((new_cset <= max_young_cset) && (add_regardless || (region_garbage > garbage_threshold))) { 150 add_region = true; 151 young_cur_cset = new_cset; 152 cur_young_garbage = new_garbage; 153 } 154 } 155 // Note that we do not add aged regions if they were not pre-selected. The reason they were not preselected 156 // is because there is not sufficient room in old-gen to hold their to-be-promoted live objects. 157 158 if (add_region) { 159 cset->add_region(r); 160 } 161 } 162 } else { 163 // This is young-gen collection or a mixed evacuation. If this is mixed evacuation, the old-gen candidate regions 164 // have already been added. 165 size_t max_cset = (size_t) (heap->get_young_evac_reserve() / ShenandoahEvacWaste); 166 size_t cur_cset = 0; 167 size_t free_target = (capacity * ShenandoahMinFreeThreshold) / 100 + max_cset; 168 size_t min_garbage = (free_target > actual_free) ? (free_target - actual_free) : 0; 169 170 log_info(gc, ergo)("Adaptive CSet Selection for YOUNG. Max Evacuation: " SIZE_FORMAT "%s, Actual Free: " SIZE_FORMAT "%s.", 171 byte_size_in_proper_unit(max_cset), proper_unit_for_byte_size(max_cset), 172 byte_size_in_proper_unit(actual_free), proper_unit_for_byte_size(actual_free)); 173 174 for (size_t idx = 0; idx < size; idx++) { 175 ShenandoahHeapRegion* r = data[idx]._region; 176 bool add_region = false; 177 178 if (!r->is_old()) { 179 if (cset->is_preselected(r->index())) { 180 assert(r->age() >= InitialTenuringThreshold, "Preselected regions must have tenure age"); 181 // Entire region will be promoted, This region does not impact young-gen evacuation reserve. Memory has already 182 // been set aside to hold evacuation results as advance_promotion_reserve. 183 add_region = true; 184 // Since all live data in this region is being evacuated from young-gen, it is as if this memory 185 // is garbage insofar as young-gen is concerned. Counting this as garbage reduces the need to 186 // reclaim highly utilized young-gen regions just for the sake of finding min_garbage to reclaim 187 // within youn-gen memory 188 cur_young_garbage += r->get_live_data_bytes(); 189 } else if (r->age() < InitialTenuringThreshold) { 190 size_t new_cset = cur_cset + r->get_live_data_bytes(); 191 size_t region_garbage = r->garbage(); 192 size_t new_garbage = cur_young_garbage + region_garbage; 193 bool add_regardless = (region_garbage > ignore_threshold) && (new_garbage < min_garbage); 194 if ((new_cset <= max_cset) && (add_regardless || (region_garbage > garbage_threshold))) { 195 add_region = true; 196 cur_cset = new_cset; 197 cur_young_garbage = new_garbage; 198 } 199 } 200 // Note that we do not add aged regions if they were not pre-selected. The reason they were not preselected 201 // is because there is not sufficient room in old-gen to hold their to-be-promoted live objects. 202 203 if (add_region) { 204 cset->add_region(r); 205 } 206 } 207 } 208 } 209 } else { 210 // Traditional Shenandoah (non-generational) 211 size_t capacity = ShenandoahHeap::heap()->soft_max_capacity(); 212 size_t max_cset = (size_t)((1.0 * capacity / 100 * ShenandoahEvacReserve) / ShenandoahEvacWaste); 213 size_t free_target = (capacity * ShenandoahMinFreeThreshold) / 100 + max_cset; 214 size_t min_garbage = (free_target > actual_free) ? (free_target - actual_free) : 0; 215 216 log_info(gc, ergo)("Adaptive CSet Selection. Max Evacuation: " SIZE_FORMAT "%s, Actual Free: " SIZE_FORMAT "%s.", 217 byte_size_in_proper_unit(max_cset), proper_unit_for_byte_size(max_cset), 218 byte_size_in_proper_unit(actual_free), proper_unit_for_byte_size(actual_free)); 219 220 size_t cur_cset = 0; 221 size_t cur_garbage = 0; 222 223 for (size_t idx = 0; idx < size; idx++) { 224 ShenandoahHeapRegion* r = data[idx]._region; 225 size_t new_cset = cur_cset + r->get_live_data_bytes(); 226 size_t region_garbage = r->garbage(); 227 size_t new_garbage = cur_garbage + region_garbage; 228 bool add_regardless = (region_garbage > ignore_threshold) && (new_garbage < min_garbage); 229 if ((new_cset <= max_cset) && (add_regardless || (region_garbage > garbage_threshold))) { 230 cset->add_region(r); 231 cur_cset = new_cset; 232 cur_garbage = new_garbage; 233 } 234 } 235 } 236 } 237 238 void ShenandoahAdaptiveHeuristics::record_cycle_start() { 239 ShenandoahHeuristics::record_cycle_start(); 240 _allocation_rate.allocation_counter_reset(); 241 ++_cycles_since_last_resize; 242 } 243 244 void ShenandoahAdaptiveHeuristics::record_success_concurrent(bool abbreviated) { 245 ShenandoahHeuristics::record_success_concurrent(abbreviated); 246 247 size_t available = MIN2(_generation->available(), ShenandoahHeap::heap()->free_set()->available()); 248 249 double z_score = 0.0; 250 double available_sd = _available.sd(); 251 if (available_sd > 0) { 252 double available_avg = _available.avg(); 253 z_score = (double(available) - available_avg) / available_sd; 254 log_debug(gc, ergo)("%s Available: " SIZE_FORMAT " %sB, z-score=%.3f. Average available: %.1f %sB +/- %.1f %sB.", 255 _generation->name(), 256 byte_size_in_proper_unit(available), proper_unit_for_byte_size(available), z_score, 257 byte_size_in_proper_unit(available_avg), proper_unit_for_byte_size(available_avg), 258 byte_size_in_proper_unit(available_sd), proper_unit_for_byte_size(available_sd)); 259 } 260 261 _available.add(double(available)); 262 263 // In the case when a concurrent GC cycle completes successfully but with an 264 // unusually small amount of available memory we will adjust our trigger 265 // parameters so that they are more likely to initiate a new cycle. 266 // Conversely, when a GC cycle results in an above average amount of available 267 // memory, we will adjust the trigger parameters to be less likely to initiate 268 // a GC cycle. 269 // 270 // The z-score we've computed is in no way statistically related to the 271 // trigger parameters, but it has the nice property that worse z-scores for 272 // available memory indicate making larger adjustments to the trigger 273 // parameters. It also results in fewer adjustments as the application 274 // stabilizes. 275 // 276 // In order to avoid making endless and likely unnecessary adjustments to the 277 // trigger parameters, the change in available memory (with respect to the 278 // average) at the end of a cycle must be beyond these threshold values. 279 if (z_score < LOWEST_EXPECTED_AVAILABLE_AT_END || 280 z_score > HIGHEST_EXPECTED_AVAILABLE_AT_END) { 281 // The sign is flipped because a negative z-score indicates that the 282 // available memory at the end of the cycle is below average. Positive 283 // adjustments make the triggers more sensitive (i.e., more likely to fire). 284 // The z-score also gives us a measure of just how far below normal. This 285 // property allows us to adjust the trigger parameters proportionally. 286 // 287 // The `100` here is used to attenuate the size of our adjustments. This 288 // number was chosen empirically. It also means the adjustments at the end of 289 // a concurrent cycle are an order of magnitude smaller than the adjustments 290 // made for a degenerated or full GC cycle (which themselves were also 291 // chosen empirically). 292 adjust_last_trigger_parameters(z_score / -100); 293 } 294 } 295 296 void ShenandoahAdaptiveHeuristics::record_success_degenerated() { 297 ShenandoahHeuristics::record_success_degenerated(); 298 // Adjust both trigger's parameters in the case of a degenerated GC because 299 // either of them should have triggered earlier to avoid this case. 300 adjust_margin_of_error(DEGENERATE_PENALTY_SD); 301 adjust_spike_threshold(DEGENERATE_PENALTY_SD); 302 } 303 304 void ShenandoahAdaptiveHeuristics::record_success_full() { 305 ShenandoahHeuristics::record_success_full(); 306 // Adjust both trigger's parameters in the case of a full GC because 307 // either of them should have triggered earlier to avoid this case. 308 adjust_margin_of_error(FULL_PENALTY_SD); 309 adjust_spike_threshold(FULL_PENALTY_SD); 310 } 311 312 static double saturate(double value, double min, double max) { 313 return MAX2(MIN2(value, max), min); 314 } 315 316 bool ShenandoahAdaptiveHeuristics::should_start_gc() { 317 size_t max_capacity = _generation->max_capacity(); 318 size_t capacity = _generation->soft_max_capacity(); 319 size_t available = _generation->available(); 320 size_t allocated = _generation->bytes_allocated_since_gc_start(); 321 322 log_debug(gc)("should_start_gc (%s)? available: " SIZE_FORMAT ", soft_max_capacity: " SIZE_FORMAT 323 ", max_capacity: " SIZE_FORMAT ", allocated: " SIZE_FORMAT, 324 _generation->name(), available, capacity, max_capacity, allocated); 325 326 // The collector reserve may eat into what the mutator is allowed to use. Make sure we are looking 327 // at what is available to the mutator when deciding whether to start a GC. 328 size_t usable = ShenandoahHeap::heap()->free_set()->available(); 329 if (usable < available) { 330 log_debug(gc)("Usable (" SIZE_FORMAT "%s) is less than available (" SIZE_FORMAT "%s)", 331 byte_size_in_proper_unit(usable), proper_unit_for_byte_size(usable), 332 byte_size_in_proper_unit(available), proper_unit_for_byte_size(available)); 333 available = usable; 334 } 335 336 // Allocation spikes are a characteristic of both the application ahd the JVM configuration. On the JVM command line, 337 // the application developer may want to supply a hint of the nature of spikes that are inherent in the application 338 // workload, and this information would normally be independent of heap size (not a percentage thereof). On the 339 // other hand, some allocation spikes are correlated with JVM configuration. For example, there are allocation 340 // spikes at the starts of concurrent marking and evacuation to refresh all local allocation buffers. The nature 341 // of these spikes is determined by LAB min and max sizes and numbers of threads, but also on frequency of GC passes, 342 // and on "periodic" behavior of these threads If GC frequency is much higher than the periodic trigger for mutator 343 // threads, then many of the mutator threads may be able to "sit out" of most GC passes. Though the thread's stack 344 // must be scanned, the thread does not need to refresh its LABs if it sits idle throughout the duration of the GC 345 // pass. The best prediction for this aspect of spikes in allocation patterns is probably recent past history. 346 // TODO: and dive deeper into _gc_time_penalties as this may also need to be corrected 347 348 // Check if allocation headroom is still okay. This also factors in: 349 // 1. Some space to absorb allocation spikes (ShenandoahAllocSpikeFactor) 350 // 2. Accumulated penalties from Degenerated and Full GC 351 size_t allocation_headroom = available; 352 size_t spike_headroom = capacity / 100 * ShenandoahAllocSpikeFactor; 353 size_t penalties = capacity / 100 * _gc_time_penalties; 354 355 allocation_headroom -= MIN2(allocation_headroom, penalties); 356 allocation_headroom -= MIN2(allocation_headroom, spike_headroom); 357 358 // Track allocation rate even if we decide to start a cycle for other reasons. 359 double rate = _allocation_rate.sample(allocated); 360 _last_trigger = OTHER; 361 362 size_t min_threshold = min_free_threshold(); 363 364 if (allocation_headroom < min_threshold) { 365 log_info(gc)("Trigger (%s): Free (" SIZE_FORMAT "%s) is below minimum threshold (" SIZE_FORMAT "%s)", 366 _generation->name(), 367 byte_size_in_proper_unit(allocation_headroom), proper_unit_for_byte_size(allocation_headroom), 368 byte_size_in_proper_unit(min_threshold), proper_unit_for_byte_size(min_threshold)); 369 return resize_and_evaluate(); 370 } 371 372 // Check if we need to learn a bit about the application 373 const size_t max_learn = ShenandoahLearningSteps; 374 if (_gc_times_learned < max_learn) { 375 size_t init_threshold = capacity / 100 * ShenandoahInitFreeThreshold; 376 if (allocation_headroom < init_threshold) { 377 log_info(gc)("Trigger (%s): Learning " SIZE_FORMAT " of " SIZE_FORMAT ". Free (" SIZE_FORMAT "%s) is below initial threshold (" SIZE_FORMAT "%s)", 378 _generation->name(), _gc_times_learned + 1, max_learn, 379 byte_size_in_proper_unit(allocation_headroom), proper_unit_for_byte_size(allocation_headroom), 380 byte_size_in_proper_unit(init_threshold), proper_unit_for_byte_size(init_threshold)); 381 return true; 382 } 383 } 384 385 // Rationale: 386 // The idea is that there is an average allocation rate and there are occasional abnormal bursts (or spikes) of 387 // allocations that exceed the average allocation rate. What do these spikes look like? 388 // 389 // 1. At certain phase changes, we may discard large amounts of data and replace it with large numbers of newly 390 // allocated objects. This "spike" looks more like a phase change. We were in steady state at M bytes/sec 391 // allocation rate and now we're in a "reinitialization phase" that looks like N bytes/sec. We need the "spike" 392 // accomodation to give us enough runway to recalibrate our "average allocation rate". 393 // 394 // 2. The typical workload changes. "Suddenly", our typical workload of N TPS increases to N+delta TPS. This means 395 // our average allocation rate needs to be adjusted. Once again, we need the "spike" accomodation to give us 396 // enough runway to recalibrate our "average allocation rate". 397 // 398 // 3. Though there is an "average" allocation rate, a given workload's demand for allocation may be very bursty. We 399 // allocate a bunch of LABs during the 5 ms that follow completion of a GC, then we perform no more allocations for 400 // the next 150 ms. It seems we want the "spike" to represent the maximum divergence from average within the 401 // period of time between consecutive evaluation of the should_start_gc() service. Here's the thinking: 402 // 403 // a) Between now and the next time I ask whether should_start_gc(), we might experience a spike representing 404 // the anticipated burst of allocations. If that would put us over budget, then we should start GC immediately. 405 // b) Between now and the anticipated depletion of allocation pool, there may be two or more bursts of allocations. 406 // If there are more than one of these bursts, we can "approximate" that these will be separated by spans of 407 // time with very little or no allocations so the "average" allocation rate should be a suitable approximation 408 // of how this will behave. 409 // 410 // For cases 1 and 2, we need to "quickly" recalibrate the average allocation rate whenever we detect a change 411 // in operation mode. We want some way to decide that the average rate has changed. Make average allocation rate 412 // computations an independent effort. 413 414 415 // TODO: Account for inherent delays in responding to GC triggers 416 // 1. It has been observed that delays of 200 ms or greater are common between the moment we return true from should_start_gc() 417 // and the moment at which we begin execution of the concurrent reset phase. Add this time into the calculation of 418 // avg_cycle_time below. (What is "this time"? Perhaps we should remember recent history of this delay for the 419 // running workload and use the maximum delay recently seen for "this time".) 420 // 2. The frequency of inquiries to should_start_gc() is adaptive, ranging between ShenandoahControlIntervalMin and 421 // ShenandoahControlIntervalMax. The current control interval (or the max control interval) should also be added into 422 // the calculation of avg_cycle_time below. 423 424 double avg_cycle_time = _gc_cycle_time_history->davg() + (_margin_of_error_sd * _gc_cycle_time_history->dsd()); 425 426 size_t last_live_memory = get_last_live_memory(); 427 size_t penultimate_live_memory = get_penultimate_live_memory(); 428 double original_cycle_time = avg_cycle_time; 429 if ((penultimate_live_memory < last_live_memory) && (penultimate_live_memory != 0)) { 430 // If the live-memory size is growing, our estimates of cycle time are based on lighter workload, so adjust. 431 // TODO: Be more precise about how to scale when live memory is growing. Existing code is a very rough approximation 432 // tuned with very limited workload observations. 433 avg_cycle_time = (avg_cycle_time * 2 * last_live_memory) / penultimate_live_memory; 434 } else { 435 int degen_cycles = degenerated_cycles_in_a_row(); 436 if (degen_cycles > 0) { 437 // If we've degenerated recently, we might be waiting too long between triggers so adjust trigger forward. 438 // TODO: Be more precise about how to scale when we've experienced recent degenerated GC. Existing code is a very 439 // rough approximation tuned with very limited workload observations. 440 avg_cycle_time += degen_cycles * avg_cycle_time; 441 } 442 } 443 444 double avg_alloc_rate = _allocation_rate.upper_bound(_margin_of_error_sd); 445 log_debug(gc)("%s: average GC time: %.2f ms, allocation rate: %.0f %s/s", 446 _generation->name(), avg_cycle_time * 1000, byte_size_in_proper_unit(avg_alloc_rate), proper_unit_for_byte_size(avg_alloc_rate)); 447 448 if (avg_cycle_time > allocation_headroom / avg_alloc_rate) { 449 if (avg_cycle_time > original_cycle_time) { 450 log_debug(gc)("%s: average GC time adjusted from: %.2f ms to %.2f ms because upward trend in live memory retention", 451 _generation->name(), original_cycle_time, avg_cycle_time); 452 } 453 454 log_info(gc)("Trigger (%s): Average GC time (%.2f ms) is above the time for average allocation rate (%.0f %sB/s) to deplete free headroom (" SIZE_FORMAT "%s) (margin of error = %.2f)", 455 _generation->name(), avg_cycle_time * 1000, 456 byte_size_in_proper_unit(avg_alloc_rate), proper_unit_for_byte_size(avg_alloc_rate), 457 byte_size_in_proper_unit(allocation_headroom), proper_unit_for_byte_size(allocation_headroom), 458 _margin_of_error_sd); 459 460 log_info(gc, ergo)("Free headroom: " SIZE_FORMAT "%s (free) - " SIZE_FORMAT "%s (spike) - " SIZE_FORMAT "%s (penalties) = " SIZE_FORMAT "%s", 461 byte_size_in_proper_unit(available), proper_unit_for_byte_size(available), 462 byte_size_in_proper_unit(spike_headroom), proper_unit_for_byte_size(spike_headroom), 463 byte_size_in_proper_unit(penalties), proper_unit_for_byte_size(penalties), 464 byte_size_in_proper_unit(allocation_headroom), proper_unit_for_byte_size(allocation_headroom)); 465 466 _last_trigger = RATE; 467 return resize_and_evaluate(); 468 } 469 470 bool is_spiking = _allocation_rate.is_spiking(rate, _spike_threshold_sd); 471 if (is_spiking && avg_cycle_time > allocation_headroom / rate) { 472 log_info(gc)("Trigger (%s): Average GC time (%.2f ms) is above the time for instantaneous allocation rate (%.0f %sB/s) to deplete free headroom (" SIZE_FORMAT "%s) (spike threshold = %.2f)", 473 _generation->name(), avg_cycle_time * 1000, 474 byte_size_in_proper_unit(rate), proper_unit_for_byte_size(rate), 475 byte_size_in_proper_unit(allocation_headroom), proper_unit_for_byte_size(allocation_headroom), 476 477 _spike_threshold_sd); 478 _last_trigger = SPIKE; 479 return resize_and_evaluate(); 480 } 481 482 return ShenandoahHeuristics::should_start_gc(); 483 } 484 485 bool ShenandoahAdaptiveHeuristics::resize_and_evaluate() { 486 ShenandoahHeap* heap = ShenandoahHeap::heap(); 487 if (!heap->mode()->is_generational()) { 488 // We only attempt to resize the generations in generational mode. 489 return true; 490 } 491 492 if (_cycles_since_last_resize <= MINIMUM_RESIZE_INTERVAL) { 493 log_info(gc, ergo)("Not resizing %s for another " UINT32_FORMAT " cycles.", 494 _generation->name(), _cycles_since_last_resize); 495 return true; 496 } 497 498 if (!heap->generation_sizer()->transfer_capacity(_generation)) { 499 // We could not enlarge our generation, so we must start a gc cycle. 500 log_info(gc, ergo)("Could not increase size of %s, begin gc cycle.", _generation->name()); 501 return true; 502 } 503 504 log_info(gc)("Increased size of %s generation, re-evaluate trigger criteria", _generation->name()); 505 return should_start_gc(); 506 } 507 508 void ShenandoahAdaptiveHeuristics::adjust_last_trigger_parameters(double amount) { 509 switch (_last_trigger) { 510 case RATE: 511 adjust_margin_of_error(amount); 512 break; 513 case SPIKE: 514 adjust_spike_threshold(amount); 515 break; 516 case OTHER: 517 // nothing to adjust here. 518 break; 519 default: 520 ShouldNotReachHere(); 521 } 522 } 523 524 void ShenandoahAdaptiveHeuristics::adjust_margin_of_error(double amount) { 525 _margin_of_error_sd = saturate(_margin_of_error_sd + amount, MINIMUM_CONFIDENCE, MAXIMUM_CONFIDENCE); 526 log_debug(gc, ergo)("Margin of error now %.2f", _margin_of_error_sd); 527 } 528 529 void ShenandoahAdaptiveHeuristics::adjust_spike_threshold(double amount) { 530 _spike_threshold_sd = saturate(_spike_threshold_sd - amount, MINIMUM_CONFIDENCE, MAXIMUM_CONFIDENCE); 531 log_debug(gc, ergo)("Spike threshold now: %.2f", _spike_threshold_sd); 532 } 533 534 ShenandoahAllocationRate::ShenandoahAllocationRate() : 535 _last_sample_time(os::elapsedTime()), 536 _last_sample_value(0), 537 _interval_sec(1.0 / ShenandoahAdaptiveSampleFrequencyHz), 538 _rate(int(ShenandoahAdaptiveSampleSizeSeconds * ShenandoahAdaptiveSampleFrequencyHz), ShenandoahAdaptiveDecayFactor), 539 _rate_avg(int(ShenandoahAdaptiveSampleSizeSeconds * ShenandoahAdaptiveSampleFrequencyHz), ShenandoahAdaptiveDecayFactor) { 540 } 541 542 double ShenandoahAllocationRate::sample(size_t allocated) { 543 double now = os::elapsedTime(); 544 double rate = 0.0; 545 if (now - _last_sample_time > _interval_sec) { 546 if (allocated >= _last_sample_value) { 547 rate = instantaneous_rate(now, allocated); 548 _rate.add(rate); 549 _rate_avg.add(_rate.avg()); 550 } 551 552 _last_sample_time = now; 553 _last_sample_value = allocated; 554 } 555 return rate; 556 } 557 558 double ShenandoahAllocationRate::upper_bound(double sds) const { 559 // Here we are using the standard deviation of the computed running 560 // average, rather than the standard deviation of the samples that went 561 // into the moving average. This is a much more stable value and is tied 562 // to the actual statistic in use (moving average over samples of averages). 563 return _rate.davg() + (sds * _rate_avg.dsd()); 564 } 565 566 void ShenandoahAllocationRate::allocation_counter_reset() { 567 _last_sample_time = os::elapsedTime(); 568 _last_sample_value = 0; 569 } 570 571 bool ShenandoahAllocationRate::is_spiking(double rate, double threshold) const { 572 if (rate <= 0.0) { 573 return false; 574 } 575 576 double sd = _rate.sd(); 577 if (sd > 0) { 578 // There is a small chance that that rate has already been sampled, but it 579 // seems not to matter in practice. 580 double z_score = (rate - _rate.avg()) / sd; 581 if (z_score > threshold) { 582 return true; 583 } 584 } 585 return false; 586 } 587 588 double ShenandoahAllocationRate::instantaneous_rate(size_t allocated) const { 589 return instantaneous_rate(os::elapsedTime(), allocated); 590 } 591 592 double ShenandoahAllocationRate::instantaneous_rate(double time, size_t allocated) const { 593 size_t last_value = _last_sample_value; 594 double last_time = _last_sample_time; 595 size_t allocation_delta = (allocated > last_value) ? (allocated - last_value) : 0; 596 double time_delta_sec = time - last_time; 597 return (time_delta_sec > 0) ? (allocation_delta / time_delta_sec) : 0; 598 }