1 /* 2 * Copyright (c) 2018, 2019, Red Hat, Inc. All rights reserved. 3 * Copyright Amazon.com Inc. or its affiliates. All Rights Reserved. 4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 5 * 6 * This code is free software; you can redistribute it and/or modify it 7 * under the terms of the GNU General Public License version 2 only, as 8 * published by the Free Software Foundation. 9 * 10 * This code is distributed in the hope that it will be useful, but WITHOUT 11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 13 * version 2 for more details (a copy is included in the LICENSE file that 14 * accompanied this code). 15 * 16 * You should have received a copy of the GNU General Public License version 17 * 2 along with this work; if not, write to the Free Software Foundation, 18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 19 * 20 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 21 * or visit www.oracle.com if you need additional information or have any 22 * questions. 23 * 24 */ 25 26 #include "precompiled.hpp" 27 28 #include "gc/shenandoah/heuristics/shenandoahAdaptiveHeuristics.hpp" 29 #include "gc/shenandoah/shenandoahCollectionSet.hpp" 30 #include "gc/shenandoah/shenandoahFreeSet.hpp" 31 #include "gc/shenandoah/shenandoahHeap.inline.hpp" 32 #include "gc/shenandoah/shenandoahGeneration.hpp" 33 #include "gc/shenandoah/shenandoahHeapRegion.inline.hpp" 34 #include "gc/shenandoah/shenandoahOldGeneration.hpp" 35 #include "gc/shenandoah/shenandoahYoungGeneration.hpp" 36 #include "logging/log.hpp" 37 #include "logging/logTag.hpp" 38 #include "utilities/quickSort.hpp" 39 40 // These constants are used to adjust the margin of error for the moving 41 // average of the allocation rate and cycle time. The units are standard 42 // deviations. 43 const double ShenandoahAdaptiveHeuristics::FULL_PENALTY_SD = 0.2; 44 const double ShenandoahAdaptiveHeuristics::DEGENERATE_PENALTY_SD = 0.1; 45 46 // These are used to decide if we want to make any adjustments at all 47 // at the end of a successful concurrent cycle. 48 const double ShenandoahAdaptiveHeuristics::LOWEST_EXPECTED_AVAILABLE_AT_END = -0.5; 49 const double ShenandoahAdaptiveHeuristics::HIGHEST_EXPECTED_AVAILABLE_AT_END = 0.5; 50 51 // These values are the confidence interval expressed as standard deviations. 52 // At the minimum confidence level, there is a 25% chance that the true value of 53 // the estimate (average cycle time or allocation rate) is not more than 54 // MINIMUM_CONFIDENCE standard deviations away from our estimate. Similarly, the 55 // MAXIMUM_CONFIDENCE interval here means there is a one in a thousand chance 56 // that the true value of our estimate is outside the interval. These are used 57 // as bounds on the adjustments applied at the outcome of a GC cycle. 58 const double ShenandoahAdaptiveHeuristics::MINIMUM_CONFIDENCE = 0.319; // 25% 59 const double ShenandoahAdaptiveHeuristics::MAXIMUM_CONFIDENCE = 3.291; // 99.9% 60 61 ShenandoahAdaptiveHeuristics::ShenandoahAdaptiveHeuristics(ShenandoahGeneration* generation) : 62 ShenandoahHeuristics(generation), 63 _margin_of_error_sd(ShenandoahAdaptiveInitialConfidence), 64 _spike_threshold_sd(ShenandoahAdaptiveInitialSpikeThreshold), 65 _last_trigger(OTHER), 66 _available(Moving_Average_Samples, ShenandoahAdaptiveDecayFactor) { } 67 68 ShenandoahAdaptiveHeuristics::~ShenandoahAdaptiveHeuristics() {} 69 70 void ShenandoahAdaptiveHeuristics::choose_collection_set_from_regiondata(ShenandoahCollectionSet* cset, 71 RegionData* data, size_t size, 72 size_t actual_free) { 73 size_t garbage_threshold = ShenandoahHeapRegion::region_size_bytes() * ShenandoahGarbageThreshold / 100; 74 size_t ignore_threshold = ShenandoahHeapRegion::region_size_bytes() * ShenandoahIgnoreGarbageThreshold / 100; 75 ShenandoahHeap* heap = ShenandoahHeap::heap(); 76 77 // The logic for cset selection in adaptive is as follows: 78 // 79 // 1. We cannot get cset larger than available free space. Otherwise we guarantee OOME 80 // during evacuation, and thus guarantee full GC. In practice, we also want to let 81 // application to allocate something. This is why we limit CSet to some fraction of 82 // available space. In non-overloaded heap, max_cset would contain all plausible candidates 83 // over garbage threshold. 84 // 85 // 2. We should not get cset too low so that free threshold would not be met right 86 // after the cycle. Otherwise we get back-to-back cycles for no reason if heap is 87 // too fragmented. In non-overloaded non-fragmented heap min_garbage would be around zero. 88 // 89 // Therefore, we start by sorting the regions by garbage. Then we unconditionally add the best candidates 90 // before we meet min_garbage. Then we add all candidates that fit with a garbage threshold before 91 // we hit max_cset. When max_cset is hit, we terminate the cset selection. Note that in this scheme, 92 // ShenandoahGarbageThreshold is the soft threshold which would be ignored until min_garbage is hit. 93 94 // In generational mode, the sort order within the data array is not strictly descending amounts of garbage. In 95 // particular, regions that have reached tenure age will be sorted into this array before younger regions that contain 96 // more garbage. This represents one of the reasons why we keep looking at regions even after we decide, for example, 97 // to exclude one of the regions because it might require evacuation of too much live data. 98 // TODO: Split it in the separate methods for clarity. 99 bool is_generational = heap->mode()->is_generational(); 100 bool is_global = _generation->is_global(); 101 size_t capacity = heap->young_generation()->max_capacity(); 102 103 // cur_young_garbage represents the amount of memory to be reclaimed from young-gen. In the case that live objects 104 // are known to be promoted out of young-gen, we count this as cur_young_garbage because this memory is reclaimed 105 // from young-gen and becomes available to serve future young-gen allocation requests. 106 size_t cur_young_garbage = 0; 107 108 // Better select garbage-first regions 109 QuickSort::sort<RegionData>(data, (int)size, compare_by_garbage, false); 110 111 if (is_generational) { 112 for (size_t idx = 0; idx < size; idx++) { 113 ShenandoahHeapRegion* r = data[idx]._region; 114 if (cset->is_preselected(r->index())) { 115 assert(r->age() >= InitialTenuringThreshold, "Preselected regions must have tenure age"); 116 // Entire region will be promoted, This region does not impact young-gen or old-gen evacuation reserve. 117 // This region has been pre-selected and its impact on promotion reserve is already accounted for. 118 119 // r->used() is r->garbage() + r->get_live_data_bytes() 120 // Since all live data in this region is being evacuated from young-gen, it is as if this memory 121 // is garbage insofar as young-gen is concerned. Counting this as garbage reduces the need to 122 // reclaim highly utilized young-gen regions just for the sake of finding min_garbage to reclaim 123 // within youn-gen memory. 124 125 cur_young_garbage += r->garbage(); 126 cset->add_region(r); 127 } 128 } 129 if (is_global) { 130 size_t max_young_cset = (size_t) (heap->get_young_evac_reserve() / ShenandoahEvacWaste); 131 size_t young_cur_cset = 0; 132 size_t max_old_cset = (size_t) (heap->get_old_evac_reserve() / ShenandoahOldEvacWaste); 133 size_t old_cur_cset = 0; 134 size_t free_target = (capacity * ShenandoahMinFreeThreshold) / 100 + max_young_cset; 135 size_t min_garbage = (free_target > actual_free) ? (free_target - actual_free) : 0; 136 137 log_info(gc, ergo)("Adaptive CSet Selection for GLOBAL. Max Young Evacuation: " SIZE_FORMAT 138 "%s, Max Old Evacuation: " SIZE_FORMAT "%s, Actual Free: " SIZE_FORMAT "%s.", 139 byte_size_in_proper_unit(max_young_cset), proper_unit_for_byte_size(max_young_cset), 140 byte_size_in_proper_unit(max_old_cset), proper_unit_for_byte_size(max_old_cset), 141 byte_size_in_proper_unit(actual_free), proper_unit_for_byte_size(actual_free)); 142 143 for (size_t idx = 0; idx < size; idx++) { 144 ShenandoahHeapRegion* r = data[idx]._region; 145 if (cset->is_preselected(r->index())) { 146 continue; 147 } 148 bool add_region = false; 149 if (r->is_old()) { 150 size_t new_cset = old_cur_cset + r->get_live_data_bytes(); 151 if ((new_cset <= max_old_cset) && (r->garbage() > garbage_threshold)) { 152 add_region = true; 153 old_cur_cset = new_cset; 154 } 155 } else if (r->age() < InitialTenuringThreshold) { 156 size_t new_cset = young_cur_cset + r->get_live_data_bytes(); 157 size_t region_garbage = r->garbage(); 158 size_t new_garbage = cur_young_garbage + region_garbage; 159 bool add_regardless = (region_garbage > ignore_threshold) && (new_garbage < min_garbage); 160 if ((new_cset <= max_young_cset) && (add_regardless || (region_garbage > garbage_threshold))) { 161 add_region = true; 162 young_cur_cset = new_cset; 163 cur_young_garbage = new_garbage; 164 } 165 } 166 // Note that we do not add aged regions if they were not pre-selected. The reason they were not preselected 167 // is because there is not sufficient room in old-gen to hold their to-be-promoted live objects. 168 169 if (add_region) { 170 cset->add_region(r); 171 } 172 } 173 } else { 174 // This is young-gen collection or a mixed evacuation. If this is mixed evacuation, the old-gen candidate regions 175 // have already been added. 176 size_t max_cset = (size_t) (heap->get_young_evac_reserve() / ShenandoahEvacWaste); 177 size_t cur_cset = 0; 178 size_t free_target = (capacity * ShenandoahMinFreeThreshold) / 100 + max_cset; 179 size_t min_garbage = (free_target > actual_free) ? (free_target - actual_free) : 0; 180 181 log_info(gc, ergo)("Adaptive CSet Selection for YOUNG. Max Evacuation: " SIZE_FORMAT "%s, Actual Free: " SIZE_FORMAT "%s.", 182 byte_size_in_proper_unit(max_cset), proper_unit_for_byte_size(max_cset), 183 byte_size_in_proper_unit(actual_free), proper_unit_for_byte_size(actual_free)); 184 185 for (size_t idx = 0; idx < size; idx++) { 186 ShenandoahHeapRegion* r = data[idx]._region; 187 if (cset->is_preselected(r->index())) { 188 continue; 189 } 190 if (r->age() < InitialTenuringThreshold) { 191 size_t new_cset = cur_cset + r->get_live_data_bytes(); 192 size_t region_garbage = r->garbage(); 193 size_t new_garbage = cur_young_garbage + region_garbage; 194 bool add_regardless = (region_garbage > ignore_threshold) && (new_garbage < min_garbage); 195 assert(r->is_young(), "Only young candidates expected in the data array"); 196 if ((new_cset <= max_cset) && (add_regardless || (region_garbage > garbage_threshold))) { 197 cur_cset = new_cset; 198 cur_young_garbage = new_garbage; 199 cset->add_region(r); 200 } 201 } 202 // Note that we do not add aged regions if they were not pre-selected. The reason they were not preselected 203 // is because there is not sufficient room in old-gen to hold their to-be-promoted live objects or because 204 // they are to be promoted in place. 205 } 206 } 207 } else { 208 // Traditional Shenandoah (non-generational) 209 size_t capacity = ShenandoahHeap::heap()->max_capacity(); 210 size_t max_cset = (size_t)((1.0 * capacity / 100 * ShenandoahEvacReserve) / ShenandoahEvacWaste); 211 size_t free_target = (capacity * ShenandoahMinFreeThreshold) / 100 + max_cset; 212 size_t min_garbage = (free_target > actual_free) ? (free_target - actual_free) : 0; 213 214 log_info(gc, ergo)("Adaptive CSet Selection. Target Free: " SIZE_FORMAT "%s, Actual Free: " 215 SIZE_FORMAT "%s, Max Evacuation: " SIZE_FORMAT "%s, Min Garbage: " SIZE_FORMAT "%s", 216 byte_size_in_proper_unit(free_target), proper_unit_for_byte_size(free_target), 217 byte_size_in_proper_unit(actual_free), proper_unit_for_byte_size(actual_free), 218 byte_size_in_proper_unit(max_cset), proper_unit_for_byte_size(max_cset), 219 byte_size_in_proper_unit(min_garbage), proper_unit_for_byte_size(min_garbage)); 220 221 size_t cur_cset = 0; 222 size_t cur_garbage = 0; 223 224 for (size_t idx = 0; idx < size; idx++) { 225 ShenandoahHeapRegion* r = data[idx]._region; 226 227 size_t new_cset = cur_cset + r->get_live_data_bytes(); 228 size_t new_garbage = cur_garbage + r->garbage(); 229 230 if (new_cset > max_cset) { 231 break; 232 } 233 234 if ((new_garbage < min_garbage) || (r->garbage() > garbage_threshold)) { 235 cset->add_region(r); 236 cur_cset = new_cset; 237 cur_garbage = new_garbage; 238 } 239 } 240 } 241 242 size_t collected_old = cset->get_old_bytes_reserved_for_evacuation(); 243 size_t collected_promoted = cset->get_young_bytes_to_be_promoted(); 244 size_t collected_young = cset->get_young_bytes_reserved_for_evacuation(); 245 246 log_info(gc, ergo)("Chosen CSet evacuates young: " SIZE_FORMAT "%s (of which at least: " SIZE_FORMAT "%s are to be promoted), " 247 "old: " SIZE_FORMAT "%s", 248 byte_size_in_proper_unit(collected_young), proper_unit_for_byte_size(collected_young), 249 byte_size_in_proper_unit(collected_promoted), proper_unit_for_byte_size(collected_promoted), 250 byte_size_in_proper_unit(collected_old), proper_unit_for_byte_size(collected_old)); 251 } 252 253 void ShenandoahAdaptiveHeuristics::record_cycle_start() { 254 ShenandoahHeuristics::record_cycle_start(); 255 _allocation_rate.allocation_counter_reset(); 256 } 257 258 void ShenandoahAdaptiveHeuristics::record_success_concurrent(bool abbreviated) { 259 ShenandoahHeuristics::record_success_concurrent(abbreviated); 260 261 size_t available = MIN2(_generation->available(), ShenandoahHeap::heap()->free_set()->available()); 262 263 double z_score = 0.0; 264 double available_sd = _available.sd(); 265 if (available_sd > 0) { 266 double available_avg = _available.avg(); 267 z_score = (double(available) - available_avg) / available_sd; 268 log_debug(gc, ergo)("%s Available: " SIZE_FORMAT " %sB, z-score=%.3f. Average available: %.1f %sB +/- %.1f %sB.", 269 _generation->name(), 270 byte_size_in_proper_unit(available), proper_unit_for_byte_size(available), 271 z_score, 272 byte_size_in_proper_unit(available_avg), proper_unit_for_byte_size(available_avg), 273 byte_size_in_proper_unit(available_sd), proper_unit_for_byte_size(available_sd)); 274 } 275 276 _available.add(double(available)); 277 278 // In the case when a concurrent GC cycle completes successfully but with an 279 // unusually small amount of available memory we will adjust our trigger 280 // parameters so that they are more likely to initiate a new cycle. 281 // Conversely, when a GC cycle results in an above average amount of available 282 // memory, we will adjust the trigger parameters to be less likely to initiate 283 // a GC cycle. 284 // 285 // The z-score we've computed is in no way statistically related to the 286 // trigger parameters, but it has the nice property that worse z-scores for 287 // available memory indicate making larger adjustments to the trigger 288 // parameters. It also results in fewer adjustments as the application 289 // stabilizes. 290 // 291 // In order to avoid making endless and likely unnecessary adjustments to the 292 // trigger parameters, the change in available memory (with respect to the 293 // average) at the end of a cycle must be beyond these threshold values. 294 if (z_score < LOWEST_EXPECTED_AVAILABLE_AT_END || 295 z_score > HIGHEST_EXPECTED_AVAILABLE_AT_END) { 296 // The sign is flipped because a negative z-score indicates that the 297 // available memory at the end of the cycle is below average. Positive 298 // adjustments make the triggers more sensitive (i.e., more likely to fire). 299 // The z-score also gives us a measure of just how far below normal. This 300 // property allows us to adjust the trigger parameters proportionally. 301 // 302 // The `100` here is used to attenuate the size of our adjustments. This 303 // number was chosen empirically. It also means the adjustments at the end of 304 // a concurrent cycle are an order of magnitude smaller than the adjustments 305 // made for a degenerated or full GC cycle (which themselves were also 306 // chosen empirically). 307 adjust_last_trigger_parameters(z_score / -100); 308 } 309 } 310 311 void ShenandoahAdaptiveHeuristics::record_success_degenerated() { 312 ShenandoahHeuristics::record_success_degenerated(); 313 // Adjust both trigger's parameters in the case of a degenerated GC because 314 // either of them should have triggered earlier to avoid this case. 315 adjust_margin_of_error(DEGENERATE_PENALTY_SD); 316 adjust_spike_threshold(DEGENERATE_PENALTY_SD); 317 } 318 319 void ShenandoahAdaptiveHeuristics::record_success_full() { 320 ShenandoahHeuristics::record_success_full(); 321 // Adjust both trigger's parameters in the case of a full GC because 322 // either of them should have triggered earlier to avoid this case. 323 adjust_margin_of_error(FULL_PENALTY_SD); 324 adjust_spike_threshold(FULL_PENALTY_SD); 325 } 326 327 static double saturate(double value, double min, double max) { 328 return MAX2(MIN2(value, max), min); 329 } 330 331 // Return a conservative estimate of how much memory can be allocated before we need to start GC. The estimate is based 332 // on memory that is currently available within young generation plus all of the memory that will be added to the young 333 // generation at the end of the current cycle (as represented by young_regions_to_be_reclaimed) and on the anticipated 334 // amount of time required to perform a GC. 335 size_t ShenandoahAdaptiveHeuristics::bytes_of_allocation_runway_before_gc_trigger(size_t young_regions_to_be_reclaimed) { 336 assert(_generation->is_young(), "Only meaningful for young-gen heuristic"); 337 338 size_t max_capacity = _generation->max_capacity(); 339 size_t capacity = _generation->soft_max_capacity(); 340 size_t usage = _generation->used(); 341 size_t available = (capacity > usage)? capacity - usage: 0; 342 size_t allocated = _generation->bytes_allocated_since_gc_start(); 343 344 size_t available_young_collected = ShenandoahHeap::heap()->collection_set()->get_young_available_bytes_collected(); 345 size_t anticipated_available = 346 available + young_regions_to_be_reclaimed * ShenandoahHeapRegion::region_size_bytes() - available_young_collected; 347 size_t allocation_headroom = anticipated_available; 348 size_t spike_headroom = capacity * ShenandoahAllocSpikeFactor / 100; 349 size_t penalties = capacity * _gc_time_penalties / 100; 350 351 double rate = _allocation_rate.sample(allocated); 352 353 // At what value of available, would avg and spike triggers occur? 354 // if allocation_headroom < avg_cycle_time * avg_alloc_rate, then we experience avg trigger 355 // if allocation_headroom < avg_cycle_time * rate, then we experience spike trigger if is_spiking 356 // 357 // allocation_headroom = 358 // 0, if penalties > available or if penalties + spike_headroom > available 359 // available - penalties - spike_headroom, otherwise 360 // 361 // so we trigger if available - penalties - spike_headroom < avg_cycle_time * avg_alloc_rate, which is to say 362 // available < avg_cycle_time * avg_alloc_rate + penalties + spike_headroom 363 // or if available < penalties + spike_headroom 364 // 365 // since avg_cycle_time * avg_alloc_rate > 0, the first test is sufficient to test both conditions 366 // 367 // thus, evac_slack_avg is MIN2(0, available - avg_cycle_time * avg_alloc_rate + penalties + spike_headroom) 368 // 369 // similarly, evac_slack_spiking is MIN2(0, available - avg_cycle_time * rate + penalties + spike_headroom) 370 // but evac_slack_spiking is only relevant if is_spiking, as defined below. 371 372 double avg_cycle_time = _gc_cycle_time_history->davg() + (_margin_of_error_sd * _gc_cycle_time_history->dsd()); 373 374 // TODO: Consider making conservative adjustments to avg_cycle_time, such as: (avg_cycle_time *= 2) in cases where 375 // we expect a longer-than-normal GC duration. This includes mixed evacuations, evacuation that perform promotion 376 // including promotion in place, and OLD GC bootstrap cycles. It has been observed that these cycles sometimes 377 // require twice or more the duration of "normal" GC cycles. We have experimented with this approach. While it 378 // does appear to reduce the frequency of degenerated cycles due to late triggers, it also has the effect of reducing 379 // evacuation slack so that there is less memory available to be transferred to OLD. The result is that we 380 // throttle promotion and it takes too long to move old objects out of the young generation. 381 382 double avg_alloc_rate = _allocation_rate.upper_bound(_margin_of_error_sd); 383 size_t evac_slack_avg; 384 if (anticipated_available > avg_cycle_time * avg_alloc_rate + penalties + spike_headroom) { 385 evac_slack_avg = anticipated_available - (avg_cycle_time * avg_alloc_rate + penalties + spike_headroom); 386 } else { 387 // we have no slack because it's already time to trigger 388 evac_slack_avg = 0; 389 } 390 391 bool is_spiking = _allocation_rate.is_spiking(rate, _spike_threshold_sd); 392 size_t evac_slack_spiking; 393 if (is_spiking) { 394 if (anticipated_available > avg_cycle_time * rate + penalties + spike_headroom) { 395 evac_slack_spiking = anticipated_available - (avg_cycle_time * rate + penalties + spike_headroom); 396 } else { 397 // we have no slack because it's already time to trigger 398 evac_slack_spiking = 0; 399 } 400 } else { 401 evac_slack_spiking = evac_slack_avg; 402 } 403 404 size_t threshold = min_free_threshold(); 405 size_t evac_min_threshold = (anticipated_available > threshold)? anticipated_available - threshold: 0; 406 return MIN3(evac_slack_spiking, evac_slack_avg, evac_min_threshold); 407 } 408 409 bool ShenandoahAdaptiveHeuristics::should_start_gc() { 410 size_t capacity = _generation->soft_max_capacity(); 411 size_t available = _generation->soft_available(); 412 size_t allocated = _generation->bytes_allocated_since_gc_start(); 413 414 log_debug(gc)("should_start_gc (%s)? available: " SIZE_FORMAT ", soft_max_capacity: " SIZE_FORMAT 415 ", allocated: " SIZE_FORMAT, 416 _generation->name(), available, capacity, allocated); 417 418 // The collector reserve may eat into what the mutator is allowed to use. Make sure we are looking 419 // at what is available to the mutator when deciding whether to start a GC. 420 size_t usable = ShenandoahHeap::heap()->free_set()->available(); 421 if (usable < available) { 422 log_debug(gc)("Usable (" SIZE_FORMAT "%s) is less than available (" SIZE_FORMAT "%s)", 423 byte_size_in_proper_unit(usable), proper_unit_for_byte_size(usable), 424 byte_size_in_proper_unit(available), proper_unit_for_byte_size(available)); 425 available = usable; 426 } 427 428 // Track allocation rate even if we decide to start a cycle for other reasons. 429 double rate = _allocation_rate.sample(allocated); 430 _last_trigger = OTHER; 431 432 // OLD generation is maintained to be as small as possible. Depletion-of-free-pool triggers do not apply to old generation. 433 if (!_generation->is_old()) { 434 size_t min_threshold = min_free_threshold(); 435 if (available < min_threshold) { 436 log_info(gc)("Trigger (%s): Free (" SIZE_FORMAT "%s) is below minimum threshold (" SIZE_FORMAT "%s)", 437 _generation->name(), 438 byte_size_in_proper_unit(available), proper_unit_for_byte_size(available), 439 byte_size_in_proper_unit(min_threshold), proper_unit_for_byte_size(min_threshold)); 440 return true; 441 } 442 443 // Check if we need to learn a bit about the application 444 const size_t max_learn = ShenandoahLearningSteps; 445 if (_gc_times_learned < max_learn) { 446 size_t init_threshold = capacity / 100 * ShenandoahInitFreeThreshold; 447 if (available < init_threshold) { 448 log_info(gc)("Trigger (%s): Learning " SIZE_FORMAT " of " SIZE_FORMAT ". Free (" 449 SIZE_FORMAT "%s) is below initial threshold (" SIZE_FORMAT "%s)", 450 _generation->name(), _gc_times_learned + 1, max_learn, 451 byte_size_in_proper_unit(available), proper_unit_for_byte_size(available), 452 byte_size_in_proper_unit(init_threshold), proper_unit_for_byte_size(init_threshold)); 453 return true; 454 } 455 } 456 457 // Rationale: 458 // The idea is that there is an average allocation rate and there are occasional abnormal bursts (or spikes) of 459 // allocations that exceed the average allocation rate. What do these spikes look like? 460 // 461 // 1. At certain phase changes, we may discard large amounts of data and replace it with large numbers of newly 462 // allocated objects. This "spike" looks more like a phase change. We were in steady state at M bytes/sec 463 // allocation rate and now we're in a "reinitialization phase" that looks like N bytes/sec. We need the "spike" 464 // accomodation to give us enough runway to recalibrate our "average allocation rate". 465 // 466 // 2. The typical workload changes. "Suddenly", our typical workload of N TPS increases to N+delta TPS. This means 467 // our average allocation rate needs to be adjusted. Once again, we need the "spike" accomodation to give us 468 // enough runway to recalibrate our "average allocation rate". 469 // 470 // 3. Though there is an "average" allocation rate, a given workload's demand for allocation may be very bursty. We 471 // allocate a bunch of LABs during the 5 ms that follow completion of a GC, then we perform no more allocations for 472 // the next 150 ms. It seems we want the "spike" to represent the maximum divergence from average within the 473 // period of time between consecutive evaluation of the should_start_gc() service. Here's the thinking: 474 // 475 // a) Between now and the next time I ask whether should_start_gc(), we might experience a spike representing 476 // the anticipated burst of allocations. If that would put us over budget, then we should start GC immediately. 477 // b) Between now and the anticipated depletion of allocation pool, there may be two or more bursts of allocations. 478 // If there are more than one of these bursts, we can "approximate" that these will be separated by spans of 479 // time with very little or no allocations so the "average" allocation rate should be a suitable approximation 480 // of how this will behave. 481 // 482 // For cases 1 and 2, we need to "quickly" recalibrate the average allocation rate whenever we detect a change 483 // in operation mode. We want some way to decide that the average rate has changed. Make average allocation rate 484 // computations an independent effort. 485 486 487 // Check if allocation headroom is still okay. This also factors in: 488 // 1. Some space to absorb allocation spikes (ShenandoahAllocSpikeFactor) 489 // 2. Accumulated penalties from Degenerated and Full GC 490 491 size_t allocation_headroom = available; 492 size_t spike_headroom = capacity / 100 * ShenandoahAllocSpikeFactor; 493 size_t penalties = capacity / 100 * _gc_time_penalties; 494 495 allocation_headroom -= MIN2(allocation_headroom, penalties); 496 allocation_headroom -= MIN2(allocation_headroom, spike_headroom); 497 498 double avg_cycle_time = _gc_cycle_time_history->davg() + (_margin_of_error_sd * _gc_cycle_time_history->dsd()); 499 double avg_alloc_rate = _allocation_rate.upper_bound(_margin_of_error_sd); 500 log_debug(gc)("%s: average GC time: %.2f ms, allocation rate: %.0f %s/s", 501 _generation->name(), 502 avg_cycle_time * 1000, byte_size_in_proper_unit(avg_alloc_rate), proper_unit_for_byte_size(avg_alloc_rate)); 503 504 if (avg_cycle_time > allocation_headroom / avg_alloc_rate) { 505 506 log_info(gc)("Trigger (%s): Average GC time (%.2f ms) is above the time for average allocation rate (%.0f %sB/s)" 507 " to deplete free headroom (" SIZE_FORMAT "%s) (margin of error = %.2f)", 508 _generation->name(), avg_cycle_time * 1000, 509 byte_size_in_proper_unit(avg_alloc_rate), proper_unit_for_byte_size(avg_alloc_rate), 510 byte_size_in_proper_unit(allocation_headroom), proper_unit_for_byte_size(allocation_headroom), 511 _margin_of_error_sd); 512 513 log_info(gc, ergo)("Free headroom: " SIZE_FORMAT "%s (free) - " SIZE_FORMAT "%s (spike) - " 514 SIZE_FORMAT "%s (penalties) = " SIZE_FORMAT "%s", 515 byte_size_in_proper_unit(available), proper_unit_for_byte_size(available), 516 byte_size_in_proper_unit(spike_headroom), proper_unit_for_byte_size(spike_headroom), 517 byte_size_in_proper_unit(penalties), proper_unit_for_byte_size(penalties), 518 byte_size_in_proper_unit(allocation_headroom), proper_unit_for_byte_size(allocation_headroom)); 519 520 _last_trigger = RATE; 521 return true; 522 } 523 524 bool is_spiking = _allocation_rate.is_spiking(rate, _spike_threshold_sd); 525 if (is_spiking && avg_cycle_time > allocation_headroom / rate) { 526 log_info(gc)("Trigger (%s): Average GC time (%.2f ms) is above the time for instantaneous allocation rate (%.0f %sB/s)" 527 " to deplete free headroom (" SIZE_FORMAT "%s) (spike threshold = %.2f)", 528 _generation->name(), avg_cycle_time * 1000, 529 byte_size_in_proper_unit(rate), proper_unit_for_byte_size(rate), 530 byte_size_in_proper_unit(allocation_headroom), proper_unit_for_byte_size(allocation_headroom), 531 _spike_threshold_sd); 532 _last_trigger = SPIKE; 533 return true; 534 } 535 536 ShenandoahHeap* heap = ShenandoahHeap::heap(); 537 if (heap->mode()->is_generational()) { 538 // Get through promotions and mixed evacuations as quickly as possible. These cycles sometimes require significantly 539 // more time than traditional young-generation cycles so start them up as soon as possible. This is a "mitigation" 540 // for the reality that old-gen and young-gen activities are not truly "concurrent". If there is old-gen work to 541 // be done, we start up the young-gen GC threads so they can do some of this old-gen work. As implemented, promotion 542 // gets priority over old-gen marking. 543 544 size_t promo_potential = heap->get_promotion_potential(); 545 size_t promo_in_place_potential = heap->get_promotion_in_place_potential(); 546 ShenandoahOldHeuristics* old_heuristics = (ShenandoahOldHeuristics*) heap->old_generation()->heuristics(); 547 size_t mixed_candidates = old_heuristics->unprocessed_old_collection_candidates(); 548 if (promo_potential > 0) { 549 // Detect unsigned arithmetic underflow 550 assert(promo_potential < heap->capacity(), "Sanity"); 551 log_info(gc)("Trigger (%s): expedite promotion of " SIZE_FORMAT "%s", 552 _generation->name(), byte_size_in_proper_unit(promo_potential), proper_unit_for_byte_size(promo_potential)); 553 return true; 554 } else if (promo_in_place_potential > 0) { 555 // Detect unsigned arithmetic underflow 556 assert(promo_in_place_potential < heap->capacity(), "Sanity"); 557 log_info(gc)("Trigger (%s): expedite promotion in place of " SIZE_FORMAT "%s", _generation->name(), 558 byte_size_in_proper_unit(promo_in_place_potential), 559 proper_unit_for_byte_size(promo_in_place_potential)); 560 return true; 561 } else if (mixed_candidates > 0) { 562 // We need to run young GC in order to open up some free heap regions so we can finish mixed evacuations. 563 log_info(gc)("Trigger (%s): expedite mixed evacuation of " SIZE_FORMAT " regions", 564 _generation->name(), mixed_candidates); 565 return true; 566 } 567 } 568 } 569 return ShenandoahHeuristics::should_start_gc(); 570 } 571 572 void ShenandoahAdaptiveHeuristics::adjust_last_trigger_parameters(double amount) { 573 switch (_last_trigger) { 574 case RATE: 575 adjust_margin_of_error(amount); 576 break; 577 case SPIKE: 578 adjust_spike_threshold(amount); 579 break; 580 case OTHER: 581 // nothing to adjust here. 582 break; 583 default: 584 ShouldNotReachHere(); 585 } 586 } 587 588 void ShenandoahAdaptiveHeuristics::adjust_margin_of_error(double amount) { 589 _margin_of_error_sd = saturate(_margin_of_error_sd + amount, MINIMUM_CONFIDENCE, MAXIMUM_CONFIDENCE); 590 log_debug(gc, ergo)("Margin of error now %.2f", _margin_of_error_sd); 591 } 592 593 void ShenandoahAdaptiveHeuristics::adjust_spike_threshold(double amount) { 594 _spike_threshold_sd = saturate(_spike_threshold_sd - amount, MINIMUM_CONFIDENCE, MAXIMUM_CONFIDENCE); 595 log_debug(gc, ergo)("Spike threshold now: %.2f", _spike_threshold_sd); 596 } 597 598 ShenandoahAllocationRate::ShenandoahAllocationRate() : 599 _last_sample_time(os::elapsedTime()), 600 _last_sample_value(0), 601 _interval_sec(1.0 / ShenandoahAdaptiveSampleFrequencyHz), 602 _rate(int(ShenandoahAdaptiveSampleSizeSeconds * ShenandoahAdaptiveSampleFrequencyHz), ShenandoahAdaptiveDecayFactor), 603 _rate_avg(int(ShenandoahAdaptiveSampleSizeSeconds * ShenandoahAdaptiveSampleFrequencyHz), ShenandoahAdaptiveDecayFactor) { 604 } 605 606 double ShenandoahAllocationRate::sample(size_t allocated) { 607 double now = os::elapsedTime(); 608 double rate = 0.0; 609 if (now - _last_sample_time > _interval_sec) { 610 if (allocated >= _last_sample_value) { 611 rate = instantaneous_rate(now, allocated); 612 _rate.add(rate); 613 _rate_avg.add(_rate.avg()); 614 } 615 616 _last_sample_time = now; 617 _last_sample_value = allocated; 618 } 619 return rate; 620 } 621 622 double ShenandoahAllocationRate::upper_bound(double sds) const { 623 // Here we are using the standard deviation of the computed running 624 // average, rather than the standard deviation of the samples that went 625 // into the moving average. This is a much more stable value and is tied 626 // to the actual statistic in use (moving average over samples of averages). 627 return _rate.davg() + (sds * _rate_avg.dsd()); 628 } 629 630 void ShenandoahAllocationRate::allocation_counter_reset() { 631 _last_sample_time = os::elapsedTime(); 632 _last_sample_value = 0; 633 } 634 635 bool ShenandoahAllocationRate::is_spiking(double rate, double threshold) const { 636 if (rate <= 0.0) { 637 return false; 638 } 639 640 double sd = _rate.sd(); 641 if (sd > 0) { 642 // There is a small chance that that rate has already been sampled, but it 643 // seems not to matter in practice. 644 double z_score = (rate - _rate.avg()) / sd; 645 if (z_score > threshold) { 646 return true; 647 } 648 } 649 return false; 650 } 651 652 double ShenandoahAllocationRate::instantaneous_rate(double time, size_t allocated) const { 653 size_t last_value = _last_sample_value; 654 double last_time = _last_sample_time; 655 size_t allocation_delta = (allocated > last_value) ? (allocated - last_value) : 0; 656 double time_delta_sec = time - last_time; 657 return (time_delta_sec > 0) ? (allocation_delta / time_delta_sec) : 0; 658 }