1 /* 2 * Copyright (c) 2018, 2019, Red Hat, Inc. All rights reserved. 3 * Copyright Amazon.com Inc. or its affiliates. All Rights Reserved. 4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 5 * 6 * This code is free software; you can redistribute it and/or modify it 7 * under the terms of the GNU General Public License version 2 only, as 8 * published by the Free Software Foundation. 9 * 10 * This code is distributed in the hope that it will be useful, but WITHOUT 11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 13 * version 2 for more details (a copy is included in the LICENSE file that 14 * accompanied this code). 15 * 16 * You should have received a copy of the GNU General Public License version 17 * 2 along with this work; if not, write to the Free Software Foundation, 18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 19 * 20 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 21 * or visit www.oracle.com if you need additional information or have any 22 * questions. 23 * 24 */ 25 26 #include "precompiled.hpp" 27 28 #include "gc/shared/gcCause.hpp" 29 #include "gc/shenandoah/heuristics/shenandoahHeuristics.hpp" 30 #include "gc/shenandoah/heuristics/shenandoahSpaceInfo.hpp" 31 #include "gc/shenandoah/heuristics/shenandoahAdaptiveHeuristics.hpp" 32 #include "gc/shenandoah/shenandoahCollectionSet.hpp" 33 #include "gc/shenandoah/shenandoahCollectorPolicy.hpp" 34 #include "gc/shenandoah/shenandoahFreeSet.hpp" 35 #include "gc/shenandoah/shenandoahHeap.inline.hpp" 36 #include "gc/shenandoah/shenandoahHeapRegion.inline.hpp" 37 #include "logging/log.hpp" 38 #include "logging/logTag.hpp" 39 #include "runtime/globals.hpp" 40 #include "utilities/quickSort.hpp" 41 42 // These constants are used to adjust the margin of error for the moving 43 // average of the allocation rate and cycle time. The units are standard 44 // deviations. 45 const double ShenandoahAdaptiveHeuristics::FULL_PENALTY_SD = 0.2; 46 const double ShenandoahAdaptiveHeuristics::DEGENERATE_PENALTY_SD = 0.1; 47 48 // These are used to decide if we want to make any adjustments at all 49 // at the end of a successful concurrent cycle. 50 const double ShenandoahAdaptiveHeuristics::LOWEST_EXPECTED_AVAILABLE_AT_END = -0.5; 51 const double ShenandoahAdaptiveHeuristics::HIGHEST_EXPECTED_AVAILABLE_AT_END = 0.5; 52 53 // These values are the confidence interval expressed as standard deviations. 54 // At the minimum confidence level, there is a 25% chance that the true value of 55 // the estimate (average cycle time or allocation rate) is not more than 56 // MINIMUM_CONFIDENCE standard deviations away from our estimate. Similarly, the 57 // MAXIMUM_CONFIDENCE interval here means there is a one in a thousand chance 58 // that the true value of our estimate is outside the interval. These are used 59 // as bounds on the adjustments applied at the outcome of a GC cycle. 60 const double ShenandoahAdaptiveHeuristics::MINIMUM_CONFIDENCE = 0.319; // 25% 61 const double ShenandoahAdaptiveHeuristics::MAXIMUM_CONFIDENCE = 3.291; // 99.9% 62 63 ShenandoahAdaptiveHeuristics::ShenandoahAdaptiveHeuristics(ShenandoahSpaceInfo* space_info) : 64 ShenandoahHeuristics(space_info), 65 _margin_of_error_sd(ShenandoahAdaptiveInitialConfidence), 66 _spike_threshold_sd(ShenandoahAdaptiveInitialSpikeThreshold), 67 _last_trigger(OTHER), 68 _available(Moving_Average_Samples, ShenandoahAdaptiveDecayFactor) { } 69 70 ShenandoahAdaptiveHeuristics::~ShenandoahAdaptiveHeuristics() {} 71 72 void ShenandoahAdaptiveHeuristics::choose_collection_set_from_regiondata(ShenandoahCollectionSet* cset, 73 RegionData* data, size_t size, 74 size_t actual_free) { 75 size_t garbage_threshold = ShenandoahHeapRegion::region_size_bytes() * ShenandoahGarbageThreshold / 100; 76 77 // The logic for cset selection in adaptive is as follows: 78 // 79 // 1. We cannot get cset larger than available free space. Otherwise we guarantee OOME 80 // during evacuation, and thus guarantee full GC. In practice, we also want to let 81 // application to allocate something. This is why we limit CSet to some fraction of 82 // available space. In non-overloaded heap, max_cset would contain all plausible candidates 83 // over garbage threshold. 84 // 85 // 2. We should not get cset too low so that free threshold would not be met right 86 // after the cycle. Otherwise we get back-to-back cycles for no reason if heap is 87 // too fragmented. In non-overloaded non-fragmented heap min_garbage would be around zero. 88 // 89 // Therefore, we start by sorting the regions by garbage. Then we unconditionally add the best candidates 90 // before we meet min_garbage. Then we add all candidates that fit with a garbage threshold before 91 // we hit max_cset. When max_cset is hit, we terminate the cset selection. Note that in this scheme, 92 // ShenandoahGarbageThreshold is the soft threshold which would be ignored until min_garbage is hit. 93 94 size_t capacity = _space_info->soft_max_capacity(); 95 size_t max_cset = (size_t)((1.0 * capacity / 100 * ShenandoahEvacReserve) / ShenandoahEvacWaste); 96 size_t free_target = (capacity / 100 * ShenandoahMinFreeThreshold) + max_cset; 97 size_t min_garbage = (free_target > actual_free ? (free_target - actual_free) : 0); 98 99 log_info(gc, ergo)("Adaptive CSet Selection. Target Free: " SIZE_FORMAT "%s, Actual Free: " 100 SIZE_FORMAT "%s, Max Evacuation: " SIZE_FORMAT "%s, Min Garbage: " SIZE_FORMAT "%s", 101 byte_size_in_proper_unit(free_target), proper_unit_for_byte_size(free_target), 102 byte_size_in_proper_unit(actual_free), proper_unit_for_byte_size(actual_free), 103 byte_size_in_proper_unit(max_cset), proper_unit_for_byte_size(max_cset), 104 byte_size_in_proper_unit(min_garbage), proper_unit_for_byte_size(min_garbage)); 105 106 // Better select garbage-first regions 107 QuickSort::sort<RegionData>(data, (int)size, compare_by_garbage, false); 108 109 size_t cur_cset = 0; 110 size_t cur_garbage = 0; 111 112 for (size_t idx = 0; idx < size; idx++) { 113 ShenandoahHeapRegion* r = data[idx].get_region(); 114 115 size_t new_cset = cur_cset + r->get_live_data_bytes(); 116 size_t new_garbage = cur_garbage + r->garbage(); 117 118 if (new_cset > max_cset) { 119 break; 120 } 121 122 if ((new_garbage < min_garbage) || (r->garbage() > garbage_threshold)) { 123 cset->add_region(r); 124 cur_cset = new_cset; 125 cur_garbage = new_garbage; 126 } 127 } 128 } 129 130 void ShenandoahAdaptiveHeuristics::record_cycle_start() { 131 ShenandoahHeuristics::record_cycle_start(); 132 _allocation_rate.allocation_counter_reset(); 133 } 134 135 void ShenandoahAdaptiveHeuristics::record_success_concurrent() { 136 ShenandoahHeuristics::record_success_concurrent(); 137 138 size_t available = _space_info->available(); 139 140 double z_score = 0.0; 141 double available_sd = _available.sd(); 142 if (available_sd > 0) { 143 double available_avg = _available.avg(); 144 z_score = (double(available) - available_avg) / available_sd; 145 log_debug(gc, ergo)("Available: " SIZE_FORMAT " %sB, z-score=%.3f. Average available: %.1f %sB +/- %.1f %sB.", 146 byte_size_in_proper_unit(available), proper_unit_for_byte_size(available), 147 z_score, 148 byte_size_in_proper_unit(available_avg), proper_unit_for_byte_size(available_avg), 149 byte_size_in_proper_unit(available_sd), proper_unit_for_byte_size(available_sd)); 150 } 151 152 _available.add(double(available)); 153 154 // In the case when a concurrent GC cycle completes successfully but with an 155 // unusually small amount of available memory we will adjust our trigger 156 // parameters so that they are more likely to initiate a new cycle. 157 // Conversely, when a GC cycle results in an above average amount of available 158 // memory, we will adjust the trigger parameters to be less likely to initiate 159 // a GC cycle. 160 // 161 // The z-score we've computed is in no way statistically related to the 162 // trigger parameters, but it has the nice property that worse z-scores for 163 // available memory indicate making larger adjustments to the trigger 164 // parameters. It also results in fewer adjustments as the application 165 // stabilizes. 166 // 167 // In order to avoid making endless and likely unnecessary adjustments to the 168 // trigger parameters, the change in available memory (with respect to the 169 // average) at the end of a cycle must be beyond these threshold values. 170 if (z_score < LOWEST_EXPECTED_AVAILABLE_AT_END || 171 z_score > HIGHEST_EXPECTED_AVAILABLE_AT_END) { 172 // The sign is flipped because a negative z-score indicates that the 173 // available memory at the end of the cycle is below average. Positive 174 // adjustments make the triggers more sensitive (i.e., more likely to fire). 175 // The z-score also gives us a measure of just how far below normal. This 176 // property allows us to adjust the trigger parameters proportionally. 177 // 178 // The `100` here is used to attenuate the size of our adjustments. This 179 // number was chosen empirically. It also means the adjustments at the end of 180 // a concurrent cycle are an order of magnitude smaller than the adjustments 181 // made for a degenerated or full GC cycle (which themselves were also 182 // chosen empirically). 183 adjust_last_trigger_parameters(z_score / -100); 184 } 185 } 186 187 void ShenandoahAdaptiveHeuristics::record_success_degenerated() { 188 ShenandoahHeuristics::record_success_degenerated(); 189 // Adjust both trigger's parameters in the case of a degenerated GC because 190 // either of them should have triggered earlier to avoid this case. 191 adjust_margin_of_error(DEGENERATE_PENALTY_SD); 192 adjust_spike_threshold(DEGENERATE_PENALTY_SD); 193 } 194 195 void ShenandoahAdaptiveHeuristics::record_success_full() { 196 ShenandoahHeuristics::record_success_full(); 197 // Adjust both trigger's parameters in the case of a full GC because 198 // either of them should have triggered earlier to avoid this case. 199 adjust_margin_of_error(FULL_PENALTY_SD); 200 adjust_spike_threshold(FULL_PENALTY_SD); 201 } 202 203 static double saturate(double value, double min, double max) { 204 return MAX2(MIN2(value, max), min); 205 } 206 207 // Rationale: 208 // The idea is that there is an average allocation rate and there are occasional abnormal bursts (or spikes) of 209 // allocations that exceed the average allocation rate. What do these spikes look like? 210 // 211 // 1. At certain phase changes, we may discard large amounts of data and replace it with large numbers of newly 212 // allocated objects. This "spike" looks more like a phase change. We were in steady state at M bytes/sec 213 // allocation rate and now we're in a "reinitialization phase" that looks like N bytes/sec. We need the "spike" 214 // accommodation to give us enough runway to recalibrate our "average allocation rate". 215 // 216 // 2. The typical workload changes. "Suddenly", our typical workload of N TPS increases to N+delta TPS. This means 217 // our average allocation rate needs to be adjusted. Once again, we need the "spike" accomodation to give us 218 // enough runway to recalibrate our "average allocation rate". 219 // 220 // 3. Though there is an "average" allocation rate, a given workload's demand for allocation may be very bursty. We 221 // allocate a bunch of LABs during the 5 ms that follow completion of a GC, then we perform no more allocations for 222 // the next 150 ms. It seems we want the "spike" to represent the maximum divergence from average within the 223 // period of time between consecutive evaluation of the should_start_gc() service. Here's the thinking: 224 // 225 // a) Between now and the next time I ask whether should_start_gc(), we might experience a spike representing 226 // the anticipated burst of allocations. If that would put us over budget, then we should start GC immediately. 227 // b) Between now and the anticipated depletion of allocation pool, there may be two or more bursts of allocations. 228 // If there are more than one of these bursts, we can "approximate" that these will be separated by spans of 229 // time with very little or no allocations so the "average" allocation rate should be a suitable approximation 230 // of how this will behave. 231 // 232 // For cases 1 and 2, we need to "quickly" recalibrate the average allocation rate whenever we detect a change 233 // in operation mode. We want some way to decide that the average rate has changed, while keeping average 234 // allocation rate computation independent. 235 bool ShenandoahAdaptiveHeuristics::should_start_gc() { 236 size_t capacity = _space_info->soft_max_capacity(); 237 size_t available = _space_info->soft_available(); 238 size_t allocated = _space_info->bytes_allocated_since_gc_start(); 239 240 log_debug(gc)("should_start_gc? available: " SIZE_FORMAT ", soft_max_capacity: " SIZE_FORMAT 241 ", allocated: " SIZE_FORMAT, available, capacity, allocated); 242 243 if (_start_gc_is_pending) { 244 log_trigger("GC start is already pending"); 245 return true; 246 } 247 248 // Track allocation rate even if we decide to start a cycle for other reasons. 249 double rate = _allocation_rate.sample(allocated); 250 _last_trigger = OTHER; 251 252 size_t min_threshold = min_free_threshold(); 253 if (available < min_threshold) { 254 log_trigger("Free (" SIZE_FORMAT "%s) is below minimum threshold (" SIZE_FORMAT "%s)", 255 byte_size_in_proper_unit(available), proper_unit_for_byte_size(available), 256 byte_size_in_proper_unit(min_threshold), proper_unit_for_byte_size(min_threshold)); 257 accept_trigger_with_type(OTHER); 258 return true; 259 } 260 261 // Check if we need to learn a bit about the application 262 const size_t max_learn = ShenandoahLearningSteps; 263 if (_gc_times_learned < max_learn) { 264 size_t init_threshold = capacity / 100 * ShenandoahInitFreeThreshold; 265 if (available < init_threshold) { 266 log_trigger("Learning " SIZE_FORMAT " of " SIZE_FORMAT ". Free (" SIZE_FORMAT "%s) is below initial threshold (" SIZE_FORMAT "%s)", 267 _gc_times_learned + 1, max_learn, 268 byte_size_in_proper_unit(available), proper_unit_for_byte_size(available), 269 byte_size_in_proper_unit(init_threshold), proper_unit_for_byte_size(init_threshold)); 270 accept_trigger_with_type(OTHER); 271 return true; 272 } 273 } 274 // Check if allocation headroom is still okay. This also factors in: 275 // 1. Some space to absorb allocation spikes (ShenandoahAllocSpikeFactor) 276 // 2. Accumulated penalties from Degenerated and Full GC 277 size_t allocation_headroom = available; 278 279 size_t spike_headroom = capacity / 100 * ShenandoahAllocSpikeFactor; 280 size_t penalties = capacity / 100 * _gc_time_penalties; 281 282 allocation_headroom -= MIN2(allocation_headroom, spike_headroom); 283 allocation_headroom -= MIN2(allocation_headroom, penalties); 284 285 double avg_cycle_time = _gc_cycle_time_history->davg() + (_margin_of_error_sd * _gc_cycle_time_history->dsd()); 286 double avg_alloc_rate = _allocation_rate.upper_bound(_margin_of_error_sd); 287 log_debug(gc)("average GC time: %.2f ms, allocation rate: %.0f %s/s", 288 avg_cycle_time * 1000, byte_size_in_proper_unit(avg_alloc_rate), proper_unit_for_byte_size(avg_alloc_rate)); 289 if (avg_cycle_time * avg_alloc_rate > allocation_headroom) { 290 log_trigger("Average GC time (%.2f ms) is above the time for average allocation rate (%.0f %sB/s)" 291 " to deplete free headroom (" SIZE_FORMAT "%s) (margin of error = %.2f)", 292 avg_cycle_time * 1000, 293 byte_size_in_proper_unit(avg_alloc_rate), proper_unit_for_byte_size(avg_alloc_rate), 294 byte_size_in_proper_unit(allocation_headroom), proper_unit_for_byte_size(allocation_headroom), 295 _margin_of_error_sd); 296 log_info(gc, ergo)("Free headroom: " SIZE_FORMAT "%s (free) - " SIZE_FORMAT "%s (spike) - " SIZE_FORMAT "%s (penalties) = " SIZE_FORMAT "%s", 297 byte_size_in_proper_unit(available), proper_unit_for_byte_size(available), 298 byte_size_in_proper_unit(spike_headroom), proper_unit_for_byte_size(spike_headroom), 299 byte_size_in_proper_unit(penalties), proper_unit_for_byte_size(penalties), 300 byte_size_in_proper_unit(allocation_headroom), proper_unit_for_byte_size(allocation_headroom)); 301 accept_trigger_with_type(RATE); 302 return true; 303 } 304 305 bool is_spiking = _allocation_rate.is_spiking(rate, _spike_threshold_sd); 306 if (is_spiking && avg_cycle_time > allocation_headroom / rate) { 307 log_trigger("Average GC time (%.2f ms) is above the time for instantaneous allocation rate (%.0f %sB/s) to deplete free headroom (" SIZE_FORMAT "%s) (spike threshold = %.2f)", 308 avg_cycle_time * 1000, 309 byte_size_in_proper_unit(rate), proper_unit_for_byte_size(rate), 310 byte_size_in_proper_unit(allocation_headroom), proper_unit_for_byte_size(allocation_headroom), 311 _spike_threshold_sd); 312 accept_trigger_with_type(SPIKE); 313 return true; 314 } 315 316 if (ShenandoahHeuristics::should_start_gc()) { 317 _start_gc_is_pending = true; 318 return true; 319 } else { 320 return false; 321 } 322 } 323 324 void ShenandoahAdaptiveHeuristics::adjust_last_trigger_parameters(double amount) { 325 switch (_last_trigger) { 326 case RATE: 327 adjust_margin_of_error(amount); 328 break; 329 case SPIKE: 330 adjust_spike_threshold(amount); 331 break; 332 case OTHER: 333 // nothing to adjust here. 334 break; 335 default: 336 ShouldNotReachHere(); 337 } 338 } 339 340 void ShenandoahAdaptiveHeuristics::adjust_margin_of_error(double amount) { 341 _margin_of_error_sd = saturate(_margin_of_error_sd + amount, MINIMUM_CONFIDENCE, MAXIMUM_CONFIDENCE); 342 log_debug(gc, ergo)("Margin of error now %.2f", _margin_of_error_sd); 343 } 344 345 void ShenandoahAdaptiveHeuristics::adjust_spike_threshold(double amount) { 346 _spike_threshold_sd = saturate(_spike_threshold_sd - amount, MINIMUM_CONFIDENCE, MAXIMUM_CONFIDENCE); 347 log_debug(gc, ergo)("Spike threshold now: %.2f", _spike_threshold_sd); 348 } 349 350 size_t ShenandoahAdaptiveHeuristics::min_free_threshold() { 351 // Note that soft_max_capacity() / 100 * min_free_threshold is smaller than max_capacity() / 100 * min_free_threshold. 352 // We want to behave conservatively here, so use max_capacity(). By returning a larger value, we cause the GC to 353 // trigger when the remaining amount of free shrinks below the larger threshold. 354 return _space_info->max_capacity() / 100 * ShenandoahMinFreeThreshold; 355 } 356 357 ShenandoahAllocationRate::ShenandoahAllocationRate() : 358 _last_sample_time(os::elapsedTime()), 359 _last_sample_value(0), 360 _interval_sec(1.0 / ShenandoahAdaptiveSampleFrequencyHz), 361 _rate(int(ShenandoahAdaptiveSampleSizeSeconds * ShenandoahAdaptiveSampleFrequencyHz), ShenandoahAdaptiveDecayFactor), 362 _rate_avg(int(ShenandoahAdaptiveSampleSizeSeconds * ShenandoahAdaptiveSampleFrequencyHz), ShenandoahAdaptiveDecayFactor) { 363 } 364 365 double ShenandoahAllocationRate::sample(size_t allocated) { 366 double now = os::elapsedTime(); 367 double rate = 0.0; 368 if (now - _last_sample_time > _interval_sec) { 369 if (allocated >= _last_sample_value) { 370 rate = instantaneous_rate(now, allocated); 371 _rate.add(rate); 372 _rate_avg.add(_rate.avg()); 373 } 374 375 _last_sample_time = now; 376 _last_sample_value = allocated; 377 } 378 return rate; 379 } 380 381 double ShenandoahAllocationRate::upper_bound(double sds) const { 382 // Here we are using the standard deviation of the computed running 383 // average, rather than the standard deviation of the samples that went 384 // into the moving average. This is a much more stable value and is tied 385 // to the actual statistic in use (moving average over samples of averages). 386 return _rate.davg() + (sds * _rate_avg.dsd()); 387 } 388 389 void ShenandoahAllocationRate::allocation_counter_reset() { 390 _last_sample_time = os::elapsedTime(); 391 _last_sample_value = 0; 392 } 393 394 bool ShenandoahAllocationRate::is_spiking(double rate, double threshold) const { 395 if (rate <= 0.0) { 396 return false; 397 } 398 399 double sd = _rate.sd(); 400 if (sd > 0) { 401 // There is a small chance that that rate has already been sampled, but it 402 // seems not to matter in practice. 403 double z_score = (rate - _rate.avg()) / sd; 404 if (z_score > threshold) { 405 return true; 406 } 407 } 408 return false; 409 } 410 411 double ShenandoahAllocationRate::instantaneous_rate(double time, size_t allocated) const { 412 size_t last_value = _last_sample_value; 413 double last_time = _last_sample_time; 414 size_t allocation_delta = (allocated > last_value) ? (allocated - last_value) : 0; 415 double time_delta_sec = time - last_time; 416 return (time_delta_sec > 0) ? (allocation_delta / time_delta_sec) : 0; 417 }