1 /*
2 * Copyright (c) 2018, 2019, Red Hat, Inc. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25 #include "precompiled.hpp"
26
27 #include "gc/shenandoah/heuristics/shenandoahAdaptiveHeuristics.hpp"
28 #include "gc/shenandoah/shenandoahCollectionSet.hpp"
29 #include "gc/shenandoah/shenandoahFreeSet.hpp"
30 #include "gc/shenandoah/shenandoahHeap.inline.hpp"
31 #include "gc/shenandoah/shenandoahHeapRegion.inline.hpp"
32 #include "logging/log.hpp"
33 #include "logging/logTag.hpp"
34 #include "utilities/quickSort.hpp"
35
36 // These constants are used to adjust the margin of error for the moving
37 // average of the allocation rate and cycle time. The units are standard
38 // deviations.
39 const double ShenandoahAdaptiveHeuristics::FULL_PENALTY_SD = 0.2;
40 const double ShenandoahAdaptiveHeuristics::DEGENERATE_PENALTY_SD = 0.1;
41
42 // These are used to decide if we want to make any adjustments at all
43 // at the end of a successful concurrent cycle.
44 const double ShenandoahAdaptiveHeuristics::LOWEST_EXPECTED_AVAILABLE_AT_END = -0.5;
45 const double ShenandoahAdaptiveHeuristics::HIGHEST_EXPECTED_AVAILABLE_AT_END = 0.5;
46
47 // These values are the confidence interval expressed as standard deviations.
48 // At the minimum confidence level, there is a 25% chance that the true value of
49 // the estimate (average cycle time or allocation rate) is not more than
50 // MINIMUM_CONFIDENCE standard deviations away from our estimate. Similarly, the
51 // MAXIMUM_CONFIDENCE interval here means there is a one in a thousand chance
52 // that the true value of our estimate is outside the interval. These are used
53 // as bounds on the adjustments applied at the outcome of a GC cycle.
54 const double ShenandoahAdaptiveHeuristics::MINIMUM_CONFIDENCE = 0.319; // 25%
55 const double ShenandoahAdaptiveHeuristics::MAXIMUM_CONFIDENCE = 3.291; // 99.9%
56
57 ShenandoahAdaptiveHeuristics::ShenandoahAdaptiveHeuristics() :
58 ShenandoahHeuristics(),
59 _margin_of_error_sd(ShenandoahAdaptiveInitialConfidence),
60 _spike_threshold_sd(ShenandoahAdaptiveInitialSpikeThreshold),
61 _last_trigger(OTHER) { }
62
63 ShenandoahAdaptiveHeuristics::~ShenandoahAdaptiveHeuristics() {}
64
65 void ShenandoahAdaptiveHeuristics::choose_collection_set_from_regiondata(ShenandoahCollectionSet* cset,
66 RegionData* data, size_t size,
67 size_t actual_free) {
68 size_t garbage_threshold = ShenandoahHeapRegion::region_size_bytes() * ShenandoahGarbageThreshold / 100;
69
70 // The logic for cset selection in adaptive is as follows:
71 //
72 // 1. We cannot get cset larger than available free space. Otherwise we guarantee OOME
73 // during evacuation, and thus guarantee full GC. In practice, we also want to let
74 // application to allocate something. This is why we limit CSet to some fraction of
75 // available space. In non-overloaded heap, max_cset would contain all plausible candidates
76 // over garbage threshold.
77 //
78 // 2. We should not get cset too low so that free threshold would not be met right
79 // after the cycle. Otherwise we get back-to-back cycles for no reason if heap is
80 // too fragmented. In non-overloaded non-fragmented heap min_garbage would be around zero.
81 //
82 // Therefore, we start by sorting the regions by garbage. Then we unconditionally add the best candidates
83 // before we meet min_garbage. Then we add all candidates that fit with a garbage threshold before
84 // we hit max_cset. When max_cset is hit, we terminate the cset selection. Note that in this scheme,
85 // ShenandoahGarbageThreshold is the soft threshold which would be ignored until min_garbage is hit.
86
87 size_t capacity = ShenandoahHeap::heap()->soft_max_capacity();
88 size_t max_cset = (size_t)((1.0 * capacity / 100 * ShenandoahEvacReserve) / ShenandoahEvacWaste);
89 size_t free_target = (capacity / 100 * ShenandoahMinFreeThreshold) + max_cset;
90 size_t min_garbage = (free_target > actual_free ? (free_target - actual_free) : 0);
91
92 log_info(gc, ergo)("Adaptive CSet Selection. Target Free: " SIZE_FORMAT "%s, Actual Free: "
93 SIZE_FORMAT "%s, Max CSet: " SIZE_FORMAT "%s, Min Garbage: " SIZE_FORMAT "%s",
94 byte_size_in_proper_unit(free_target), proper_unit_for_byte_size(free_target),
95 byte_size_in_proper_unit(actual_free), proper_unit_for_byte_size(actual_free),
96 byte_size_in_proper_unit(max_cset), proper_unit_for_byte_size(max_cset),
97 byte_size_in_proper_unit(min_garbage), proper_unit_for_byte_size(min_garbage));
98
99 // Better select garbage-first regions
100 QuickSort::sort<RegionData>(data, (int)size, compare_by_garbage, false);
101
102 size_t cur_cset = 0;
103 size_t cur_garbage = 0;
104
105 for (size_t idx = 0; idx < size; idx++) {
106 ShenandoahHeapRegion* r = data[idx]._region;
107
108 size_t new_cset = cur_cset + r->get_live_data_bytes();
109 size_t new_garbage = cur_garbage + r->garbage();
110
111 if (new_cset > max_cset) {
112 break;
113 }
114
115 if ((new_garbage < min_garbage) || (r->garbage() > garbage_threshold)) {
116 cset->add_region(r);
117 cur_cset = new_cset;
118 cur_garbage = new_garbage;
119 }
120 }
121 }
122
123 void ShenandoahAdaptiveHeuristics::record_cycle_start() {
124 ShenandoahHeuristics::record_cycle_start();
125 _allocation_rate.allocation_counter_reset();
126 }
127
128 void ShenandoahAdaptiveHeuristics::record_success_concurrent() {
129 ShenandoahHeuristics::record_success_concurrent();
130
131 size_t available = ShenandoahHeap::heap()->free_set()->available();
132
133 _available.add(available);
134 double z_score = 0.0;
135 if (_available.sd() > 0) {
136 z_score = (available - _available.avg()) / _available.sd();
137 }
138
139 log_debug(gc, ergo)("Available: " SIZE_FORMAT " %sB, z-score=%.3f. Average available: %.1f %sB +/- %.1f %sB.",
140 byte_size_in_proper_unit(available), proper_unit_for_byte_size(available),
141 z_score,
142 byte_size_in_proper_unit(_available.avg()), proper_unit_for_byte_size(_available.avg()),
143 byte_size_in_proper_unit(_available.sd()), proper_unit_for_byte_size(_available.sd()));
144
145 // In the case when a concurrent GC cycle completes successfully but with an
146 // unusually small amount of available memory we will adjust our trigger
147 // parameters so that they are more likely to initiate a new cycle.
148 // Conversely, when a GC cycle results in an above average amount of available
149 // memory, we will adjust the trigger parameters to be less likely to initiate
150 // a GC cycle.
151 //
152 // The z-score we've computed is in no way statistically related to the
153 // trigger parameters, but it has the nice property that worse z-scores for
154 // available memory indicate making larger adjustments to the trigger
155 // parameters. It also results in fewer adjustments as the application
156 // stabilizes.
157 //
158 // In order to avoid making endless and likely unnecessary adjustments to the
159 // trigger parameters, the change in available memory (with respect to the
160 // average) at the end of a cycle must be beyond these threshold values.
161 if (z_score < LOWEST_EXPECTED_AVAILABLE_AT_END ||
162 z_score > HIGHEST_EXPECTED_AVAILABLE_AT_END) {
163 // The sign is flipped because a negative z-score indicates that the
178 void ShenandoahAdaptiveHeuristics::record_success_degenerated() {
179 ShenandoahHeuristics::record_success_degenerated();
180 // Adjust both trigger's parameters in the case of a degenerated GC because
181 // either of them should have triggered earlier to avoid this case.
182 adjust_margin_of_error(DEGENERATE_PENALTY_SD);
183 adjust_spike_threshold(DEGENERATE_PENALTY_SD);
184 }
185
186 void ShenandoahAdaptiveHeuristics::record_success_full() {
187 ShenandoahHeuristics::record_success_full();
188 // Adjust both trigger's parameters in the case of a full GC because
189 // either of them should have triggered earlier to avoid this case.
190 adjust_margin_of_error(FULL_PENALTY_SD);
191 adjust_spike_threshold(FULL_PENALTY_SD);
192 }
193
194 static double saturate(double value, double min, double max) {
195 return MAX2(MIN2(value, max), min);
196 }
197
198 bool ShenandoahAdaptiveHeuristics::should_start_gc() {
199 ShenandoahHeap* heap = ShenandoahHeap::heap();
200 size_t max_capacity = heap->max_capacity();
201 size_t capacity = heap->soft_max_capacity();
202 size_t available = heap->free_set()->available();
203 size_t allocated = heap->bytes_allocated_since_gc_start();
204
205 // Make sure the code below treats available without the soft tail.
206 size_t soft_tail = max_capacity - capacity;
207 available = (available > soft_tail) ? (available - soft_tail) : 0;
208
209 // Track allocation rate even if we decide to start a cycle for other reasons.
210 double rate = _allocation_rate.sample(allocated);
211 _last_trigger = OTHER;
212
213 size_t min_threshold = capacity / 100 * ShenandoahMinFreeThreshold;
214 if (available < min_threshold) {
215 log_info(gc)("Trigger: Free (" SIZE_FORMAT "%s) is below minimum threshold (" SIZE_FORMAT "%s)",
216 byte_size_in_proper_unit(available), proper_unit_for_byte_size(available),
217 byte_size_in_proper_unit(min_threshold), proper_unit_for_byte_size(min_threshold));
218 return true;
219 }
220
221 const size_t max_learn = ShenandoahLearningSteps;
222 if (_gc_times_learned < max_learn) {
223 size_t init_threshold = capacity / 100 * ShenandoahInitFreeThreshold;
224 if (available < init_threshold) {
225 log_info(gc)("Trigger: Learning " SIZE_FORMAT " of " SIZE_FORMAT ". Free (" SIZE_FORMAT "%s) is below initial threshold (" SIZE_FORMAT "%s)",
226 _gc_times_learned + 1, max_learn,
227 byte_size_in_proper_unit(available), proper_unit_for_byte_size(available),
228 byte_size_in_proper_unit(init_threshold), proper_unit_for_byte_size(init_threshold));
229 return true;
230 }
231 }
232
233 // Check if allocation headroom is still okay. This also factors in:
234 // 1. Some space to absorb allocation spikes
235 // 2. Accumulated penalties from Degenerated and Full GC
236 size_t allocation_headroom = available;
237
238 size_t spike_headroom = capacity / 100 * ShenandoahAllocSpikeFactor;
239 size_t penalties = capacity / 100 * _gc_time_penalties;
240
241 allocation_headroom -= MIN2(allocation_headroom, spike_headroom);
242 allocation_headroom -= MIN2(allocation_headroom, penalties);
243
244 double avg_cycle_time = _gc_time_history->davg() + (_margin_of_error_sd * _gc_time_history->dsd());
245 double avg_alloc_rate = _allocation_rate.upper_bound(_margin_of_error_sd);
246 if (avg_cycle_time > allocation_headroom / avg_alloc_rate) {
247 log_info(gc)("Trigger: Average GC time (%.2f ms) is above the time for average allocation rate (%.0f %sB/s) to deplete free headroom (" SIZE_FORMAT "%s) (margin of error = %.2f)",
248 avg_cycle_time * 1000,
249 byte_size_in_proper_unit(avg_alloc_rate), proper_unit_for_byte_size(avg_alloc_rate),
250 byte_size_in_proper_unit(allocation_headroom), proper_unit_for_byte_size(allocation_headroom),
251 _margin_of_error_sd);
252
253 log_info(gc, ergo)("Free headroom: " SIZE_FORMAT "%s (free) - " SIZE_FORMAT "%s (spike) - " SIZE_FORMAT "%s (penalties) = " SIZE_FORMAT "%s",
254 byte_size_in_proper_unit(available), proper_unit_for_byte_size(available),
255 byte_size_in_proper_unit(spike_headroom), proper_unit_for_byte_size(spike_headroom),
256 byte_size_in_proper_unit(penalties), proper_unit_for_byte_size(penalties),
257 byte_size_in_proper_unit(allocation_headroom), proper_unit_for_byte_size(allocation_headroom));
258
259 _last_trigger = RATE;
260 return true;
261 }
262
263 bool is_spiking = _allocation_rate.is_spiking(rate, _spike_threshold_sd);
264 if (is_spiking && avg_cycle_time > allocation_headroom / rate) {
265 log_info(gc)("Trigger: Average GC time (%.2f ms) is above the time for instantaneous allocation rate (%.0f %sB/s) to deplete free headroom (" SIZE_FORMAT "%s) (spike threshold = %.2f)",
266 avg_cycle_time * 1000,
267 byte_size_in_proper_unit(rate), proper_unit_for_byte_size(rate),
268 byte_size_in_proper_unit(allocation_headroom), proper_unit_for_byte_size(allocation_headroom),
269 _spike_threshold_sd);
270 _last_trigger = SPIKE;
271 return true;
272 }
273
274 return ShenandoahHeuristics::should_start_gc();
275 }
276
277 void ShenandoahAdaptiveHeuristics::adjust_last_trigger_parameters(double amount) {
278 switch (_last_trigger) {
279 case RATE:
280 adjust_margin_of_error(amount);
281 break;
282 case SPIKE:
283 adjust_spike_threshold(amount);
284 break;
285 case OTHER:
286 // nothing to adjust here.
287 break;
288 default:
289 ShouldNotReachHere();
290 }
291 }
292
293 void ShenandoahAdaptiveHeuristics::adjust_margin_of_error(double amount) {
337 _last_sample_value = 0;
338 }
339
340 bool ShenandoahAllocationRate::is_spiking(double rate, double threshold) const {
341 if (rate <= 0.0) {
342 return false;
343 }
344
345 double sd = _rate.sd();
346 if (sd > 0) {
347 // There is a small chance that that rate has already been sampled, but it
348 // seems not to matter in practice.
349 double z_score = (rate - _rate.avg()) / sd;
350 if (z_score > threshold) {
351 return true;
352 }
353 }
354 return false;
355 }
356
357 double ShenandoahAllocationRate::instantaneous_rate(size_t allocated) const {
358 return instantaneous_rate(os::elapsedTime(), allocated);
359 }
360
361 double ShenandoahAllocationRate::instantaneous_rate(double time, size_t allocated) const {
362 size_t last_value = _last_sample_value;
363 double last_time = _last_sample_time;
364 size_t allocation_delta = (allocated > last_value) ? (allocated - last_value) : 0;
365 double time_delta_sec = time - last_time;
366 return (time_delta_sec > 0) ? (allocation_delta / time_delta_sec) : 0;
367 }
|
1 /*
2 * Copyright (c) 2018, 2019, Red Hat, Inc. All rights reserved.
3 * Copyright Amazon.com Inc. or its affiliates. All Rights Reserved.
4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 *
6 * This code is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 only, as
8 * published by the Free Software Foundation.
9 *
10 * This code is distributed in the hope that it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 * version 2 for more details (a copy is included in the LICENSE file that
14 * accompanied this code).
15 *
16 * You should have received a copy of the GNU General Public License version
17 * 2 along with this work; if not, write to the Free Software Foundation,
18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
19 *
20 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
21 * or visit www.oracle.com if you need additional information or have any
22 * questions.
23 *
24 */
25
26 #include "precompiled.hpp"
27
28 #include "gc/shenandoah/heuristics/shenandoahAdaptiveHeuristics.hpp"
29 #include "gc/shenandoah/shenandoahCollectionSet.hpp"
30 #include "gc/shenandoah/shenandoahFreeSet.hpp"
31 #include "gc/shenandoah/shenandoahHeap.inline.hpp"
32 #include "gc/shenandoah/shenandoahGeneration.hpp"
33 #include "gc/shenandoah/shenandoahHeapRegion.inline.hpp"
34 #include "gc/shenandoah/shenandoahOldGeneration.hpp"
35 #include "gc/shenandoah/shenandoahYoungGeneration.hpp"
36 #include "logging/log.hpp"
37 #include "logging/logTag.hpp"
38 #include "utilities/quickSort.hpp"
39
40 // These constants are used to adjust the margin of error for the moving
41 // average of the allocation rate and cycle time. The units are standard
42 // deviations.
43 const double ShenandoahAdaptiveHeuristics::FULL_PENALTY_SD = 0.2;
44 const double ShenandoahAdaptiveHeuristics::DEGENERATE_PENALTY_SD = 0.1;
45
46 // These are used to decide if we want to make any adjustments at all
47 // at the end of a successful concurrent cycle.
48 const double ShenandoahAdaptiveHeuristics::LOWEST_EXPECTED_AVAILABLE_AT_END = -0.5;
49 const double ShenandoahAdaptiveHeuristics::HIGHEST_EXPECTED_AVAILABLE_AT_END = 0.5;
50
51 // These values are the confidence interval expressed as standard deviations.
52 // At the minimum confidence level, there is a 25% chance that the true value of
53 // the estimate (average cycle time or allocation rate) is not more than
54 // MINIMUM_CONFIDENCE standard deviations away from our estimate. Similarly, the
55 // MAXIMUM_CONFIDENCE interval here means there is a one in a thousand chance
56 // that the true value of our estimate is outside the interval. These are used
57 // as bounds on the adjustments applied at the outcome of a GC cycle.
58 const double ShenandoahAdaptiveHeuristics::MINIMUM_CONFIDENCE = 0.319; // 25%
59 const double ShenandoahAdaptiveHeuristics::MAXIMUM_CONFIDENCE = 3.291; // 99.9%
60
61 ShenandoahAdaptiveHeuristics::ShenandoahAdaptiveHeuristics(ShenandoahGeneration* generation) :
62 ShenandoahHeuristics(generation),
63 _margin_of_error_sd(ShenandoahAdaptiveInitialConfidence),
64 _spike_threshold_sd(ShenandoahAdaptiveInitialSpikeThreshold),
65 _last_trigger(OTHER),
66 _available(Moving_Average_Samples, ShenandoahAdaptiveDecayFactor) { }
67
68 ShenandoahAdaptiveHeuristics::~ShenandoahAdaptiveHeuristics() {}
69
70 void ShenandoahAdaptiveHeuristics::choose_collection_set_from_regiondata(ShenandoahCollectionSet* cset,
71 RegionData* data, size_t size,
72 size_t actual_free) {
73 size_t garbage_threshold = ShenandoahHeapRegion::region_size_bytes() * ShenandoahGarbageThreshold / 100;
74 size_t ignore_threshold = ShenandoahHeapRegion::region_size_bytes() * ShenandoahIgnoreGarbageThreshold / 100;
75 ShenandoahHeap* heap = ShenandoahHeap::heap();
76
77 // The logic for cset selection in adaptive is as follows:
78 //
79 // 1. We cannot get cset larger than available free space. Otherwise we guarantee OOME
80 // during evacuation, and thus guarantee full GC. In practice, we also want to let
81 // application to allocate something. This is why we limit CSet to some fraction of
82 // available space. In non-overloaded heap, max_cset would contain all plausible candidates
83 // over garbage threshold.
84 //
85 // 2. We should not get cset too low so that free threshold would not be met right
86 // after the cycle. Otherwise we get back-to-back cycles for no reason if heap is
87 // too fragmented. In non-overloaded non-fragmented heap min_garbage would be around zero.
88 //
89 // Therefore, we start by sorting the regions by garbage. Then we unconditionally add the best candidates
90 // before we meet min_garbage. Then we add all candidates that fit with a garbage threshold before
91 // we hit max_cset. When max_cset is hit, we terminate the cset selection. Note that in this scheme,
92 // ShenandoahGarbageThreshold is the soft threshold which would be ignored until min_garbage is hit.
93
94 // In generational mode, the sort order within the data array is not strictly descending amounts of garbage. In
95 // particular, regions that have reached tenure age will be sorted into this array before younger regions that contain
96 // more garbage. This represents one of the reasons why we keep looking at regions even after we decide, for example,
97 // to exclude one of the regions because it might require evacuation of too much live data.
98 // TODO: Split it in the separate methods for clarity.
99 bool is_generational = heap->mode()->is_generational();
100 bool is_global = _generation->is_global();
101 size_t capacity = heap->young_generation()->max_capacity();
102
103 // cur_young_garbage represents the amount of memory to be reclaimed from young-gen. In the case that live objects
104 // are known to be promoted out of young-gen, we count this as cur_young_garbage because this memory is reclaimed
105 // from young-gen and becomes available to serve future young-gen allocation requests.
106 size_t cur_young_garbage = 0;
107
108 // Better select garbage-first regions
109 QuickSort::sort<RegionData>(data, (int)size, compare_by_garbage, false);
110
111 if (is_generational) {
112 for (size_t idx = 0; idx < size; idx++) {
113 ShenandoahHeapRegion* r = data[idx]._region;
114 if (cset->is_preselected(r->index())) {
115 assert(r->age() >= InitialTenuringThreshold, "Preselected regions must have tenure age");
116 // Entire region will be promoted, This region does not impact young-gen or old-gen evacuation reserve.
117 // This region has been pre-selected and its impact on promotion reserve is already accounted for.
118
119 // r->used() is r->garbage() + r->get_live_data_bytes()
120 // Since all live data in this region is being evacuated from young-gen, it is as if this memory
121 // is garbage insofar as young-gen is concerned. Counting this as garbage reduces the need to
122 // reclaim highly utilized young-gen regions just for the sake of finding min_garbage to reclaim
123 // within youn-gen memory.
124
125 cur_young_garbage += r->garbage();
126 cset->add_region(r);
127 }
128 }
129 if (is_global) {
130 size_t max_young_cset = (size_t) (heap->get_young_evac_reserve() / ShenandoahEvacWaste);
131 size_t young_cur_cset = 0;
132 size_t max_old_cset = (size_t) (heap->get_old_evac_reserve() / ShenandoahOldEvacWaste);
133 size_t old_cur_cset = 0;
134 size_t free_target = (capacity * ShenandoahMinFreeThreshold) / 100 + max_young_cset;
135 size_t min_garbage = (free_target > actual_free) ? (free_target - actual_free) : 0;
136
137 log_info(gc, ergo)("Adaptive CSet Selection for GLOBAL. Max Young Evacuation: " SIZE_FORMAT
138 "%s, Max Old Evacuation: " SIZE_FORMAT "%s, Actual Free: " SIZE_FORMAT "%s.",
139 byte_size_in_proper_unit(max_young_cset), proper_unit_for_byte_size(max_young_cset),
140 byte_size_in_proper_unit(max_old_cset), proper_unit_for_byte_size(max_old_cset),
141 byte_size_in_proper_unit(actual_free), proper_unit_for_byte_size(actual_free));
142
143 for (size_t idx = 0; idx < size; idx++) {
144 ShenandoahHeapRegion* r = data[idx]._region;
145 if (cset->is_preselected(r->index())) {
146 continue;
147 }
148 bool add_region = false;
149 if (r->is_old()) {
150 size_t new_cset = old_cur_cset + r->get_live_data_bytes();
151 if ((new_cset <= max_old_cset) && (r->garbage() > garbage_threshold)) {
152 add_region = true;
153 old_cur_cset = new_cset;
154 }
155 } else if (r->age() < InitialTenuringThreshold) {
156 size_t new_cset = young_cur_cset + r->get_live_data_bytes();
157 size_t region_garbage = r->garbage();
158 size_t new_garbage = cur_young_garbage + region_garbage;
159 bool add_regardless = (region_garbage > ignore_threshold) && (new_garbage < min_garbage);
160 if ((new_cset <= max_young_cset) && (add_regardless || (region_garbage > garbage_threshold))) {
161 add_region = true;
162 young_cur_cset = new_cset;
163 cur_young_garbage = new_garbage;
164 }
165 }
166 // Note that we do not add aged regions if they were not pre-selected. The reason they were not preselected
167 // is because there is not sufficient room in old-gen to hold their to-be-promoted live objects.
168
169 if (add_region) {
170 cset->add_region(r);
171 }
172 }
173 } else {
174 // This is young-gen collection or a mixed evacuation. If this is mixed evacuation, the old-gen candidate regions
175 // have already been added.
176 size_t max_cset = (size_t) (heap->get_young_evac_reserve() / ShenandoahEvacWaste);
177 size_t cur_cset = 0;
178 size_t free_target = (capacity * ShenandoahMinFreeThreshold) / 100 + max_cset;
179 size_t min_garbage = (free_target > actual_free) ? (free_target - actual_free) : 0;
180
181 log_info(gc, ergo)("Adaptive CSet Selection for YOUNG. Max Evacuation: " SIZE_FORMAT "%s, Actual Free: " SIZE_FORMAT "%s.",
182 byte_size_in_proper_unit(max_cset), proper_unit_for_byte_size(max_cset),
183 byte_size_in_proper_unit(actual_free), proper_unit_for_byte_size(actual_free));
184
185 for (size_t idx = 0; idx < size; idx++) {
186 ShenandoahHeapRegion* r = data[idx]._region;
187 if (cset->is_preselected(r->index())) {
188 continue;
189 }
190 if (r->age() < InitialTenuringThreshold) {
191 size_t new_cset = cur_cset + r->get_live_data_bytes();
192 size_t region_garbage = r->garbage();
193 size_t new_garbage = cur_young_garbage + region_garbage;
194 bool add_regardless = (region_garbage > ignore_threshold) && (new_garbage < min_garbage);
195 assert(r->is_young(), "Only young candidates expected in the data array");
196 if ((new_cset <= max_cset) && (add_regardless || (region_garbage > garbage_threshold))) {
197 cur_cset = new_cset;
198 cur_young_garbage = new_garbage;
199 cset->add_region(r);
200 }
201 }
202 // Note that we do not add aged regions if they were not pre-selected. The reason they were not preselected
203 // is because there is not sufficient room in old-gen to hold their to-be-promoted live objects or because
204 // they are to be promoted in place.
205 }
206 }
207 } else {
208 // Traditional Shenandoah (non-generational)
209 size_t capacity = ShenandoahHeap::heap()->max_capacity();
210 size_t max_cset = (size_t)((1.0 * capacity / 100 * ShenandoahEvacReserve) / ShenandoahEvacWaste);
211 size_t free_target = (capacity * ShenandoahMinFreeThreshold) / 100 + max_cset;
212 size_t min_garbage = (free_target > actual_free) ? (free_target - actual_free) : 0;
213
214 log_info(gc, ergo)("Adaptive CSet Selection. Target Free: " SIZE_FORMAT "%s, Actual Free: "
215 SIZE_FORMAT "%s, Max Evacuation: " SIZE_FORMAT "%s, Min Garbage: " SIZE_FORMAT "%s",
216 byte_size_in_proper_unit(free_target), proper_unit_for_byte_size(free_target),
217 byte_size_in_proper_unit(actual_free), proper_unit_for_byte_size(actual_free),
218 byte_size_in_proper_unit(max_cset), proper_unit_for_byte_size(max_cset),
219 byte_size_in_proper_unit(min_garbage), proper_unit_for_byte_size(min_garbage));
220
221 size_t cur_cset = 0;
222 size_t cur_garbage = 0;
223
224 for (size_t idx = 0; idx < size; idx++) {
225 ShenandoahHeapRegion* r = data[idx]._region;
226
227 size_t new_cset = cur_cset + r->get_live_data_bytes();
228 size_t new_garbage = cur_garbage + r->garbage();
229
230 if (new_cset > max_cset) {
231 break;
232 }
233
234 if ((new_garbage < min_garbage) || (r->garbage() > garbage_threshold)) {
235 cset->add_region(r);
236 cur_cset = new_cset;
237 cur_garbage = new_garbage;
238 }
239 }
240 }
241
242 size_t collected_old = cset->get_old_bytes_reserved_for_evacuation();
243 size_t collected_promoted = cset->get_young_bytes_to_be_promoted();
244 size_t collected_young = cset->get_young_bytes_reserved_for_evacuation();
245
246 log_info(gc, ergo)("Chosen CSet evacuates young: " SIZE_FORMAT "%s (of which at least: " SIZE_FORMAT "%s are to be promoted), "
247 "old: " SIZE_FORMAT "%s",
248 byte_size_in_proper_unit(collected_young), proper_unit_for_byte_size(collected_young),
249 byte_size_in_proper_unit(collected_promoted), proper_unit_for_byte_size(collected_promoted),
250 byte_size_in_proper_unit(collected_old), proper_unit_for_byte_size(collected_old));
251 }
252
253 void ShenandoahAdaptiveHeuristics::record_cycle_start() {
254 ShenandoahHeuristics::record_cycle_start();
255 _allocation_rate.allocation_counter_reset();
256 }
257
258 void ShenandoahAdaptiveHeuristics::record_success_concurrent(bool abbreviated) {
259 ShenandoahHeuristics::record_success_concurrent(abbreviated);
260
261 size_t available = MIN2(_generation->available(), ShenandoahHeap::heap()->free_set()->available());
262
263 double z_score = 0.0;
264 double available_sd = _available.sd();
265 if (available_sd > 0) {
266 double available_avg = _available.avg();
267 z_score = (double(available) - available_avg) / available_sd;
268 log_debug(gc, ergo)("%s Available: " SIZE_FORMAT " %sB, z-score=%.3f. Average available: %.1f %sB +/- %.1f %sB.",
269 _generation->name(),
270 byte_size_in_proper_unit(available), proper_unit_for_byte_size(available),
271 z_score,
272 byte_size_in_proper_unit(available_avg), proper_unit_for_byte_size(available_avg),
273 byte_size_in_proper_unit(available_sd), proper_unit_for_byte_size(available_sd));
274 }
275
276 _available.add(double(available));
277
278 // In the case when a concurrent GC cycle completes successfully but with an
279 // unusually small amount of available memory we will adjust our trigger
280 // parameters so that they are more likely to initiate a new cycle.
281 // Conversely, when a GC cycle results in an above average amount of available
282 // memory, we will adjust the trigger parameters to be less likely to initiate
283 // a GC cycle.
284 //
285 // The z-score we've computed is in no way statistically related to the
286 // trigger parameters, but it has the nice property that worse z-scores for
287 // available memory indicate making larger adjustments to the trigger
288 // parameters. It also results in fewer adjustments as the application
289 // stabilizes.
290 //
291 // In order to avoid making endless and likely unnecessary adjustments to the
292 // trigger parameters, the change in available memory (with respect to the
293 // average) at the end of a cycle must be beyond these threshold values.
294 if (z_score < LOWEST_EXPECTED_AVAILABLE_AT_END ||
295 z_score > HIGHEST_EXPECTED_AVAILABLE_AT_END) {
296 // The sign is flipped because a negative z-score indicates that the
311 void ShenandoahAdaptiveHeuristics::record_success_degenerated() {
312 ShenandoahHeuristics::record_success_degenerated();
313 // Adjust both trigger's parameters in the case of a degenerated GC because
314 // either of them should have triggered earlier to avoid this case.
315 adjust_margin_of_error(DEGENERATE_PENALTY_SD);
316 adjust_spike_threshold(DEGENERATE_PENALTY_SD);
317 }
318
319 void ShenandoahAdaptiveHeuristics::record_success_full() {
320 ShenandoahHeuristics::record_success_full();
321 // Adjust both trigger's parameters in the case of a full GC because
322 // either of them should have triggered earlier to avoid this case.
323 adjust_margin_of_error(FULL_PENALTY_SD);
324 adjust_spike_threshold(FULL_PENALTY_SD);
325 }
326
327 static double saturate(double value, double min, double max) {
328 return MAX2(MIN2(value, max), min);
329 }
330
331 // Return a conservative estimate of how much memory can be allocated before we need to start GC. The estimate is based
332 // on memory that is currently available within young generation plus all of the memory that will be added to the young
333 // generation at the end of the current cycle (as represented by young_regions_to_be_reclaimed) and on the anticipated
334 // amount of time required to perform a GC.
335 size_t ShenandoahAdaptiveHeuristics::bytes_of_allocation_runway_before_gc_trigger(size_t young_regions_to_be_reclaimed) {
336 assert(_generation->is_young(), "Only meaningful for young-gen heuristic");
337
338 size_t max_capacity = _generation->max_capacity();
339 size_t capacity = _generation->soft_max_capacity();
340 size_t usage = _generation->used();
341 size_t available = (capacity > usage)? capacity - usage: 0;
342 size_t allocated = _generation->bytes_allocated_since_gc_start();
343
344 size_t available_young_collected = ShenandoahHeap::heap()->collection_set()->get_young_available_bytes_collected();
345 size_t anticipated_available =
346 available + young_regions_to_be_reclaimed * ShenandoahHeapRegion::region_size_bytes() - available_young_collected;
347 size_t allocation_headroom = anticipated_available;
348 size_t spike_headroom = capacity * ShenandoahAllocSpikeFactor / 100;
349 size_t penalties = capacity * _gc_time_penalties / 100;
350
351 double rate = _allocation_rate.sample(allocated);
352
353 // At what value of available, would avg and spike triggers occur?
354 // if allocation_headroom < avg_cycle_time * avg_alloc_rate, then we experience avg trigger
355 // if allocation_headroom < avg_cycle_time * rate, then we experience spike trigger if is_spiking
356 //
357 // allocation_headroom =
358 // 0, if penalties > available or if penalties + spike_headroom > available
359 // available - penalties - spike_headroom, otherwise
360 //
361 // so we trigger if available - penalties - spike_headroom < avg_cycle_time * avg_alloc_rate, which is to say
362 // available < avg_cycle_time * avg_alloc_rate + penalties + spike_headroom
363 // or if available < penalties + spike_headroom
364 //
365 // since avg_cycle_time * avg_alloc_rate > 0, the first test is sufficient to test both conditions
366 //
367 // thus, evac_slack_avg is MIN2(0, available - avg_cycle_time * avg_alloc_rate + penalties + spike_headroom)
368 //
369 // similarly, evac_slack_spiking is MIN2(0, available - avg_cycle_time * rate + penalties + spike_headroom)
370 // but evac_slack_spiking is only relevant if is_spiking, as defined below.
371
372 double avg_cycle_time = _gc_cycle_time_history->davg() + (_margin_of_error_sd * _gc_cycle_time_history->dsd());
373
374 // TODO: Consider making conservative adjustments to avg_cycle_time, such as: (avg_cycle_time *= 2) in cases where
375 // we expect a longer-than-normal GC duration. This includes mixed evacuations, evacuation that perform promotion
376 // including promotion in place, and OLD GC bootstrap cycles. It has been observed that these cycles sometimes
377 // require twice or more the duration of "normal" GC cycles. We have experimented with this approach. While it
378 // does appear to reduce the frequency of degenerated cycles due to late triggers, it also has the effect of reducing
379 // evacuation slack so that there is less memory available to be transferred to OLD. The result is that we
380 // throttle promotion and it takes too long to move old objects out of the young generation.
381
382 double avg_alloc_rate = _allocation_rate.upper_bound(_margin_of_error_sd);
383 size_t evac_slack_avg;
384 if (anticipated_available > avg_cycle_time * avg_alloc_rate + penalties + spike_headroom) {
385 evac_slack_avg = anticipated_available - (avg_cycle_time * avg_alloc_rate + penalties + spike_headroom);
386 } else {
387 // we have no slack because it's already time to trigger
388 evac_slack_avg = 0;
389 }
390
391 bool is_spiking = _allocation_rate.is_spiking(rate, _spike_threshold_sd);
392 size_t evac_slack_spiking;
393 if (is_spiking) {
394 if (anticipated_available > avg_cycle_time * rate + penalties + spike_headroom) {
395 evac_slack_spiking = anticipated_available - (avg_cycle_time * rate + penalties + spike_headroom);
396 } else {
397 // we have no slack because it's already time to trigger
398 evac_slack_spiking = 0;
399 }
400 } else {
401 evac_slack_spiking = evac_slack_avg;
402 }
403
404 size_t threshold = min_free_threshold();
405 size_t evac_min_threshold = (anticipated_available > threshold)? anticipated_available - threshold: 0;
406 return MIN3(evac_slack_spiking, evac_slack_avg, evac_min_threshold);
407 }
408
409 bool ShenandoahAdaptiveHeuristics::should_start_gc() {
410 size_t capacity = _generation->soft_max_capacity();
411 size_t available = _generation->soft_available();
412 size_t allocated = _generation->bytes_allocated_since_gc_start();
413
414 log_debug(gc)("should_start_gc (%s)? available: " SIZE_FORMAT ", soft_max_capacity: " SIZE_FORMAT
415 ", allocated: " SIZE_FORMAT,
416 _generation->name(), available, capacity, allocated);
417
418 // The collector reserve may eat into what the mutator is allowed to use. Make sure we are looking
419 // at what is available to the mutator when deciding whether to start a GC.
420 size_t usable = ShenandoahHeap::heap()->free_set()->available();
421 if (usable < available) {
422 log_debug(gc)("Usable (" SIZE_FORMAT "%s) is less than available (" SIZE_FORMAT "%s)",
423 byte_size_in_proper_unit(usable), proper_unit_for_byte_size(usable),
424 byte_size_in_proper_unit(available), proper_unit_for_byte_size(available));
425 available = usable;
426 }
427
428 // Track allocation rate even if we decide to start a cycle for other reasons.
429 double rate = _allocation_rate.sample(allocated);
430 _last_trigger = OTHER;
431
432 // OLD generation is maintained to be as small as possible. Depletion-of-free-pool triggers do not apply to old generation.
433 if (!_generation->is_old()) {
434 size_t min_threshold = min_free_threshold();
435 if (available < min_threshold) {
436 log_info(gc)("Trigger (%s): Free (" SIZE_FORMAT "%s) is below minimum threshold (" SIZE_FORMAT "%s)",
437 _generation->name(),
438 byte_size_in_proper_unit(available), proper_unit_for_byte_size(available),
439 byte_size_in_proper_unit(min_threshold), proper_unit_for_byte_size(min_threshold));
440 return true;
441 }
442
443 // Check if we need to learn a bit about the application
444 const size_t max_learn = ShenandoahLearningSteps;
445 if (_gc_times_learned < max_learn) {
446 size_t init_threshold = capacity / 100 * ShenandoahInitFreeThreshold;
447 if (available < init_threshold) {
448 log_info(gc)("Trigger (%s): Learning " SIZE_FORMAT " of " SIZE_FORMAT ". Free ("
449 SIZE_FORMAT "%s) is below initial threshold (" SIZE_FORMAT "%s)",
450 _generation->name(), _gc_times_learned + 1, max_learn,
451 byte_size_in_proper_unit(available), proper_unit_for_byte_size(available),
452 byte_size_in_proper_unit(init_threshold), proper_unit_for_byte_size(init_threshold));
453 return true;
454 }
455 }
456
457 // Rationale:
458 // The idea is that there is an average allocation rate and there are occasional abnormal bursts (or spikes) of
459 // allocations that exceed the average allocation rate. What do these spikes look like?
460 //
461 // 1. At certain phase changes, we may discard large amounts of data and replace it with large numbers of newly
462 // allocated objects. This "spike" looks more like a phase change. We were in steady state at M bytes/sec
463 // allocation rate and now we're in a "reinitialization phase" that looks like N bytes/sec. We need the "spike"
464 // accomodation to give us enough runway to recalibrate our "average allocation rate".
465 //
466 // 2. The typical workload changes. "Suddenly", our typical workload of N TPS increases to N+delta TPS. This means
467 // our average allocation rate needs to be adjusted. Once again, we need the "spike" accomodation to give us
468 // enough runway to recalibrate our "average allocation rate".
469 //
470 // 3. Though there is an "average" allocation rate, a given workload's demand for allocation may be very bursty. We
471 // allocate a bunch of LABs during the 5 ms that follow completion of a GC, then we perform no more allocations for
472 // the next 150 ms. It seems we want the "spike" to represent the maximum divergence from average within the
473 // period of time between consecutive evaluation of the should_start_gc() service. Here's the thinking:
474 //
475 // a) Between now and the next time I ask whether should_start_gc(), we might experience a spike representing
476 // the anticipated burst of allocations. If that would put us over budget, then we should start GC immediately.
477 // b) Between now and the anticipated depletion of allocation pool, there may be two or more bursts of allocations.
478 // If there are more than one of these bursts, we can "approximate" that these will be separated by spans of
479 // time with very little or no allocations so the "average" allocation rate should be a suitable approximation
480 // of how this will behave.
481 //
482 // For cases 1 and 2, we need to "quickly" recalibrate the average allocation rate whenever we detect a change
483 // in operation mode. We want some way to decide that the average rate has changed. Make average allocation rate
484 // computations an independent effort.
485
486
487 // Check if allocation headroom is still okay. This also factors in:
488 // 1. Some space to absorb allocation spikes (ShenandoahAllocSpikeFactor)
489 // 2. Accumulated penalties from Degenerated and Full GC
490
491 size_t allocation_headroom = available;
492 size_t spike_headroom = capacity / 100 * ShenandoahAllocSpikeFactor;
493 size_t penalties = capacity / 100 * _gc_time_penalties;
494
495 allocation_headroom -= MIN2(allocation_headroom, penalties);
496 allocation_headroom -= MIN2(allocation_headroom, spike_headroom);
497
498 double avg_cycle_time = _gc_cycle_time_history->davg() + (_margin_of_error_sd * _gc_cycle_time_history->dsd());
499 double avg_alloc_rate = _allocation_rate.upper_bound(_margin_of_error_sd);
500 log_debug(gc)("%s: average GC time: %.2f ms, allocation rate: %.0f %s/s",
501 _generation->name(),
502 avg_cycle_time * 1000, byte_size_in_proper_unit(avg_alloc_rate), proper_unit_for_byte_size(avg_alloc_rate));
503
504 if (avg_cycle_time > allocation_headroom / avg_alloc_rate) {
505
506 log_info(gc)("Trigger (%s): Average GC time (%.2f ms) is above the time for average allocation rate (%.0f %sB/s)"
507 " to deplete free headroom (" SIZE_FORMAT "%s) (margin of error = %.2f)",
508 _generation->name(), avg_cycle_time * 1000,
509 byte_size_in_proper_unit(avg_alloc_rate), proper_unit_for_byte_size(avg_alloc_rate),
510 byte_size_in_proper_unit(allocation_headroom), proper_unit_for_byte_size(allocation_headroom),
511 _margin_of_error_sd);
512
513 log_info(gc, ergo)("Free headroom: " SIZE_FORMAT "%s (free) - " SIZE_FORMAT "%s (spike) - "
514 SIZE_FORMAT "%s (penalties) = " SIZE_FORMAT "%s",
515 byte_size_in_proper_unit(available), proper_unit_for_byte_size(available),
516 byte_size_in_proper_unit(spike_headroom), proper_unit_for_byte_size(spike_headroom),
517 byte_size_in_proper_unit(penalties), proper_unit_for_byte_size(penalties),
518 byte_size_in_proper_unit(allocation_headroom), proper_unit_for_byte_size(allocation_headroom));
519
520 _last_trigger = RATE;
521 return true;
522 }
523
524 bool is_spiking = _allocation_rate.is_spiking(rate, _spike_threshold_sd);
525 if (is_spiking && avg_cycle_time > allocation_headroom / rate) {
526 log_info(gc)("Trigger (%s): Average GC time (%.2f ms) is above the time for instantaneous allocation rate (%.0f %sB/s)"
527 " to deplete free headroom (" SIZE_FORMAT "%s) (spike threshold = %.2f)",
528 _generation->name(), avg_cycle_time * 1000,
529 byte_size_in_proper_unit(rate), proper_unit_for_byte_size(rate),
530 byte_size_in_proper_unit(allocation_headroom), proper_unit_for_byte_size(allocation_headroom),
531 _spike_threshold_sd);
532 _last_trigger = SPIKE;
533 return true;
534 }
535
536 ShenandoahHeap* heap = ShenandoahHeap::heap();
537 if (heap->mode()->is_generational()) {
538 // Get through promotions and mixed evacuations as quickly as possible. These cycles sometimes require significantly
539 // more time than traditional young-generation cycles so start them up as soon as possible. This is a "mitigation"
540 // for the reality that old-gen and young-gen activities are not truly "concurrent". If there is old-gen work to
541 // be done, we start up the young-gen GC threads so they can do some of this old-gen work. As implemented, promotion
542 // gets priority over old-gen marking.
543
544 size_t promo_potential = heap->get_promotion_potential();
545 size_t promo_in_place_potential = heap->get_promotion_in_place_potential();
546 ShenandoahOldHeuristics* old_heuristics = (ShenandoahOldHeuristics*) heap->old_generation()->heuristics();
547 size_t mixed_candidates = old_heuristics->unprocessed_old_collection_candidates();
548 if (promo_potential > 0) {
549 // Detect unsigned arithmetic underflow
550 assert(promo_potential < heap->capacity(), "Sanity");
551 log_info(gc)("Trigger (%s): expedite promotion of " SIZE_FORMAT "%s",
552 _generation->name(), byte_size_in_proper_unit(promo_potential), proper_unit_for_byte_size(promo_potential));
553 return true;
554 } else if (promo_in_place_potential > 0) {
555 // Detect unsigned arithmetic underflow
556 assert(promo_in_place_potential < heap->capacity(), "Sanity");
557 log_info(gc)("Trigger (%s): expedite promotion in place of " SIZE_FORMAT "%s", _generation->name(),
558 byte_size_in_proper_unit(promo_in_place_potential),
559 proper_unit_for_byte_size(promo_in_place_potential));
560 return true;
561 } else if (mixed_candidates > 0) {
562 // We need to run young GC in order to open up some free heap regions so we can finish mixed evacuations.
563 log_info(gc)("Trigger (%s): expedite mixed evacuation of " SIZE_FORMAT " regions",
564 _generation->name(), mixed_candidates);
565 return true;
566 }
567 }
568 }
569 return ShenandoahHeuristics::should_start_gc();
570 }
571
572 void ShenandoahAdaptiveHeuristics::adjust_last_trigger_parameters(double amount) {
573 switch (_last_trigger) {
574 case RATE:
575 adjust_margin_of_error(amount);
576 break;
577 case SPIKE:
578 adjust_spike_threshold(amount);
579 break;
580 case OTHER:
581 // nothing to adjust here.
582 break;
583 default:
584 ShouldNotReachHere();
585 }
586 }
587
588 void ShenandoahAdaptiveHeuristics::adjust_margin_of_error(double amount) {
632 _last_sample_value = 0;
633 }
634
635 bool ShenandoahAllocationRate::is_spiking(double rate, double threshold) const {
636 if (rate <= 0.0) {
637 return false;
638 }
639
640 double sd = _rate.sd();
641 if (sd > 0) {
642 // There is a small chance that that rate has already been sampled, but it
643 // seems not to matter in practice.
644 double z_score = (rate - _rate.avg()) / sd;
645 if (z_score > threshold) {
646 return true;
647 }
648 }
649 return false;
650 }
651
652 double ShenandoahAllocationRate::instantaneous_rate(double time, size_t allocated) const {
653 size_t last_value = _last_sample_value;
654 double last_time = _last_sample_time;
655 size_t allocation_delta = (allocated > last_value) ? (allocated - last_value) : 0;
656 double time_delta_sec = time - last_time;
657 return (time_delta_sec > 0) ? (allocation_delta / time_delta_sec) : 0;
658 }
|