10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25 #include "precompiled.hpp"
26
27 #include "gc/shenandoah/heuristics/shenandoahAdaptiveHeuristics.hpp"
28 #include "gc/shenandoah/shenandoahCollectionSet.hpp"
29 #include "gc/shenandoah/shenandoahFreeSet.hpp"
30 #include "gc/shenandoah/shenandoahHeap.inline.hpp"
31 #include "gc/shenandoah/shenandoahHeapRegion.inline.hpp"
32 #include "logging/log.hpp"
33 #include "logging/logTag.hpp"
34 #include "utilities/quickSort.hpp"
35
36 // These constants are used to adjust the margin of error for the moving
37 // average of the allocation rate and cycle time. The units are standard
38 // deviations.
39 const double ShenandoahAdaptiveHeuristics::FULL_PENALTY_SD = 0.2;
40 const double ShenandoahAdaptiveHeuristics::DEGENERATE_PENALTY_SD = 0.1;
41
42 // These are used to decide if we want to make any adjustments at all
43 // at the end of a successful concurrent cycle.
44 const double ShenandoahAdaptiveHeuristics::LOWEST_EXPECTED_AVAILABLE_AT_END = -0.5;
45 const double ShenandoahAdaptiveHeuristics::HIGHEST_EXPECTED_AVAILABLE_AT_END = 0.5;
46
47 // These values are the confidence interval expressed as standard deviations.
48 // At the minimum confidence level, there is a 25% chance that the true value of
49 // the estimate (average cycle time or allocation rate) is not more than
50 // MINIMUM_CONFIDENCE standard deviations away from our estimate. Similarly, the
51 // MAXIMUM_CONFIDENCE interval here means there is a one in a thousand chance
52 // that the true value of our estimate is outside the interval. These are used
53 // as bounds on the adjustments applied at the outcome of a GC cycle.
54 const double ShenandoahAdaptiveHeuristics::MINIMUM_CONFIDENCE = 0.319; // 25%
55 const double ShenandoahAdaptiveHeuristics::MAXIMUM_CONFIDENCE = 3.291; // 99.9%
56
57 ShenandoahAdaptiveHeuristics::ShenandoahAdaptiveHeuristics() :
58 ShenandoahHeuristics(),
59 _margin_of_error_sd(ShenandoahAdaptiveInitialConfidence),
60 _spike_threshold_sd(ShenandoahAdaptiveInitialSpikeThreshold),
61 _last_trigger(OTHER) { }
62
63 ShenandoahAdaptiveHeuristics::~ShenandoahAdaptiveHeuristics() {}
64
65 void ShenandoahAdaptiveHeuristics::choose_collection_set_from_regiondata(ShenandoahCollectionSet* cset,
66 RegionData* data, size_t size,
67 size_t actual_free) {
68 size_t garbage_threshold = ShenandoahHeapRegion::region_size_bytes() * ShenandoahGarbageThreshold / 100;
69
70 // The logic for cset selection in adaptive is as follows:
71 //
72 // 1. We cannot get cset larger than available free space. Otherwise we guarantee OOME
73 // during evacuation, and thus guarantee full GC. In practice, we also want to let
74 // application to allocate something. This is why we limit CSet to some fraction of
75 // available space. In non-overloaded heap, max_cset would contain all plausible candidates
76 // over garbage threshold.
77 //
78 // 2. We should not get cset too low so that free threshold would not be met right
79 // after the cycle. Otherwise we get back-to-back cycles for no reason if heap is
80 // too fragmented. In non-overloaded non-fragmented heap min_garbage would be around zero.
81 //
82 // Therefore, we start by sorting the regions by garbage. Then we unconditionally add the best candidates
83 // before we meet min_garbage. Then we add all candidates that fit with a garbage threshold before
84 // we hit max_cset. When max_cset is hit, we terminate the cset selection. Note that in this scheme,
85 // ShenandoahGarbageThreshold is the soft threshold which would be ignored until min_garbage is hit.
86
87 size_t capacity = ShenandoahHeap::heap()->soft_max_capacity();
88 size_t max_cset = (size_t)((1.0 * capacity / 100 * ShenandoahEvacReserve) / ShenandoahEvacWaste);
89 size_t free_target = (capacity / 100 * ShenandoahMinFreeThreshold) + max_cset;
90 size_t min_garbage = (free_target > actual_free ? (free_target - actual_free) : 0);
91
92 log_info(gc, ergo)("Adaptive CSet Selection. Target Free: " SIZE_FORMAT "%s, Actual Free: "
93 SIZE_FORMAT "%s, Max CSet: " SIZE_FORMAT "%s, Min Garbage: " SIZE_FORMAT "%s",
94 byte_size_in_proper_unit(free_target), proper_unit_for_byte_size(free_target),
95 byte_size_in_proper_unit(actual_free), proper_unit_for_byte_size(actual_free),
96 byte_size_in_proper_unit(max_cset), proper_unit_for_byte_size(max_cset),
97 byte_size_in_proper_unit(min_garbage), proper_unit_for_byte_size(min_garbage));
98
99 // Better select garbage-first regions
100 QuickSort::sort<RegionData>(data, (int)size, compare_by_garbage, false);
101
102 size_t cur_cset = 0;
103 size_t cur_garbage = 0;
104
105 for (size_t idx = 0; idx < size; idx++) {
106 ShenandoahHeapRegion* r = data[idx]._region;
107
108 size_t new_cset = cur_cset + r->get_live_data_bytes();
109 size_t new_garbage = cur_garbage + r->garbage();
110
111 if (new_cset > max_cset) {
112 break;
113 }
114
115 if ((new_garbage < min_garbage) || (r->garbage() > garbage_threshold)) {
116 cset->add_region(r);
117 cur_cset = new_cset;
118 cur_garbage = new_garbage;
119 }
120 }
121 }
122
123 void ShenandoahAdaptiveHeuristics::record_cycle_start() {
124 ShenandoahHeuristics::record_cycle_start();
125 _allocation_rate.allocation_counter_reset();
126 }
127
128 void ShenandoahAdaptiveHeuristics::record_success_concurrent() {
129 ShenandoahHeuristics::record_success_concurrent();
130
131 size_t available = ShenandoahHeap::heap()->free_set()->available();
132
133 _available.add(available);
134 double z_score = 0.0;
135 if (_available.sd() > 0) {
136 z_score = (available - _available.avg()) / _available.sd();
137 }
138
139 log_debug(gc, ergo)("Available: " SIZE_FORMAT " %sB, z-score=%.3f. Average available: %.1f %sB +/- %.1f %sB.",
140 byte_size_in_proper_unit(available), proper_unit_for_byte_size(available),
141 z_score,
142 byte_size_in_proper_unit(_available.avg()), proper_unit_for_byte_size(_available.avg()),
143 byte_size_in_proper_unit(_available.sd()), proper_unit_for_byte_size(_available.sd()));
144
145 // In the case when a concurrent GC cycle completes successfully but with an
146 // unusually small amount of available memory we will adjust our trigger
147 // parameters so that they are more likely to initiate a new cycle.
148 // Conversely, when a GC cycle results in an above average amount of available
149 // memory, we will adjust the trigger parameters to be less likely to initiate
150 // a GC cycle.
151 //
152 // The z-score we've computed is in no way statistically related to the
153 // trigger parameters, but it has the nice property that worse z-scores for
154 // available memory indicate making larger adjustments to the trigger
155 // parameters. It also results in fewer adjustments as the application
156 // stabilizes.
157 //
158 // In order to avoid making endless and likely unnecessary adjustments to the
159 // trigger parameters, the change in available memory (with respect to the
160 // average) at the end of a cycle must be beyond these threshold values.
161 if (z_score < LOWEST_EXPECTED_AVAILABLE_AT_END ||
162 z_score > HIGHEST_EXPECTED_AVAILABLE_AT_END) {
163 // The sign is flipped because a negative z-score indicates that the
179 ShenandoahHeuristics::record_success_degenerated();
180 // Adjust both trigger's parameters in the case of a degenerated GC because
181 // either of them should have triggered earlier to avoid this case.
182 adjust_margin_of_error(DEGENERATE_PENALTY_SD);
183 adjust_spike_threshold(DEGENERATE_PENALTY_SD);
184 }
185
186 void ShenandoahAdaptiveHeuristics::record_success_full() {
187 ShenandoahHeuristics::record_success_full();
188 // Adjust both trigger's parameters in the case of a full GC because
189 // either of them should have triggered earlier to avoid this case.
190 adjust_margin_of_error(FULL_PENALTY_SD);
191 adjust_spike_threshold(FULL_PENALTY_SD);
192 }
193
194 static double saturate(double value, double min, double max) {
195 return MAX2(MIN2(value, max), min);
196 }
197
198 bool ShenandoahAdaptiveHeuristics::should_start_gc() {
199 ShenandoahHeap* heap = ShenandoahHeap::heap();
200 size_t max_capacity = heap->max_capacity();
201 size_t capacity = heap->soft_max_capacity();
202 size_t available = heap->free_set()->available();
203 size_t allocated = heap->bytes_allocated_since_gc_start();
204
205 // Make sure the code below treats available without the soft tail.
206 size_t soft_tail = max_capacity - capacity;
207 available = (available > soft_tail) ? (available - soft_tail) : 0;
208
209 // Track allocation rate even if we decide to start a cycle for other reasons.
210 double rate = _allocation_rate.sample(allocated);
211 _last_trigger = OTHER;
212
213 size_t min_threshold = capacity / 100 * ShenandoahMinFreeThreshold;
214 if (available < min_threshold) {
215 log_info(gc)("Trigger: Free (" SIZE_FORMAT "%s) is below minimum threshold (" SIZE_FORMAT "%s)",
216 byte_size_in_proper_unit(available), proper_unit_for_byte_size(available),
217 byte_size_in_proper_unit(min_threshold), proper_unit_for_byte_size(min_threshold));
218 return true;
219 }
220
221 const size_t max_learn = ShenandoahLearningSteps;
222 if (_gc_times_learned < max_learn) {
223 size_t init_threshold = capacity / 100 * ShenandoahInitFreeThreshold;
224 if (available < init_threshold) {
225 log_info(gc)("Trigger: Learning " SIZE_FORMAT " of " SIZE_FORMAT ". Free (" SIZE_FORMAT "%s) is below initial threshold (" SIZE_FORMAT "%s)",
226 _gc_times_learned + 1, max_learn,
227 byte_size_in_proper_unit(available), proper_unit_for_byte_size(available),
228 byte_size_in_proper_unit(init_threshold), proper_unit_for_byte_size(init_threshold));
229 return true;
230 }
231 }
232
233 // Check if allocation headroom is still okay. This also factors in:
234 // 1. Some space to absorb allocation spikes
235 // 2. Accumulated penalties from Degenerated and Full GC
236 size_t allocation_headroom = available;
237
238 size_t spike_headroom = capacity / 100 * ShenandoahAllocSpikeFactor;
239 size_t penalties = capacity / 100 * _gc_time_penalties;
240
241 allocation_headroom -= MIN2(allocation_headroom, spike_headroom);
242 allocation_headroom -= MIN2(allocation_headroom, penalties);
243
244 double avg_cycle_time = _gc_time_history->davg() + (_margin_of_error_sd * _gc_time_history->dsd());
245 double avg_alloc_rate = _allocation_rate.upper_bound(_margin_of_error_sd);
246 if (avg_cycle_time > allocation_headroom / avg_alloc_rate) {
247 log_info(gc)("Trigger: Average GC time (%.2f ms) is above the time for average allocation rate (%.0f %sB/s) to deplete free headroom (" SIZE_FORMAT "%s) (margin of error = %.2f)",
248 avg_cycle_time * 1000,
249 byte_size_in_proper_unit(avg_alloc_rate), proper_unit_for_byte_size(avg_alloc_rate),
250 byte_size_in_proper_unit(allocation_headroom), proper_unit_for_byte_size(allocation_headroom),
251 _margin_of_error_sd);
252
253 log_info(gc, ergo)("Free headroom: " SIZE_FORMAT "%s (free) - " SIZE_FORMAT "%s (spike) - " SIZE_FORMAT "%s (penalties) = " SIZE_FORMAT "%s",
254 byte_size_in_proper_unit(available), proper_unit_for_byte_size(available),
255 byte_size_in_proper_unit(spike_headroom), proper_unit_for_byte_size(spike_headroom),
256 byte_size_in_proper_unit(penalties), proper_unit_for_byte_size(penalties),
257 byte_size_in_proper_unit(allocation_headroom), proper_unit_for_byte_size(allocation_headroom));
258
259 _last_trigger = RATE;
260 return true;
261 }
262
263 bool is_spiking = _allocation_rate.is_spiking(rate, _spike_threshold_sd);
264 if (is_spiking && avg_cycle_time > allocation_headroom / rate) {
265 log_info(gc)("Trigger: Average GC time (%.2f ms) is above the time for instantaneous allocation rate (%.0f %sB/s) to deplete free headroom (" SIZE_FORMAT "%s) (spike threshold = %.2f)",
266 avg_cycle_time * 1000,
267 byte_size_in_proper_unit(rate), proper_unit_for_byte_size(rate),
268 byte_size_in_proper_unit(allocation_headroom), proper_unit_for_byte_size(allocation_headroom),
269 _spike_threshold_sd);
270 _last_trigger = SPIKE;
271 return true;
272 }
273
274 return ShenandoahHeuristics::should_start_gc();
275 }
276
277 void ShenandoahAdaptiveHeuristics::adjust_last_trigger_parameters(double amount) {
278 switch (_last_trigger) {
279 case RATE:
280 adjust_margin_of_error(amount);
281 break;
282 case SPIKE:
283 adjust_spike_threshold(amount);
284 break;
285 case OTHER:
286 // nothing to adjust here.
287 break;
288 default:
289 ShouldNotReachHere();
290 }
291 }
292
293 void ShenandoahAdaptiveHeuristics::adjust_margin_of_error(double amount) {
294 _margin_of_error_sd = saturate(_margin_of_error_sd + amount, MINIMUM_CONFIDENCE, MAXIMUM_CONFIDENCE);
295 log_debug(gc, ergo)("Margin of error now %.2f", _margin_of_error_sd);
296 }
|
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25 #include "precompiled.hpp"
26
27 #include "gc/shenandoah/heuristics/shenandoahAdaptiveHeuristics.hpp"
28 #include "gc/shenandoah/shenandoahCollectionSet.hpp"
29 #include "gc/shenandoah/shenandoahFreeSet.hpp"
30 #include "gc/shenandoah/shenandoahGeneration.hpp"
31 #include "gc/shenandoah/shenandoahHeapRegion.inline.hpp"
32 #include "gc/shenandoah/shenandoahYoungGeneration.hpp"
33 #include "logging/log.hpp"
34 #include "logging/logTag.hpp"
35 #include "utilities/quickSort.hpp"
36
37 // These constants are used to adjust the margin of error for the moving
38 // average of the allocation rate and cycle time. The units are standard
39 // deviations.
40 const double ShenandoahAdaptiveHeuristics::FULL_PENALTY_SD = 0.2;
41 const double ShenandoahAdaptiveHeuristics::DEGENERATE_PENALTY_SD = 0.1;
42
43 // These are used to decide if we want to make any adjustments at all
44 // at the end of a successful concurrent cycle.
45 const double ShenandoahAdaptiveHeuristics::LOWEST_EXPECTED_AVAILABLE_AT_END = -0.5;
46 const double ShenandoahAdaptiveHeuristics::HIGHEST_EXPECTED_AVAILABLE_AT_END = 0.5;
47
48 // These values are the confidence interval expressed as standard deviations.
49 // At the minimum confidence level, there is a 25% chance that the true value of
50 // the estimate (average cycle time or allocation rate) is not more than
51 // MINIMUM_CONFIDENCE standard deviations away from our estimate. Similarly, the
52 // MAXIMUM_CONFIDENCE interval here means there is a one in a thousand chance
53 // that the true value of our estimate is outside the interval. These are used
54 // as bounds on the adjustments applied at the outcome of a GC cycle.
55 const double ShenandoahAdaptiveHeuristics::MINIMUM_CONFIDENCE = 0.319; // 25%
56 const double ShenandoahAdaptiveHeuristics::MAXIMUM_CONFIDENCE = 3.291; // 99.9%
57
58 const uint ShenandoahAdaptiveHeuristics::MINIMUM_RESIZE_INTERVAL = 10;
59
60 ShenandoahAdaptiveHeuristics::ShenandoahAdaptiveHeuristics(ShenandoahGeneration* generation) :
61 ShenandoahHeuristics(generation),
62 _margin_of_error_sd(ShenandoahAdaptiveInitialConfidence),
63 _spike_threshold_sd(ShenandoahAdaptiveInitialSpikeThreshold),
64 _last_trigger(OTHER),
65 _available(Moving_Average_Samples, ShenandoahAdaptiveDecayFactor) { }
66
67 ShenandoahAdaptiveHeuristics::~ShenandoahAdaptiveHeuristics() {}
68
69 void ShenandoahAdaptiveHeuristics::choose_collection_set_from_regiondata(ShenandoahCollectionSet* cset,
70 RegionData* data, size_t size,
71 size_t actual_free) {
72 size_t garbage_threshold = ShenandoahHeapRegion::region_size_bytes() * ShenandoahGarbageThreshold / 100;
73 size_t ignore_threshold = ShenandoahHeapRegion::region_size_bytes() * ShenandoahIgnoreGarbageThreshold / 100;
74 ShenandoahHeap* heap = ShenandoahHeap::heap();
75
76 // The logic for cset selection in adaptive is as follows:
77 //
78 // 1. We cannot get cset larger than available free space. Otherwise we guarantee OOME
79 // during evacuation, and thus guarantee full GC. In practice, we also want to let
80 // application to allocate something. This is why we limit CSet to some fraction of
81 // available space. In non-overloaded heap, max_cset would contain all plausible candidates
82 // over garbage threshold.
83 //
84 // 2. We should not get cset too low so that free threshold would not be met right
85 // after the cycle. Otherwise we get back-to-back cycles for no reason if heap is
86 // too fragmented. In non-overloaded non-fragmented heap min_garbage would be around zero.
87 //
88 // Therefore, we start by sorting the regions by garbage. Then we unconditionally add the best candidates
89 // before we meet min_garbage. Then we add all candidates that fit with a garbage threshold before
90 // we hit max_cset. When max_cset is hit, we terminate the cset selection. Note that in this scheme,
91 // ShenandoahGarbageThreshold is the soft threshold which would be ignored until min_garbage is hit.
92
93 // In generational mode, the sort order within the data array is not strictly descending amounts of garbage. In
94 // particular, regions that have reached tenure age will be sorted into this array before younger regions that contain
95 // more garbage. This represents one of the reasons why we keep looking at regions even after we decide, for example,
96 // to exclude one of the regions because it might require evacuation of too much live data.
97 bool is_generational = heap->mode()->is_generational();
98 bool is_global = (_generation->generation_mode() == GLOBAL);
99 size_t capacity = heap->young_generation()->max_capacity();
100
101 // cur_young_garbage represents the amount of memory to be reclaimed from young-gen. In the case that live objects
102 // are known to be promoted out of young-gen, we count this as cur_young_garbage because this memory is reclaimed
103 // from young-gen and becomes available to serve future young-gen allocation requests.
104 size_t cur_young_garbage = 0;
105
106 // Better select garbage-first regions
107 QuickSort::sort<RegionData>(data, (int)size, compare_by_garbage, false);
108
109 if (is_generational) {
110 if (is_global) {
111 size_t max_young_cset = (size_t) (heap->get_young_evac_reserve() / ShenandoahEvacWaste);
112 size_t young_cur_cset = 0;
113 size_t max_old_cset = (size_t) (heap->get_old_evac_reserve() / ShenandoahEvacWaste);
114 size_t old_cur_cset = 0;
115 size_t free_target = (capacity * ShenandoahMinFreeThreshold) / 100 + max_young_cset;
116 size_t min_garbage = (free_target > actual_free) ? (free_target - actual_free) : 0;
117
118 log_info(gc, ergo)("Adaptive CSet Selection for GLOBAL. Max Young Evacuation: " SIZE_FORMAT
119 "%s, Max Old Evacuation: " SIZE_FORMAT "%s, Actual Free: " SIZE_FORMAT "%s.",
120 byte_size_in_proper_unit(max_young_cset), proper_unit_for_byte_size(max_young_cset),
121 byte_size_in_proper_unit(max_old_cset), proper_unit_for_byte_size(max_old_cset),
122 byte_size_in_proper_unit(actual_free), proper_unit_for_byte_size(actual_free));
123
124 for (size_t idx = 0; idx < size; idx++) {
125 ShenandoahHeapRegion* r = data[idx]._region;
126 bool add_region = false;
127 if (r->is_old()) {
128 size_t new_cset = old_cur_cset + r->get_live_data_bytes();
129 if ((new_cset <= max_old_cset) && (r->garbage() > garbage_threshold)) {
130 add_region = true;
131 old_cur_cset = new_cset;
132 }
133 } else if (cset->is_preselected(r->index())) {
134 assert(r->age() >= InitialTenuringThreshold, "Preselected regions must have tenure age");
135 // Entire region will be promoted, This region does not impact young-gen or old-gen evacuation reserve.
136 // This region has been pre-selected and its impact on promotion reserve is already accounted for.
137 add_region = true;
138 // r->used() is r->garbage() + r->get_live_data_bytes()
139 // Since all live data in this region is being evacuated from young-gen, it is as if this memory
140 // is garbage insofar as young-gen is concerned. Counting this as garbage reduces the need to
141 // reclaim highly utilized young-gen regions just for the sake of finding min_garbage to reclaim
142 // within youn-gen memory.
143 cur_young_garbage += r->used();
144 } else if (r->age() < InitialTenuringThreshold) {
145 size_t new_cset = young_cur_cset + r->get_live_data_bytes();
146 size_t region_garbage = r->garbage();
147 size_t new_garbage = cur_young_garbage + region_garbage;
148 bool add_regardless = (region_garbage > ignore_threshold) && (new_garbage < min_garbage);
149 if ((new_cset <= max_young_cset) && (add_regardless || (region_garbage > garbage_threshold))) {
150 add_region = true;
151 young_cur_cset = new_cset;
152 cur_young_garbage = new_garbage;
153 }
154 }
155 // Note that we do not add aged regions if they were not pre-selected. The reason they were not preselected
156 // is because there is not sufficient room in old-gen to hold their to-be-promoted live objects.
157
158 if (add_region) {
159 cset->add_region(r);
160 }
161 }
162 } else {
163 // This is young-gen collection or a mixed evacuation. If this is mixed evacuation, the old-gen candidate regions
164 // have already been added.
165 size_t max_cset = (size_t) (heap->get_young_evac_reserve() / ShenandoahEvacWaste);
166 size_t cur_cset = 0;
167 size_t free_target = (capacity * ShenandoahMinFreeThreshold) / 100 + max_cset;
168 size_t min_garbage = (free_target > actual_free) ? (free_target - actual_free) : 0;
169
170 log_info(gc, ergo)("Adaptive CSet Selection for YOUNG. Max Evacuation: " SIZE_FORMAT "%s, Actual Free: " SIZE_FORMAT "%s.",
171 byte_size_in_proper_unit(max_cset), proper_unit_for_byte_size(max_cset),
172 byte_size_in_proper_unit(actual_free), proper_unit_for_byte_size(actual_free));
173
174 for (size_t idx = 0; idx < size; idx++) {
175 ShenandoahHeapRegion* r = data[idx]._region;
176 bool add_region = false;
177
178 if (!r->is_old()) {
179 if (cset->is_preselected(r->index())) {
180 assert(r->age() >= InitialTenuringThreshold, "Preselected regions must have tenure age");
181 // Entire region will be promoted, This region does not impact young-gen evacuation reserve. Memory has already
182 // been set aside to hold evacuation results as advance_promotion_reserve.
183 add_region = true;
184 // Since all live data in this region is being evacuated from young-gen, it is as if this memory
185 // is garbage insofar as young-gen is concerned. Counting this as garbage reduces the need to
186 // reclaim highly utilized young-gen regions just for the sake of finding min_garbage to reclaim
187 // within youn-gen memory
188 cur_young_garbage += r->get_live_data_bytes();
189 } else if (r->age() < InitialTenuringThreshold) {
190 size_t new_cset = cur_cset + r->get_live_data_bytes();
191 size_t region_garbage = r->garbage();
192 size_t new_garbage = cur_young_garbage + region_garbage;
193 bool add_regardless = (region_garbage > ignore_threshold) && (new_garbage < min_garbage);
194 if ((new_cset <= max_cset) && (add_regardless || (region_garbage > garbage_threshold))) {
195 add_region = true;
196 cur_cset = new_cset;
197 cur_young_garbage = new_garbage;
198 }
199 }
200 // Note that we do not add aged regions if they were not pre-selected. The reason they were not preselected
201 // is because there is not sufficient room in old-gen to hold their to-be-promoted live objects.
202
203 if (add_region) {
204 cset->add_region(r);
205 }
206 }
207 }
208 }
209 } else {
210 // Traditional Shenandoah (non-generational)
211 size_t capacity = ShenandoahHeap::heap()->soft_max_capacity();
212 size_t max_cset = (size_t)((1.0 * capacity / 100 * ShenandoahEvacReserve) / ShenandoahEvacWaste);
213 size_t free_target = (capacity * ShenandoahMinFreeThreshold) / 100 + max_cset;
214 size_t min_garbage = (free_target > actual_free) ? (free_target - actual_free) : 0;
215
216 log_info(gc, ergo)("Adaptive CSet Selection. Max Evacuation: " SIZE_FORMAT "%s, Actual Free: " SIZE_FORMAT "%s.",
217 byte_size_in_proper_unit(max_cset), proper_unit_for_byte_size(max_cset),
218 byte_size_in_proper_unit(actual_free), proper_unit_for_byte_size(actual_free));
219
220 size_t cur_cset = 0;
221 size_t cur_garbage = 0;
222
223 for (size_t idx = 0; idx < size; idx++) {
224 ShenandoahHeapRegion* r = data[idx]._region;
225 size_t new_cset = cur_cset + r->get_live_data_bytes();
226 size_t region_garbage = r->garbage();
227 size_t new_garbage = cur_garbage + region_garbage;
228 bool add_regardless = (region_garbage > ignore_threshold) && (new_garbage < min_garbage);
229 if ((new_cset <= max_cset) && (add_regardless || (region_garbage > garbage_threshold))) {
230 cset->add_region(r);
231 cur_cset = new_cset;
232 cur_garbage = new_garbage;
233 }
234 }
235 }
236 }
237
238 void ShenandoahAdaptiveHeuristics::record_cycle_start() {
239 ShenandoahHeuristics::record_cycle_start();
240 _allocation_rate.allocation_counter_reset();
241 ++_cycles_since_last_resize;
242 }
243
244 void ShenandoahAdaptiveHeuristics::record_success_concurrent(bool abbreviated) {
245 ShenandoahHeuristics::record_success_concurrent(abbreviated);
246
247 size_t available = MIN2(_generation->available(), ShenandoahHeap::heap()->free_set()->available());
248
249 double z_score = 0.0;
250 double available_sd = _available.sd();
251 if (available_sd > 0) {
252 double available_avg = _available.avg();
253 z_score = (double(available) - available_avg) / available_sd;
254 log_debug(gc, ergo)("%s Available: " SIZE_FORMAT " %sB, z-score=%.3f. Average available: %.1f %sB +/- %.1f %sB.",
255 _generation->name(),
256 byte_size_in_proper_unit(available), proper_unit_for_byte_size(available), z_score,
257 byte_size_in_proper_unit(available_avg), proper_unit_for_byte_size(available_avg),
258 byte_size_in_proper_unit(available_sd), proper_unit_for_byte_size(available_sd));
259 }
260
261 _available.add(double(available));
262
263 // In the case when a concurrent GC cycle completes successfully but with an
264 // unusually small amount of available memory we will adjust our trigger
265 // parameters so that they are more likely to initiate a new cycle.
266 // Conversely, when a GC cycle results in an above average amount of available
267 // memory, we will adjust the trigger parameters to be less likely to initiate
268 // a GC cycle.
269 //
270 // The z-score we've computed is in no way statistically related to the
271 // trigger parameters, but it has the nice property that worse z-scores for
272 // available memory indicate making larger adjustments to the trigger
273 // parameters. It also results in fewer adjustments as the application
274 // stabilizes.
275 //
276 // In order to avoid making endless and likely unnecessary adjustments to the
277 // trigger parameters, the change in available memory (with respect to the
278 // average) at the end of a cycle must be beyond these threshold values.
279 if (z_score < LOWEST_EXPECTED_AVAILABLE_AT_END ||
280 z_score > HIGHEST_EXPECTED_AVAILABLE_AT_END) {
281 // The sign is flipped because a negative z-score indicates that the
297 ShenandoahHeuristics::record_success_degenerated();
298 // Adjust both trigger's parameters in the case of a degenerated GC because
299 // either of them should have triggered earlier to avoid this case.
300 adjust_margin_of_error(DEGENERATE_PENALTY_SD);
301 adjust_spike_threshold(DEGENERATE_PENALTY_SD);
302 }
303
304 void ShenandoahAdaptiveHeuristics::record_success_full() {
305 ShenandoahHeuristics::record_success_full();
306 // Adjust both trigger's parameters in the case of a full GC because
307 // either of them should have triggered earlier to avoid this case.
308 adjust_margin_of_error(FULL_PENALTY_SD);
309 adjust_spike_threshold(FULL_PENALTY_SD);
310 }
311
312 static double saturate(double value, double min, double max) {
313 return MAX2(MIN2(value, max), min);
314 }
315
316 bool ShenandoahAdaptiveHeuristics::should_start_gc() {
317 size_t max_capacity = _generation->max_capacity();
318 size_t capacity = _generation->soft_max_capacity();
319 size_t available = _generation->available();
320 size_t allocated = _generation->bytes_allocated_since_gc_start();
321
322 log_debug(gc)("should_start_gc (%s)? available: " SIZE_FORMAT ", soft_max_capacity: " SIZE_FORMAT
323 ", max_capacity: " SIZE_FORMAT ", allocated: " SIZE_FORMAT,
324 _generation->name(), available, capacity, max_capacity, allocated);
325
326 // The collector reserve may eat into what the mutator is allowed to use. Make sure we are looking
327 // at what is available to the mutator when deciding whether to start a GC.
328 size_t usable = ShenandoahHeap::heap()->free_set()->available();
329 if (usable < available) {
330 log_debug(gc)("Usable (" SIZE_FORMAT "%s) is less than available (" SIZE_FORMAT "%s)",
331 byte_size_in_proper_unit(usable), proper_unit_for_byte_size(usable),
332 byte_size_in_proper_unit(available), proper_unit_for_byte_size(available));
333 available = usable;
334 }
335
336 // Allocation spikes are a characteristic of both the application ahd the JVM configuration. On the JVM command line,
337 // the application developer may want to supply a hint of the nature of spikes that are inherent in the application
338 // workload, and this information would normally be independent of heap size (not a percentage thereof). On the
339 // other hand, some allocation spikes are correlated with JVM configuration. For example, there are allocation
340 // spikes at the starts of concurrent marking and evacuation to refresh all local allocation buffers. The nature
341 // of these spikes is determined by LAB min and max sizes and numbers of threads, but also on frequency of GC passes,
342 // and on "periodic" behavior of these threads If GC frequency is much higher than the periodic trigger for mutator
343 // threads, then many of the mutator threads may be able to "sit out" of most GC passes. Though the thread's stack
344 // must be scanned, the thread does not need to refresh its LABs if it sits idle throughout the duration of the GC
345 // pass. The best prediction for this aspect of spikes in allocation patterns is probably recent past history.
346 // TODO: and dive deeper into _gc_time_penalties as this may also need to be corrected
347
348 // Check if allocation headroom is still okay. This also factors in:
349 // 1. Some space to absorb allocation spikes (ShenandoahAllocSpikeFactor)
350 // 2. Accumulated penalties from Degenerated and Full GC
351 size_t allocation_headroom = available;
352 size_t spike_headroom = capacity / 100 * ShenandoahAllocSpikeFactor;
353 size_t penalties = capacity / 100 * _gc_time_penalties;
354
355 allocation_headroom -= MIN2(allocation_headroom, penalties);
356 allocation_headroom -= MIN2(allocation_headroom, spike_headroom);
357
358 // Track allocation rate even if we decide to start a cycle for other reasons.
359 double rate = _allocation_rate.sample(allocated);
360 _last_trigger = OTHER;
361
362 size_t min_threshold = min_free_threshold();
363
364 if (allocation_headroom < min_threshold) {
365 log_info(gc)("Trigger (%s): Free (" SIZE_FORMAT "%s) is below minimum threshold (" SIZE_FORMAT "%s)",
366 _generation->name(),
367 byte_size_in_proper_unit(allocation_headroom), proper_unit_for_byte_size(allocation_headroom),
368 byte_size_in_proper_unit(min_threshold), proper_unit_for_byte_size(min_threshold));
369 return resize_and_evaluate();
370 }
371
372 // Check if we need to learn a bit about the application
373 const size_t max_learn = ShenandoahLearningSteps;
374 if (_gc_times_learned < max_learn) {
375 size_t init_threshold = capacity / 100 * ShenandoahInitFreeThreshold;
376 if (allocation_headroom < init_threshold) {
377 log_info(gc)("Trigger (%s): Learning " SIZE_FORMAT " of " SIZE_FORMAT ". Free (" SIZE_FORMAT "%s) is below initial threshold (" SIZE_FORMAT "%s)",
378 _generation->name(), _gc_times_learned + 1, max_learn,
379 byte_size_in_proper_unit(allocation_headroom), proper_unit_for_byte_size(allocation_headroom),
380 byte_size_in_proper_unit(init_threshold), proper_unit_for_byte_size(init_threshold));
381 return true;
382 }
383 }
384
385 // Rationale:
386 // The idea is that there is an average allocation rate and there are occasional abnormal bursts (or spikes) of
387 // allocations that exceed the average allocation rate. What do these spikes look like?
388 //
389 // 1. At certain phase changes, we may discard large amounts of data and replace it with large numbers of newly
390 // allocated objects. This "spike" looks more like a phase change. We were in steady state at M bytes/sec
391 // allocation rate and now we're in a "reinitialization phase" that looks like N bytes/sec. We need the "spike"
392 // accomodation to give us enough runway to recalibrate our "average allocation rate".
393 //
394 // 2. The typical workload changes. "Suddenly", our typical workload of N TPS increases to N+delta TPS. This means
395 // our average allocation rate needs to be adjusted. Once again, we need the "spike" accomodation to give us
396 // enough runway to recalibrate our "average allocation rate".
397 //
398 // 3. Though there is an "average" allocation rate, a given workload's demand for allocation may be very bursty. We
399 // allocate a bunch of LABs during the 5 ms that follow completion of a GC, then we perform no more allocations for
400 // the next 150 ms. It seems we want the "spike" to represent the maximum divergence from average within the
401 // period of time between consecutive evaluation of the should_start_gc() service. Here's the thinking:
402 //
403 // a) Between now and the next time I ask whether should_start_gc(), we might experience a spike representing
404 // the anticipated burst of allocations. If that would put us over budget, then we should start GC immediately.
405 // b) Between now and the anticipated depletion of allocation pool, there may be two or more bursts of allocations.
406 // If there are more than one of these bursts, we can "approximate" that these will be separated by spans of
407 // time with very little or no allocations so the "average" allocation rate should be a suitable approximation
408 // of how this will behave.
409 //
410 // For cases 1 and 2, we need to "quickly" recalibrate the average allocation rate whenever we detect a change
411 // in operation mode. We want some way to decide that the average rate has changed. Make average allocation rate
412 // computations an independent effort.
413
414
415 // TODO: Account for inherent delays in responding to GC triggers
416 // 1. It has been observed that delays of 200 ms or greater are common between the moment we return true from should_start_gc()
417 // and the moment at which we begin execution of the concurrent reset phase. Add this time into the calculation of
418 // avg_cycle_time below. (What is "this time"? Perhaps we should remember recent history of this delay for the
419 // running workload and use the maximum delay recently seen for "this time".)
420 // 2. The frequency of inquiries to should_start_gc() is adaptive, ranging between ShenandoahControlIntervalMin and
421 // ShenandoahControlIntervalMax. The current control interval (or the max control interval) should also be added into
422 // the calculation of avg_cycle_time below.
423
424 double avg_cycle_time = _gc_cycle_time_history->davg() + (_margin_of_error_sd * _gc_cycle_time_history->dsd());
425
426 size_t last_live_memory = get_last_live_memory();
427 size_t penultimate_live_memory = get_penultimate_live_memory();
428 double original_cycle_time = avg_cycle_time;
429 if ((penultimate_live_memory < last_live_memory) && (penultimate_live_memory != 0)) {
430 // If the live-memory size is growing, our estimates of cycle time are based on lighter workload, so adjust.
431 // TODO: Be more precise about how to scale when live memory is growing. Existing code is a very rough approximation
432 // tuned with very limited workload observations.
433 avg_cycle_time = (avg_cycle_time * 2 * last_live_memory) / penultimate_live_memory;
434 } else {
435 int degen_cycles = degenerated_cycles_in_a_row();
436 if (degen_cycles > 0) {
437 // If we've degenerated recently, we might be waiting too long between triggers so adjust trigger forward.
438 // TODO: Be more precise about how to scale when we've experienced recent degenerated GC. Existing code is a very
439 // rough approximation tuned with very limited workload observations.
440 avg_cycle_time += degen_cycles * avg_cycle_time;
441 }
442 }
443
444 double avg_alloc_rate = _allocation_rate.upper_bound(_margin_of_error_sd);
445 log_debug(gc)("%s: average GC time: %.2f ms, allocation rate: %.0f %s/s",
446 _generation->name(), avg_cycle_time * 1000, byte_size_in_proper_unit(avg_alloc_rate), proper_unit_for_byte_size(avg_alloc_rate));
447
448 if (avg_cycle_time > allocation_headroom / avg_alloc_rate) {
449 if (avg_cycle_time > original_cycle_time) {
450 log_debug(gc)("%s: average GC time adjusted from: %.2f ms to %.2f ms because upward trend in live memory retention",
451 _generation->name(), original_cycle_time, avg_cycle_time);
452 }
453
454 log_info(gc)("Trigger (%s): Average GC time (%.2f ms) is above the time for average allocation rate (%.0f %sB/s) to deplete free headroom (" SIZE_FORMAT "%s) (margin of error = %.2f)",
455 _generation->name(), avg_cycle_time * 1000,
456 byte_size_in_proper_unit(avg_alloc_rate), proper_unit_for_byte_size(avg_alloc_rate),
457 byte_size_in_proper_unit(allocation_headroom), proper_unit_for_byte_size(allocation_headroom),
458 _margin_of_error_sd);
459
460 log_info(gc, ergo)("Free headroom: " SIZE_FORMAT "%s (free) - " SIZE_FORMAT "%s (spike) - " SIZE_FORMAT "%s (penalties) = " SIZE_FORMAT "%s",
461 byte_size_in_proper_unit(available), proper_unit_for_byte_size(available),
462 byte_size_in_proper_unit(spike_headroom), proper_unit_for_byte_size(spike_headroom),
463 byte_size_in_proper_unit(penalties), proper_unit_for_byte_size(penalties),
464 byte_size_in_proper_unit(allocation_headroom), proper_unit_for_byte_size(allocation_headroom));
465
466 _last_trigger = RATE;
467 return resize_and_evaluate();
468 }
469
470 bool is_spiking = _allocation_rate.is_spiking(rate, _spike_threshold_sd);
471 if (is_spiking && avg_cycle_time > allocation_headroom / rate) {
472 log_info(gc)("Trigger (%s): Average GC time (%.2f ms) is above the time for instantaneous allocation rate (%.0f %sB/s) to deplete free headroom (" SIZE_FORMAT "%s) (spike threshold = %.2f)",
473 _generation->name(), avg_cycle_time * 1000,
474 byte_size_in_proper_unit(rate), proper_unit_for_byte_size(rate),
475 byte_size_in_proper_unit(allocation_headroom), proper_unit_for_byte_size(allocation_headroom),
476
477 _spike_threshold_sd);
478 _last_trigger = SPIKE;
479 return resize_and_evaluate();
480 }
481
482 return ShenandoahHeuristics::should_start_gc();
483 }
484
485 bool ShenandoahAdaptiveHeuristics::resize_and_evaluate() {
486 ShenandoahHeap* heap = ShenandoahHeap::heap();
487 if (!heap->mode()->is_generational()) {
488 // We only attempt to resize the generations in generational mode.
489 return true;
490 }
491
492 if (_cycles_since_last_resize <= MINIMUM_RESIZE_INTERVAL) {
493 log_info(gc, ergo)("Not resizing %s for another " UINT32_FORMAT " cycles.",
494 _generation->name(), _cycles_since_last_resize);
495 return true;
496 }
497
498 if (!heap->generation_sizer()->transfer_capacity(_generation)) {
499 // We could not enlarge our generation, so we must start a gc cycle.
500 log_info(gc, ergo)("Could not increase size of %s, begin gc cycle.", _generation->name());
501 return true;
502 }
503
504 log_info(gc)("Increased size of %s generation, re-evaluate trigger criteria", _generation->name());
505 return should_start_gc();
506 }
507
508 void ShenandoahAdaptiveHeuristics::adjust_last_trigger_parameters(double amount) {
509 switch (_last_trigger) {
510 case RATE:
511 adjust_margin_of_error(amount);
512 break;
513 case SPIKE:
514 adjust_spike_threshold(amount);
515 break;
516 case OTHER:
517 // nothing to adjust here.
518 break;
519 default:
520 ShouldNotReachHere();
521 }
522 }
523
524 void ShenandoahAdaptiveHeuristics::adjust_margin_of_error(double amount) {
525 _margin_of_error_sd = saturate(_margin_of_error_sd + amount, MINIMUM_CONFIDENCE, MAXIMUM_CONFIDENCE);
526 log_debug(gc, ergo)("Margin of error now %.2f", _margin_of_error_sd);
527 }
|