1 /*
  2  * Copyright (c) 2021, Amazon.com, Inc. or its affiliates.  All rights reserved.
  3  *
  4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  5  *
  6  * This code is free software; you can redistribute it and/or modify it
  7  * under the terms of the GNU General Public License version 2 only, as
  8  * published by the Free Software Foundation.
  9  *
 10  * This code is distributed in the hope that it will be useful, but WITHOUT
 11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 13  * version 2 for more details (a copy is included in the LICENSE file that
 14  * accompanied this code).
 15  *
 16  * You should have received a copy of the GNU General Public License version
 17  * 2 along with this work; if not, write to the Free Software Foundation,
 18  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 19  *
 20  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 21  * or visit www.oracle.com if you need additional information or have any
 22  * questions.
 23  *
 24  */
 25 
 26 
 27 #include "precompiled.hpp"
 28 
 29 #include "gc/shenandoah/shenandoahHeap.inline.hpp"
 30 #include "gc/shenandoah/shenandoahOopClosures.inline.hpp"
 31 #include "gc/shenandoah/shenandoahReferenceProcessor.hpp"
 32 #include "gc/shenandoah/shenandoahScanRemembered.inline.hpp"
 33 #include "logging/log.hpp"
 34 
 35 ShenandoahDirectCardMarkRememberedSet::ShenandoahDirectCardMarkRememberedSet(ShenandoahCardTable* card_table, size_t total_card_count) {
 36   _heap = ShenandoahHeap::heap();
 37   _card_table = card_table;
 38   _total_card_count = total_card_count;
 39   _cluster_count = total_card_count / ShenandoahCardCluster<ShenandoahDirectCardMarkRememberedSet>::CardsPerCluster;
 40   _card_shift = CardTable::card_shift();
 41 
 42   _byte_map = _card_table->byte_for_index(0);
 43 
 44   _whole_heap_base = _card_table->addr_for(_byte_map);
 45   _byte_map_base = _byte_map - (uintptr_t(_whole_heap_base) >> _card_shift);
 46 
 47   assert(total_card_count % ShenandoahCardCluster<ShenandoahDirectCardMarkRememberedSet>::CardsPerCluster == 0, "Invalid card count.");
 48   assert(total_card_count > 0, "Card count cannot be zero.");
 49 }
 50 
 51 ShenandoahScanRememberedTask::ShenandoahScanRememberedTask(ShenandoahObjToScanQueueSet* queue_set,
 52                                                            ShenandoahObjToScanQueueSet* old_queue_set,
 53                                                            ShenandoahReferenceProcessor* rp,
 54                                                            ShenandoahRegionChunkIterator* work_list, bool is_concurrent) :
 55   WorkerTask("Scan Remembered Set"),
 56   _queue_set(queue_set), _old_queue_set(old_queue_set), _rp(rp), _work_list(work_list), _is_concurrent(is_concurrent) {}
 57 
 58 void ShenandoahScanRememberedTask::work(uint worker_id) {
 59   if (_is_concurrent) {
 60     // This sets up a thread local reference to the worker_id which is needed by the weak reference processor.
 61     ShenandoahConcurrentWorkerSession worker_session(worker_id);
 62     ShenandoahSuspendibleThreadSetJoiner stsj(ShenandoahSuspendibleWorkers);
 63     do_work(worker_id);
 64   } else {
 65     // This sets up a thread local reference to the worker_id which is needed by the weak reference processor.
 66     ShenandoahParallelWorkerSession worker_session(worker_id);
 67     do_work(worker_id);
 68   }
 69 }
 70 
 71 void ShenandoahScanRememberedTask::do_work(uint worker_id) {
 72   ShenandoahWorkerTimingsTracker x(ShenandoahPhaseTimings::init_scan_rset, ShenandoahPhaseTimings::ScanClusters, worker_id);
 73 
 74   ShenandoahObjToScanQueue* q = _queue_set->queue(worker_id);
 75   ShenandoahObjToScanQueue* old = _old_queue_set == NULL ? NULL : _old_queue_set->queue(worker_id);
 76   ShenandoahMarkRefsClosure<YOUNG> cl(q, _rp, old);
 77   ShenandoahHeap* heap = ShenandoahHeap::heap();
 78   RememberedScanner* scanner = heap->card_scan();
 79 
 80   // set up thread local closure for shen ref processor
 81   _rp->set_mark_closure(worker_id, &cl);
 82   struct ShenandoahRegionChunk assignment;
 83   while (_work_list->next(&assignment)) {
 84     ShenandoahHeapRegion* region = assignment._r;
 85     log_debug(gc)("ShenandoahScanRememberedTask::do_work(%u), processing slice of region "
 86                   SIZE_FORMAT " at offset " SIZE_FORMAT ", size: " SIZE_FORMAT,
 87                   worker_id, region->index(), assignment._chunk_offset, assignment._chunk_size);
 88     if (region->affiliation() == OLD_GENERATION) {
 89       size_t cluster_size =
 90         CardTable::card_size_in_words() * ShenandoahCardCluster<ShenandoahDirectCardMarkRememberedSet>::CardsPerCluster;
 91       size_t clusters = assignment._chunk_size / cluster_size;
 92       assert(clusters * cluster_size == assignment._chunk_size, "Chunk assignments must align on cluster boundaries");
 93       HeapWord* end_of_range = region->bottom() + assignment._chunk_offset + assignment._chunk_size;
 94 
 95       // During concurrent mark, region->top() equals TAMS with respect to the current young-gen pass.
 96       if (end_of_range > region->top()) {
 97         end_of_range = region->top();
 98       }
 99       scanner->process_region_slice(region, assignment._chunk_offset, clusters, end_of_range, &cl, false, _is_concurrent, worker_id);
100     }
101 #ifdef ENABLE_REMEMBERED_SET_CANCELLATION
102     // This check is currently disabled to avoid crashes that occur
103     // when we try to cancel remembered set scanning; it should be re-enabled
104     // after the issues are fixed, as it would allow more prompt cancellation and
105     // transition to degenerated / full GCs. Note that work that has been assigned/
106     // claimed above must be completed before we return here upon cancellation.
107     if (heap->check_cancelled_gc_and_yield(_is_concurrent)) {
108       return;
109     }
110 #endif
111   }
112 }
113 
114 size_t ShenandoahRegionChunkIterator::calc_regular_group_size() {
115   // The group size is calculated from the number of regions.  Suppose the entire heap size is N.  The first group processes
116   // N/2 of total heap size.  The second group processes N/4 of total heap size.  The third group processes N/2 of total heap
117   // size, and so on.  Note that N/2 + N/4 + N/8 + N/16 + ...  sums to N if expanded to infinite terms.
118   //
119   // The normal group size is the number of regions / 2.
120   //
121   // In the case that the region_size_words is greater than _maximum_chunk_size_words, the first group_size is
122   // larger than the normal group size because each chunk in the group will be smaller than the region size.
123   //
124   // The last group also has more than the normal entries because it finishes the total scanning effort.  The chunk sizes are
125   // different for each group.  The intention is that the first group processes roughly half of the heap, the second processes
126   // a quarter of the remaining heap, the third processes an eight of what remains and so on.  The smallest chunk size
127   // is represented by _smallest_chunk_size_words.  We do not divide work any smaller than this.
128   //
129 
130   size_t group_size = _heap->num_regions() / 2;
131   return group_size;
132 }
133 
134 size_t ShenandoahRegionChunkIterator::calc_first_group_chunk_size_b4_rebalance() {
135   size_t words_in_first_chunk = ShenandoahHeapRegion::region_size_words();
136   return words_in_first_chunk;
137 }
138 
139 size_t ShenandoahRegionChunkIterator::calc_num_groups() {
140   size_t total_heap_size = _heap->num_regions() * ShenandoahHeapRegion::region_size_words();
141   size_t num_groups = 0;
142   size_t cumulative_group_span = 0;
143   size_t current_group_span = _first_group_chunk_size_b4_rebalance * _regular_group_size;
144   size_t smallest_group_span = smallest_chunk_size_words() * _regular_group_size;
145   while ((num_groups < _maximum_groups) && (cumulative_group_span + current_group_span <= total_heap_size)) {
146     num_groups++;
147     cumulative_group_span += current_group_span;
148     if (current_group_span <= smallest_group_span) {
149       break;
150     } else {
151       current_group_span /= 2;    // Each group spans half of what the preceding group spanned.
152     }
153   }
154   // Loop post condition:
155   //   num_groups <= _maximum_groups
156   //   cumulative_group_span is the memory spanned by num_groups
157   //   current_group_span is the span of the last fully populated group (assuming loop iterates at least once)
158   //   each of num_groups is fully populated with _regular_group_size chunks in each
159   // Non post conditions:
160   //   cumulative_group_span may be less than total_heap size for one or more of the folowing reasons
161   //   a) The number of regions remaining to be spanned is smaller than a complete group, or
162   //   b) We have filled up all groups through _maximum_groups and still have not spanned all regions
163 
164   if (cumulative_group_span < total_heap_size) {
165     // We've got more regions to span
166     if ((num_groups < _maximum_groups) && (current_group_span > smallest_group_span)) {
167       num_groups++;             // Place all remaining regions into a new not-full group (chunk_size half that of previous group)
168     }
169     // Else we are unable to create a new group because we've exceed the number of allowed groups or have reached the
170     // minimum chunk size.
171 
172     // Any remaining regions will be treated as if they are part of the most recently created group.  This group will
173     // have more than _regular_group_size chunks within it.
174   }
175   return num_groups;
176 }
177 
178 size_t ShenandoahRegionChunkIterator::calc_total_chunks() {
179   size_t region_size_words = ShenandoahHeapRegion::region_size_words();
180   size_t unspanned_heap_size = _heap->num_regions() * region_size_words;
181   size_t num_chunks = 0;
182   size_t cumulative_group_span = 0;
183   size_t current_group_span = _first_group_chunk_size_b4_rebalance * _regular_group_size;
184   size_t smallest_group_span = smallest_chunk_size_words() * _regular_group_size;
185 
186   // The first group gets special handling because the first chunk size can be no larger than _largest_chunk_size_words
187   if (region_size_words > _maximum_chunk_size_words) {
188     // In the case that we shrink the first group's chunk size, certain other groups will also be subsumed within the first group
189     size_t effective_chunk_size = _first_group_chunk_size_b4_rebalance;
190     while (effective_chunk_size >= _maximum_chunk_size_words) {
191       num_chunks += current_group_span / _maximum_chunk_size_words;
192       unspanned_heap_size -= current_group_span;
193       effective_chunk_size /= 2;
194       current_group_span /= 2;
195     }
196   } else {
197     num_chunks = _regular_group_size;
198     unspanned_heap_size -= current_group_span;
199     current_group_span /= 2;
200   }
201   size_t spanned_groups = 1;
202   while (unspanned_heap_size > 0) {
203     if (current_group_span <= unspanned_heap_size) {
204       unspanned_heap_size -= current_group_span;
205       num_chunks += _regular_group_size;
206       spanned_groups++;
207 
208       // _num_groups is the number of groups required to span the configured heap size.  We are not allowed
209       // to change the number of groups.  The last group is responsible for spanning all chunks not spanned
210       // by previously processed groups.
211       if (spanned_groups >= _num_groups) {
212         // The last group has more than _regular_group_size entries.
213         size_t chunk_span = current_group_span / _regular_group_size;
214         size_t extra_chunks = unspanned_heap_size / chunk_span;
215         assert (extra_chunks * chunk_span == unspanned_heap_size, "Chunks must precisely span regions");
216         num_chunks += extra_chunks;
217         return num_chunks;
218       } else if (current_group_span <= smallest_group_span) {
219         // We cannot introduce new groups because we've reached the lower bound on group size.  So this last
220         // group may hold extra chunks.
221         size_t chunk_span = smallest_chunk_size_words();
222         size_t extra_chunks = unspanned_heap_size / chunk_span;
223         assert (extra_chunks * chunk_span == unspanned_heap_size, "Chunks must precisely span regions");
224         num_chunks += extra_chunks;
225         return num_chunks;
226       } else {
227         current_group_span /= 2;
228       }
229     } else {
230       // This last group has fewer than _regular_group_size entries.
231       size_t chunk_span = current_group_span / _regular_group_size;
232       size_t last_group_size = unspanned_heap_size / chunk_span;
233       assert (last_group_size * chunk_span == unspanned_heap_size, "Chunks must precisely span regions");
234       num_chunks += last_group_size;
235       return num_chunks;
236     }
237   }
238   return num_chunks;
239 }
240 
241 ShenandoahRegionChunkIterator::ShenandoahRegionChunkIterator(size_t worker_count) :
242     ShenandoahRegionChunkIterator(ShenandoahHeap::heap(), worker_count)
243 {
244 }
245 
246 ShenandoahRegionChunkIterator::ShenandoahRegionChunkIterator(ShenandoahHeap* heap, size_t worker_count) :
247     _heap(heap),
248     _regular_group_size(calc_regular_group_size()),
249     _first_group_chunk_size_b4_rebalance(calc_first_group_chunk_size_b4_rebalance()),
250     _num_groups(calc_num_groups()),
251     _total_chunks(calc_total_chunks()),
252     _index(0)
253 {
254 #ifdef ASSERT
255   size_t expected_chunk_size_words = _clusters_in_smallest_chunk * CardTable::card_size_in_words() * ShenandoahCardCluster<ShenandoahDirectCardMarkRememberedSet>::CardsPerCluster;
256   assert(smallest_chunk_size_words() == expected_chunk_size_words, "_smallest_chunk_size (" SIZE_FORMAT") is not valid because it does not equal (" SIZE_FORMAT ")",
257          smallest_chunk_size_words(), expected_chunk_size_words);
258 #endif
259   assert(_num_groups <= _maximum_groups,
260          "The number of remembered set scanning groups must be less than or equal to maximum groups");
261   assert(smallest_chunk_size_words() << (_maximum_groups - 1) == _maximum_chunk_size_words,
262          "Maximum number of groups needs to span maximum chunk size to smallest chunk size");
263 
264   size_t words_in_region = ShenandoahHeapRegion::region_size_words();
265   _region_index[0] = 0;
266   _group_offset[0] = 0;
267   if (words_in_region > _maximum_chunk_size_words) {
268     // In the case that we shrink the first group's chunk size, certain other groups will also be subsumed within the first group
269     size_t num_chunks = 0;
270     size_t effective_chunk_size = _first_group_chunk_size_b4_rebalance;
271     size_t  current_group_span = effective_chunk_size * _regular_group_size;
272     while (effective_chunk_size >= _maximum_chunk_size_words) {
273       num_chunks += current_group_span / _maximum_chunk_size_words;
274       effective_chunk_size /= 2;
275       current_group_span /= 2;
276     }
277     _group_entries[0] = num_chunks;
278     _group_chunk_size[0] = _maximum_chunk_size_words;
279   } else {
280     _group_entries[0] = _regular_group_size;
281     _group_chunk_size[0] = _first_group_chunk_size_b4_rebalance;
282   }
283 
284   size_t previous_group_span = _group_entries[0] * _group_chunk_size[0];
285   for (size_t i = 1; i < _num_groups; i++) {
286     size_t previous_group_entries = (i == 1)? _group_entries[0]: (_group_entries[i-1] - _group_entries[i-2]);
287     _group_chunk_size[i] = _group_chunk_size[i-1] / 2;
288     size_t chunks_in_group = _regular_group_size;
289     size_t this_group_span = _group_chunk_size[i] * chunks_in_group;
290     size_t total_span_of_groups = previous_group_span + this_group_span;
291     _region_index[i] = previous_group_span / words_in_region;
292     _group_offset[i] = previous_group_span % words_in_region;
293     _group_entries[i] = _group_entries[i-1] + _regular_group_size;
294     previous_group_span = total_span_of_groups;
295   }
296   if (_group_entries[_num_groups-1] < _total_chunks) {
297     assert((_total_chunks - _group_entries[_num_groups-1]) * _group_chunk_size[_num_groups-1] + previous_group_span ==
298            heap->num_regions() * words_in_region, "Total region chunks (" SIZE_FORMAT
299            ") do not span total heap regions (" SIZE_FORMAT ")", _total_chunks, _heap->num_regions());
300     previous_group_span += (_total_chunks - _group_entries[_num_groups-1]) * _group_chunk_size[_num_groups-1];
301     _group_entries[_num_groups-1] = _total_chunks;
302   }
303   assert(previous_group_span == heap->num_regions() * words_in_region, "Total region chunks (" SIZE_FORMAT
304          ") do not span total heap regions (" SIZE_FORMAT "): " SIZE_FORMAT " does not equal " SIZE_FORMAT,
305          _total_chunks, _heap->num_regions(), previous_group_span, heap->num_regions() * words_in_region);
306 
307   // Not necessary, but keeps things tidy
308   for (size_t i = _num_groups; i < _maximum_groups; i++) {
309     _region_index[i] = 0;
310     _group_offset[i] = 0;
311     _group_entries[i] = _group_entries[i-1];
312     _group_chunk_size[i] = 0;
313   }
314 }
315 
316 void ShenandoahRegionChunkIterator::reset() {
317   _index = 0;
318 }