1 /*
  2  * Copyright Amazon.com Inc. or its affiliates. All Rights Reserved.
  3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  4  *
  5  * This code is free software; you can redistribute it and/or modify it
  6  * under the terms of the GNU General Public License version 2 only, as
  7  * published by the Free Software Foundation.
  8  *
  9  * This code is distributed in the hope that it will be useful, but WITHOUT
 10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 12  * version 2 for more details (a copy is included in the LICENSE file that
 13  * accompanied this code).
 14  *
 15  * You should have received a copy of the GNU General Public License version
 16  * 2 along with this work; if not, write to the Free Software Foundation,
 17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 18  *
 19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 20  * or visit www.oracle.com if you need additional information or have any
 21  * questions.
 22  *
 23  */
 24 
 25 
 26 #include "precompiled.hpp"
 27 
 28 #include "gc/shenandoah/shenandoahHeap.inline.hpp"
 29 #include "gc/shenandoah/shenandoahOopClosures.inline.hpp"
 30 #include "gc/shenandoah/shenandoahReferenceProcessor.hpp"
 31 #include "gc/shenandoah/shenandoahScanRemembered.inline.hpp"
 32 #include "logging/log.hpp"
 33 
 34 ShenandoahDirectCardMarkRememberedSet::ShenandoahDirectCardMarkRememberedSet(ShenandoahCardTable* card_table, size_t total_card_count) :
 35   LogCardValsPerIntPtr(log2i_exact(sizeof(intptr_t)) - log2i_exact(sizeof(CardValue))),
 36   LogCardSizeInWords(log2i_exact(CardTable::card_size_in_words())) {
 37 
 38   // Paranoid assert for LogCardsPerIntPtr calculation above
 39   assert(sizeof(intptr_t) > sizeof(CardValue), "LogsCardValsPerIntPtr would underflow");
 40 
 41   _heap = ShenandoahHeap::heap();
 42   _card_table = card_table;
 43   _total_card_count = total_card_count;
 44   _cluster_count = total_card_count / ShenandoahCardCluster<ShenandoahDirectCardMarkRememberedSet>::CardsPerCluster;
 45   _card_shift = CardTable::card_shift();
 46 
 47   _byte_map = _card_table->byte_for_index(0);
 48 
 49   _whole_heap_base = _card_table->addr_for(_byte_map);
 50   _byte_map_base = _byte_map - (uintptr_t(_whole_heap_base) >> _card_shift);
 51 
 52   assert(total_card_count % ShenandoahCardCluster<ShenandoahDirectCardMarkRememberedSet>::CardsPerCluster == 0, "Invalid card count.");
 53   assert(total_card_count > 0, "Card count cannot be zero.");
 54 }
 55 
 56 // Merge any dirty values from write table into the read table, while leaving
 57 // the write table unchanged.
 58 void ShenandoahDirectCardMarkRememberedSet::merge_write_table(HeapWord* start, size_t word_count) {
 59   size_t start_index = card_index_for_addr(start);
 60 #ifdef ASSERT
 61   // avoid querying card_index_for_addr() for an address past end of heap
 62   size_t end_index = card_index_for_addr(start + word_count - 1) + 1;
 63 #endif
 64   assert(start_index % ((size_t)1 << LogCardValsPerIntPtr) == 0, "Expected a multiple of CardValsPerIntPtr");
 65   assert(end_index % ((size_t)1 << LogCardValsPerIntPtr) == 0, "Expected a multiple of CardValsPerIntPtr");
 66 
 67   // We'll access in groups of intptr_t worth of card entries
 68   intptr_t* const read_table  = (intptr_t*) &(_card_table->read_byte_map())[start_index];
 69   intptr_t* const write_table = (intptr_t*) &(_card_table->write_byte_map())[start_index];
 70 
 71   // Avoid division, use shift instead
 72   assert(word_count % ((size_t)1 << (LogCardSizeInWords + LogCardValsPerIntPtr)) == 0, "Expected a multiple of CardSizeInWords*CardValsPerIntPtr");
 73   size_t const num = word_count >> (LogCardSizeInWords + LogCardValsPerIntPtr);
 74 
 75   for (size_t i = 0; i < num; i++) {
 76     read_table[i] &= write_table[i];
 77   }
 78 }
 79 
 80 // Destructively copy the write table to the read table, and clean the write table.
 81 void ShenandoahDirectCardMarkRememberedSet::reset_remset(HeapWord* start, size_t word_count) {
 82   size_t start_index = card_index_for_addr(start);
 83 #ifdef ASSERT
 84   // avoid querying card_index_for_addr() for an address past end of heap
 85   size_t end_index = card_index_for_addr(start + word_count - 1) + 1;
 86 #endif
 87   assert(start_index % ((size_t)1 << LogCardValsPerIntPtr) == 0, "Expected a multiple of CardValsPerIntPtr");
 88   assert(end_index % ((size_t)1 << LogCardValsPerIntPtr) == 0, "Expected a multiple of CardValsPerIntPtr");
 89 
 90   // We'll access in groups of intptr_t worth of card entries
 91   intptr_t* const read_table  = (intptr_t*) &(_card_table->read_byte_map())[start_index];
 92   intptr_t* const write_table = (intptr_t*) &(_card_table->write_byte_map())[start_index];
 93 
 94   // Avoid division, use shift instead
 95   assert(word_count % ((size_t)1 << (LogCardSizeInWords + LogCardValsPerIntPtr)) == 0, "Expected a multiple of CardSizeInWords*CardValsPerIntPtr");
 96   size_t const num = word_count >> (LogCardSizeInWords + LogCardValsPerIntPtr);
 97 
 98   for (size_t i = 0; i < num; i++) {
 99     read_table[i]  = write_table[i];
100     write_table[i] = CardTable::clean_card_row_val();
101   }
102 }
103 
104 ShenandoahScanRememberedTask::ShenandoahScanRememberedTask(ShenandoahObjToScanQueueSet* queue_set,
105                                                            ShenandoahObjToScanQueueSet* old_queue_set,
106                                                            ShenandoahReferenceProcessor* rp,
107                                                            ShenandoahRegionChunkIterator* work_list, bool is_concurrent) :
108   WorkerTask("Scan Remembered Set"),
109   _queue_set(queue_set), _old_queue_set(old_queue_set), _rp(rp), _work_list(work_list), _is_concurrent(is_concurrent) {
110   log_info(gc, remset)("Scan remembered set using bitmap: %s", BOOL_TO_STR(ShenandoahHeap::heap()->is_old_bitmap_stable()));
111 }
112 
113 void ShenandoahScanRememberedTask::work(uint worker_id) {
114   if (_is_concurrent) {
115     // This sets up a thread local reference to the worker_id which is needed by the weak reference processor.
116     ShenandoahConcurrentWorkerSession worker_session(worker_id);
117     ShenandoahSuspendibleThreadSetJoiner stsj;
118     do_work(worker_id);
119   } else {
120     // This sets up a thread local reference to the worker_id which is needed by the weak reference processor.
121     ShenandoahParallelWorkerSession worker_session(worker_id);
122     do_work(worker_id);
123   }
124 }
125 
126 void ShenandoahScanRememberedTask::do_work(uint worker_id) {
127   ShenandoahWorkerTimingsTracker x(ShenandoahPhaseTimings::init_scan_rset, ShenandoahPhaseTimings::ScanClusters, worker_id);
128 
129   ShenandoahObjToScanQueue* q = _queue_set->queue(worker_id);
130   ShenandoahObjToScanQueue* old = _old_queue_set == nullptr ? nullptr : _old_queue_set->queue(worker_id);
131   ShenandoahMarkRefsClosure<YOUNG> cl(q, _rp, old);
132   ShenandoahHeap* heap = ShenandoahHeap::heap();
133   RememberedScanner* scanner = heap->card_scan();
134 
135   // set up thread local closure for shen ref processor
136   _rp->set_mark_closure(worker_id, &cl);
137   struct ShenandoahRegionChunk assignment;
138   while (_work_list->next(&assignment)) {
139     ShenandoahHeapRegion* region = assignment._r;
140     log_debug(gc)("ShenandoahScanRememberedTask::do_work(%u), processing slice of region "
141                   SIZE_FORMAT " at offset " SIZE_FORMAT ", size: " SIZE_FORMAT,
142                   worker_id, region->index(), assignment._chunk_offset, assignment._chunk_size);
143     if (region->is_old()) {
144       size_t cluster_size =
145         CardTable::card_size_in_words() * ShenandoahCardCluster<ShenandoahDirectCardMarkRememberedSet>::CardsPerCluster;
146       size_t clusters = assignment._chunk_size / cluster_size;
147       assert(clusters * cluster_size == assignment._chunk_size, "Chunk assignments must align on cluster boundaries");
148       HeapWord* end_of_range = region->bottom() + assignment._chunk_offset + assignment._chunk_size;
149 
150       // During concurrent mark, region->top() equals TAMS with respect to the current young-gen pass.
151       if (end_of_range > region->top()) {
152         end_of_range = region->top();
153       }
154       scanner->process_region_slice(region, assignment._chunk_offset, clusters, end_of_range, &cl, false, worker_id);
155     }
156 #ifdef ENABLE_REMEMBERED_SET_CANCELLATION
157     // This check is currently disabled to avoid crashes that occur
158     // when we try to cancel remembered set scanning; it should be re-enabled
159     // after the issues are fixed, as it would allow more prompt cancellation and
160     // transition to degenerated / full GCs. Note that work that has been assigned/
161     // claimed above must be completed before we return here upon cancellation.
162     if (heap->check_cancelled_gc_and_yield(_is_concurrent)) {
163       return;
164     }
165 #endif
166   }
167 }
168 
169 size_t ShenandoahRegionChunkIterator::calc_regular_group_size() {
170   // The group size is calculated from the number of regions.  Suppose the heap has N regions.  The first group processes
171   // N/2 regions.  The second group processes N/4 regions, the third group N/8 regions and so on.
172   // Note that infinite series N/2 + N/4 + N/8 + N/16 + ...  sums to N.
173   //
174   // The normal group size is the number of regions / 2.
175   //
176   // In the case that the region_size_words is greater than _maximum_chunk_size_words, the first group_size is
177   // larger than the normal group size because each chunk in the group will be smaller than the region size.
178   //
179   // The last group also has more than the normal entries because it finishes the total scanning effort.  The chunk sizes are
180   // different for each group.  The intention is that the first group processes roughly half of the heap, the second processes
181   // half of the remaining heap, the third processes half of what remains and so on.  The smallest chunk size
182   // is represented by _smallest_chunk_size_words.  We do not divide work any smaller than this.
183   //
184 
185   size_t group_size = _heap->num_regions() / 2;
186   return group_size;
187 }
188 
189 size_t ShenandoahRegionChunkIterator::calc_first_group_chunk_size_b4_rebalance() {
190   size_t words_in_first_chunk = ShenandoahHeapRegion::region_size_words();
191   return words_in_first_chunk;
192 }
193 
194 size_t ShenandoahRegionChunkIterator::calc_num_groups() {
195   size_t total_heap_size = _heap->num_regions() * ShenandoahHeapRegion::region_size_words();
196   size_t num_groups = 0;
197   size_t cumulative_group_span = 0;
198   size_t current_group_span = _first_group_chunk_size_b4_rebalance * _regular_group_size;
199   size_t smallest_group_span = smallest_chunk_size_words() * _regular_group_size;
200   while ((num_groups < _maximum_groups) && (cumulative_group_span + current_group_span <= total_heap_size)) {
201     num_groups++;
202     cumulative_group_span += current_group_span;
203     if (current_group_span <= smallest_group_span) {
204       break;
205     } else {
206       current_group_span /= 2;    // Each group spans half of what the preceding group spanned.
207     }
208   }
209   // Loop post condition:
210   //   num_groups <= _maximum_groups
211   //   cumulative_group_span is the memory spanned by num_groups
212   //   current_group_span is the span of the last fully populated group (assuming loop iterates at least once)
213   //   each of num_groups is fully populated with _regular_group_size chunks in each
214   // Non post conditions:
215   //   cumulative_group_span may be less than total_heap size for one or more of the folowing reasons
216   //   a) The number of regions remaining to be spanned is smaller than a complete group, or
217   //   b) We have filled up all groups through _maximum_groups and still have not spanned all regions
218 
219   if (cumulative_group_span < total_heap_size) {
220     // We've got more regions to span
221     if ((num_groups < _maximum_groups) && (current_group_span > smallest_group_span)) {
222       num_groups++;             // Place all remaining regions into a new not-full group (chunk_size half that of previous group)
223     }
224     // Else we are unable to create a new group because we've exceed the number of allowed groups or have reached the
225     // minimum chunk size.
226 
227     // Any remaining regions will be treated as if they are part of the most recently created group.  This group will
228     // have more than _regular_group_size chunks within it.
229   }
230   return num_groups;
231 }
232 
233 size_t ShenandoahRegionChunkIterator::calc_total_chunks() {
234   size_t region_size_words = ShenandoahHeapRegion::region_size_words();
235   size_t unspanned_heap_size = _heap->num_regions() * region_size_words;
236   size_t num_chunks = 0;
237   size_t cumulative_group_span = 0;
238   size_t current_group_span = _first_group_chunk_size_b4_rebalance * _regular_group_size;
239   size_t smallest_group_span = smallest_chunk_size_words() * _regular_group_size;
240 
241   // The first group gets special handling because the first chunk size can be no larger than _largest_chunk_size_words
242   if (region_size_words > _maximum_chunk_size_words) {
243     // In the case that we shrink the first group's chunk size, certain other groups will also be subsumed within the first group
244     size_t effective_chunk_size = _first_group_chunk_size_b4_rebalance;
245     while (effective_chunk_size >= _maximum_chunk_size_words) {
246       num_chunks += current_group_span / _maximum_chunk_size_words;
247       unspanned_heap_size -= current_group_span;
248       effective_chunk_size /= 2;
249       current_group_span /= 2;
250     }
251   } else {
252     num_chunks = _regular_group_size;
253     unspanned_heap_size -= current_group_span;
254     current_group_span /= 2;
255   }
256   size_t spanned_groups = 1;
257   while (unspanned_heap_size > 0) {
258     if (current_group_span <= unspanned_heap_size) {
259       unspanned_heap_size -= current_group_span;
260       num_chunks += _regular_group_size;
261       spanned_groups++;
262 
263       // _num_groups is the number of groups required to span the configured heap size.  We are not allowed
264       // to change the number of groups.  The last group is responsible for spanning all chunks not spanned
265       // by previously processed groups.
266       if (spanned_groups >= _num_groups) {
267         // The last group has more than _regular_group_size entries.
268         size_t chunk_span = current_group_span / _regular_group_size;
269         size_t extra_chunks = unspanned_heap_size / chunk_span;
270         assert (extra_chunks * chunk_span == unspanned_heap_size, "Chunks must precisely span regions");
271         num_chunks += extra_chunks;
272         return num_chunks;
273       } else if (current_group_span <= smallest_group_span) {
274         // We cannot introduce new groups because we've reached the lower bound on group size.  So this last
275         // group may hold extra chunks.
276         size_t chunk_span = smallest_chunk_size_words();
277         size_t extra_chunks = unspanned_heap_size / chunk_span;
278         assert (extra_chunks * chunk_span == unspanned_heap_size, "Chunks must precisely span regions");
279         num_chunks += extra_chunks;
280         return num_chunks;
281       } else {
282         current_group_span /= 2;
283       }
284     } else {
285       // This last group has fewer than _regular_group_size entries.
286       size_t chunk_span = current_group_span / _regular_group_size;
287       size_t last_group_size = unspanned_heap_size / chunk_span;
288       assert (last_group_size * chunk_span == unspanned_heap_size, "Chunks must precisely span regions");
289       num_chunks += last_group_size;
290       return num_chunks;
291     }
292   }
293   return num_chunks;
294 }
295 
296 ShenandoahRegionChunkIterator::ShenandoahRegionChunkIterator(size_t worker_count) :
297     ShenandoahRegionChunkIterator(ShenandoahHeap::heap(), worker_count)
298 {
299 }
300 
301 ShenandoahRegionChunkIterator::ShenandoahRegionChunkIterator(ShenandoahHeap* heap, size_t worker_count) :
302     _heap(heap),
303     _regular_group_size(calc_regular_group_size()),
304     _first_group_chunk_size_b4_rebalance(calc_first_group_chunk_size_b4_rebalance()),
305     _num_groups(calc_num_groups()),
306     _total_chunks(calc_total_chunks()),
307     _index(0)
308 {
309 #ifdef ASSERT
310   size_t expected_chunk_size_words = _clusters_in_smallest_chunk * CardTable::card_size_in_words() * ShenandoahCardCluster<ShenandoahDirectCardMarkRememberedSet>::CardsPerCluster;
311   assert(smallest_chunk_size_words() == expected_chunk_size_words, "_smallest_chunk_size (" SIZE_FORMAT") is not valid because it does not equal (" SIZE_FORMAT ")",
312          smallest_chunk_size_words(), expected_chunk_size_words);
313 #endif
314   assert(_num_groups <= _maximum_groups,
315          "The number of remembered set scanning groups must be less than or equal to maximum groups");
316   assert(smallest_chunk_size_words() << (_maximum_groups - 1) == _maximum_chunk_size_words,
317          "Maximum number of groups needs to span maximum chunk size to smallest chunk size");
318 
319   size_t words_in_region = ShenandoahHeapRegion::region_size_words();
320   _region_index[0] = 0;
321   _group_offset[0] = 0;
322   if (words_in_region > _maximum_chunk_size_words) {
323     // In the case that we shrink the first group's chunk size, certain other groups will also be subsumed within the first group
324     size_t num_chunks = 0;
325     size_t effective_chunk_size = _first_group_chunk_size_b4_rebalance;
326     size_t  current_group_span = effective_chunk_size * _regular_group_size;
327     while (effective_chunk_size >= _maximum_chunk_size_words) {
328       num_chunks += current_group_span / _maximum_chunk_size_words;
329       effective_chunk_size /= 2;
330       current_group_span /= 2;
331     }
332     _group_entries[0] = num_chunks;
333     _group_chunk_size[0] = _maximum_chunk_size_words;
334   } else {
335     _group_entries[0] = _regular_group_size;
336     _group_chunk_size[0] = _first_group_chunk_size_b4_rebalance;
337   }
338 
339   size_t previous_group_span = _group_entries[0] * _group_chunk_size[0];
340   for (size_t i = 1; i < _num_groups; i++) {
341     size_t previous_group_entries = (i == 1)? _group_entries[0]: (_group_entries[i-1] - _group_entries[i-2]);
342     _group_chunk_size[i] = _group_chunk_size[i-1] / 2;
343     size_t chunks_in_group = _regular_group_size;
344     size_t this_group_span = _group_chunk_size[i] * chunks_in_group;
345     size_t total_span_of_groups = previous_group_span + this_group_span;
346     _region_index[i] = previous_group_span / words_in_region;
347     _group_offset[i] = previous_group_span % words_in_region;
348     _group_entries[i] = _group_entries[i-1] + _regular_group_size;
349     previous_group_span = total_span_of_groups;
350   }
351   if (_group_entries[_num_groups-1] < _total_chunks) {
352     assert((_total_chunks - _group_entries[_num_groups-1]) * _group_chunk_size[_num_groups-1] + previous_group_span ==
353            heap->num_regions() * words_in_region, "Total region chunks (" SIZE_FORMAT
354            ") do not span total heap regions (" SIZE_FORMAT ")", _total_chunks, _heap->num_regions());
355     previous_group_span += (_total_chunks - _group_entries[_num_groups-1]) * _group_chunk_size[_num_groups-1];
356     _group_entries[_num_groups-1] = _total_chunks;
357   }
358   assert(previous_group_span == heap->num_regions() * words_in_region, "Total region chunks (" SIZE_FORMAT
359          ") do not span total heap regions (" SIZE_FORMAT "): " SIZE_FORMAT " does not equal " SIZE_FORMAT,
360          _total_chunks, _heap->num_regions(), previous_group_span, heap->num_regions() * words_in_region);
361 
362   // Not necessary, but keeps things tidy
363   for (size_t i = _num_groups; i < _maximum_groups; i++) {
364     _region_index[i] = 0;
365     _group_offset[i] = 0;
366     _group_entries[i] = _group_entries[i-1];
367     _group_chunk_size[i] = 0;
368   }
369 }
370 
371 void ShenandoahRegionChunkIterator::reset() {
372   _index = 0;
373 }