1 /*
  2  * Copyright Amazon.com Inc. or its affiliates. All Rights Reserved.
  3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  4  *
  5  * This code is free software; you can redistribute it and/or modify it
  6  * under the terms of the GNU General Public License version 2 only, as
  7  * published by the Free Software Foundation.
  8  *
  9  * This code is distributed in the hope that it will be useful, but WITHOUT
 10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 12  * version 2 for more details (a copy is included in the LICENSE file that
 13  * accompanied this code).
 14  *
 15  * You should have received a copy of the GNU General Public License version
 16  * 2 along with this work; if not, write to the Free Software Foundation,
 17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 18  *
 19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 20  * or visit www.oracle.com if you need additional information or have any
 21  * questions.
 22  *
 23  */
 24 
 25 
 26 #include "precompiled.hpp"
 27 
 28 #include "gc/shenandoah/shenandoahOldGeneration.hpp"
 29 #include "gc/shenandoah/shenandoahHeap.inline.hpp"
 30 #include "gc/shenandoah/shenandoahOopClosures.inline.hpp"
 31 #include "gc/shenandoah/shenandoahReferenceProcessor.hpp"
 32 #include "gc/shenandoah/shenandoahScanRemembered.inline.hpp"
 33 #include "logging/log.hpp"
 34 
 35 // A closure that takes an oop in the old generation and, if it's pointing
 36 // into the young generation, dirties the corresponding remembered set entry.
 37 // This is only used to rebuild the remembered set after a full GC.
 38 class ShenandoahDirtyRememberedSetClosure : public BasicOopIterateClosure {
 39 protected:
 40   ShenandoahHeap*    const _heap;
 41   RememberedScanner* const _scanner;
 42 
 43 public:
 44   ShenandoahDirtyRememberedSetClosure() :
 45           _heap(ShenandoahHeap::heap()),
 46           _scanner(_heap->card_scan()) {}
 47 
 48   template<class T>
 49   inline void work(T* p) {
 50     assert(_heap->is_in_old(p), "Expecting to get an old gen address");
 51     T o = RawAccess<>::oop_load(p);
 52     if (!CompressedOops::is_null(o)) {
 53       oop obj = CompressedOops::decode_not_null(o);
 54       if (_heap->is_in_young(obj)) {
 55         // Dirty the card containing the cross-generational pointer.
 56         _scanner->mark_card_as_dirty((HeapWord*) p);
 57       }
 58     }
 59   }
 60 
 61   virtual void do_oop(narrowOop* p) { work(p); }
 62   virtual void do_oop(oop* p)       { work(p); }
 63 };
 64 
 65 ShenandoahDirectCardMarkRememberedSet::ShenandoahDirectCardMarkRememberedSet(ShenandoahCardTable* card_table, size_t total_card_count) :
 66   LogCardValsPerIntPtr(log2i_exact(sizeof(intptr_t)) - log2i_exact(sizeof(CardValue))),
 67   LogCardSizeInWords(log2i_exact(CardTable::card_size_in_words())) {
 68 
 69   // Paranoid assert for LogCardsPerIntPtr calculation above
 70   assert(sizeof(intptr_t) > sizeof(CardValue), "LogsCardValsPerIntPtr would underflow");
 71 
 72   _heap = ShenandoahHeap::heap();
 73   _card_table = card_table;
 74   _total_card_count = total_card_count;
 75   _cluster_count = total_card_count / ShenandoahCardCluster<ShenandoahDirectCardMarkRememberedSet>::CardsPerCluster;
 76   _card_shift = CardTable::card_shift();
 77 
 78   _byte_map = _card_table->byte_for_index(0);
 79 
 80   _whole_heap_base = _card_table->addr_for(_byte_map);
 81   _byte_map_base = _byte_map - (uintptr_t(_whole_heap_base) >> _card_shift);
 82 
 83   assert(total_card_count % ShenandoahCardCluster<ShenandoahDirectCardMarkRememberedSet>::CardsPerCluster == 0, "Invalid card count.");
 84   assert(total_card_count > 0, "Card count cannot be zero.");
 85 }
 86 
 87 // Merge any dirty values from write table into the read table, while leaving
 88 // the write table unchanged.
 89 void ShenandoahDirectCardMarkRememberedSet::merge_write_table(HeapWord* start, size_t word_count) {
 90   size_t start_index = card_index_for_addr(start);
 91 #ifdef ASSERT
 92   // avoid querying card_index_for_addr() for an address past end of heap
 93   size_t end_index = card_index_for_addr(start + word_count - 1) + 1;
 94 #endif
 95   assert(start_index % ((size_t)1 << LogCardValsPerIntPtr) == 0, "Expected a multiple of CardValsPerIntPtr");
 96   assert(end_index % ((size_t)1 << LogCardValsPerIntPtr) == 0, "Expected a multiple of CardValsPerIntPtr");
 97 
 98   // We'll access in groups of intptr_t worth of card entries
 99   intptr_t* const read_table  = (intptr_t*) &(_card_table->read_byte_map())[start_index];
100   intptr_t* const write_table = (intptr_t*) &(_card_table->write_byte_map())[start_index];
101 
102   // Avoid division, use shift instead
103   assert(word_count % ((size_t)1 << (LogCardSizeInWords + LogCardValsPerIntPtr)) == 0, "Expected a multiple of CardSizeInWords*CardValsPerIntPtr");
104   size_t const num = word_count >> (LogCardSizeInWords + LogCardValsPerIntPtr);
105 
106   for (size_t i = 0; i < num; i++) {
107     read_table[i] &= write_table[i];
108   }
109 }
110 
111 // Destructively copy the write table to the read table, and clean the write table.
112 void ShenandoahDirectCardMarkRememberedSet::reset_remset(HeapWord* start, size_t word_count) {
113   size_t start_index = card_index_for_addr(start);
114 #ifdef ASSERT
115   // avoid querying card_index_for_addr() for an address past end of heap
116   size_t end_index = card_index_for_addr(start + word_count - 1) + 1;
117 #endif
118   assert(start_index % ((size_t)1 << LogCardValsPerIntPtr) == 0, "Expected a multiple of CardValsPerIntPtr");
119   assert(end_index % ((size_t)1 << LogCardValsPerIntPtr) == 0, "Expected a multiple of CardValsPerIntPtr");
120 
121   // We'll access in groups of intptr_t worth of card entries
122   intptr_t* const read_table  = (intptr_t*) &(_card_table->read_byte_map())[start_index];
123   intptr_t* const write_table = (intptr_t*) &(_card_table->write_byte_map())[start_index];
124 
125   // Avoid division, use shift instead
126   assert(word_count % ((size_t)1 << (LogCardSizeInWords + LogCardValsPerIntPtr)) == 0, "Expected a multiple of CardSizeInWords*CardValsPerIntPtr");
127   size_t const num = word_count >> (LogCardSizeInWords + LogCardValsPerIntPtr);
128 
129   for (size_t i = 0; i < num; i++) {
130     read_table[i]  = write_table[i];
131     write_table[i] = CardTable::clean_card_row_val();
132   }
133 }
134 
135 ShenandoahScanRememberedTask::ShenandoahScanRememberedTask(ShenandoahObjToScanQueueSet* queue_set,
136                                                            ShenandoahObjToScanQueueSet* old_queue_set,
137                                                            ShenandoahReferenceProcessor* rp,
138                                                            ShenandoahRegionChunkIterator* work_list, bool is_concurrent) :
139   WorkerTask("Scan Remembered Set"),
140   _queue_set(queue_set), _old_queue_set(old_queue_set), _rp(rp), _work_list(work_list), _is_concurrent(is_concurrent) {
141   bool old_bitmap_stable = ShenandoahHeap::heap()->old_generation()->is_mark_complete();
142   log_info(gc, remset)("Scan remembered set using bitmap: %s", BOOL_TO_STR(old_bitmap_stable));
143 }
144 
145 void ShenandoahScanRememberedTask::work(uint worker_id) {
146   if (_is_concurrent) {
147     // This sets up a thread local reference to the worker_id which is needed by the weak reference processor.
148     ShenandoahConcurrentWorkerSession worker_session(worker_id);
149     ShenandoahSuspendibleThreadSetJoiner stsj;
150     do_work(worker_id);
151   } else {
152     // This sets up a thread local reference to the worker_id which is needed by the weak reference processor.
153     ShenandoahParallelWorkerSession worker_session(worker_id);
154     do_work(worker_id);
155   }
156 }
157 
158 void ShenandoahScanRememberedTask::do_work(uint worker_id) {
159   ShenandoahWorkerTimingsTracker x(ShenandoahPhaseTimings::init_scan_rset, ShenandoahPhaseTimings::ScanClusters, worker_id);
160 
161   ShenandoahObjToScanQueue* q = _queue_set->queue(worker_id);
162   ShenandoahObjToScanQueue* old = _old_queue_set == nullptr ? nullptr : _old_queue_set->queue(worker_id);
163   ShenandoahMarkRefsClosure<YOUNG> cl(q, _rp, old);
164   ShenandoahHeap* heap = ShenandoahHeap::heap();
165   RememberedScanner* scanner = heap->card_scan();
166 
167   // set up thread local closure for shen ref processor
168   _rp->set_mark_closure(worker_id, &cl);
169   struct ShenandoahRegionChunk assignment;
170   while (_work_list->next(&assignment)) {
171     ShenandoahHeapRegion* region = assignment._r;
172     log_debug(gc)("ShenandoahScanRememberedTask::do_work(%u), processing slice of region "
173                   SIZE_FORMAT " at offset " SIZE_FORMAT ", size: " SIZE_FORMAT,
174                   worker_id, region->index(), assignment._chunk_offset, assignment._chunk_size);
175     if (region->is_old()) {
176       size_t cluster_size =
177         CardTable::card_size_in_words() * ShenandoahCardCluster<ShenandoahDirectCardMarkRememberedSet>::CardsPerCluster;
178       size_t clusters = assignment._chunk_size / cluster_size;
179       assert(clusters * cluster_size == assignment._chunk_size, "Chunk assignments must align on cluster boundaries");
180       HeapWord* end_of_range = region->bottom() + assignment._chunk_offset + assignment._chunk_size;
181 
182       // During concurrent mark, region->top() equals TAMS with respect to the current young-gen pass.
183       if (end_of_range > region->top()) {
184         end_of_range = region->top();
185       }
186       scanner->process_region_slice(region, assignment._chunk_offset, clusters, end_of_range, &cl, false, worker_id);
187     }
188 #ifdef ENABLE_REMEMBERED_SET_CANCELLATION
189     // This check is currently disabled to avoid crashes that occur
190     // when we try to cancel remembered set scanning; it should be re-enabled
191     // after the issues are fixed, as it would allow more prompt cancellation and
192     // transition to degenerated / full GCs. Note that work that has been assigned/
193     // claimed above must be completed before we return here upon cancellation.
194     if (heap->check_cancelled_gc_and_yield(_is_concurrent)) {
195       return;
196     }
197 #endif
198   }
199 }
200 
201 size_t ShenandoahRegionChunkIterator::calc_regular_group_size() {
202   // The group size is calculated from the number of regions.  Suppose the heap has N regions.  The first group processes
203   // N/2 regions.  The second group processes N/4 regions, the third group N/8 regions and so on.
204   // Note that infinite series N/2 + N/4 + N/8 + N/16 + ...  sums to N.
205   //
206   // The normal group size is the number of regions / 2.
207   //
208   // In the case that the region_size_words is greater than _maximum_chunk_size_words, the first group_size is
209   // larger than the normal group size because each chunk in the group will be smaller than the region size.
210   //
211   // The last group also has more than the normal entries because it finishes the total scanning effort.  The chunk sizes are
212   // different for each group.  The intention is that the first group processes roughly half of the heap, the second processes
213   // half of the remaining heap, the third processes half of what remains and so on.  The smallest chunk size
214   // is represented by _smallest_chunk_size_words.  We do not divide work any smaller than this.
215   //
216 
217   size_t group_size = _heap->num_regions() / 2;
218   return group_size;
219 }
220 
221 size_t ShenandoahRegionChunkIterator::calc_first_group_chunk_size_b4_rebalance() {
222   size_t words_in_first_chunk = ShenandoahHeapRegion::region_size_words();
223   return words_in_first_chunk;
224 }
225 
226 size_t ShenandoahRegionChunkIterator::calc_num_groups() {
227   size_t total_heap_size = _heap->num_regions() * ShenandoahHeapRegion::region_size_words();
228   size_t num_groups = 0;
229   size_t cumulative_group_span = 0;
230   size_t current_group_span = _first_group_chunk_size_b4_rebalance * _regular_group_size;
231   size_t smallest_group_span = smallest_chunk_size_words() * _regular_group_size;
232   while ((num_groups < _maximum_groups) && (cumulative_group_span + current_group_span <= total_heap_size)) {
233     num_groups++;
234     cumulative_group_span += current_group_span;
235     if (current_group_span <= smallest_group_span) {
236       break;
237     } else {
238       current_group_span /= 2;    // Each group spans half of what the preceding group spanned.
239     }
240   }
241   // Loop post condition:
242   //   num_groups <= _maximum_groups
243   //   cumulative_group_span is the memory spanned by num_groups
244   //   current_group_span is the span of the last fully populated group (assuming loop iterates at least once)
245   //   each of num_groups is fully populated with _regular_group_size chunks in each
246   // Non post conditions:
247   //   cumulative_group_span may be less than total_heap size for one or more of the folowing reasons
248   //   a) The number of regions remaining to be spanned is smaller than a complete group, or
249   //   b) We have filled up all groups through _maximum_groups and still have not spanned all regions
250 
251   if (cumulative_group_span < total_heap_size) {
252     // We've got more regions to span
253     if ((num_groups < _maximum_groups) && (current_group_span > smallest_group_span)) {
254       num_groups++;             // Place all remaining regions into a new not-full group (chunk_size half that of previous group)
255     }
256     // Else we are unable to create a new group because we've exceed the number of allowed groups or have reached the
257     // minimum chunk size.
258 
259     // Any remaining regions will be treated as if they are part of the most recently created group.  This group will
260     // have more than _regular_group_size chunks within it.
261   }
262   return num_groups;
263 }
264 
265 size_t ShenandoahRegionChunkIterator::calc_total_chunks() {
266   size_t region_size_words = ShenandoahHeapRegion::region_size_words();
267   size_t unspanned_heap_size = _heap->num_regions() * region_size_words;
268   size_t num_chunks = 0;
269   size_t cumulative_group_span = 0;
270   size_t current_group_span = _first_group_chunk_size_b4_rebalance * _regular_group_size;
271   size_t smallest_group_span = smallest_chunk_size_words() * _regular_group_size;
272 
273   // The first group gets special handling because the first chunk size can be no larger than _largest_chunk_size_words
274   if (region_size_words > _maximum_chunk_size_words) {
275     // In the case that we shrink the first group's chunk size, certain other groups will also be subsumed within the first group
276     size_t effective_chunk_size = _first_group_chunk_size_b4_rebalance;
277     while (effective_chunk_size >= _maximum_chunk_size_words) {
278       num_chunks += current_group_span / _maximum_chunk_size_words;
279       unspanned_heap_size -= current_group_span;
280       effective_chunk_size /= 2;
281       current_group_span /= 2;
282     }
283   } else {
284     num_chunks = _regular_group_size;
285     unspanned_heap_size -= current_group_span;
286     current_group_span /= 2;
287   }
288   size_t spanned_groups = 1;
289   while (unspanned_heap_size > 0) {
290     if (current_group_span <= unspanned_heap_size) {
291       unspanned_heap_size -= current_group_span;
292       num_chunks += _regular_group_size;
293       spanned_groups++;
294 
295       // _num_groups is the number of groups required to span the configured heap size.  We are not allowed
296       // to change the number of groups.  The last group is responsible for spanning all chunks not spanned
297       // by previously processed groups.
298       if (spanned_groups >= _num_groups) {
299         // The last group has more than _regular_group_size entries.
300         size_t chunk_span = current_group_span / _regular_group_size;
301         size_t extra_chunks = unspanned_heap_size / chunk_span;
302         assert (extra_chunks * chunk_span == unspanned_heap_size, "Chunks must precisely span regions");
303         num_chunks += extra_chunks;
304         return num_chunks;
305       } else if (current_group_span <= smallest_group_span) {
306         // We cannot introduce new groups because we've reached the lower bound on group size.  So this last
307         // group may hold extra chunks.
308         size_t chunk_span = smallest_chunk_size_words();
309         size_t extra_chunks = unspanned_heap_size / chunk_span;
310         assert (extra_chunks * chunk_span == unspanned_heap_size, "Chunks must precisely span regions");
311         num_chunks += extra_chunks;
312         return num_chunks;
313       } else {
314         current_group_span /= 2;
315       }
316     } else {
317       // This last group has fewer than _regular_group_size entries.
318       size_t chunk_span = current_group_span / _regular_group_size;
319       size_t last_group_size = unspanned_heap_size / chunk_span;
320       assert (last_group_size * chunk_span == unspanned_heap_size, "Chunks must precisely span regions");
321       num_chunks += last_group_size;
322       return num_chunks;
323     }
324   }
325   return num_chunks;
326 }
327 
328 ShenandoahRegionChunkIterator::ShenandoahRegionChunkIterator(size_t worker_count) :
329     ShenandoahRegionChunkIterator(ShenandoahHeap::heap(), worker_count)
330 {
331 }
332 
333 ShenandoahRegionChunkIterator::ShenandoahRegionChunkIterator(ShenandoahHeap* heap, size_t worker_count) :
334     _heap(heap),
335     _regular_group_size(calc_regular_group_size()),
336     _first_group_chunk_size_b4_rebalance(calc_first_group_chunk_size_b4_rebalance()),
337     _num_groups(calc_num_groups()),
338     _total_chunks(calc_total_chunks()),
339     _index(0)
340 {
341 #ifdef ASSERT
342   size_t expected_chunk_size_words = _clusters_in_smallest_chunk * CardTable::card_size_in_words() * ShenandoahCardCluster<ShenandoahDirectCardMarkRememberedSet>::CardsPerCluster;
343   assert(smallest_chunk_size_words() == expected_chunk_size_words, "_smallest_chunk_size (" SIZE_FORMAT") is not valid because it does not equal (" SIZE_FORMAT ")",
344          smallest_chunk_size_words(), expected_chunk_size_words);
345 #endif
346   assert(_num_groups <= _maximum_groups,
347          "The number of remembered set scanning groups must be less than or equal to maximum groups");
348   assert(smallest_chunk_size_words() << (_maximum_groups - 1) == _maximum_chunk_size_words,
349          "Maximum number of groups needs to span maximum chunk size to smallest chunk size");
350 
351   size_t words_in_region = ShenandoahHeapRegion::region_size_words();
352   _region_index[0] = 0;
353   _group_offset[0] = 0;
354   if (words_in_region > _maximum_chunk_size_words) {
355     // In the case that we shrink the first group's chunk size, certain other groups will also be subsumed within the first group
356     size_t num_chunks = 0;
357     size_t effective_chunk_size = _first_group_chunk_size_b4_rebalance;
358     size_t  current_group_span = effective_chunk_size * _regular_group_size;
359     while (effective_chunk_size >= _maximum_chunk_size_words) {
360       num_chunks += current_group_span / _maximum_chunk_size_words;
361       effective_chunk_size /= 2;
362       current_group_span /= 2;
363     }
364     _group_entries[0] = num_chunks;
365     _group_chunk_size[0] = _maximum_chunk_size_words;
366   } else {
367     _group_entries[0] = _regular_group_size;
368     _group_chunk_size[0] = _first_group_chunk_size_b4_rebalance;
369   }
370 
371   size_t previous_group_span = _group_entries[0] * _group_chunk_size[0];
372   for (size_t i = 1; i < _num_groups; i++) {
373     size_t previous_group_entries = (i == 1)? _group_entries[0]: (_group_entries[i-1] - _group_entries[i-2]);
374     _group_chunk_size[i] = _group_chunk_size[i-1] / 2;
375     size_t chunks_in_group = _regular_group_size;
376     size_t this_group_span = _group_chunk_size[i] * chunks_in_group;
377     size_t total_span_of_groups = previous_group_span + this_group_span;
378     _region_index[i] = previous_group_span / words_in_region;
379     _group_offset[i] = previous_group_span % words_in_region;
380     _group_entries[i] = _group_entries[i-1] + _regular_group_size;
381     previous_group_span = total_span_of_groups;
382   }
383   if (_group_entries[_num_groups-1] < _total_chunks) {
384     assert((_total_chunks - _group_entries[_num_groups-1]) * _group_chunk_size[_num_groups-1] + previous_group_span ==
385            heap->num_regions() * words_in_region, "Total region chunks (" SIZE_FORMAT
386            ") do not span total heap regions (" SIZE_FORMAT ")", _total_chunks, _heap->num_regions());
387     previous_group_span += (_total_chunks - _group_entries[_num_groups-1]) * _group_chunk_size[_num_groups-1];
388     _group_entries[_num_groups-1] = _total_chunks;
389   }
390   assert(previous_group_span == heap->num_regions() * words_in_region, "Total region chunks (" SIZE_FORMAT
391          ") do not span total heap regions (" SIZE_FORMAT "): " SIZE_FORMAT " does not equal " SIZE_FORMAT,
392          _total_chunks, _heap->num_regions(), previous_group_span, heap->num_regions() * words_in_region);
393 
394   // Not necessary, but keeps things tidy
395   for (size_t i = _num_groups; i < _maximum_groups; i++) {
396     _region_index[i] = 0;
397     _group_offset[i] = 0;
398     _group_entries[i] = _group_entries[i-1];
399     _group_chunk_size[i] = 0;
400   }
401 }
402 
403 void ShenandoahRegionChunkIterator::reset() {
404   _index = 0;
405 }
406 
407 ShenandoahReconstructRememberedSetTask::ShenandoahReconstructRememberedSetTask(ShenandoahRegionIterator* regions)
408   : WorkerTask("Shenandoah Reset Bitmap")
409   , _regions(regions) { }
410 
411 void ShenandoahReconstructRememberedSetTask::work(uint worker_id) {
412   ShenandoahParallelWorkerSession worker_session(worker_id);
413   ShenandoahHeapRegion* r = _regions->next();
414   ShenandoahHeap* heap = ShenandoahHeap::heap();
415   RememberedScanner* scanner = heap->card_scan();
416   ShenandoahDirtyRememberedSetClosure dirty_cards_for_cross_generational_pointers;
417 
418   while (r != nullptr) {
419     if (r->is_old() && r->is_active()) {
420       HeapWord* obj_addr = r->bottom();
421       if (r->is_humongous_start()) {
422         // First, clear the remembered set
423         oop obj = cast_to_oop(obj_addr);
424         size_t size = obj->size();
425 
426         // First, clear the remembered set for all spanned humongous regions
427         size_t num_regions = ShenandoahHeapRegion::required_regions(size * HeapWordSize);
428         size_t region_span = num_regions * ShenandoahHeapRegion::region_size_words();
429         scanner->reset_remset(r->bottom(), region_span);
430         size_t region_index = r->index();
431         ShenandoahHeapRegion* humongous_region = heap->get_region(region_index);
432         while (num_regions-- != 0) {
433           scanner->reset_object_range(humongous_region->bottom(), humongous_region->end());
434           region_index++;
435           humongous_region = heap->get_region(region_index);
436         }
437 
438         // Then register the humongous object and DIRTY relevant remembered set cards
439         scanner->register_object_without_lock(obj_addr);
440         obj->oop_iterate(&dirty_cards_for_cross_generational_pointers);
441       } else if (!r->is_humongous()) {
442         // First, clear the remembered set
443         scanner->reset_remset(r->bottom(), ShenandoahHeapRegion::region_size_words());
444         scanner->reset_object_range(r->bottom(), r->end());
445 
446         // Then iterate over all objects, registering object and DIRTYing relevant remembered set cards
447         HeapWord* t = r->top();
448         while (obj_addr < t) {
449           oop obj = cast_to_oop(obj_addr);
450           scanner->register_object_without_lock(obj_addr);
451           obj_addr += obj->oop_iterate_size(&dirty_cards_for_cross_generational_pointers);
452         }
453       } // else, ignore humongous continuation region
454     }
455     // else, this region is FREE or YOUNG or inactive and we can ignore it.
456     r = _regions->next();
457   }
458 }