1 /*
  2  * Copyright Amazon.com Inc. or its affiliates. All Rights Reserved.
  3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  4  *
  5  * This code is free software; you can redistribute it and/or modify it
  6  * under the terms of the GNU General Public License version 2 only, as
  7  * published by the Free Software Foundation.
  8  *
  9  * This code is distributed in the hope that it will be useful, but WITHOUT
 10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 12  * version 2 for more details (a copy is included in the LICENSE file that
 13  * accompanied this code).
 14  *
 15  * You should have received a copy of the GNU General Public License version
 16  * 2 along with this work; if not, write to the Free Software Foundation,
 17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 18  *
 19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 20  * or visit www.oracle.com if you need additional information or have any
 21  * questions.
 22  *
 23  */
 24 
 25 
 26 #include "precompiled.hpp"
 27 
 28 #include "gc/shenandoah/shenandoahClosures.inline.hpp"
 29 #include "gc/shenandoah/shenandoahHeap.inline.hpp"
 30 #include "gc/shenandoah/shenandoahOldGeneration.hpp"
 31 #include "gc/shenandoah/shenandoahReferenceProcessor.hpp"
 32 #include "gc/shenandoah/shenandoahScanRemembered.inline.hpp"
 33 #include "logging/log.hpp"
 34 
 35 size_t ShenandoahDirectCardMarkRememberedSet::last_valid_index() const {
 36   return _card_table->last_valid_index();
 37 }
 38 
 39 size_t ShenandoahDirectCardMarkRememberedSet::total_cards() const {
 40   return _total_card_count;
 41 }
 42 
 43 size_t ShenandoahDirectCardMarkRememberedSet::card_index_for_addr(HeapWord *p) const {
 44   return _card_table->index_for(p);
 45 }
 46 
 47 HeapWord* ShenandoahDirectCardMarkRememberedSet::addr_for_card_index(size_t card_index) const {
 48   return _whole_heap_base + CardTable::card_size_in_words() * card_index;
 49 }
 50 
 51 bool ShenandoahDirectCardMarkRememberedSet::is_write_card_dirty(size_t card_index) const {
 52   CardValue* bp = &(_card_table->write_byte_map())[card_index];
 53   return (bp[0] == CardTable::dirty_card_val());
 54 }
 55 
 56 bool ShenandoahDirectCardMarkRememberedSet::is_card_dirty(size_t card_index) const {
 57   CardValue* bp = &(_card_table->read_byte_map())[card_index];
 58   return (bp[0] == CardTable::dirty_card_val());
 59 }
 60 
 61 void ShenandoahDirectCardMarkRememberedSet::mark_card_as_dirty(size_t card_index) {
 62   CardValue* bp = &(_card_table->write_byte_map())[card_index];
 63   bp[0] = CardTable::dirty_card_val();
 64 }
 65 
 66 void ShenandoahDirectCardMarkRememberedSet::mark_range_as_dirty(size_t card_index, size_t num_cards) {
 67   CardValue* bp = &(_card_table->write_byte_map())[card_index];
 68   while (num_cards-- > 0) {
 69     *bp++ = CardTable::dirty_card_val();
 70   }
 71 }
 72 
 73 void ShenandoahDirectCardMarkRememberedSet::mark_card_as_clean(size_t card_index) {
 74   CardValue* bp = &(_card_table->write_byte_map())[card_index];
 75   bp[0] = CardTable::clean_card_val();
 76 }
 77 
 78 void ShenandoahDirectCardMarkRememberedSet::mark_range_as_clean(size_t card_index, size_t num_cards) {
 79   CardValue* bp = &(_card_table->write_byte_map())[card_index];
 80   while (num_cards-- > 0) {
 81     *bp++ = CardTable::clean_card_val();
 82   }
 83 }
 84 
 85 bool ShenandoahDirectCardMarkRememberedSet::is_card_dirty(HeapWord* p) const {
 86   size_t index = card_index_for_addr(p);
 87   CardValue* bp = &(_card_table->read_byte_map())[index];
 88   return (bp[0] == CardTable::dirty_card_val());
 89 }
 90 
 91 bool ShenandoahDirectCardMarkRememberedSet::is_write_card_dirty(HeapWord* p) const {
 92   size_t index = card_index_for_addr(p);
 93   CardValue* bp = &(_card_table->write_byte_map())[index];
 94   return (bp[0] == CardTable::dirty_card_val());
 95 }
 96 
 97 void ShenandoahDirectCardMarkRememberedSet::mark_card_as_dirty(HeapWord* p) {
 98   size_t index = card_index_for_addr(p);
 99   CardValue* bp = &(_card_table->write_byte_map())[index];
100   bp[0] = CardTable::dirty_card_val();
101 }
102 
103 void ShenandoahDirectCardMarkRememberedSet::mark_range_as_dirty(HeapWord* p, size_t num_heap_words) {
104   CardValue* bp = &(_card_table->write_byte_map_base())[uintptr_t(p) >> _card_shift];
105   CardValue* end_bp = &(_card_table->write_byte_map_base())[uintptr_t(p + num_heap_words) >> _card_shift];
106   // If (p + num_heap_words) is not aligned on card boundary, we also need to dirty last card.
107   if (((unsigned long long) (p + num_heap_words)) & (CardTable::card_size() - 1)) {
108     end_bp++;
109   }
110   while (bp < end_bp) {
111     *bp++ = CardTable::dirty_card_val();
112   }
113 }
114 
115 void ShenandoahDirectCardMarkRememberedSet::mark_card_as_clean(HeapWord* p) {
116   size_t index = card_index_for_addr(p);
117   CardValue* bp = &(_card_table->write_byte_map())[index];
118   bp[0] = CardTable::clean_card_val();
119 }
120 
121 void ShenandoahDirectCardMarkRememberedSet::mark_range_as_clean(HeapWord* p, size_t num_heap_words) {
122   CardValue* bp = &(_card_table->write_byte_map_base())[uintptr_t(p) >> _card_shift];
123   CardValue* end_bp = &(_card_table->write_byte_map_base())[uintptr_t(p + num_heap_words) >> _card_shift];
124   // If (p + num_heap_words) is not aligned on card boundary, we also need to clean last card.
125   if (((unsigned long long) (p + num_heap_words)) & (CardTable::card_size() - 1)) {
126     end_bp++;
127   }
128   while (bp < end_bp) {
129     *bp++ = CardTable::clean_card_val();
130   }
131 }
132 
133 // No lock required because arguments align with card boundaries.
134 void ShenandoahCardCluster::reset_object_range(HeapWord* from, HeapWord* to) {
135   assert(((((unsigned long long) from) & (CardTable::card_size() - 1)) == 0) &&
136          ((((unsigned long long) to) & (CardTable::card_size() - 1)) == 0),
137          "reset_object_range bounds must align with card boundaries");
138   size_t card_at_start = _rs->card_index_for_addr(from);
139   size_t num_cards = (to - from) / CardTable::card_size_in_words();
140 
141   for (size_t i = 0; i < num_cards; i++) {
142     _object_starts[card_at_start + i].short_word = 0;
143   }
144 }
145 
146 // Assume only one thread at a time registers objects pertaining to
147 // each card-table entry's range of memory.
148 void ShenandoahCardCluster::register_object(HeapWord* address) {
149   shenandoah_assert_heaplocked();
150 
151   register_object_without_lock(address);
152 }
153 
154 void ShenandoahCardCluster::register_object_without_lock(HeapWord* address) {
155   size_t card_at_start = _rs->card_index_for_addr(address);
156   HeapWord* card_start_address = _rs->addr_for_card_index(card_at_start);
157   uint8_t offset_in_card = checked_cast<uint8_t>(pointer_delta(address, card_start_address));
158 
159   if (!starts_object(card_at_start)) {
160     set_starts_object_bit(card_at_start);
161     set_first_start(card_at_start, offset_in_card);
162     set_last_start(card_at_start, offset_in_card);
163   } else {
164     if (offset_in_card < get_first_start(card_at_start))
165       set_first_start(card_at_start, offset_in_card);
166     if (offset_in_card > get_last_start(card_at_start))
167       set_last_start(card_at_start, offset_in_card);
168   }
169 }
170 
171 void ShenandoahCardCluster::coalesce_objects(HeapWord* address, size_t length_in_words) {
172 
173   size_t card_at_start = _rs->card_index_for_addr(address);
174   HeapWord* card_start_address = _rs->addr_for_card_index(card_at_start);
175   size_t card_at_end = card_at_start + ((address + length_in_words) - card_start_address) / CardTable::card_size_in_words();
176 
177   if (card_at_start == card_at_end) {
178     // There are no changes to the get_first_start array.  Either get_first_start(card_at_start) returns this coalesced object,
179     // or it returns an object that precedes the coalesced object.
180     if (card_start_address + get_last_start(card_at_start) < address + length_in_words) {
181       uint8_t coalesced_offset = checked_cast<uint8_t>(pointer_delta(address, card_start_address));
182       // The object that used to be the last object starting within this card is being subsumed within the coalesced
183       // object.  Since we always coalesce entire objects, this condition only occurs if the last object ends before or at
184       // the end of the card's memory range and there is no object following this object.  In this case, adjust last_start
185       // to represent the start of the coalesced range.
186       set_last_start(card_at_start, coalesced_offset);
187     }
188     // Else, no changes to last_starts information.  Either get_last_start(card_at_start) returns the object that immediately
189     // follows the coalesced object, or it returns an object that follows the object immediately following the coalesced object.
190   } else {
191     uint8_t coalesced_offset = checked_cast<uint8_t>(pointer_delta(address, card_start_address));
192     if (get_last_start(card_at_start) > coalesced_offset) {
193       // Existing last start is being coalesced, create new last start
194       set_last_start(card_at_start, coalesced_offset);
195     }
196     // otherwise, get_last_start(card_at_start) must equal coalesced_offset
197 
198     // All the cards between first and last get cleared.
199     for (size_t i = card_at_start + 1; i < card_at_end; i++) {
200       clear_starts_object_bit(i);
201     }
202 
203     uint8_t follow_offset = checked_cast<uint8_t>((address + length_in_words) - _rs->addr_for_card_index(card_at_end));
204     if (starts_object(card_at_end) && (get_first_start(card_at_end) < follow_offset)) {
205       // It may be that after coalescing within this last card's memory range, the last card
206       // no longer holds an object.
207       if (get_last_start(card_at_end) >= follow_offset) {
208         set_first_start(card_at_end, follow_offset);
209       } else {
210         // last_start is being coalesced so this card no longer has any objects.
211         clear_starts_object_bit(card_at_end);
212       }
213     }
214     // else
215     //  card_at_end did not have an object, so it still does not have an object, or
216     //  card_at_end had an object that starts after the coalesced object, so no changes required for card_at_end
217 
218   }
219 }
220 
221 
222 size_t ShenandoahCardCluster::get_first_start(size_t card_index) const {
223   assert(starts_object(card_index), "Can't get first start because no object starts here");
224   return _object_starts[card_index].offsets.first & FirstStartBits;
225 }
226 
227 size_t ShenandoahCardCluster::get_last_start(size_t card_index) const {
228   assert(starts_object(card_index), "Can't get last start because no object starts here");
229   return _object_starts[card_index].offsets.last;
230 }
231 
232 // Given a card_index, return the starting address of the first block in the heap
233 // that straddles into this card. If this card is co-initial with an object, then
234 // this would return the first address of the range that this card covers, which is
235 // where the card's first object also begins.
236 HeapWord* ShenandoahCardCluster::block_start(const size_t card_index) const {
237 
238   HeapWord* left = _rs->addr_for_card_index(card_index);
239 
240 #ifdef ASSERT
241   assert(ShenandoahHeap::heap()->mode()->is_generational(), "Do not use in non-generational mode");
242   ShenandoahHeapRegion* region = ShenandoahHeap::heap()->heap_region_containing(left);
243   assert(region->is_old(), "Do not use for young regions");
244   // For HumongousRegion:s it's more efficient to jump directly to the
245   // start region.
246   assert(!region->is_humongous(), "Use region->humongous_start_region() instead");
247 #endif
248   if (starts_object(card_index) && get_first_start(card_index) == 0) {
249     // This card contains a co-initial object; a fortiori, it covers
250     // also the case of a card being the first in a region.
251     assert(oopDesc::is_oop(cast_to_oop(left)), "Should be an object");
252     return left;
253   }
254 
255   HeapWord* p = nullptr;
256   oop obj = cast_to_oop(p);
257   ssize_t cur_index = (ssize_t)card_index;
258   assert(cur_index >= 0, "Overflow");
259   assert(cur_index > 0, "Should have returned above");
260   // Walk backwards over the cards...
261   while (--cur_index > 0 && !starts_object(cur_index)) {
262    // ... to the one that starts the object
263   }
264   // cur_index should start an object: we should not have walked
265   // past the left end of the region.
266   assert(cur_index >= 0 && (cur_index <= (ssize_t)card_index), "Error");
267   assert(region->bottom() <= _rs->addr_for_card_index(cur_index),
268          "Fell off the bottom of containing region");
269   assert(starts_object(cur_index), "Error");
270   size_t offset = get_last_start(cur_index);
271   // can avoid call via card size arithmetic below instead
272   p = _rs->addr_for_card_index(cur_index) + offset;
273   // Recall that we already dealt with the co-initial object case above
274   assert(p < left, "obj should start before left");
275   // While it is safe to ask an object its size in the loop that
276   // follows, the (ifdef'd out) loop should never be needed.
277   // 1. we ask this question only for regions in the old generation
278   // 2. there is no direct allocation ever by mutators in old generation
279   //    regions. Only GC will ever allocate in old regions, and then
280   //    too only during promotion/evacuation phases. Thus there is no danger
281   //    of races between reading from and writing to the object start array,
282   //    or of asking partially initialized objects their size (in the loop below).
283   // 3. only GC asks this question during phases when it is not concurrently
284   //    evacuating/promoting, viz. during concurrent root scanning (before
285   //    the evacuation phase) and during concurrent update refs (after the
286   //    evacuation phase) of young collections. This is never called
287   //    during old or global collections.
288   // 4. Every allocation under TAMS updates the object start array.
289   NOT_PRODUCT(obj = cast_to_oop(p);)
290   assert(oopDesc::is_oop(obj), "Should be an object");
291 #define WALK_FORWARD_IN_BLOCK_START false
292   while (WALK_FORWARD_IN_BLOCK_START && p + obj->size() < left) {
293     p += obj->size();
294   }
295 #undef WALK_FORWARD_IN_BLOCK_START // false
296   assert(p + obj->size() > left, "obj should end after left");
297   return p;
298 }
299 
300 size_t ShenandoahScanRemembered::card_index_for_addr(HeapWord* p) {
301   return _rs->card_index_for_addr(p);
302 }
303 
304 HeapWord* ShenandoahScanRemembered::addr_for_card_index(size_t card_index) {
305   return _rs->addr_for_card_index(card_index);
306 }
307 
308 bool ShenandoahScanRemembered::is_card_dirty(size_t card_index) {
309   return _rs->is_card_dirty(card_index);
310 }
311 
312 bool ShenandoahScanRemembered::is_write_card_dirty(size_t card_index) {
313   return _rs->is_write_card_dirty(card_index);
314 }
315 
316 bool ShenandoahScanRemembered::is_card_dirty(HeapWord* p) {
317   return _rs->is_card_dirty(p);
318 }
319 
320 void ShenandoahScanRemembered::mark_card_as_dirty(HeapWord* p) {
321   _rs->mark_card_as_dirty(p);
322 }
323 
324 bool ShenandoahScanRemembered::is_write_card_dirty(HeapWord* p) {
325   return _rs->is_write_card_dirty(p);
326 }
327 
328 void ShenandoahScanRemembered::mark_range_as_dirty(HeapWord* p, size_t num_heap_words) {
329   _rs->mark_range_as_dirty(p, num_heap_words);
330 }
331 
332 void ShenandoahScanRemembered::mark_card_as_clean(HeapWord* p) {
333   _rs->mark_card_as_clean(p);
334 }
335 
336 void ShenandoahScanRemembered:: mark_range_as_clean(HeapWord* p, size_t num_heap_words) {
337   _rs->mark_range_as_clean(p, num_heap_words);
338 }
339 
340 void ShenandoahScanRemembered::reset_object_range(HeapWord* from, HeapWord* to) {
341   _scc->reset_object_range(from, to);
342 }
343 
344 void ShenandoahScanRemembered::register_object(HeapWord* addr) {
345   _scc->register_object(addr);
346 }
347 
348 void ShenandoahScanRemembered::register_object_without_lock(HeapWord* addr) {
349   _scc->register_object_without_lock(addr);
350 }
351 
352 bool ShenandoahScanRemembered::verify_registration(HeapWord* address, ShenandoahMarkingContext* ctx) {
353 
354   size_t index = card_index_for_addr(address);
355   if (!_scc->starts_object(index)) {
356     return false;
357   }
358   HeapWord* base_addr = addr_for_card_index(index);
359   size_t offset = _scc->get_first_start(index);
360   ShenandoahHeap* heap = ShenandoahHeap::heap();
361 
362   // Verify that I can find this object within its enclosing card by scanning forward from first_start.
363   while (base_addr + offset < address) {
364     oop obj = cast_to_oop(base_addr + offset);
365     if (!ctx || ctx->is_marked(obj)) {
366       offset += obj->size();
367     } else {
368       // If this object is not live, don't trust its size(); all objects above tams are live.
369       ShenandoahHeapRegion* r = heap->heap_region_containing(obj);
370       HeapWord* tams = ctx->top_at_mark_start(r);
371       offset = ctx->get_next_marked_addr(base_addr + offset, tams) - base_addr;
372     }
373   }
374   if (base_addr + offset != address){
375     return false;
376   }
377 
378   // At this point, offset represents object whose registration we are verifying.  We know that at least this object resides
379   // within this card's memory.
380 
381   // Make sure that last_offset is properly set for the enclosing card, but we can't verify this for
382   // candidate collection-set regions during mixed evacuations, so disable this check in general
383   // during mixed evacuations.
384 
385   ShenandoahHeapRegion* r = heap->heap_region_containing(base_addr + offset);
386   size_t max_offset = r->top() - base_addr;
387   if (max_offset > CardTable::card_size_in_words()) {
388     max_offset = CardTable::card_size_in_words();
389   }
390   size_t prev_offset;
391   if (!ctx) {
392     do {
393       oop obj = cast_to_oop(base_addr + offset);
394       prev_offset = offset;
395       offset += obj->size();
396     } while (offset < max_offset);
397     if (_scc->get_last_start(index) != prev_offset) {
398       return false;
399     }
400 
401     // base + offset represents address of first object that starts on following card, if there is one.
402 
403     // Notes: base_addr is addr_for_card_index(index)
404     //        base_addr + offset is end of the object we are verifying
405     //        cannot use card_index_for_addr(base_addr + offset) because it asserts arg < end of whole heap
406     size_t end_card_index = index + offset / CardTable::card_size_in_words();
407 
408     if (end_card_index > index && end_card_index <= _rs->last_valid_index()) {
409       // If there is a following object registered on the next card, it should begin where this object ends.
410       if (_scc->starts_object(end_card_index) &&
411           ((addr_for_card_index(end_card_index) + _scc->get_first_start(end_card_index)) != (base_addr + offset))) {
412         return false;
413       }
414     }
415 
416     // Assure that no other objects are registered "inside" of this one.
417     for (index++; index < end_card_index; index++) {
418       if (_scc->starts_object(index)) {
419         return false;
420       }
421     }
422   } else {
423     // This is a mixed evacuation or a global collect: rely on mark bits to identify which objects need to be properly registered
424     assert(!ShenandoahHeap::heap()->is_concurrent_old_mark_in_progress(), "Cannot rely on mark context here.");
425     // If the object reaching or spanning the end of this card's memory is marked, then last_offset for this card
426     // should represent this object.  Otherwise, last_offset is a don't care.
427     ShenandoahHeapRegion* region = heap->heap_region_containing(base_addr + offset);
428     HeapWord* tams = ctx->top_at_mark_start(region);
429     oop last_obj = nullptr;
430     do {
431       oop obj = cast_to_oop(base_addr + offset);
432       if (ctx->is_marked(obj)) {
433         prev_offset = offset;
434         offset += obj->size();
435         last_obj = obj;
436       } else {
437         offset = ctx->get_next_marked_addr(base_addr + offset, tams) - base_addr;
438         // If there are no marked objects remaining in this region, offset equals tams - base_addr.  If this offset is
439         // greater than max_offset, we will immediately exit this loop.  Otherwise, the next iteration of the loop will
440         // treat the object at offset as marked and live (because address >= tams) and we will continue iterating object
441         // by consulting the size() fields of each.
442       }
443     } while (offset < max_offset);
444     if (last_obj != nullptr && prev_offset + last_obj->size() >= max_offset) {
445       // last marked object extends beyond end of card
446       if (_scc->get_last_start(index) != prev_offset) {
447         return false;
448       }
449       // otherwise, the value of _scc->get_last_start(index) is a don't care because it represents a dead object and we
450       // cannot verify its context
451     }
452   }
453   return true;
454 }
455 
456 void ShenandoahScanRemembered::coalesce_objects(HeapWord* addr, size_t length_in_words) {
457   _scc->coalesce_objects(addr, length_in_words);
458 }
459 
460 void ShenandoahScanRemembered::mark_range_as_empty(HeapWord* addr, size_t length_in_words) {
461   _rs->mark_range_as_clean(addr, length_in_words);
462   _scc->clear_objects_in_range(addr, length_in_words);
463 }
464 
465 size_t ShenandoahScanRemembered::cluster_for_addr(HeapWordImpl **addr) {
466   size_t card_index = _rs->card_index_for_addr(addr);
467   size_t result = card_index / ShenandoahCardCluster::CardsPerCluster;
468   return result;
469 }
470 
471 HeapWord* ShenandoahScanRemembered::addr_for_cluster(size_t cluster_no) {
472   size_t card_index = cluster_no * ShenandoahCardCluster::CardsPerCluster;
473   return addr_for_card_index(card_index);
474 }
475 
476 // This is used only for debug verification so don't worry about making the scan parallel.
477 void ShenandoahScanRemembered::roots_do(OopIterateClosure* cl) {
478   ShenandoahHeap* heap = ShenandoahHeap::heap();
479   bool old_bitmap_stable = heap->old_generation()->is_mark_complete();
480   log_info(gc, remset)("Scan remembered set using bitmap: %s", BOOL_TO_STR(old_bitmap_stable));
481   for (size_t i = 0, n = heap->num_regions(); i < n; ++i) {
482     ShenandoahHeapRegion* region = heap->get_region(i);
483     if (region->is_old() && region->is_active() && !region->is_cset()) {
484       HeapWord* start_of_range = region->bottom();
485       HeapWord* end_of_range = region->top();
486       size_t start_cluster_no = cluster_for_addr(start_of_range);
487       size_t num_heapwords = end_of_range - start_of_range;
488       unsigned int cluster_size = CardTable::card_size_in_words() * ShenandoahCardCluster::CardsPerCluster;
489       size_t num_clusters = (size_t) ((num_heapwords - 1 + cluster_size) / cluster_size);
490 
491       // Remembered set scanner
492       if (region->is_humongous()) {
493         process_humongous_clusters(region->humongous_start_region(), start_cluster_no, num_clusters, end_of_range, cl,
494                                    false /* use_write_table */);
495       } else {
496         process_clusters(start_cluster_no, num_clusters, end_of_range, cl,
497                          false /* use_write_table */, 0 /* fake worker id */);
498       }
499     }
500   }
501 }
502 
503 #ifndef PRODUCT
504 // Log given card stats
505 void ShenandoahScanRemembered::log_card_stats(HdrSeq* stats) {
506   for (int i = 0; i < MAX_CARD_STAT_TYPE; i++) {
507     log_info(gc, remset)("%18s: [ %8.2f %8.2f %8.2f %8.2f %8.2f ]",
508       _card_stats_name[i],
509       stats[i].percentile(0), stats[i].percentile(25),
510       stats[i].percentile(50), stats[i].percentile(75),
511       stats[i].maximum());
512   }
513 }
514 
515 // Log card stats for all nworkers for a specific phase t
516 void ShenandoahScanRemembered::log_card_stats(uint nworkers, CardStatLogType t) {
517   assert(ShenandoahEnableCardStats, "Do not call");
518   HdrSeq* sum_stats = card_stats_for_phase(t);
519   log_info(gc, remset)("%s", _card_stat_log_type[t]);
520   for (uint i = 0; i < nworkers; i++) {
521     log_worker_card_stats(i, sum_stats);
522   }
523 
524   // Every so often, log the cumulative global stats
525   if (++_card_stats_log_counter[t] >= ShenandoahCardStatsLogInterval) {
526     _card_stats_log_counter[t] = 0;
527     log_info(gc, remset)("Cumulative stats");
528     log_card_stats(sum_stats);
529   }
530 }
531 
532 // Log card stats for given worker_id, & clear them after merging into given cumulative stats
533 void ShenandoahScanRemembered::log_worker_card_stats(uint worker_id, HdrSeq* sum_stats) {
534   assert(ShenandoahEnableCardStats, "Do not call");
535 
536   HdrSeq* worker_card_stats = card_stats(worker_id);
537   log_info(gc, remset)("Worker %u Card Stats: ", worker_id);
538   log_card_stats(worker_card_stats);
539   // Merge worker stats into the cumulative stats & clear worker stats
540   merge_worker_card_stats_cumulative(worker_card_stats, sum_stats);
541 }
542 
543 void ShenandoahScanRemembered::merge_worker_card_stats_cumulative(
544   HdrSeq* worker_stats, HdrSeq* sum_stats) {
545   for (int i = 0; i < MAX_CARD_STAT_TYPE; i++) {
546     sum_stats[i].add(worker_stats[i]);
547     worker_stats[i].clear();
548   }
549 }
550 #endif
551 
552 // A closure that takes an oop in the old generation and, if it's pointing
553 // into the young generation, dirties the corresponding remembered set entry.
554 // This is only used to rebuild the remembered set after a full GC.
555 class ShenandoahDirtyRememberedSetClosure : public BasicOopIterateClosure {
556 protected:
557   ShenandoahGenerationalHeap* const _heap;
558   ShenandoahScanRemembered*   const _scanner;
559 
560 public:
561   ShenandoahDirtyRememberedSetClosure() :
562           _heap(ShenandoahGenerationalHeap::heap()),
563           _scanner(_heap->old_generation()->card_scan()) {}
564 
565   template<class T>
566   inline void work(T* p) {
567     assert(_heap->is_in_old(p), "Expecting to get an old gen address");
568     T o = RawAccess<>::oop_load(p);
569     if (!CompressedOops::is_null(o)) {
570       oop obj = CompressedOops::decode_not_null(o);
571       if (_heap->is_in_young(obj)) {
572         // Dirty the card containing the cross-generational pointer.
573         _scanner->mark_card_as_dirty((HeapWord*) p);
574       }
575     }
576   }
577 
578   virtual void do_oop(narrowOop* p) { work(p); }
579   virtual void do_oop(oop* p)       { work(p); }
580 };
581 
582 ShenandoahDirectCardMarkRememberedSet::ShenandoahDirectCardMarkRememberedSet(ShenandoahCardTable* card_table, size_t total_card_count) :
583   LogCardValsPerIntPtr(log2i_exact(sizeof(intptr_t)) - log2i_exact(sizeof(CardValue))),
584   LogCardSizeInWords(log2i_exact(CardTable::card_size_in_words())) {
585 
586   // Paranoid assert for LogCardsPerIntPtr calculation above
587   assert(sizeof(intptr_t) > sizeof(CardValue), "LogsCardValsPerIntPtr would underflow");
588 
589   _heap = ShenandoahHeap::heap();
590   _card_table = card_table;
591   _total_card_count = total_card_count;
592   _card_shift = CardTable::card_shift();
593 
594   _byte_map = _card_table->byte_for_index(0);
595 
596   _whole_heap_base = _card_table->addr_for(_byte_map);
597   _byte_map_base = _byte_map - (uintptr_t(_whole_heap_base) >> _card_shift);
598 
599   assert(total_card_count % ShenandoahCardCluster::CardsPerCluster == 0, "Invalid card count.");
600   assert(total_card_count > 0, "Card count cannot be zero.");
601 }
602 
603 // Merge any dirty values from write table into the read table, while leaving
604 // the write table unchanged.
605 void ShenandoahDirectCardMarkRememberedSet::merge_write_table(HeapWord* start, size_t word_count) {
606   size_t start_index = card_index_for_addr(start);
607 #ifdef ASSERT
608   // avoid querying card_index_for_addr() for an address past end of heap
609   size_t end_index = card_index_for_addr(start + word_count - 1) + 1;
610 #endif
611   assert(start_index % ((size_t)1 << LogCardValsPerIntPtr) == 0, "Expected a multiple of CardValsPerIntPtr");
612   assert(end_index % ((size_t)1 << LogCardValsPerIntPtr) == 0, "Expected a multiple of CardValsPerIntPtr");
613 
614   // We'll access in groups of intptr_t worth of card entries
615   intptr_t* const read_table  = (intptr_t*) &(_card_table->read_byte_map())[start_index];
616   intptr_t* const write_table = (intptr_t*) &(_card_table->write_byte_map())[start_index];
617 
618   // Avoid division, use shift instead
619   assert(word_count % ((size_t)1 << (LogCardSizeInWords + LogCardValsPerIntPtr)) == 0, "Expected a multiple of CardSizeInWords*CardValsPerIntPtr");
620   size_t const num = word_count >> (LogCardSizeInWords + LogCardValsPerIntPtr);
621 
622   for (size_t i = 0; i < num; i++) {
623     read_table[i] &= write_table[i];
624   }
625 }
626 
627 // Destructively copy the write table to the read table, and clean the write table.
628 void ShenandoahDirectCardMarkRememberedSet::reset_remset(HeapWord* start, size_t word_count) {
629   size_t start_index = card_index_for_addr(start);
630 #ifdef ASSERT
631   // avoid querying card_index_for_addr() for an address past end of heap
632   size_t end_index = card_index_for_addr(start + word_count - 1) + 1;
633 #endif
634   assert(start_index % ((size_t)1 << LogCardValsPerIntPtr) == 0, "Expected a multiple of CardValsPerIntPtr");
635   assert(end_index % ((size_t)1 << LogCardValsPerIntPtr) == 0, "Expected a multiple of CardValsPerIntPtr");
636 
637   // We'll access in groups of intptr_t worth of card entries
638   intptr_t* const read_table  = (intptr_t*) &(_card_table->read_byte_map())[start_index];
639   intptr_t* const write_table = (intptr_t*) &(_card_table->write_byte_map())[start_index];
640 
641   // Avoid division, use shift instead
642   assert(word_count % ((size_t)1 << (LogCardSizeInWords + LogCardValsPerIntPtr)) == 0, "Expected a multiple of CardSizeInWords*CardValsPerIntPtr");
643   size_t const num = word_count >> (LogCardSizeInWords + LogCardValsPerIntPtr);
644 
645   for (size_t i = 0; i < num; i++) {
646     read_table[i]  = write_table[i];
647     write_table[i] = CardTable::clean_card_row_val();
648   }
649 }
650 
651 ShenandoahScanRememberedTask::ShenandoahScanRememberedTask(ShenandoahObjToScanQueueSet* queue_set,
652                                                            ShenandoahObjToScanQueueSet* old_queue_set,
653                                                            ShenandoahReferenceProcessor* rp,
654                                                            ShenandoahRegionChunkIterator* work_list, bool is_concurrent) :
655   WorkerTask("Scan Remembered Set"),
656   _queue_set(queue_set), _old_queue_set(old_queue_set), _rp(rp), _work_list(work_list), _is_concurrent(is_concurrent) {
657   bool old_bitmap_stable = ShenandoahHeap::heap()->old_generation()->is_mark_complete();
658   log_info(gc, remset)("Scan remembered set using bitmap: %s", BOOL_TO_STR(old_bitmap_stable));
659 }
660 
661 void ShenandoahScanRememberedTask::work(uint worker_id) {
662   if (_is_concurrent) {
663     // This sets up a thread local reference to the worker_id which is needed by the weak reference processor.
664     ShenandoahConcurrentWorkerSession worker_session(worker_id);
665     ShenandoahSuspendibleThreadSetJoiner stsj;
666     do_work(worker_id);
667   } else {
668     // This sets up a thread local reference to the worker_id which is needed by the weak reference processor.
669     ShenandoahParallelWorkerSession worker_session(worker_id);
670     do_work(worker_id);
671   }
672 }
673 
674 void ShenandoahScanRememberedTask::do_work(uint worker_id) {
675   ShenandoahWorkerTimingsTracker x(ShenandoahPhaseTimings::init_scan_rset, ShenandoahPhaseTimings::ScanClusters, worker_id);
676 
677   ShenandoahObjToScanQueue* q = _queue_set->queue(worker_id);
678   ShenandoahObjToScanQueue* old = _old_queue_set == nullptr ? nullptr : _old_queue_set->queue(worker_id);
679   ShenandoahMarkRefsClosure<YOUNG> cl(q, _rp, old);
680   ShenandoahGenerationalHeap* heap = ShenandoahGenerationalHeap::heap();
681   ShenandoahScanRemembered* scanner = heap->old_generation()->card_scan();
682 
683   // set up thread local closure for shen ref processor
684   _rp->set_mark_closure(worker_id, &cl);
685   struct ShenandoahRegionChunk assignment;
686   while (_work_list->next(&assignment)) {
687     ShenandoahHeapRegion* region = assignment._r;
688     log_debug(gc)("ShenandoahScanRememberedTask::do_work(%u), processing slice of region "
689                   SIZE_FORMAT " at offset " SIZE_FORMAT ", size: " SIZE_FORMAT,
690                   worker_id, region->index(), assignment._chunk_offset, assignment._chunk_size);
691     if (region->is_old()) {
692       size_t cluster_size =
693         CardTable::card_size_in_words() * ShenandoahCardCluster::CardsPerCluster;
694       size_t clusters = assignment._chunk_size / cluster_size;
695       assert(clusters * cluster_size == assignment._chunk_size, "Chunk assignments must align on cluster boundaries");
696       HeapWord* end_of_range = region->bottom() + assignment._chunk_offset + assignment._chunk_size;
697 
698       // During concurrent mark, region->top() equals TAMS with respect to the current young-gen pass.
699       if (end_of_range > region->top()) {
700         end_of_range = region->top();
701       }
702       scanner->process_region_slice(region, assignment._chunk_offset, clusters, end_of_range, &cl, false, worker_id);
703     }
704 #ifdef ENABLE_REMEMBERED_SET_CANCELLATION
705     // This check is currently disabled to avoid crashes that occur
706     // when we try to cancel remembered set scanning; it should be re-enabled
707     // after the issues are fixed, as it would allow more prompt cancellation and
708     // transition to degenerated / full GCs. Note that work that has been assigned/
709     // claimed above must be completed before we return here upon cancellation.
710     if (heap->check_cancelled_gc_and_yield(_is_concurrent)) {
711       return;
712     }
713 #endif
714   }
715 }
716 
717 size_t ShenandoahRegionChunkIterator::calc_regular_group_size() {
718   // The group size is calculated from the number of regions.  Suppose the heap has N regions.  The first group processes
719   // N/2 regions.  The second group processes N/4 regions, the third group N/8 regions and so on.
720   // Note that infinite series N/2 + N/4 + N/8 + N/16 + ...  sums to N.
721   //
722   // The normal group size is the number of regions / 2.
723   //
724   // In the case that the region_size_words is greater than _maximum_chunk_size_words, the first group_size is
725   // larger than the normal group size because each chunk in the group will be smaller than the region size.
726   //
727   // The last group also has more than the normal entries because it finishes the total scanning effort.  The chunk sizes are
728   // different for each group.  The intention is that the first group processes roughly half of the heap, the second processes
729   // half of the remaining heap, the third processes half of what remains and so on.  The smallest chunk size
730   // is represented by _smallest_chunk_size_words.  We do not divide work any smaller than this.
731   //
732 
733   size_t group_size = _heap->num_regions() / 2;
734   return group_size;
735 }
736 
737 size_t ShenandoahRegionChunkIterator::calc_first_group_chunk_size_b4_rebalance() {
738   size_t words_in_first_chunk = ShenandoahHeapRegion::region_size_words();
739   return words_in_first_chunk;
740 }
741 
742 size_t ShenandoahRegionChunkIterator::calc_num_groups() {
743   size_t total_heap_size = _heap->num_regions() * ShenandoahHeapRegion::region_size_words();
744   size_t num_groups = 0;
745   size_t cumulative_group_span = 0;
746   size_t current_group_span = _first_group_chunk_size_b4_rebalance * _regular_group_size;
747   size_t smallest_group_span = smallest_chunk_size_words() * _regular_group_size;
748   while ((num_groups < _maximum_groups) && (cumulative_group_span + current_group_span <= total_heap_size)) {
749     num_groups++;
750     cumulative_group_span += current_group_span;
751     if (current_group_span <= smallest_group_span) {
752       break;
753     } else {
754       current_group_span /= 2;    // Each group spans half of what the preceding group spanned.
755     }
756   }
757   // Loop post condition:
758   //   num_groups <= _maximum_groups
759   //   cumulative_group_span is the memory spanned by num_groups
760   //   current_group_span is the span of the last fully populated group (assuming loop iterates at least once)
761   //   each of num_groups is fully populated with _regular_group_size chunks in each
762   // Non post conditions:
763   //   cumulative_group_span may be less than total_heap size for one or more of the folowing reasons
764   //   a) The number of regions remaining to be spanned is smaller than a complete group, or
765   //   b) We have filled up all groups through _maximum_groups and still have not spanned all regions
766 
767   if (cumulative_group_span < total_heap_size) {
768     // We've got more regions to span
769     if ((num_groups < _maximum_groups) && (current_group_span > smallest_group_span)) {
770       num_groups++;             // Place all remaining regions into a new not-full group (chunk_size half that of previous group)
771     }
772     // Else we are unable to create a new group because we've exceed the number of allowed groups or have reached the
773     // minimum chunk size.
774 
775     // Any remaining regions will be treated as if they are part of the most recently created group.  This group will
776     // have more than _regular_group_size chunks within it.
777   }
778   return num_groups;
779 }
780 
781 size_t ShenandoahRegionChunkIterator::calc_total_chunks() {
782   size_t region_size_words = ShenandoahHeapRegion::region_size_words();
783   size_t unspanned_heap_size = _heap->num_regions() * region_size_words;
784   size_t num_chunks = 0;
785   size_t cumulative_group_span = 0;
786   size_t current_group_span = _first_group_chunk_size_b4_rebalance * _regular_group_size;
787   size_t smallest_group_span = smallest_chunk_size_words() * _regular_group_size;
788 
789   // The first group gets special handling because the first chunk size can be no larger than _largest_chunk_size_words
790   if (region_size_words > _maximum_chunk_size_words) {
791     // In the case that we shrink the first group's chunk size, certain other groups will also be subsumed within the first group
792     size_t effective_chunk_size = _first_group_chunk_size_b4_rebalance;
793     while (effective_chunk_size >= _maximum_chunk_size_words) {
794       num_chunks += current_group_span / _maximum_chunk_size_words;
795       unspanned_heap_size -= current_group_span;
796       effective_chunk_size /= 2;
797       current_group_span /= 2;
798     }
799   } else {
800     num_chunks = _regular_group_size;
801     unspanned_heap_size -= current_group_span;
802     current_group_span /= 2;
803   }
804   size_t spanned_groups = 1;
805   while (unspanned_heap_size > 0) {
806     if (current_group_span <= unspanned_heap_size) {
807       unspanned_heap_size -= current_group_span;
808       num_chunks += _regular_group_size;
809       spanned_groups++;
810 
811       // _num_groups is the number of groups required to span the configured heap size.  We are not allowed
812       // to change the number of groups.  The last group is responsible for spanning all chunks not spanned
813       // by previously processed groups.
814       if (spanned_groups >= _num_groups) {
815         // The last group has more than _regular_group_size entries.
816         size_t chunk_span = current_group_span / _regular_group_size;
817         size_t extra_chunks = unspanned_heap_size / chunk_span;
818         assert (extra_chunks * chunk_span == unspanned_heap_size, "Chunks must precisely span regions");
819         num_chunks += extra_chunks;
820         return num_chunks;
821       } else if (current_group_span <= smallest_group_span) {
822         // We cannot introduce new groups because we've reached the lower bound on group size.  So this last
823         // group may hold extra chunks.
824         size_t chunk_span = smallest_chunk_size_words();
825         size_t extra_chunks = unspanned_heap_size / chunk_span;
826         assert (extra_chunks * chunk_span == unspanned_heap_size, "Chunks must precisely span regions");
827         num_chunks += extra_chunks;
828         return num_chunks;
829       } else {
830         current_group_span /= 2;
831       }
832     } else {
833       // This last group has fewer than _regular_group_size entries.
834       size_t chunk_span = current_group_span / _regular_group_size;
835       size_t last_group_size = unspanned_heap_size / chunk_span;
836       assert (last_group_size * chunk_span == unspanned_heap_size, "Chunks must precisely span regions");
837       num_chunks += last_group_size;
838       return num_chunks;
839     }
840   }
841   return num_chunks;
842 }
843 
844 ShenandoahRegionChunkIterator::ShenandoahRegionChunkIterator(size_t worker_count) :
845     ShenandoahRegionChunkIterator(ShenandoahHeap::heap(), worker_count)
846 {
847 }
848 
849 ShenandoahRegionChunkIterator::ShenandoahRegionChunkIterator(ShenandoahHeap* heap, size_t worker_count) :
850     _heap(heap),
851     _regular_group_size(calc_regular_group_size()),
852     _first_group_chunk_size_b4_rebalance(calc_first_group_chunk_size_b4_rebalance()),
853     _num_groups(calc_num_groups()),
854     _total_chunks(calc_total_chunks()),
855     _index(0)
856 {
857 #ifdef ASSERT
858   size_t expected_chunk_size_words = _clusters_in_smallest_chunk * CardTable::card_size_in_words() * ShenandoahCardCluster::CardsPerCluster;
859   assert(smallest_chunk_size_words() == expected_chunk_size_words, "_smallest_chunk_size (" SIZE_FORMAT") is not valid because it does not equal (" SIZE_FORMAT ")",
860          smallest_chunk_size_words(), expected_chunk_size_words);
861 #endif
862   assert(_num_groups <= _maximum_groups,
863          "The number of remembered set scanning groups must be less than or equal to maximum groups");
864   assert(smallest_chunk_size_words() << (_maximum_groups - 1) == _maximum_chunk_size_words,
865          "Maximum number of groups needs to span maximum chunk size to smallest chunk size");
866 
867   size_t words_in_region = ShenandoahHeapRegion::region_size_words();
868   _region_index[0] = 0;
869   _group_offset[0] = 0;
870   if (words_in_region > _maximum_chunk_size_words) {
871     // In the case that we shrink the first group's chunk size, certain other groups will also be subsumed within the first group
872     size_t num_chunks = 0;
873     size_t effective_chunk_size = _first_group_chunk_size_b4_rebalance;
874     size_t  current_group_span = effective_chunk_size * _regular_group_size;
875     while (effective_chunk_size >= _maximum_chunk_size_words) {
876       num_chunks += current_group_span / _maximum_chunk_size_words;
877       effective_chunk_size /= 2;
878       current_group_span /= 2;
879     }
880     _group_entries[0] = num_chunks;
881     _group_chunk_size[0] = _maximum_chunk_size_words;
882   } else {
883     _group_entries[0] = _regular_group_size;
884     _group_chunk_size[0] = _first_group_chunk_size_b4_rebalance;
885   }
886 
887   size_t previous_group_span = _group_entries[0] * _group_chunk_size[0];
888   for (size_t i = 1; i < _num_groups; i++) {
889     _group_chunk_size[i] = _group_chunk_size[i-1] / 2;
890     size_t chunks_in_group = _regular_group_size;
891     size_t this_group_span = _group_chunk_size[i] * chunks_in_group;
892     size_t total_span_of_groups = previous_group_span + this_group_span;
893     _region_index[i] = previous_group_span / words_in_region;
894     _group_offset[i] = previous_group_span % words_in_region;
895     _group_entries[i] = _group_entries[i-1] + _regular_group_size;
896     previous_group_span = total_span_of_groups;
897   }
898   if (_group_entries[_num_groups-1] < _total_chunks) {
899     assert((_total_chunks - _group_entries[_num_groups-1]) * _group_chunk_size[_num_groups-1] + previous_group_span ==
900            heap->num_regions() * words_in_region, "Total region chunks (" SIZE_FORMAT
901            ") do not span total heap regions (" SIZE_FORMAT ")", _total_chunks, _heap->num_regions());
902     previous_group_span += (_total_chunks - _group_entries[_num_groups-1]) * _group_chunk_size[_num_groups-1];
903     _group_entries[_num_groups-1] = _total_chunks;
904   }
905   assert(previous_group_span == heap->num_regions() * words_in_region, "Total region chunks (" SIZE_FORMAT
906          ") do not span total heap regions (" SIZE_FORMAT "): " SIZE_FORMAT " does not equal " SIZE_FORMAT,
907          _total_chunks, _heap->num_regions(), previous_group_span, heap->num_regions() * words_in_region);
908 
909   // Not necessary, but keeps things tidy
910   for (size_t i = _num_groups; i < _maximum_groups; i++) {
911     _region_index[i] = 0;
912     _group_offset[i] = 0;
913     _group_entries[i] = _group_entries[i-1];
914     _group_chunk_size[i] = 0;
915   }
916 }
917 
918 void ShenandoahRegionChunkIterator::reset() {
919   _index = 0;
920 }
921 
922 ShenandoahReconstructRememberedSetTask::ShenandoahReconstructRememberedSetTask(ShenandoahRegionIterator* regions)
923   : WorkerTask("Shenandoah Reset Bitmap")
924   , _regions(regions) { }
925 
926 void ShenandoahReconstructRememberedSetTask::work(uint worker_id) {
927   ShenandoahParallelWorkerSession worker_session(worker_id);
928   ShenandoahHeapRegion* r = _regions->next();
929   ShenandoahGenerationalHeap* heap = ShenandoahGenerationalHeap::heap();
930   ShenandoahScanRemembered* scanner = heap->old_generation()->card_scan();
931   ShenandoahDirtyRememberedSetClosure dirty_cards_for_cross_generational_pointers;
932 
933   while (r != nullptr) {
934     if (r->is_old() && r->is_active()) {
935       HeapWord* obj_addr = r->bottom();
936       if (r->is_humongous_start()) {
937         // First, clear the remembered set
938         oop obj = cast_to_oop(obj_addr);
939         size_t size = obj->size();
940 
941         // First, clear the remembered set for all spanned humongous regions
942         size_t num_regions = ShenandoahHeapRegion::required_regions(size * HeapWordSize);
943         size_t region_span = num_regions * ShenandoahHeapRegion::region_size_words();
944         scanner->reset_remset(r->bottom(), region_span);
945         size_t region_index = r->index();
946         ShenandoahHeapRegion* humongous_region = heap->get_region(region_index);
947         while (num_regions-- != 0) {
948           scanner->reset_object_range(humongous_region->bottom(), humongous_region->end());
949           region_index++;
950           humongous_region = heap->get_region(region_index);
951         }
952 
953         // Then register the humongous object and DIRTY relevant remembered set cards
954         scanner->register_object_without_lock(obj_addr);
955         obj->oop_iterate(&dirty_cards_for_cross_generational_pointers);
956       } else if (!r->is_humongous()) {
957         // First, clear the remembered set
958         scanner->reset_remset(r->bottom(), ShenandoahHeapRegion::region_size_words());
959         scanner->reset_object_range(r->bottom(), r->end());
960 
961         // Then iterate over all objects, registering object and DIRTYing relevant remembered set cards
962         HeapWord* t = r->top();
963         while (obj_addr < t) {
964           oop obj = cast_to_oop(obj_addr);
965           scanner->register_object_without_lock(obj_addr);
966           obj_addr += obj->oop_iterate_size(&dirty_cards_for_cross_generational_pointers);
967         }
968       } // else, ignore humongous continuation region
969     }
970     // else, this region is FREE or YOUNG or inactive and we can ignore it.
971     r = _regions->next();
972   }
973 }