1 /*
2 * Copyright Amazon.com Inc. or its affiliates. All Rights Reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25
26 #include "precompiled.hpp"
27
28 #include "gc/shenandoah/shenandoahOldGeneration.hpp"
29 #include "gc/shenandoah/shenandoahHeap.inline.hpp"
30 #include "gc/shenandoah/shenandoahOopClosures.inline.hpp"
31 #include "gc/shenandoah/shenandoahReferenceProcessor.hpp"
32 #include "gc/shenandoah/shenandoahScanRemembered.inline.hpp"
33 #include "logging/log.hpp"
34 #include "runtime/threads.hpp"
35
36 size_t ShenandoahDirectCardMarkRememberedSet::last_valid_index() const {
37 return _card_table->last_valid_index();
38 }
39
40 size_t ShenandoahDirectCardMarkRememberedSet::total_cards() const {
41 return _total_card_count;
42 }
43
44 size_t ShenandoahDirectCardMarkRememberedSet::card_index_for_addr(HeapWord *p) const {
45 return _card_table->index_for(p);
46 }
47
48 HeapWord* ShenandoahDirectCardMarkRememberedSet::addr_for_card_index(size_t card_index) const {
49 return _whole_heap_base + CardTable::card_size_in_words() * card_index;
50 }
51
52 bool ShenandoahDirectCardMarkRememberedSet::is_write_card_dirty(size_t card_index) const {
53 CardValue* bp = &(_card_table->write_byte_map())[card_index];
54 return (bp[0] == CardTable::dirty_card_val());
55 }
56
57 bool ShenandoahDirectCardMarkRememberedSet::is_card_dirty(size_t card_index) const {
58 CardValue* bp = &(_card_table->read_byte_map())[card_index];
59 return (bp[0] == CardTable::dirty_card_val());
60 }
61
62 void ShenandoahDirectCardMarkRememberedSet::mark_card_as_dirty(size_t card_index) {
63 CardValue* bp = &(_card_table->write_byte_map())[card_index];
64 bp[0] = CardTable::dirty_card_val();
65 }
66
67 void ShenandoahDirectCardMarkRememberedSet::mark_range_as_dirty(size_t card_index, size_t num_cards) {
68 CardValue* bp = &(_card_table->write_byte_map())[card_index];
69 while (num_cards-- > 0) {
70 *bp++ = CardTable::dirty_card_val();
71 }
72 }
73
74 bool ShenandoahDirectCardMarkRememberedSet::is_card_dirty(HeapWord* p) const {
75 size_t index = card_index_for_addr(p);
76 CardValue* bp = &(_card_table->read_byte_map())[index];
77 return (bp[0] == CardTable::dirty_card_val());
78 }
79
80 bool ShenandoahDirectCardMarkRememberedSet::is_write_card_dirty(HeapWord* p) const {
81 size_t index = card_index_for_addr(p);
82 CardValue* bp = &(_card_table->write_byte_map())[index];
83 return (bp[0] == CardTable::dirty_card_val());
84 }
85
86 void ShenandoahDirectCardMarkRememberedSet::mark_card_as_dirty(HeapWord* p) {
87 size_t index = card_index_for_addr(p);
88 CardValue* bp = &(_card_table->write_byte_map())[index];
89 bp[0] = CardTable::dirty_card_val();
90 }
91
92 void ShenandoahDirectCardMarkRememberedSet::mark_range_as_dirty(HeapWord* p, size_t num_heap_words) {
93 CardValue* bp = &(_card_table->write_byte_map_base())[uintptr_t(p) >> _card_shift];
94 CardValue* end_bp = &(_card_table->write_byte_map_base())[uintptr_t(p + num_heap_words) >> _card_shift];
95 // If (p + num_heap_words) is not aligned on card boundary, we also need to dirty last card.
96 if (((unsigned long long) (p + num_heap_words)) & (CardTable::card_size() - 1)) {
97 end_bp++;
98 }
99 while (bp < end_bp) {
100 *bp++ = CardTable::dirty_card_val();
101 }
102 }
103
104 void ShenandoahDirectCardMarkRememberedSet::mark_range_as_clean(HeapWord* p, size_t num_heap_words) {
105 CardValue* bp = &(_card_table->write_byte_map_base())[uintptr_t(p) >> _card_shift];
106 CardValue* end_bp = &(_card_table->write_byte_map_base())[uintptr_t(p + num_heap_words) >> _card_shift];
107 // If (p + num_heap_words) is not aligned on card boundary, we also need to clean last card.
108 if (((unsigned long long) (p + num_heap_words)) & (CardTable::card_size() - 1)) {
109 end_bp++;
110 }
111 while (bp < end_bp) {
112 *bp++ = CardTable::clean_card_val();
113 }
114 }
115
116 void ShenandoahDirectCardMarkRememberedSet::mark_read_table_as_clean() {
117 CardValue* read_table = _card_table->read_byte_map();
118 CardValue* bp = &(read_table)[0];
119 CardValue* end_bp = &(read_table)[_card_table->last_valid_index()];
120
121 while (bp <= end_bp) {
122 *bp++ = CardTable::clean_card_val();
123 }
124
125 log_info(gc, barrier)("Cleaned read_table from " PTR_FORMAT " to " PTR_FORMAT, p2i(&(read_table)[0]), p2i(end_bp));
126 }
127
128 // No lock required because arguments align with card boundaries.
129 void ShenandoahCardCluster::reset_object_range(HeapWord* from, HeapWord* to) {
130 assert(((((unsigned long long) from) & (CardTable::card_size() - 1)) == 0) &&
131 ((((unsigned long long) to) & (CardTable::card_size() - 1)) == 0),
132 "reset_object_range bounds must align with card boundaries");
133 size_t card_at_start = _rs->card_index_for_addr(from);
134 size_t num_cards = (to - from) / CardTable::card_size_in_words();
135
136 for (size_t i = 0; i < num_cards; i++) {
137 _object_starts[card_at_start + i].short_word = 0;
138 }
139 }
140
141 // Assume only one thread at a time registers objects pertaining to
142 // each card-table entry's range of memory.
143 void ShenandoahCardCluster::register_object(HeapWord* address) {
144 shenandoah_assert_heaplocked();
145
146 register_object_without_lock(address);
147 }
148
149 void ShenandoahCardCluster::register_object_without_lock(HeapWord* address) {
150 size_t card_at_start = _rs->card_index_for_addr(address);
151 HeapWord* card_start_address = _rs->addr_for_card_index(card_at_start);
152 uint8_t offset_in_card = checked_cast<uint8_t>(pointer_delta(address, card_start_address));
153
154 if (!starts_object(card_at_start)) {
155 set_starts_object_bit(card_at_start);
156 set_first_start(card_at_start, offset_in_card);
157 set_last_start(card_at_start, offset_in_card);
158 } else {
159 if (offset_in_card < get_first_start(card_at_start))
160 set_first_start(card_at_start, offset_in_card);
161 if (offset_in_card > get_last_start(card_at_start))
162 set_last_start(card_at_start, offset_in_card);
163 }
164 }
165
166 void ShenandoahCardCluster::coalesce_objects(HeapWord* address, size_t length_in_words) {
167
168 size_t card_at_start = _rs->card_index_for_addr(address);
169 HeapWord* card_start_address = _rs->addr_for_card_index(card_at_start);
170 size_t card_at_end = card_at_start + ((address + length_in_words) - card_start_address) / CardTable::card_size_in_words();
171
172 if (card_at_start == card_at_end) {
173 // There are no changes to the get_first_start array. Either get_first_start(card_at_start) returns this coalesced object,
174 // or it returns an object that precedes the coalesced object.
175 if (card_start_address + get_last_start(card_at_start) < address + length_in_words) {
176 uint8_t coalesced_offset = checked_cast<uint8_t>(pointer_delta(address, card_start_address));
177 // The object that used to be the last object starting within this card is being subsumed within the coalesced
178 // object. Since we always coalesce entire objects, this condition only occurs if the last object ends before or at
179 // the end of the card's memory range and there is no object following this object. In this case, adjust last_start
180 // to represent the start of the coalesced range.
181 set_last_start(card_at_start, coalesced_offset);
182 }
183 // Else, no changes to last_starts information. Either get_last_start(card_at_start) returns the object that immediately
184 // follows the coalesced object, or it returns an object that follows the object immediately following the coalesced object.
185 } else {
186 uint8_t coalesced_offset = checked_cast<uint8_t>(pointer_delta(address, card_start_address));
187 if (get_last_start(card_at_start) > coalesced_offset) {
188 // Existing last start is being coalesced, create new last start
189 set_last_start(card_at_start, coalesced_offset);
190 }
191 // otherwise, get_last_start(card_at_start) must equal coalesced_offset
192
193 // All the cards between first and last get cleared.
194 for (size_t i = card_at_start + 1; i < card_at_end; i++) {
195 clear_starts_object_bit(i);
196 }
197
198 uint8_t follow_offset = checked_cast<uint8_t>((address + length_in_words) - _rs->addr_for_card_index(card_at_end));
199 if (starts_object(card_at_end) && (get_first_start(card_at_end) < follow_offset)) {
200 // It may be that after coalescing within this last card's memory range, the last card
201 // no longer holds an object.
202 if (get_last_start(card_at_end) >= follow_offset) {
203 set_first_start(card_at_end, follow_offset);
204 } else {
205 // last_start is being coalesced so this card no longer has any objects.
206 clear_starts_object_bit(card_at_end);
207 }
208 }
209 // else
210 // card_at_end did not have an object, so it still does not have an object, or
211 // card_at_end had an object that starts after the coalesced object, so no changes required for card_at_end
212
213 }
214 }
215
216
217 size_t ShenandoahCardCluster::get_first_start(size_t card_index) const {
218 assert(starts_object(card_index), "Can't get first start because no object starts here");
219 return _object_starts[card_index].offsets.first & FirstStartBits;
220 }
221
222 size_t ShenandoahCardCluster::get_last_start(size_t card_index) const {
223 assert(starts_object(card_index), "Can't get last start because no object starts here");
224 return _object_starts[card_index].offsets.last;
225 }
226
227 // Given a card_index, return the starting address of the first block in the heap
228 // that straddles into this card. If this card is co-initial with an object, then
229 // this would return the first address of the range that this card covers, which is
230 // where the card's first object also begins.
231 HeapWord* ShenandoahCardCluster::block_start(const size_t card_index) const {
232
233 HeapWord* left = _rs->addr_for_card_index(card_index);
234
235 #ifdef ASSERT
236 assert(ShenandoahHeap::heap()->mode()->is_generational(), "Do not use in non-generational mode");
237 ShenandoahHeapRegion* region = ShenandoahHeap::heap()->heap_region_containing(left);
238 assert(region->is_old(), "Do not use for young regions");
239 // For HumongousRegion:s it's more efficient to jump directly to the
240 // start region.
241 assert(!region->is_humongous(), "Use region->humongous_start_region() instead");
242 #endif
243 if (starts_object(card_index) && get_first_start(card_index) == 0) {
244 // This card contains a co-initial object; a fortiori, it covers
245 // also the case of a card being the first in a region.
246 assert(oopDesc::is_oop(cast_to_oop(left)), "Should be an object");
247 return left;
248 }
249
250 HeapWord* p = nullptr;
251 oop obj = cast_to_oop(p);
252 ssize_t cur_index = (ssize_t)card_index;
253 assert(cur_index >= 0, "Overflow");
254 assert(cur_index > 0, "Should have returned above");
255 // Walk backwards over the cards...
256 while (--cur_index > 0 && !starts_object(cur_index)) {
257 // ... to the one that starts the object
258 }
259 // cur_index should start an object: we should not have walked
260 // past the left end of the region.
261 assert(cur_index >= 0 && (cur_index <= (ssize_t)card_index), "Error");
262 assert(region->bottom() <= _rs->addr_for_card_index(cur_index),
263 "Fell off the bottom of containing region");
264 assert(starts_object(cur_index), "Error");
265 size_t offset = get_last_start(cur_index);
266 // can avoid call via card size arithmetic below instead
267 p = _rs->addr_for_card_index(cur_index) + offset;
268 // Recall that we already dealt with the co-initial object case above
269 assert(p < left, "obj should start before left");
270 // While it is safe to ask an object its size in the loop that
271 // follows, the (ifdef'd out) loop should never be needed.
272 // 1. we ask this question only for regions in the old generation
273 // 2. there is no direct allocation ever by mutators in old generation
274 // regions. Only GC will ever allocate in old regions, and then
275 // too only during promotion/evacuation phases. Thus there is no danger
276 // of races between reading from and writing to the object start array,
277 // or of asking partially initialized objects their size (in the loop below).
278 // 3. only GC asks this question during phases when it is not concurrently
279 // evacuating/promoting, viz. during concurrent root scanning (before
280 // the evacuation phase) and during concurrent update refs (after the
281 // evacuation phase) of young collections. This is never called
282 // during old or global collections.
283 // 4. Every allocation under TAMS updates the object start array.
284 NOT_PRODUCT(obj = cast_to_oop(p);)
285 assert(oopDesc::is_oop(obj), "Should be an object");
286 #define WALK_FORWARD_IN_BLOCK_START false
287 while (WALK_FORWARD_IN_BLOCK_START && p + obj->size() < left) {
288 p += obj->size();
289 }
290 #undef WALK_FORWARD_IN_BLOCK_START // false
291 assert(p + obj->size() > left, "obj should end after left");
292 return p;
293 }
294
295 size_t ShenandoahScanRemembered::card_index_for_addr(HeapWord* p) {
296 return _rs->card_index_for_addr(p);
297 }
298
299 HeapWord* ShenandoahScanRemembered::addr_for_card_index(size_t card_index) {
300 return _rs->addr_for_card_index(card_index);
301 }
302
303 bool ShenandoahScanRemembered::is_card_dirty(size_t card_index) {
304 return _rs->is_card_dirty(card_index);
305 }
306
307 bool ShenandoahScanRemembered::is_write_card_dirty(size_t card_index) {
308 return _rs->is_write_card_dirty(card_index);
309 }
310
311 bool ShenandoahScanRemembered::is_card_dirty(HeapWord* p) {
312 return _rs->is_card_dirty(p);
313 }
314
315 void ShenandoahScanRemembered::mark_card_as_dirty(HeapWord* p) {
316 _rs->mark_card_as_dirty(p);
317 }
318
319 bool ShenandoahScanRemembered::is_write_card_dirty(HeapWord* p) {
320 return _rs->is_write_card_dirty(p);
321 }
322
323 void ShenandoahScanRemembered::mark_range_as_dirty(HeapWord* p, size_t num_heap_words) {
324 _rs->mark_range_as_dirty(p, num_heap_words);
325 }
326
327 void ShenandoahScanRemembered::mark_range_as_clean(HeapWord* p, size_t num_heap_words) {
328 _rs->mark_range_as_clean(p, num_heap_words);
329 }
330
331 void ShenandoahScanRemembered::mark_read_table_as_clean() {
332 _rs->mark_read_table_as_clean();
333 }
334
335 void ShenandoahScanRemembered::reset_object_range(HeapWord* from, HeapWord* to) {
336 _scc->reset_object_range(from, to);
337 }
338
339 void ShenandoahScanRemembered::register_object(HeapWord* addr) {
340 _scc->register_object(addr);
341 }
342
343 void ShenandoahScanRemembered::register_object_without_lock(HeapWord* addr) {
344 _scc->register_object_without_lock(addr);
345 }
346
347 bool ShenandoahScanRemembered::verify_registration(HeapWord* address, ShenandoahMarkingContext* ctx) {
348
349 size_t index = card_index_for_addr(address);
350 if (!_scc->starts_object(index)) {
351 return false;
352 }
353 HeapWord* base_addr = addr_for_card_index(index);
354 size_t offset = _scc->get_first_start(index);
355 ShenandoahHeap* heap = ShenandoahHeap::heap();
356
357 // Verify that I can find this object within its enclosing card by scanning forward from first_start.
358 while (base_addr + offset < address) {
359 oop obj = cast_to_oop(base_addr + offset);
360 if (!ctx || ctx->is_marked(obj)) {
361 offset += obj->size();
362 } else {
363 // If this object is not live, don't trust its size(); all objects above tams are live.
364 ShenandoahHeapRegion* r = heap->heap_region_containing(obj);
365 HeapWord* tams = ctx->top_at_mark_start(r);
366 offset = ctx->get_next_marked_addr(base_addr + offset, tams) - base_addr;
367 }
368 }
369 if (base_addr + offset != address){
370 return false;
371 }
372
373 // At this point, offset represents object whose registration we are verifying. We know that at least this object resides
374 // within this card's memory.
375
376 // Make sure that last_offset is properly set for the enclosing card, but we can't verify this for
377 // candidate collection-set regions during mixed evacuations, so disable this check in general
378 // during mixed evacuations.
379
380 ShenandoahHeapRegion* r = heap->heap_region_containing(base_addr + offset);
381 size_t max_offset = r->top() - base_addr;
382 if (max_offset > CardTable::card_size_in_words()) {
383 max_offset = CardTable::card_size_in_words();
384 }
385 size_t prev_offset;
386 if (!ctx) {
387 do {
388 oop obj = cast_to_oop(base_addr + offset);
389 prev_offset = offset;
390 offset += obj->size();
391 } while (offset < max_offset);
392 if (_scc->get_last_start(index) != prev_offset) {
393 return false;
394 }
395
396 // base + offset represents address of first object that starts on following card, if there is one.
397
398 // Notes: base_addr is addr_for_card_index(index)
399 // base_addr + offset is end of the object we are verifying
400 // cannot use card_index_for_addr(base_addr + offset) because it asserts arg < end of whole heap
401 size_t end_card_index = index + offset / CardTable::card_size_in_words();
402
403 if (end_card_index > index && end_card_index <= _rs->last_valid_index()) {
404 // If there is a following object registered on the next card, it should begin where this object ends.
405 if (_scc->starts_object(end_card_index) &&
406 ((addr_for_card_index(end_card_index) + _scc->get_first_start(end_card_index)) != (base_addr + offset))) {
407 return false;
408 }
409 }
410
411 // Assure that no other objects are registered "inside" of this one.
412 for (index++; index < end_card_index; index++) {
413 if (_scc->starts_object(index)) {
414 return false;
415 }
416 }
417 } else {
418 // This is a mixed evacuation or a global collect: rely on mark bits to identify which objects need to be properly registered
419 assert(!ShenandoahHeap::heap()->is_concurrent_old_mark_in_progress(), "Cannot rely on mark context here.");
420 // If the object reaching or spanning the end of this card's memory is marked, then last_offset for this card
421 // should represent this object. Otherwise, last_offset is a don't care.
422 ShenandoahHeapRegion* region = heap->heap_region_containing(base_addr + offset);
423 HeapWord* tams = ctx->top_at_mark_start(region);
424 oop last_obj = nullptr;
425 do {
426 oop obj = cast_to_oop(base_addr + offset);
427 if (ctx->is_marked(obj)) {
428 prev_offset = offset;
429 offset += obj->size();
430 last_obj = obj;
431 } else {
432 offset = ctx->get_next_marked_addr(base_addr + offset, tams) - base_addr;
433 // If there are no marked objects remaining in this region, offset equals tams - base_addr. If this offset is
434 // greater than max_offset, we will immediately exit this loop. Otherwise, the next iteration of the loop will
435 // treat the object at offset as marked and live (because address >= tams) and we will continue iterating object
436 // by consulting the size() fields of each.
437 }
438 } while (offset < max_offset);
439 if (last_obj != nullptr && prev_offset + last_obj->size() >= max_offset) {
440 // last marked object extends beyond end of card
441 if (_scc->get_last_start(index) != prev_offset) {
442 return false;
443 }
444 // otherwise, the value of _scc->get_last_start(index) is a don't care because it represents a dead object and we
445 // cannot verify its context
446 }
447 }
448 return true;
449 }
450
451 void ShenandoahScanRemembered::coalesce_objects(HeapWord* addr, size_t length_in_words) {
452 _scc->coalesce_objects(addr, length_in_words);
453 }
454
455 void ShenandoahScanRemembered::mark_range_as_empty(HeapWord* addr, size_t length_in_words) {
456 _rs->mark_range_as_clean(addr, length_in_words);
457 _scc->clear_objects_in_range(addr, length_in_words);
458 }
459
460 size_t ShenandoahScanRemembered::cluster_for_addr(HeapWordImpl **addr) {
461 size_t card_index = _rs->card_index_for_addr(addr);
462 size_t result = card_index / ShenandoahCardCluster::CardsPerCluster;
463 return result;
464 }
465
466 HeapWord* ShenandoahScanRemembered::addr_for_cluster(size_t cluster_no) {
467 size_t card_index = cluster_no * ShenandoahCardCluster::CardsPerCluster;
468 return addr_for_card_index(card_index);
469 }
470
471 // This is used only for debug verification so don't worry about making the scan parallel.
472 void ShenandoahScanRemembered::roots_do(OopIterateClosure* cl) {
473 ShenandoahHeap* heap = ShenandoahHeap::heap();
474 bool old_bitmap_stable = heap->old_generation()->is_mark_complete();
475 log_debug(gc, remset)("Scan remembered set using bitmap: %s", BOOL_TO_STR(old_bitmap_stable));
476 for (size_t i = 0, n = heap->num_regions(); i < n; ++i) {
477 ShenandoahHeapRegion* region = heap->get_region(i);
478 if (region->is_old() && region->is_active() && !region->is_cset()) {
479 HeapWord* start_of_range = region->bottom();
480 HeapWord* end_of_range = region->top();
481 size_t start_cluster_no = cluster_for_addr(start_of_range);
482 size_t num_heapwords = end_of_range - start_of_range;
483 unsigned int cluster_size = CardTable::card_size_in_words() * ShenandoahCardCluster::CardsPerCluster;
484 size_t num_clusters = (size_t) ((num_heapwords - 1 + cluster_size) / cluster_size);
485
486 // Remembered set scanner
487 if (region->is_humongous()) {
488 process_humongous_clusters(region->humongous_start_region(), start_cluster_no, num_clusters, end_of_range, cl,
489 false /* use_write_table */);
490 } else {
491 process_clusters(start_cluster_no, num_clusters, end_of_range, cl,
492 false /* use_write_table */, 0 /* fake worker id */);
493 }
494 }
495 }
496 }
497
498 #ifndef PRODUCT
499 // Log given card stats
500 void ShenandoahScanRemembered::log_card_stats(HdrSeq* stats) {
501 for (int i = 0; i < MAX_CARD_STAT_TYPE; i++) {
502 log_info(gc, remset)("%18s: [ %8.2f %8.2f %8.2f %8.2f %8.2f ]",
503 _card_stats_name[i],
504 stats[i].percentile(0), stats[i].percentile(25),
505 stats[i].percentile(50), stats[i].percentile(75),
506 stats[i].maximum());
507 }
508 }
509
510 // Log card stats for all nworkers for a specific phase t
511 void ShenandoahScanRemembered::log_card_stats(uint nworkers, CardStatLogType t) {
512 assert(ShenandoahEnableCardStats, "Do not call");
513 HdrSeq* sum_stats = card_stats_for_phase(t);
514 log_info(gc, remset)("%s", _card_stat_log_type[t]);
515 for (uint i = 0; i < nworkers; i++) {
516 log_worker_card_stats(i, sum_stats);
517 }
518
519 // Every so often, log the cumulative global stats
520 if (++_card_stats_log_counter[t] >= ShenandoahCardStatsLogInterval) {
521 _card_stats_log_counter[t] = 0;
522 log_info(gc, remset)("Cumulative stats");
523 log_card_stats(sum_stats);
524 }
525 }
526
527 // Log card stats for given worker_id, & clear them after merging into given cumulative stats
528 void ShenandoahScanRemembered::log_worker_card_stats(uint worker_id, HdrSeq* sum_stats) {
529 assert(ShenandoahEnableCardStats, "Do not call");
530
531 HdrSeq* worker_card_stats = card_stats(worker_id);
532 log_info(gc, remset)("Worker %u Card Stats: ", worker_id);
533 log_card_stats(worker_card_stats);
534 // Merge worker stats into the cumulative stats & clear worker stats
535 merge_worker_card_stats_cumulative(worker_card_stats, sum_stats);
536 }
537
538 void ShenandoahScanRemembered::merge_worker_card_stats_cumulative(
539 HdrSeq* worker_stats, HdrSeq* sum_stats) {
540 for (int i = 0; i < MAX_CARD_STAT_TYPE; i++) {
541 sum_stats[i].add(worker_stats[i]);
542 worker_stats[i].clear();
543 }
544 }
545 #endif
546
547 // A closure that takes an oop in the old generation and, if it's pointing
548 // into the young generation, dirties the corresponding remembered set entry.
549 // This is only used to rebuild the remembered set after a full GC.
550 class ShenandoahDirtyRememberedSetClosure : public BasicOopIterateClosure {
551 protected:
552 ShenandoahGenerationalHeap* const _heap;
553 ShenandoahScanRemembered* const _scanner;
554
555 public:
556 ShenandoahDirtyRememberedSetClosure() :
557 _heap(ShenandoahGenerationalHeap::heap()),
558 _scanner(_heap->old_generation()->card_scan()) {}
559
560 template<class T>
561 inline void work(T* p) {
562 assert(_heap->is_in_old(p), "Expecting to get an old gen address");
563 T o = RawAccess<>::oop_load(p);
564 if (!CompressedOops::is_null(o)) {
565 oop obj = CompressedOops::decode_not_null(o);
566 if (_heap->is_in_young(obj)) {
567 // Dirty the card containing the cross-generational pointer.
568 _scanner->mark_card_as_dirty((HeapWord*) p);
569 }
570 }
571 }
572
573 virtual void do_oop(narrowOop* p) { work(p); }
574 virtual void do_oop(oop* p) { work(p); }
575 };
576
577 ShenandoahDirectCardMarkRememberedSet::ShenandoahDirectCardMarkRememberedSet(ShenandoahCardTable* card_table, size_t total_card_count) :
578 LogCardValsPerIntPtr(log2i_exact(sizeof(intptr_t)) - log2i_exact(sizeof(CardValue))),
579 LogCardSizeInWords(log2i_exact(CardTable::card_size_in_words())) {
580
581 // Paranoid assert for LogCardsPerIntPtr calculation above
582 assert(sizeof(intptr_t) > sizeof(CardValue), "LogsCardValsPerIntPtr would underflow");
583
584 _heap = ShenandoahHeap::heap();
585 _card_table = card_table;
586 _total_card_count = total_card_count;
587 _card_shift = CardTable::card_shift();
588
589 _byte_map = _card_table->byte_for_index(0);
590
591 _whole_heap_base = _card_table->addr_for(_byte_map);
592 _byte_map_base = _byte_map - (uintptr_t(_whole_heap_base) >> _card_shift);
593
594 assert(total_card_count % ShenandoahCardCluster::CardsPerCluster == 0, "Invalid card count.");
595 assert(total_card_count > 0, "Card count cannot be zero.");
596 }
597
598 // Merge any dirty values from write table into the read table, while leaving
599 // the write table unchanged.
600 void ShenandoahDirectCardMarkRememberedSet::merge_write_table(HeapWord* start, size_t word_count) {
601 size_t start_index = card_index_for_addr(start);
602 #ifdef ASSERT
603 // avoid querying card_index_for_addr() for an address past end of heap
604 size_t end_index = card_index_for_addr(start + word_count - 1) + 1;
605 #endif
606 assert(start_index % ((size_t)1 << LogCardValsPerIntPtr) == 0, "Expected a multiple of CardValsPerIntPtr");
607 assert(end_index % ((size_t)1 << LogCardValsPerIntPtr) == 0, "Expected a multiple of CardValsPerIntPtr");
608
609 // We'll access in groups of intptr_t worth of card entries
610 intptr_t* const read_table = (intptr_t*) &(_card_table->read_byte_map())[start_index];
611 intptr_t* const write_table = (intptr_t*) &(_card_table->write_byte_map())[start_index];
612
613 // Avoid division, use shift instead
614 assert(word_count % ((size_t)1 << (LogCardSizeInWords + LogCardValsPerIntPtr)) == 0, "Expected a multiple of CardSizeInWords*CardValsPerIntPtr");
615 size_t const num = word_count >> (LogCardSizeInWords + LogCardValsPerIntPtr);
616
617 for (size_t i = 0; i < num; i++) {
618 read_table[i] &= write_table[i];
619 }
620
621 log_info(gc, remset)("Finished merging write_table into read_table.");
622 }
623
624 void ShenandoahDirectCardMarkRememberedSet::swap_card_tables() {
625 CardTable::CardValue* new_ptr = _card_table->swap_read_and_write_tables();
626
627 #ifdef ASSERT
628 CardValue* start_bp = &(_card_table->write_byte_map())[0];
629 CardValue* end_bp = &(start_bp[_card_table->last_valid_index()]);
630
631 while (start_bp <= end_bp) {
632 assert(*start_bp == CardTable::clean_card_val(), "Should be clean: " PTR_FORMAT, p2i(start_bp));
633 start_bp++;
634 }
635 #endif
636
637 struct SwapTLSCardTable : public ThreadClosure {
638 CardTable::CardValue* _new_ptr;
639 SwapTLSCardTable(CardTable::CardValue* np) : _new_ptr(np) {}
640 virtual void do_thread(Thread* t) {
641 ShenandoahThreadLocalData::set_card_table(t, _new_ptr);
642 }
643 } swap_it(new_ptr);
644
645 // Iterate on threads and adjust thread local data
646 Threads::threads_do(&swap_it);
647
648 log_info(gc, barrier)("Current write_card_table: " PTR_FORMAT, p2i(swap_it._new_ptr));
649 }
650
651 ShenandoahScanRememberedTask::ShenandoahScanRememberedTask(ShenandoahObjToScanQueueSet* queue_set,
652 ShenandoahObjToScanQueueSet* old_queue_set,
653 ShenandoahReferenceProcessor* rp,
654 ShenandoahRegionChunkIterator* work_list, bool is_concurrent) :
655 WorkerTask("Scan Remembered Set"),
656 _queue_set(queue_set), _old_queue_set(old_queue_set), _rp(rp), _work_list(work_list), _is_concurrent(is_concurrent) {
657 bool old_bitmap_stable = ShenandoahHeap::heap()->old_generation()->is_mark_complete();
658 log_debug(gc, remset)("Scan remembered set using bitmap: %s", BOOL_TO_STR(old_bitmap_stable));
659 }
660
661 void ShenandoahScanRememberedTask::work(uint worker_id) {
662 if (_is_concurrent) {
663 // This sets up a thread local reference to the worker_id which is needed by the weak reference processor.
664 ShenandoahConcurrentWorkerSession worker_session(worker_id);
665 ShenandoahSuspendibleThreadSetJoiner stsj;
666 do_work(worker_id);
667 } else {
668 // This sets up a thread local reference to the worker_id which is needed by the weak reference processor.
669 ShenandoahParallelWorkerSession worker_session(worker_id);
670 do_work(worker_id);
671 }
672 }
673
674 void ShenandoahScanRememberedTask::do_work(uint worker_id) {
675 ShenandoahWorkerTimingsTracker x(ShenandoahPhaseTimings::init_scan_rset, ShenandoahPhaseTimings::ScanClusters, worker_id);
676
677 ShenandoahObjToScanQueue* q = _queue_set->queue(worker_id);
678 ShenandoahObjToScanQueue* old = _old_queue_set == nullptr ? nullptr : _old_queue_set->queue(worker_id);
679 ShenandoahMarkRefsClosure<YOUNG> cl(q, _rp, old);
680 ShenandoahGenerationalHeap* heap = ShenandoahGenerationalHeap::heap();
681 ShenandoahScanRemembered* scanner = heap->old_generation()->card_scan();
682
683 // set up thread local closure for shen ref processor
684 _rp->set_mark_closure(worker_id, &cl);
685 struct ShenandoahRegionChunk assignment;
686 while (_work_list->next(&assignment)) {
687 ShenandoahHeapRegion* region = assignment._r;
688 log_debug(gc)("ShenandoahScanRememberedTask::do_work(%u), processing slice of region "
689 SIZE_FORMAT " at offset " SIZE_FORMAT ", size: " SIZE_FORMAT,
690 worker_id, region->index(), assignment._chunk_offset, assignment._chunk_size);
691 if (region->is_old()) {
692 size_t cluster_size =
693 CardTable::card_size_in_words() * ShenandoahCardCluster::CardsPerCluster;
694 size_t clusters = assignment._chunk_size / cluster_size;
695 assert(clusters * cluster_size == assignment._chunk_size, "Chunk assignments must align on cluster boundaries");
696 HeapWord* end_of_range = region->bottom() + assignment._chunk_offset + assignment._chunk_size;
697
698 // During concurrent mark, region->top() equals TAMS with respect to the current young-gen pass.
699 if (end_of_range > region->top()) {
700 end_of_range = region->top();
701 }
702 scanner->process_region_slice(region, assignment._chunk_offset, clusters, end_of_range, &cl, false, worker_id);
703 }
704 #ifdef ENABLE_REMEMBERED_SET_CANCELLATION
705 // This check is currently disabled to avoid crashes that occur
706 // when we try to cancel remembered set scanning; it should be re-enabled
707 // after the issues are fixed, as it would allow more prompt cancellation and
708 // transition to degenerated / full GCs. Note that work that has been assigned/
709 // claimed above must be completed before we return here upon cancellation.
710 if (heap->check_cancelled_gc_and_yield(_is_concurrent)) {
711 return;
712 }
713 #endif
714 }
715 }
716
717 size_t ShenandoahRegionChunkIterator::calc_regular_group_size() {
718 // The group size is calculated from the number of regions. Suppose the heap has N regions. The first group processes
719 // N/2 regions. The second group processes N/4 regions, the third group N/8 regions and so on.
720 // Note that infinite series N/2 + N/4 + N/8 + N/16 + ... sums to N.
721 //
722 // The normal group size is the number of regions / 2.
723 //
724 // In the case that the region_size_words is greater than _maximum_chunk_size_words, the first group_size is
725 // larger than the normal group size because each chunk in the group will be smaller than the region size.
726 //
727 // The last group also has more than the normal entries because it finishes the total scanning effort. The chunk sizes are
728 // different for each group. The intention is that the first group processes roughly half of the heap, the second processes
729 // half of the remaining heap, the third processes half of what remains and so on. The smallest chunk size
730 // is represented by _smallest_chunk_size_words. We do not divide work any smaller than this.
731 //
732 size_t group_size = _heap->num_regions() / 2;
733 return group_size;
734 }
735
736 size_t ShenandoahRegionChunkIterator::calc_first_group_chunk_size_b4_rebalance() {
737 size_t words_in_first_chunk = ShenandoahHeapRegion::region_size_words();
738 return words_in_first_chunk;
739 }
740
741 size_t ShenandoahRegionChunkIterator::calc_num_groups() {
742 size_t total_heap_size = _heap->num_regions() * ShenandoahHeapRegion::region_size_words();
743 size_t num_groups = 0;
744 size_t cumulative_group_span = 0;
745 size_t current_group_span = _first_group_chunk_size_b4_rebalance * _regular_group_size;
746 size_t smallest_group_span = smallest_chunk_size_words() * _regular_group_size;
747 while ((num_groups < _maximum_groups) && (cumulative_group_span + current_group_span <= total_heap_size)) {
748 num_groups++;
749 cumulative_group_span += current_group_span;
750 if (current_group_span <= smallest_group_span) {
751 break;
752 } else {
753 current_group_span /= 2; // Each group spans half of what the preceding group spanned.
754 }
755 }
756 // Loop post condition:
757 // num_groups <= _maximum_groups
758 // cumulative_group_span is the memory spanned by num_groups
759 // current_group_span is the span of the last fully populated group (assuming loop iterates at least once)
760 // each of num_groups is fully populated with _regular_group_size chunks in each
761 // Non post conditions:
762 // cumulative_group_span may be less than total_heap size for one or more of the folowing reasons
763 // a) The number of regions remaining to be spanned is smaller than a complete group, or
764 // b) We have filled up all groups through _maximum_groups and still have not spanned all regions
765
766 if (cumulative_group_span < total_heap_size) {
767 // We've got more regions to span
768 if ((num_groups < _maximum_groups) && (current_group_span > smallest_group_span)) {
769 num_groups++; // Place all remaining regions into a new not-full group (chunk_size half that of previous group)
770 }
771 // Else we are unable to create a new group because we've exceed the number of allowed groups or have reached the
772 // minimum chunk size.
773
774 // Any remaining regions will be treated as if they are part of the most recently created group. This group will
775 // have more than _regular_group_size chunks within it.
776 }
777 assert (num_groups <= _maximum_groups, "Cannot have more than %zu groups", _maximum_groups);
778 return num_groups;
779 }
780
781 size_t ShenandoahRegionChunkIterator::calc_total_chunks() {
782 size_t region_size_words = ShenandoahHeapRegion::region_size_words();
783 size_t unspanned_heap_size = _heap->num_regions() * region_size_words;
784 size_t num_chunks = 0;
785 size_t cumulative_group_span = 0;
786 size_t current_group_span = _first_group_chunk_size_b4_rebalance * _regular_group_size;
787 size_t smallest_group_span = smallest_chunk_size_words() * _regular_group_size;
788
789 // The first group gets special handling because the first chunk size can be no larger than _maximum_chunk_size_words
790 if (region_size_words > _maximum_chunk_size_words) {
791 // In the case that we shrink the first group's chunk size, certain other groups will also be subsumed within the first group
792 size_t effective_chunk_size = _first_group_chunk_size_b4_rebalance;
793 uint coalesced_groups = 0;
794 while (effective_chunk_size >= _maximum_chunk_size_words) {
795 // Each iteration of this loop subsumes one original group into a new rebalanced initial group.
796 num_chunks += current_group_span / _maximum_chunk_size_words;
797 unspanned_heap_size -= current_group_span;
798 effective_chunk_size /= 2;
799 current_group_span /= 2;
800 coalesced_groups++;
801 }
802 assert(effective_chunk_size * 2 == _maximum_chunk_size_words,
803 "We assume _first_group_chunk_size_b4_rebalance is _maximum_chunk_size_words * a power of two");
804 _largest_chunk_size_words = _maximum_chunk_size_words;
805 _adjusted_num_groups = _num_groups - (coalesced_groups - 1);
806 } else {
807 num_chunks = _regular_group_size;
808 unspanned_heap_size -= current_group_span;
809 _largest_chunk_size_words = current_group_span / num_chunks;
810 _adjusted_num_groups = _num_groups;
811 current_group_span /= 2;
812 }
813
814 size_t spanned_groups = 1;
815 while (unspanned_heap_size > 0) {
816 if (current_group_span <= unspanned_heap_size) {
817 unspanned_heap_size -= current_group_span;
818 num_chunks += _regular_group_size;
819 spanned_groups++;
820
821 // _num_groups is the number of groups required to span the configured heap size. We are not allowed
822 // to change the number of groups. The last group is responsible for spanning all chunks not spanned
823 // by previously processed groups.
824 if (spanned_groups >= _num_groups) {
825 // The last group has more than _regular_group_size entries.
826 size_t chunk_span = current_group_span / _regular_group_size;
827 size_t extra_chunks = unspanned_heap_size / chunk_span;
828 assert (extra_chunks * chunk_span == unspanned_heap_size, "Chunks must precisely span regions");
829 num_chunks += extra_chunks;
830 return num_chunks;
831 } else if (current_group_span <= smallest_group_span) {
832 // We cannot introduce new groups because we've reached the lower bound on group size. So this last
833 // group may hold extra chunks.
834 size_t chunk_span = smallest_chunk_size_words();
835 size_t extra_chunks = unspanned_heap_size / chunk_span;
836 assert (extra_chunks * chunk_span == unspanned_heap_size, "Chunks must precisely span regions");
837 num_chunks += extra_chunks;
838 return num_chunks;
839 } else {
840 current_group_span /= 2;
841 }
842 } else {
843 // This last group has fewer than _regular_group_size entries.
844 size_t chunk_span = current_group_span / _regular_group_size;
845 size_t last_group_size = unspanned_heap_size / chunk_span;
846 assert (last_group_size * chunk_span == unspanned_heap_size, "Chunks must precisely span regions");
847 num_chunks += last_group_size;
848 return num_chunks;
849 }
850 }
851 return num_chunks;
852 }
853
854 ShenandoahRegionChunkIterator::ShenandoahRegionChunkIterator(size_t worker_count) :
855 ShenandoahRegionChunkIterator(ShenandoahHeap::heap(), worker_count)
856 {
857 }
858
859 ShenandoahRegionChunkIterator::ShenandoahRegionChunkIterator(ShenandoahHeap* heap, size_t worker_count) :
860 _heap(heap),
861 _regular_group_size(calc_regular_group_size()),
862 _first_group_chunk_size_b4_rebalance(calc_first_group_chunk_size_b4_rebalance()),
863 _num_groups(calc_num_groups()),
864 _total_chunks(calc_total_chunks()),
865 _index(0)
866 {
867 #ifdef ASSERT
868 size_t expected_chunk_size_words = _clusters_in_smallest_chunk * CardTable::card_size_in_words() * ShenandoahCardCluster::CardsPerCluster;
869 assert(smallest_chunk_size_words() == expected_chunk_size_words, "_smallest_chunk_size (" SIZE_FORMAT") is not valid because it does not equal (" SIZE_FORMAT ")",
870 smallest_chunk_size_words(), expected_chunk_size_words);
871 assert(_num_groups <= _maximum_groups,
872 "The number of remembered set scanning groups must be less than or equal to maximum groups");
873 assert(smallest_chunk_size_words() << (_adjusted_num_groups - 1) == _largest_chunk_size_words,
874 "The number of groups (%zu) needs to span smallest chunk size (%zu) to largest chunk size (%zu)",
875 _adjusted_num_groups, smallest_chunk_size_words(), _largest_chunk_size_words);
876 #endif
877
878 size_t words_in_region = ShenandoahHeapRegion::region_size_words();
879 _region_index[0] = 0;
880 _group_offset[0] = 0;
881 if (words_in_region > _maximum_chunk_size_words) {
882 // In the case that we shrink the first group's chunk size, certain other groups will also be subsumed within the first group
883 size_t num_chunks = 0;
884 size_t effective_chunk_size = _first_group_chunk_size_b4_rebalance;
885 size_t current_group_span = effective_chunk_size * _regular_group_size;
886 while (effective_chunk_size >= _maximum_chunk_size_words) {
887 num_chunks += current_group_span / _maximum_chunk_size_words;
888 effective_chunk_size /= 2;
889 current_group_span /= 2;
890 }
891 _group_entries[0] = num_chunks;
892 _group_chunk_size[0] = _maximum_chunk_size_words;
893 } else {
894 _group_entries[0] = _regular_group_size;
895 _group_chunk_size[0] = _first_group_chunk_size_b4_rebalance;
896 }
897
898 size_t previous_group_span = _group_entries[0] * _group_chunk_size[0];
899 for (size_t i = 1; i < _adjusted_num_groups; i++) {
900 _group_chunk_size[i] = _group_chunk_size[i-1] / 2;
901 size_t chunks_in_group = _regular_group_size;
902 size_t this_group_span = _group_chunk_size[i] * chunks_in_group;
903 size_t total_span_of_groups = previous_group_span + this_group_span;
904 _region_index[i] = previous_group_span / words_in_region;
905 _group_offset[i] = previous_group_span % words_in_region;
906 _group_entries[i] = _group_entries[i-1] + _regular_group_size;
907 previous_group_span = total_span_of_groups;
908 }
909 if (_group_entries[_adjusted_num_groups-1] < _total_chunks) {
910 assert((_total_chunks - _group_entries[_adjusted_num_groups-1]) * _group_chunk_size[_adjusted_num_groups-1] + previous_group_span ==
911 heap->num_regions() * words_in_region, "Total region chunks (" SIZE_FORMAT
912 ") do not span total heap regions (" SIZE_FORMAT ")", _total_chunks, _heap->num_regions());
913 previous_group_span += (_total_chunks - _group_entries[_adjusted_num_groups-1]) * _group_chunk_size[_adjusted_num_groups-1];
914 _group_entries[_adjusted_num_groups-1] = _total_chunks;
915 }
916 assert(previous_group_span == heap->num_regions() * words_in_region, "Total region chunks (" SIZE_FORMAT
917 ") do not span total heap regions (" SIZE_FORMAT "): " SIZE_FORMAT " does not equal " SIZE_FORMAT,
918 _total_chunks, _heap->num_regions(), previous_group_span, heap->num_regions() * words_in_region);
919
920 // Not necessary, but keeps things tidy
921 for (size_t i = _adjusted_num_groups; i < _maximum_groups; i++) {
922 _region_index[i] = 0;
923 _group_offset[i] = 0;
924 _group_entries[i] = _group_entries[i-1];
925 _group_chunk_size[i] = 0;
926 }
927 }
928
929 void ShenandoahRegionChunkIterator::reset() {
930 _index = 0;
931 }
932
933 ShenandoahReconstructRememberedSetTask::ShenandoahReconstructRememberedSetTask(ShenandoahRegionIterator* regions)
934 : WorkerTask("Shenandoah Reset Bitmap")
935 , _regions(regions) { }
936
937 void ShenandoahReconstructRememberedSetTask::work(uint worker_id) {
938 ShenandoahParallelWorkerSession worker_session(worker_id);
939 ShenandoahHeapRegion* r = _regions->next();
940 ShenandoahGenerationalHeap* heap = ShenandoahGenerationalHeap::heap();
941 ShenandoahScanRemembered* scanner = heap->old_generation()->card_scan();
942 ShenandoahDirtyRememberedSetClosure dirty_cards_for_cross_generational_pointers;
943
944 while (r != nullptr) {
945 if (r->is_old() && r->is_active()) {
946 HeapWord* obj_addr = r->bottom();
947 if (r->is_humongous_start()) {
948 // First, clear the remembered set
949 oop obj = cast_to_oop(obj_addr);
950 size_t size = obj->size();
951
952 size_t num_regions = ShenandoahHeapRegion::required_regions(size * HeapWordSize);
953 size_t region_index = r->index();
954 ShenandoahHeapRegion* humongous_region = heap->get_region(region_index);
955 while (num_regions-- != 0) {
956 scanner->reset_object_range(humongous_region->bottom(), humongous_region->end());
957 region_index++;
958 humongous_region = heap->get_region(region_index);
959 }
960
961 // Then register the humongous object and DIRTY relevant remembered set cards
962 scanner->register_object_without_lock(obj_addr);
963 obj->oop_iterate(&dirty_cards_for_cross_generational_pointers);
964 } else if (!r->is_humongous()) {
965 scanner->reset_object_range(r->bottom(), r->end());
966
967 // Then iterate over all objects, registering object and DIRTYing relevant remembered set cards
968 HeapWord* t = r->top();
969 while (obj_addr < t) {
970 oop obj = cast_to_oop(obj_addr);
971 scanner->register_object_without_lock(obj_addr);
972 obj_addr += obj->oop_iterate_size(&dirty_cards_for_cross_generational_pointers);
973 }
974 } // else, ignore humongous continuation region
975 }
976 // else, this region is FREE or YOUNG or inactive and we can ignore it.
977 r = _regions->next();
978 }
979 }