1 /* 2 * Copyright Amazon.com Inc. or its affiliates. All Rights Reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 26 #include "precompiled.hpp" 27 28 #include "gc/shenandoah/shenandoahOldGeneration.hpp" 29 #include "gc/shenandoah/shenandoahHeap.inline.hpp" 30 #include "gc/shenandoah/shenandoahOopClosures.inline.hpp" 31 #include "gc/shenandoah/shenandoahReferenceProcessor.hpp" 32 #include "gc/shenandoah/shenandoahScanRemembered.inline.hpp" 33 #include "logging/log.hpp" 34 35 size_t ShenandoahDirectCardMarkRememberedSet::last_valid_index() const { 36 return _card_table->last_valid_index(); 37 } 38 39 size_t ShenandoahDirectCardMarkRememberedSet::total_cards() const { 40 return _total_card_count; 41 } 42 43 size_t ShenandoahDirectCardMarkRememberedSet::card_index_for_addr(HeapWord *p) const { 44 return _card_table->index_for(p); 45 } 46 47 HeapWord* ShenandoahDirectCardMarkRememberedSet::addr_for_card_index(size_t card_index) const { 48 return _whole_heap_base + CardTable::card_size_in_words() * card_index; 49 } 50 51 bool ShenandoahDirectCardMarkRememberedSet::is_write_card_dirty(size_t card_index) const { 52 CardValue* bp = &(_card_table->write_byte_map())[card_index]; 53 return (bp[0] == CardTable::dirty_card_val()); 54 } 55 56 bool ShenandoahDirectCardMarkRememberedSet::is_card_dirty(size_t card_index) const { 57 CardValue* bp = &(_card_table->read_byte_map())[card_index]; 58 return (bp[0] == CardTable::dirty_card_val()); 59 } 60 61 void ShenandoahDirectCardMarkRememberedSet::mark_card_as_dirty(size_t card_index) { 62 CardValue* bp = &(_card_table->write_byte_map())[card_index]; 63 bp[0] = CardTable::dirty_card_val(); 64 } 65 66 void ShenandoahDirectCardMarkRememberedSet::mark_range_as_dirty(size_t card_index, size_t num_cards) { 67 CardValue* bp = &(_card_table->write_byte_map())[card_index]; 68 while (num_cards-- > 0) { 69 *bp++ = CardTable::dirty_card_val(); 70 } 71 } 72 73 void ShenandoahDirectCardMarkRememberedSet::mark_card_as_clean(size_t card_index) { 74 CardValue* bp = &(_card_table->write_byte_map())[card_index]; 75 bp[0] = CardTable::clean_card_val(); 76 } 77 78 void ShenandoahDirectCardMarkRememberedSet::mark_range_as_clean(size_t card_index, size_t num_cards) { 79 CardValue* bp = &(_card_table->write_byte_map())[card_index]; 80 while (num_cards-- > 0) { 81 *bp++ = CardTable::clean_card_val(); 82 } 83 } 84 85 bool ShenandoahDirectCardMarkRememberedSet::is_card_dirty(HeapWord* p) const { 86 size_t index = card_index_for_addr(p); 87 CardValue* bp = &(_card_table->read_byte_map())[index]; 88 return (bp[0] == CardTable::dirty_card_val()); 89 } 90 91 bool ShenandoahDirectCardMarkRememberedSet::is_write_card_dirty(HeapWord* p) const { 92 size_t index = card_index_for_addr(p); 93 CardValue* bp = &(_card_table->write_byte_map())[index]; 94 return (bp[0] == CardTable::dirty_card_val()); 95 } 96 97 void ShenandoahDirectCardMarkRememberedSet::mark_card_as_dirty(HeapWord* p) { 98 size_t index = card_index_for_addr(p); 99 CardValue* bp = &(_card_table->write_byte_map())[index]; 100 bp[0] = CardTable::dirty_card_val(); 101 } 102 103 void ShenandoahDirectCardMarkRememberedSet::mark_range_as_dirty(HeapWord* p, size_t num_heap_words) { 104 CardValue* bp = &(_card_table->write_byte_map_base())[uintptr_t(p) >> _card_shift]; 105 CardValue* end_bp = &(_card_table->write_byte_map_base())[uintptr_t(p + num_heap_words) >> _card_shift]; 106 // If (p + num_heap_words) is not aligned on card boundary, we also need to dirty last card. 107 if (((unsigned long long) (p + num_heap_words)) & (CardTable::card_size() - 1)) { 108 end_bp++; 109 } 110 while (bp < end_bp) { 111 *bp++ = CardTable::dirty_card_val(); 112 } 113 } 114 115 void ShenandoahDirectCardMarkRememberedSet::mark_card_as_clean(HeapWord* p) { 116 size_t index = card_index_for_addr(p); 117 CardValue* bp = &(_card_table->write_byte_map())[index]; 118 bp[0] = CardTable::clean_card_val(); 119 } 120 121 void ShenandoahDirectCardMarkRememberedSet::mark_range_as_clean(HeapWord* p, size_t num_heap_words) { 122 CardValue* bp = &(_card_table->write_byte_map_base())[uintptr_t(p) >> _card_shift]; 123 CardValue* end_bp = &(_card_table->write_byte_map_base())[uintptr_t(p + num_heap_words) >> _card_shift]; 124 // If (p + num_heap_words) is not aligned on card boundary, we also need to clean last card. 125 if (((unsigned long long) (p + num_heap_words)) & (CardTable::card_size() - 1)) { 126 end_bp++; 127 } 128 while (bp < end_bp) { 129 *bp++ = CardTable::clean_card_val(); 130 } 131 } 132 133 // No lock required because arguments align with card boundaries. 134 void ShenandoahCardCluster::reset_object_range(HeapWord* from, HeapWord* to) { 135 assert(((((unsigned long long) from) & (CardTable::card_size() - 1)) == 0) && 136 ((((unsigned long long) to) & (CardTable::card_size() - 1)) == 0), 137 "reset_object_range bounds must align with card boundaries"); 138 size_t card_at_start = _rs->card_index_for_addr(from); 139 size_t num_cards = (to - from) / CardTable::card_size_in_words(); 140 141 for (size_t i = 0; i < num_cards; i++) { 142 _object_starts[card_at_start + i].short_word = 0; 143 } 144 } 145 146 // Assume only one thread at a time registers objects pertaining to 147 // each card-table entry's range of memory. 148 void ShenandoahCardCluster::register_object(HeapWord* address) { 149 shenandoah_assert_heaplocked(); 150 151 register_object_without_lock(address); 152 } 153 154 void ShenandoahCardCluster::register_object_without_lock(HeapWord* address) { 155 size_t card_at_start = _rs->card_index_for_addr(address); 156 HeapWord* card_start_address = _rs->addr_for_card_index(card_at_start); 157 uint8_t offset_in_card = address - card_start_address; 158 159 if (!starts_object(card_at_start)) { 160 set_starts_object_bit(card_at_start); 161 set_first_start(card_at_start, offset_in_card); 162 set_last_start(card_at_start, offset_in_card); 163 } else { 164 if (offset_in_card < get_first_start(card_at_start)) 165 set_first_start(card_at_start, offset_in_card); 166 if (offset_in_card > get_last_start(card_at_start)) 167 set_last_start(card_at_start, offset_in_card); 168 } 169 } 170 171 void ShenandoahCardCluster::coalesce_objects(HeapWord* address, size_t length_in_words) { 172 173 size_t card_at_start = _rs->card_index_for_addr(address); 174 HeapWord* card_start_address = _rs->addr_for_card_index(card_at_start); 175 size_t card_at_end = card_at_start + ((address + length_in_words) - card_start_address) / CardTable::card_size_in_words(); 176 177 if (card_at_start == card_at_end) { 178 // There are no changes to the get_first_start array. Either get_first_start(card_at_start) returns this coalesced object, 179 // or it returns an object that precedes the coalesced object. 180 if (card_start_address + get_last_start(card_at_start) < address + length_in_words) { 181 uint8_t coalesced_offset = static_cast<uint8_t>(address - card_start_address); 182 // The object that used to be the last object starting within this card is being subsumed within the coalesced 183 // object. Since we always coalesce entire objects, this condition only occurs if the last object ends before or at 184 // the end of the card's memory range and there is no object following this object. In this case, adjust last_start 185 // to represent the start of the coalesced range. 186 set_last_start(card_at_start, coalesced_offset); 187 } 188 // Else, no changes to last_starts information. Either get_last_start(card_at_start) returns the object that immediately 189 // follows the coalesced object, or it returns an object that follows the object immediately following the coalesced object. 190 } else { 191 uint8_t coalesced_offset = static_cast<uint8_t>(address - card_start_address); 192 if (get_last_start(card_at_start) > coalesced_offset) { 193 // Existing last start is being coalesced, create new last start 194 set_last_start(card_at_start, coalesced_offset); 195 } 196 // otherwise, get_last_start(card_at_start) must equal coalesced_offset 197 198 // All the cards between first and last get cleared. 199 for (size_t i = card_at_start + 1; i < card_at_end; i++) { 200 clear_starts_object_bit(i); 201 } 202 203 uint8_t follow_offset = static_cast<uint8_t>((address + length_in_words) - _rs->addr_for_card_index(card_at_end)); 204 if (starts_object(card_at_end) && (get_first_start(card_at_end) < follow_offset)) { 205 // It may be that after coalescing within this last card's memory range, the last card 206 // no longer holds an object. 207 if (get_last_start(card_at_end) >= follow_offset) { 208 set_first_start(card_at_end, follow_offset); 209 } else { 210 // last_start is being coalesced so this card no longer has any objects. 211 clear_starts_object_bit(card_at_end); 212 } 213 } 214 // else 215 // card_at_end did not have an object, so it still does not have an object, or 216 // card_at_end had an object that starts after the coalesced object, so no changes required for card_at_end 217 218 } 219 } 220 221 222 size_t ShenandoahCardCluster::get_first_start(size_t card_index) const { 223 assert(starts_object(card_index), "Can't get first start because no object starts here"); 224 return _object_starts[card_index].offsets.first & FirstStartBits; 225 } 226 227 size_t ShenandoahCardCluster::get_last_start(size_t card_index) const { 228 assert(starts_object(card_index), "Can't get last start because no object starts here"); 229 return _object_starts[card_index].offsets.last; 230 } 231 232 // Given a card_index, return the starting address of the first block in the heap 233 // that straddles into this card. If this card is co-initial with an object, then 234 // this would return the first address of the range that this card covers, which is 235 // where the card's first object also begins. 236 HeapWord* ShenandoahCardCluster::block_start(const size_t card_index) const { 237 238 HeapWord* left = _rs->addr_for_card_index(card_index); 239 240 #ifdef ASSERT 241 assert(ShenandoahHeap::heap()->mode()->is_generational(), "Do not use in non-generational mode"); 242 ShenandoahHeapRegion* region = ShenandoahHeap::heap()->heap_region_containing(left); 243 assert(region->is_old(), "Do not use for young regions"); 244 // For HumongousRegion:s it's more efficient to jump directly to the 245 // start region. 246 assert(!region->is_humongous(), "Use region->humongous_start_region() instead"); 247 #endif 248 if (starts_object(card_index) && get_first_start(card_index) == 0) { 249 // This card contains a co-initial object; a fortiori, it covers 250 // also the case of a card being the first in a region. 251 assert(oopDesc::is_oop(cast_to_oop(left)), "Should be an object"); 252 return left; 253 } 254 255 HeapWord* p = nullptr; 256 oop obj = cast_to_oop(p); 257 ssize_t cur_index = (ssize_t)card_index; 258 assert(cur_index >= 0, "Overflow"); 259 assert(cur_index > 0, "Should have returned above"); 260 // Walk backwards over the cards... 261 while (--cur_index > 0 && !starts_object(cur_index)) { 262 // ... to the one that starts the object 263 } 264 // cur_index should start an object: we should not have walked 265 // past the left end of the region. 266 assert(cur_index >= 0 && (cur_index <= (ssize_t)card_index), "Error"); 267 assert(region->bottom() <= _rs->addr_for_card_index(cur_index), 268 "Fell off the bottom of containing region"); 269 assert(starts_object(cur_index), "Error"); 270 size_t offset = get_last_start(cur_index); 271 // can avoid call via card size arithmetic below instead 272 p = _rs->addr_for_card_index(cur_index) + offset; 273 // Recall that we already dealt with the co-initial object case above 274 assert(p < left, "obj should start before left"); 275 // While it is safe to ask an object its size in the loop that 276 // follows, the (ifdef'd out) loop should never be needed. 277 // 1. we ask this question only for regions in the old generation 278 // 2. there is no direct allocation ever by mutators in old generation 279 // regions. Only GC will ever allocate in old regions, and then 280 // too only during promotion/evacuation phases. Thus there is no danger 281 // of races between reading from and writing to the object start array, 282 // or of asking partially initialized objects their size (in the loop below). 283 // 3. only GC asks this question during phases when it is not concurrently 284 // evacuating/promoting, viz. during concurrent root scanning (before 285 // the evacuation phase) and during concurrent update refs (after the 286 // evacuation phase) of young collections. This is never called 287 // during old or global collections. 288 // 4. Every allocation under TAMS updates the object start array. 289 NOT_PRODUCT(obj = cast_to_oop(p);) 290 assert(oopDesc::is_oop(obj), "Should be an object"); 291 #define WALK_FORWARD_IN_BLOCK_START false 292 while (WALK_FORWARD_IN_BLOCK_START && p + obj->size() < left) { 293 p += obj->size(); 294 } 295 #undef WALK_FORWARD_IN_BLOCK_START // false 296 assert(p + obj->size() > left, "obj should end after left"); 297 return p; 298 } 299 300 size_t ShenandoahScanRemembered::card_index_for_addr(HeapWord* p) { 301 return _rs->card_index_for_addr(p); 302 } 303 304 HeapWord* ShenandoahScanRemembered::addr_for_card_index(size_t card_index) { 305 return _rs->addr_for_card_index(card_index); 306 } 307 308 bool ShenandoahScanRemembered::is_card_dirty(size_t card_index) { 309 return _rs->is_card_dirty(card_index); 310 } 311 312 bool ShenandoahScanRemembered::is_write_card_dirty(size_t card_index) { 313 return _rs->is_write_card_dirty(card_index); 314 } 315 316 bool ShenandoahScanRemembered::is_card_dirty(HeapWord* p) { 317 return _rs->is_card_dirty(p); 318 } 319 320 void ShenandoahScanRemembered::mark_card_as_dirty(HeapWord* p) { 321 _rs->mark_card_as_dirty(p); 322 } 323 324 bool ShenandoahScanRemembered::is_write_card_dirty(HeapWord* p) { 325 return _rs->is_write_card_dirty(p); 326 } 327 328 void ShenandoahScanRemembered::mark_range_as_dirty(HeapWord* p, size_t num_heap_words) { 329 _rs->mark_range_as_dirty(p, num_heap_words); 330 } 331 332 void ShenandoahScanRemembered::mark_card_as_clean(HeapWord* p) { 333 _rs->mark_card_as_clean(p); 334 } 335 336 void ShenandoahScanRemembered:: mark_range_as_clean(HeapWord* p, size_t num_heap_words) { 337 _rs->mark_range_as_clean(p, num_heap_words); 338 } 339 340 void ShenandoahScanRemembered::reset_object_range(HeapWord* from, HeapWord* to) { 341 _scc->reset_object_range(from, to); 342 } 343 344 void ShenandoahScanRemembered::register_object(HeapWord* addr) { 345 _scc->register_object(addr); 346 } 347 348 void ShenandoahScanRemembered::register_object_without_lock(HeapWord* addr) { 349 _scc->register_object_without_lock(addr); 350 } 351 352 bool ShenandoahScanRemembered::verify_registration(HeapWord* address, ShenandoahMarkingContext* ctx) { 353 354 size_t index = card_index_for_addr(address); 355 if (!_scc->starts_object(index)) { 356 return false; 357 } 358 HeapWord* base_addr = addr_for_card_index(index); 359 size_t offset = _scc->get_first_start(index); 360 ShenandoahHeap* heap = ShenandoahHeap::heap(); 361 362 // Verify that I can find this object within its enclosing card by scanning forward from first_start. 363 while (base_addr + offset < address) { 364 oop obj = cast_to_oop(base_addr + offset); 365 if (!ctx || ctx->is_marked(obj)) { 366 offset += obj->size(); 367 } else { 368 // If this object is not live, don't trust its size(); all objects above tams are live. 369 ShenandoahHeapRegion* r = heap->heap_region_containing(obj); 370 HeapWord* tams = ctx->top_at_mark_start(r); 371 offset = ctx->get_next_marked_addr(base_addr + offset, tams) - base_addr; 372 } 373 } 374 if (base_addr + offset != address){ 375 return false; 376 } 377 378 // At this point, offset represents object whose registration we are verifying. We know that at least this object resides 379 // within this card's memory. 380 381 // Make sure that last_offset is properly set for the enclosing card, but we can't verify this for 382 // candidate collection-set regions during mixed evacuations, so disable this check in general 383 // during mixed evacuations. 384 385 ShenandoahHeapRegion* r = heap->heap_region_containing(base_addr + offset); 386 size_t max_offset = r->top() - base_addr; 387 if (max_offset > CardTable::card_size_in_words()) { 388 max_offset = CardTable::card_size_in_words(); 389 } 390 size_t prev_offset; 391 if (!ctx) { 392 do { 393 oop obj = cast_to_oop(base_addr + offset); 394 prev_offset = offset; 395 offset += obj->size(); 396 } while (offset < max_offset); 397 if (_scc->get_last_start(index) != prev_offset) { 398 return false; 399 } 400 401 // base + offset represents address of first object that starts on following card, if there is one. 402 403 // Notes: base_addr is addr_for_card_index(index) 404 // base_addr + offset is end of the object we are verifying 405 // cannot use card_index_for_addr(base_addr + offset) because it asserts arg < end of whole heap 406 size_t end_card_index = index + offset / CardTable::card_size_in_words(); 407 408 if (end_card_index > index && end_card_index <= _rs->last_valid_index()) { 409 // If there is a following object registered on the next card, it should begin where this object ends. 410 if (_scc->starts_object(end_card_index) && 411 ((addr_for_card_index(end_card_index) + _scc->get_first_start(end_card_index)) != (base_addr + offset))) { 412 return false; 413 } 414 } 415 416 // Assure that no other objects are registered "inside" of this one. 417 for (index++; index < end_card_index; index++) { 418 if (_scc->starts_object(index)) { 419 return false; 420 } 421 } 422 } else { 423 // This is a mixed evacuation or a global collect: rely on mark bits to identify which objects need to be properly registered 424 assert(!ShenandoahHeap::heap()->is_concurrent_old_mark_in_progress(), "Cannot rely on mark context here."); 425 // If the object reaching or spanning the end of this card's memory is marked, then last_offset for this card 426 // should represent this object. Otherwise, last_offset is a don't care. 427 ShenandoahHeapRegion* region = heap->heap_region_containing(base_addr + offset); 428 HeapWord* tams = ctx->top_at_mark_start(region); 429 oop last_obj = nullptr; 430 do { 431 oop obj = cast_to_oop(base_addr + offset); 432 if (ctx->is_marked(obj)) { 433 prev_offset = offset; 434 offset += obj->size(); 435 last_obj = obj; 436 } else { 437 offset = ctx->get_next_marked_addr(base_addr + offset, tams) - base_addr; 438 // If there are no marked objects remaining in this region, offset equals tams - base_addr. If this offset is 439 // greater than max_offset, we will immediately exit this loop. Otherwise, the next iteration of the loop will 440 // treat the object at offset as marked and live (because address >= tams) and we will continue iterating object 441 // by consulting the size() fields of each. 442 } 443 } while (offset < max_offset); 444 if (last_obj != nullptr && prev_offset + last_obj->size() >= max_offset) { 445 // last marked object extends beyond end of card 446 if (_scc->get_last_start(index) != prev_offset) { 447 return false; 448 } 449 // otherwise, the value of _scc->get_last_start(index) is a don't care because it represents a dead object and we 450 // cannot verify its context 451 } 452 } 453 return true; 454 } 455 456 void ShenandoahScanRemembered::coalesce_objects(HeapWord* addr, size_t length_in_words) { 457 _scc->coalesce_objects(addr, length_in_words); 458 } 459 460 void ShenandoahScanRemembered::mark_range_as_empty(HeapWord* addr, size_t length_in_words) { 461 _rs->mark_range_as_clean(addr, length_in_words); 462 _scc->clear_objects_in_range(addr, length_in_words); 463 } 464 465 size_t ShenandoahScanRemembered::cluster_for_addr(HeapWordImpl **addr) { 466 size_t card_index = _rs->card_index_for_addr(addr); 467 size_t result = card_index / ShenandoahCardCluster::CardsPerCluster; 468 return result; 469 } 470 471 HeapWord* ShenandoahScanRemembered::addr_for_cluster(size_t cluster_no) { 472 size_t card_index = cluster_no * ShenandoahCardCluster::CardsPerCluster; 473 return addr_for_card_index(card_index); 474 } 475 476 // This is used only for debug verification so don't worry about making the scan parallel. 477 void ShenandoahScanRemembered::roots_do(OopIterateClosure* cl) { 478 ShenandoahHeap* heap = ShenandoahHeap::heap(); 479 bool old_bitmap_stable = heap->old_generation()->is_mark_complete(); 480 log_info(gc, remset)("Scan remembered set using bitmap: %s", BOOL_TO_STR(old_bitmap_stable)); 481 for (size_t i = 0, n = heap->num_regions(); i < n; ++i) { 482 ShenandoahHeapRegion* region = heap->get_region(i); 483 if (region->is_old() && region->is_active() && !region->is_cset()) { 484 HeapWord* start_of_range = region->bottom(); 485 HeapWord* end_of_range = region->top(); 486 size_t start_cluster_no = cluster_for_addr(start_of_range); 487 size_t num_heapwords = end_of_range - start_of_range; 488 unsigned int cluster_size = CardTable::card_size_in_words() * ShenandoahCardCluster::CardsPerCluster; 489 size_t num_clusters = (size_t) ((num_heapwords - 1 + cluster_size) / cluster_size); 490 491 // Remembered set scanner 492 if (region->is_humongous()) { 493 process_humongous_clusters(region->humongous_start_region(), start_cluster_no, num_clusters, end_of_range, cl, 494 false /* use_write_table */); 495 } else { 496 process_clusters(start_cluster_no, num_clusters, end_of_range, cl, 497 false /* use_write_table */, 0 /* fake worker id */); 498 } 499 } 500 } 501 } 502 503 #ifndef PRODUCT 504 // Log given card stats 505 void ShenandoahScanRemembered::log_card_stats(HdrSeq* stats) { 506 for (int i = 0; i < MAX_CARD_STAT_TYPE; i++) { 507 log_info(gc, remset)("%18s: [ %8.2f %8.2f %8.2f %8.2f %8.2f ]", 508 _card_stats_name[i], 509 stats[i].percentile(0), stats[i].percentile(25), 510 stats[i].percentile(50), stats[i].percentile(75), 511 stats[i].maximum()); 512 } 513 } 514 515 // Log card stats for all nworkers for a specific phase t 516 void ShenandoahScanRemembered::log_card_stats(uint nworkers, CardStatLogType t) { 517 assert(ShenandoahEnableCardStats, "Do not call"); 518 HdrSeq* sum_stats = card_stats_for_phase(t); 519 log_info(gc, remset)("%s", _card_stat_log_type[t]); 520 for (uint i = 0; i < nworkers; i++) { 521 log_worker_card_stats(i, sum_stats); 522 } 523 524 // Every so often, log the cumulative global stats 525 if (++_card_stats_log_counter[t] >= ShenandoahCardStatsLogInterval) { 526 _card_stats_log_counter[t] = 0; 527 log_info(gc, remset)("Cumulative stats"); 528 log_card_stats(sum_stats); 529 } 530 } 531 532 // Log card stats for given worker_id, & clear them after merging into given cumulative stats 533 void ShenandoahScanRemembered::log_worker_card_stats(uint worker_id, HdrSeq* sum_stats) { 534 assert(ShenandoahEnableCardStats, "Do not call"); 535 536 HdrSeq* worker_card_stats = card_stats(worker_id); 537 log_info(gc, remset)("Worker %u Card Stats: ", worker_id); 538 log_card_stats(worker_card_stats); 539 // Merge worker stats into the cumulative stats & clear worker stats 540 merge_worker_card_stats_cumulative(worker_card_stats, sum_stats); 541 } 542 543 void ShenandoahScanRemembered::merge_worker_card_stats_cumulative( 544 HdrSeq* worker_stats, HdrSeq* sum_stats) { 545 for (int i = 0; i < MAX_CARD_STAT_TYPE; i++) { 546 sum_stats[i].add(worker_stats[i]); 547 worker_stats[i].clear(); 548 } 549 } 550 #endif 551 552 // A closure that takes an oop in the old generation and, if it's pointing 553 // into the young generation, dirties the corresponding remembered set entry. 554 // This is only used to rebuild the remembered set after a full GC. 555 class ShenandoahDirtyRememberedSetClosure : public BasicOopIterateClosure { 556 protected: 557 ShenandoahGenerationalHeap* const _heap; 558 ShenandoahScanRemembered* const _scanner; 559 560 public: 561 ShenandoahDirtyRememberedSetClosure() : 562 _heap(ShenandoahGenerationalHeap::heap()), 563 _scanner(_heap->old_generation()->card_scan()) {} 564 565 template<class T> 566 inline void work(T* p) { 567 assert(_heap->is_in_old(p), "Expecting to get an old gen address"); 568 T o = RawAccess<>::oop_load(p); 569 if (!CompressedOops::is_null(o)) { 570 oop obj = CompressedOops::decode_not_null(o); 571 if (_heap->is_in_young(obj)) { 572 // Dirty the card containing the cross-generational pointer. 573 _scanner->mark_card_as_dirty((HeapWord*) p); 574 } 575 } 576 } 577 578 virtual void do_oop(narrowOop* p) { work(p); } 579 virtual void do_oop(oop* p) { work(p); } 580 }; 581 582 ShenandoahDirectCardMarkRememberedSet::ShenandoahDirectCardMarkRememberedSet(ShenandoahCardTable* card_table, size_t total_card_count) : 583 LogCardValsPerIntPtr(log2i_exact(sizeof(intptr_t)) - log2i_exact(sizeof(CardValue))), 584 LogCardSizeInWords(log2i_exact(CardTable::card_size_in_words())) { 585 586 // Paranoid assert for LogCardsPerIntPtr calculation above 587 assert(sizeof(intptr_t) > sizeof(CardValue), "LogsCardValsPerIntPtr would underflow"); 588 589 _heap = ShenandoahHeap::heap(); 590 _card_table = card_table; 591 _total_card_count = total_card_count; 592 _card_shift = CardTable::card_shift(); 593 594 _byte_map = _card_table->byte_for_index(0); 595 596 _whole_heap_base = _card_table->addr_for(_byte_map); 597 _byte_map_base = _byte_map - (uintptr_t(_whole_heap_base) >> _card_shift); 598 599 assert(total_card_count % ShenandoahCardCluster::CardsPerCluster == 0, "Invalid card count."); 600 assert(total_card_count > 0, "Card count cannot be zero."); 601 } 602 603 // Merge any dirty values from write table into the read table, while leaving 604 // the write table unchanged. 605 void ShenandoahDirectCardMarkRememberedSet::merge_write_table(HeapWord* start, size_t word_count) { 606 size_t start_index = card_index_for_addr(start); 607 #ifdef ASSERT 608 // avoid querying card_index_for_addr() for an address past end of heap 609 size_t end_index = card_index_for_addr(start + word_count - 1) + 1; 610 #endif 611 assert(start_index % ((size_t)1 << LogCardValsPerIntPtr) == 0, "Expected a multiple of CardValsPerIntPtr"); 612 assert(end_index % ((size_t)1 << LogCardValsPerIntPtr) == 0, "Expected a multiple of CardValsPerIntPtr"); 613 614 // We'll access in groups of intptr_t worth of card entries 615 intptr_t* const read_table = (intptr_t*) &(_card_table->read_byte_map())[start_index]; 616 intptr_t* const write_table = (intptr_t*) &(_card_table->write_byte_map())[start_index]; 617 618 // Avoid division, use shift instead 619 assert(word_count % ((size_t)1 << (LogCardSizeInWords + LogCardValsPerIntPtr)) == 0, "Expected a multiple of CardSizeInWords*CardValsPerIntPtr"); 620 size_t const num = word_count >> (LogCardSizeInWords + LogCardValsPerIntPtr); 621 622 for (size_t i = 0; i < num; i++) { 623 read_table[i] &= write_table[i]; 624 } 625 } 626 627 // Destructively copy the write table to the read table, and clean the write table. 628 void ShenandoahDirectCardMarkRememberedSet::reset_remset(HeapWord* start, size_t word_count) { 629 size_t start_index = card_index_for_addr(start); 630 #ifdef ASSERT 631 // avoid querying card_index_for_addr() for an address past end of heap 632 size_t end_index = card_index_for_addr(start + word_count - 1) + 1; 633 #endif 634 assert(start_index % ((size_t)1 << LogCardValsPerIntPtr) == 0, "Expected a multiple of CardValsPerIntPtr"); 635 assert(end_index % ((size_t)1 << LogCardValsPerIntPtr) == 0, "Expected a multiple of CardValsPerIntPtr"); 636 637 // We'll access in groups of intptr_t worth of card entries 638 intptr_t* const read_table = (intptr_t*) &(_card_table->read_byte_map())[start_index]; 639 intptr_t* const write_table = (intptr_t*) &(_card_table->write_byte_map())[start_index]; 640 641 // Avoid division, use shift instead 642 assert(word_count % ((size_t)1 << (LogCardSizeInWords + LogCardValsPerIntPtr)) == 0, "Expected a multiple of CardSizeInWords*CardValsPerIntPtr"); 643 size_t const num = word_count >> (LogCardSizeInWords + LogCardValsPerIntPtr); 644 645 for (size_t i = 0; i < num; i++) { 646 read_table[i] = write_table[i]; 647 write_table[i] = CardTable::clean_card_row_val(); 648 } 649 } 650 651 ShenandoahScanRememberedTask::ShenandoahScanRememberedTask(ShenandoahObjToScanQueueSet* queue_set, 652 ShenandoahObjToScanQueueSet* old_queue_set, 653 ShenandoahReferenceProcessor* rp, 654 ShenandoahRegionChunkIterator* work_list, bool is_concurrent) : 655 WorkerTask("Scan Remembered Set"), 656 _queue_set(queue_set), _old_queue_set(old_queue_set), _rp(rp), _work_list(work_list), _is_concurrent(is_concurrent) { 657 bool old_bitmap_stable = ShenandoahHeap::heap()->old_generation()->is_mark_complete(); 658 log_info(gc, remset)("Scan remembered set using bitmap: %s", BOOL_TO_STR(old_bitmap_stable)); 659 } 660 661 void ShenandoahScanRememberedTask::work(uint worker_id) { 662 if (_is_concurrent) { 663 // This sets up a thread local reference to the worker_id which is needed by the weak reference processor. 664 ShenandoahConcurrentWorkerSession worker_session(worker_id); 665 ShenandoahSuspendibleThreadSetJoiner stsj; 666 do_work(worker_id); 667 } else { 668 // This sets up a thread local reference to the worker_id which is needed by the weak reference processor. 669 ShenandoahParallelWorkerSession worker_session(worker_id); 670 do_work(worker_id); 671 } 672 } 673 674 void ShenandoahScanRememberedTask::do_work(uint worker_id) { 675 ShenandoahWorkerTimingsTracker x(ShenandoahPhaseTimings::init_scan_rset, ShenandoahPhaseTimings::ScanClusters, worker_id); 676 677 ShenandoahObjToScanQueue* q = _queue_set->queue(worker_id); 678 ShenandoahObjToScanQueue* old = _old_queue_set == nullptr ? nullptr : _old_queue_set->queue(worker_id); 679 ShenandoahMarkRefsClosure<YOUNG> cl(q, _rp, old); 680 ShenandoahGenerationalHeap* heap = ShenandoahGenerationalHeap::heap(); 681 ShenandoahScanRemembered* scanner = heap->old_generation()->card_scan(); 682 683 // set up thread local closure for shen ref processor 684 _rp->set_mark_closure(worker_id, &cl); 685 struct ShenandoahRegionChunk assignment; 686 while (_work_list->next(&assignment)) { 687 ShenandoahHeapRegion* region = assignment._r; 688 log_debug(gc)("ShenandoahScanRememberedTask::do_work(%u), processing slice of region " 689 SIZE_FORMAT " at offset " SIZE_FORMAT ", size: " SIZE_FORMAT, 690 worker_id, region->index(), assignment._chunk_offset, assignment._chunk_size); 691 if (region->is_old()) { 692 size_t cluster_size = 693 CardTable::card_size_in_words() * ShenandoahCardCluster::CardsPerCluster; 694 size_t clusters = assignment._chunk_size / cluster_size; 695 assert(clusters * cluster_size == assignment._chunk_size, "Chunk assignments must align on cluster boundaries"); 696 HeapWord* end_of_range = region->bottom() + assignment._chunk_offset + assignment._chunk_size; 697 698 // During concurrent mark, region->top() equals TAMS with respect to the current young-gen pass. 699 if (end_of_range > region->top()) { 700 end_of_range = region->top(); 701 } 702 scanner->process_region_slice(region, assignment._chunk_offset, clusters, end_of_range, &cl, false, worker_id); 703 } 704 #ifdef ENABLE_REMEMBERED_SET_CANCELLATION 705 // This check is currently disabled to avoid crashes that occur 706 // when we try to cancel remembered set scanning; it should be re-enabled 707 // after the issues are fixed, as it would allow more prompt cancellation and 708 // transition to degenerated / full GCs. Note that work that has been assigned/ 709 // claimed above must be completed before we return here upon cancellation. 710 if (heap->check_cancelled_gc_and_yield(_is_concurrent)) { 711 return; 712 } 713 #endif 714 } 715 } 716 717 size_t ShenandoahRegionChunkIterator::calc_regular_group_size() { 718 // The group size is calculated from the number of regions. Suppose the heap has N regions. The first group processes 719 // N/2 regions. The second group processes N/4 regions, the third group N/8 regions and so on. 720 // Note that infinite series N/2 + N/4 + N/8 + N/16 + ... sums to N. 721 // 722 // The normal group size is the number of regions / 2. 723 // 724 // In the case that the region_size_words is greater than _maximum_chunk_size_words, the first group_size is 725 // larger than the normal group size because each chunk in the group will be smaller than the region size. 726 // 727 // The last group also has more than the normal entries because it finishes the total scanning effort. The chunk sizes are 728 // different for each group. The intention is that the first group processes roughly half of the heap, the second processes 729 // half of the remaining heap, the third processes half of what remains and so on. The smallest chunk size 730 // is represented by _smallest_chunk_size_words. We do not divide work any smaller than this. 731 // 732 733 size_t group_size = _heap->num_regions() / 2; 734 return group_size; 735 } 736 737 size_t ShenandoahRegionChunkIterator::calc_first_group_chunk_size_b4_rebalance() { 738 size_t words_in_first_chunk = ShenandoahHeapRegion::region_size_words(); 739 return words_in_first_chunk; 740 } 741 742 size_t ShenandoahRegionChunkIterator::calc_num_groups() { 743 size_t total_heap_size = _heap->num_regions() * ShenandoahHeapRegion::region_size_words(); 744 size_t num_groups = 0; 745 size_t cumulative_group_span = 0; 746 size_t current_group_span = _first_group_chunk_size_b4_rebalance * _regular_group_size; 747 size_t smallest_group_span = smallest_chunk_size_words() * _regular_group_size; 748 while ((num_groups < _maximum_groups) && (cumulative_group_span + current_group_span <= total_heap_size)) { 749 num_groups++; 750 cumulative_group_span += current_group_span; 751 if (current_group_span <= smallest_group_span) { 752 break; 753 } else { 754 current_group_span /= 2; // Each group spans half of what the preceding group spanned. 755 } 756 } 757 // Loop post condition: 758 // num_groups <= _maximum_groups 759 // cumulative_group_span is the memory spanned by num_groups 760 // current_group_span is the span of the last fully populated group (assuming loop iterates at least once) 761 // each of num_groups is fully populated with _regular_group_size chunks in each 762 // Non post conditions: 763 // cumulative_group_span may be less than total_heap size for one or more of the folowing reasons 764 // a) The number of regions remaining to be spanned is smaller than a complete group, or 765 // b) We have filled up all groups through _maximum_groups and still have not spanned all regions 766 767 if (cumulative_group_span < total_heap_size) { 768 // We've got more regions to span 769 if ((num_groups < _maximum_groups) && (current_group_span > smallest_group_span)) { 770 num_groups++; // Place all remaining regions into a new not-full group (chunk_size half that of previous group) 771 } 772 // Else we are unable to create a new group because we've exceed the number of allowed groups or have reached the 773 // minimum chunk size. 774 775 // Any remaining regions will be treated as if they are part of the most recently created group. This group will 776 // have more than _regular_group_size chunks within it. 777 } 778 return num_groups; 779 } 780 781 size_t ShenandoahRegionChunkIterator::calc_total_chunks() { 782 size_t region_size_words = ShenandoahHeapRegion::region_size_words(); 783 size_t unspanned_heap_size = _heap->num_regions() * region_size_words; 784 size_t num_chunks = 0; 785 size_t cumulative_group_span = 0; 786 size_t current_group_span = _first_group_chunk_size_b4_rebalance * _regular_group_size; 787 size_t smallest_group_span = smallest_chunk_size_words() * _regular_group_size; 788 789 // The first group gets special handling because the first chunk size can be no larger than _largest_chunk_size_words 790 if (region_size_words > _maximum_chunk_size_words) { 791 // In the case that we shrink the first group's chunk size, certain other groups will also be subsumed within the first group 792 size_t effective_chunk_size = _first_group_chunk_size_b4_rebalance; 793 while (effective_chunk_size >= _maximum_chunk_size_words) { 794 num_chunks += current_group_span / _maximum_chunk_size_words; 795 unspanned_heap_size -= current_group_span; 796 effective_chunk_size /= 2; 797 current_group_span /= 2; 798 } 799 } else { 800 num_chunks = _regular_group_size; 801 unspanned_heap_size -= current_group_span; 802 current_group_span /= 2; 803 } 804 size_t spanned_groups = 1; 805 while (unspanned_heap_size > 0) { 806 if (current_group_span <= unspanned_heap_size) { 807 unspanned_heap_size -= current_group_span; 808 num_chunks += _regular_group_size; 809 spanned_groups++; 810 811 // _num_groups is the number of groups required to span the configured heap size. We are not allowed 812 // to change the number of groups. The last group is responsible for spanning all chunks not spanned 813 // by previously processed groups. 814 if (spanned_groups >= _num_groups) { 815 // The last group has more than _regular_group_size entries. 816 size_t chunk_span = current_group_span / _regular_group_size; 817 size_t extra_chunks = unspanned_heap_size / chunk_span; 818 assert (extra_chunks * chunk_span == unspanned_heap_size, "Chunks must precisely span regions"); 819 num_chunks += extra_chunks; 820 return num_chunks; 821 } else if (current_group_span <= smallest_group_span) { 822 // We cannot introduce new groups because we've reached the lower bound on group size. So this last 823 // group may hold extra chunks. 824 size_t chunk_span = smallest_chunk_size_words(); 825 size_t extra_chunks = unspanned_heap_size / chunk_span; 826 assert (extra_chunks * chunk_span == unspanned_heap_size, "Chunks must precisely span regions"); 827 num_chunks += extra_chunks; 828 return num_chunks; 829 } else { 830 current_group_span /= 2; 831 } 832 } else { 833 // This last group has fewer than _regular_group_size entries. 834 size_t chunk_span = current_group_span / _regular_group_size; 835 size_t last_group_size = unspanned_heap_size / chunk_span; 836 assert (last_group_size * chunk_span == unspanned_heap_size, "Chunks must precisely span regions"); 837 num_chunks += last_group_size; 838 return num_chunks; 839 } 840 } 841 return num_chunks; 842 } 843 844 ShenandoahRegionChunkIterator::ShenandoahRegionChunkIterator(size_t worker_count) : 845 ShenandoahRegionChunkIterator(ShenandoahHeap::heap(), worker_count) 846 { 847 } 848 849 ShenandoahRegionChunkIterator::ShenandoahRegionChunkIterator(ShenandoahHeap* heap, size_t worker_count) : 850 _heap(heap), 851 _regular_group_size(calc_regular_group_size()), 852 _first_group_chunk_size_b4_rebalance(calc_first_group_chunk_size_b4_rebalance()), 853 _num_groups(calc_num_groups()), 854 _total_chunks(calc_total_chunks()), 855 _index(0) 856 { 857 #ifdef ASSERT 858 size_t expected_chunk_size_words = _clusters_in_smallest_chunk * CardTable::card_size_in_words() * ShenandoahCardCluster::CardsPerCluster; 859 assert(smallest_chunk_size_words() == expected_chunk_size_words, "_smallest_chunk_size (" SIZE_FORMAT") is not valid because it does not equal (" SIZE_FORMAT ")", 860 smallest_chunk_size_words(), expected_chunk_size_words); 861 #endif 862 assert(_num_groups <= _maximum_groups, 863 "The number of remembered set scanning groups must be less than or equal to maximum groups"); 864 assert(smallest_chunk_size_words() << (_maximum_groups - 1) == _maximum_chunk_size_words, 865 "Maximum number of groups needs to span maximum chunk size to smallest chunk size"); 866 867 size_t words_in_region = ShenandoahHeapRegion::region_size_words(); 868 _region_index[0] = 0; 869 _group_offset[0] = 0; 870 if (words_in_region > _maximum_chunk_size_words) { 871 // In the case that we shrink the first group's chunk size, certain other groups will also be subsumed within the first group 872 size_t num_chunks = 0; 873 size_t effective_chunk_size = _first_group_chunk_size_b4_rebalance; 874 size_t current_group_span = effective_chunk_size * _regular_group_size; 875 while (effective_chunk_size >= _maximum_chunk_size_words) { 876 num_chunks += current_group_span / _maximum_chunk_size_words; 877 effective_chunk_size /= 2; 878 current_group_span /= 2; 879 } 880 _group_entries[0] = num_chunks; 881 _group_chunk_size[0] = _maximum_chunk_size_words; 882 } else { 883 _group_entries[0] = _regular_group_size; 884 _group_chunk_size[0] = _first_group_chunk_size_b4_rebalance; 885 } 886 887 size_t previous_group_span = _group_entries[0] * _group_chunk_size[0]; 888 for (size_t i = 1; i < _num_groups; i++) { 889 size_t previous_group_entries = (i == 1)? _group_entries[0]: (_group_entries[i-1] - _group_entries[i-2]); 890 _group_chunk_size[i] = _group_chunk_size[i-1] / 2; 891 size_t chunks_in_group = _regular_group_size; 892 size_t this_group_span = _group_chunk_size[i] * chunks_in_group; 893 size_t total_span_of_groups = previous_group_span + this_group_span; 894 _region_index[i] = previous_group_span / words_in_region; 895 _group_offset[i] = previous_group_span % words_in_region; 896 _group_entries[i] = _group_entries[i-1] + _regular_group_size; 897 previous_group_span = total_span_of_groups; 898 } 899 if (_group_entries[_num_groups-1] < _total_chunks) { 900 assert((_total_chunks - _group_entries[_num_groups-1]) * _group_chunk_size[_num_groups-1] + previous_group_span == 901 heap->num_regions() * words_in_region, "Total region chunks (" SIZE_FORMAT 902 ") do not span total heap regions (" SIZE_FORMAT ")", _total_chunks, _heap->num_regions()); 903 previous_group_span += (_total_chunks - _group_entries[_num_groups-1]) * _group_chunk_size[_num_groups-1]; 904 _group_entries[_num_groups-1] = _total_chunks; 905 } 906 assert(previous_group_span == heap->num_regions() * words_in_region, "Total region chunks (" SIZE_FORMAT 907 ") do not span total heap regions (" SIZE_FORMAT "): " SIZE_FORMAT " does not equal " SIZE_FORMAT, 908 _total_chunks, _heap->num_regions(), previous_group_span, heap->num_regions() * words_in_region); 909 910 // Not necessary, but keeps things tidy 911 for (size_t i = _num_groups; i < _maximum_groups; i++) { 912 _region_index[i] = 0; 913 _group_offset[i] = 0; 914 _group_entries[i] = _group_entries[i-1]; 915 _group_chunk_size[i] = 0; 916 } 917 } 918 919 void ShenandoahRegionChunkIterator::reset() { 920 _index = 0; 921 } 922 923 ShenandoahReconstructRememberedSetTask::ShenandoahReconstructRememberedSetTask(ShenandoahRegionIterator* regions) 924 : WorkerTask("Shenandoah Reset Bitmap") 925 , _regions(regions) { } 926 927 void ShenandoahReconstructRememberedSetTask::work(uint worker_id) { 928 ShenandoahParallelWorkerSession worker_session(worker_id); 929 ShenandoahHeapRegion* r = _regions->next(); 930 ShenandoahGenerationalHeap* heap = ShenandoahGenerationalHeap::heap(); 931 ShenandoahScanRemembered* scanner = heap->old_generation()->card_scan(); 932 ShenandoahDirtyRememberedSetClosure dirty_cards_for_cross_generational_pointers; 933 934 while (r != nullptr) { 935 if (r->is_old() && r->is_active()) { 936 HeapWord* obj_addr = r->bottom(); 937 if (r->is_humongous_start()) { 938 // First, clear the remembered set 939 oop obj = cast_to_oop(obj_addr); 940 size_t size = obj->size(); 941 942 // First, clear the remembered set for all spanned humongous regions 943 size_t num_regions = ShenandoahHeapRegion::required_regions(size * HeapWordSize); 944 size_t region_span = num_regions * ShenandoahHeapRegion::region_size_words(); 945 scanner->reset_remset(r->bottom(), region_span); 946 size_t region_index = r->index(); 947 ShenandoahHeapRegion* humongous_region = heap->get_region(region_index); 948 while (num_regions-- != 0) { 949 scanner->reset_object_range(humongous_region->bottom(), humongous_region->end()); 950 region_index++; 951 humongous_region = heap->get_region(region_index); 952 } 953 954 // Then register the humongous object and DIRTY relevant remembered set cards 955 scanner->register_object_without_lock(obj_addr); 956 obj->oop_iterate(&dirty_cards_for_cross_generational_pointers); 957 } else if (!r->is_humongous()) { 958 // First, clear the remembered set 959 scanner->reset_remset(r->bottom(), ShenandoahHeapRegion::region_size_words()); 960 scanner->reset_object_range(r->bottom(), r->end()); 961 962 // Then iterate over all objects, registering object and DIRTYing relevant remembered set cards 963 HeapWord* t = r->top(); 964 while (obj_addr < t) { 965 oop obj = cast_to_oop(obj_addr); 966 scanner->register_object_without_lock(obj_addr); 967 obj_addr += obj->oop_iterate_size(&dirty_cards_for_cross_generational_pointers); 968 } 969 } // else, ignore humongous continuation region 970 } 971 // else, this region is FREE or YOUNG or inactive and we can ignore it. 972 r = _regions->next(); 973 } 974 }