1 /* 2 * Copyright Amazon.com Inc. or its affiliates. All Rights Reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 26 #include "precompiled.hpp" 27 28 #include "gc/shenandoah/shenandoahOldGeneration.hpp" 29 #include "gc/shenandoah/shenandoahHeap.inline.hpp" 30 #include "gc/shenandoah/shenandoahOopClosures.inline.hpp" 31 #include "gc/shenandoah/shenandoahReferenceProcessor.hpp" 32 #include "gc/shenandoah/shenandoahScanRemembered.inline.hpp" 33 #include "logging/log.hpp" 34 #include "runtime/threads.hpp" 35 36 size_t ShenandoahDirectCardMarkRememberedSet::last_valid_index() const { 37 return _card_table->last_valid_index(); 38 } 39 40 size_t ShenandoahDirectCardMarkRememberedSet::total_cards() const { 41 return _total_card_count; 42 } 43 44 size_t ShenandoahDirectCardMarkRememberedSet::card_index_for_addr(HeapWord *p) const { 45 return _card_table->index_for(p); 46 } 47 48 HeapWord* ShenandoahDirectCardMarkRememberedSet::addr_for_card_index(size_t card_index) const { 49 return _whole_heap_base + CardTable::card_size_in_words() * card_index; 50 } 51 52 bool ShenandoahDirectCardMarkRememberedSet::is_write_card_dirty(size_t card_index) const { 53 CardValue* bp = &(_card_table->write_byte_map())[card_index]; 54 return (bp[0] == CardTable::dirty_card_val()); 55 } 56 57 bool ShenandoahDirectCardMarkRememberedSet::is_card_dirty(size_t card_index) const { 58 CardValue* bp = &(_card_table->read_byte_map())[card_index]; 59 return (bp[0] == CardTable::dirty_card_val()); 60 } 61 62 void ShenandoahDirectCardMarkRememberedSet::mark_card_as_dirty(size_t card_index) { 63 CardValue* bp = &(_card_table->write_byte_map())[card_index]; 64 bp[0] = CardTable::dirty_card_val(); 65 } 66 67 void ShenandoahDirectCardMarkRememberedSet::mark_range_as_dirty(size_t card_index, size_t num_cards) { 68 CardValue* bp = &(_card_table->write_byte_map())[card_index]; 69 while (num_cards-- > 0) { 70 *bp++ = CardTable::dirty_card_val(); 71 } 72 } 73 74 bool ShenandoahDirectCardMarkRememberedSet::is_card_dirty(HeapWord* p) const { 75 size_t index = card_index_for_addr(p); 76 CardValue* bp = &(_card_table->read_byte_map())[index]; 77 return (bp[0] == CardTable::dirty_card_val()); 78 } 79 80 bool ShenandoahDirectCardMarkRememberedSet::is_write_card_dirty(HeapWord* p) const { 81 size_t index = card_index_for_addr(p); 82 CardValue* bp = &(_card_table->write_byte_map())[index]; 83 return (bp[0] == CardTable::dirty_card_val()); 84 } 85 86 void ShenandoahDirectCardMarkRememberedSet::mark_card_as_dirty(HeapWord* p) { 87 size_t index = card_index_for_addr(p); 88 CardValue* bp = &(_card_table->write_byte_map())[index]; 89 bp[0] = CardTable::dirty_card_val(); 90 } 91 92 void ShenandoahDirectCardMarkRememberedSet::mark_range_as_dirty(HeapWord* p, size_t num_heap_words) { 93 CardValue* bp = &(_card_table->write_byte_map_base())[uintptr_t(p) >> _card_shift]; 94 CardValue* end_bp = &(_card_table->write_byte_map_base())[uintptr_t(p + num_heap_words) >> _card_shift]; 95 // If (p + num_heap_words) is not aligned on card boundary, we also need to dirty last card. 96 if (((unsigned long long) (p + num_heap_words)) & (CardTable::card_size() - 1)) { 97 end_bp++; 98 } 99 while (bp < end_bp) { 100 *bp++ = CardTable::dirty_card_val(); 101 } 102 } 103 104 void ShenandoahDirectCardMarkRememberedSet::mark_range_as_clean(HeapWord* p, size_t num_heap_words) { 105 CardValue* bp = &(_card_table->write_byte_map_base())[uintptr_t(p) >> _card_shift]; 106 CardValue* end_bp = &(_card_table->write_byte_map_base())[uintptr_t(p + num_heap_words) >> _card_shift]; 107 // If (p + num_heap_words) is not aligned on card boundary, we also need to clean last card. 108 if (((unsigned long long) (p + num_heap_words)) & (CardTable::card_size() - 1)) { 109 end_bp++; 110 } 111 while (bp < end_bp) { 112 *bp++ = CardTable::clean_card_val(); 113 } 114 } 115 116 void ShenandoahDirectCardMarkRememberedSet::mark_read_table_as_clean() { 117 CardValue* read_table = _card_table->read_byte_map(); 118 CardValue* bp = &(read_table)[0]; 119 CardValue* end_bp = &(read_table)[_card_table->last_valid_index()]; 120 121 while (bp <= end_bp) { 122 *bp++ = CardTable::clean_card_val(); 123 } 124 125 log_info(gc, barrier)("Cleaned read_table from " PTR_FORMAT " to " PTR_FORMAT, p2i(&(read_table)[0]), p2i(end_bp)); 126 } 127 128 // No lock required because arguments align with card boundaries. 129 void ShenandoahCardCluster::reset_object_range(HeapWord* from, HeapWord* to) { 130 assert(((((unsigned long long) from) & (CardTable::card_size() - 1)) == 0) && 131 ((((unsigned long long) to) & (CardTable::card_size() - 1)) == 0), 132 "reset_object_range bounds must align with card boundaries"); 133 size_t card_at_start = _rs->card_index_for_addr(from); 134 size_t num_cards = (to - from) / CardTable::card_size_in_words(); 135 136 for (size_t i = 0; i < num_cards; i++) { 137 _object_starts[card_at_start + i].short_word = 0; 138 } 139 } 140 141 // Assume only one thread at a time registers objects pertaining to 142 // each card-table entry's range of memory. 143 void ShenandoahCardCluster::register_object(HeapWord* address) { 144 shenandoah_assert_heaplocked(); 145 146 register_object_without_lock(address); 147 } 148 149 void ShenandoahCardCluster::register_object_without_lock(HeapWord* address) { 150 size_t card_at_start = _rs->card_index_for_addr(address); 151 HeapWord* card_start_address = _rs->addr_for_card_index(card_at_start); 152 uint8_t offset_in_card = checked_cast<uint8_t>(pointer_delta(address, card_start_address)); 153 154 if (!starts_object(card_at_start)) { 155 set_starts_object_bit(card_at_start); 156 set_first_start(card_at_start, offset_in_card); 157 set_last_start(card_at_start, offset_in_card); 158 } else { 159 if (offset_in_card < get_first_start(card_at_start)) 160 set_first_start(card_at_start, offset_in_card); 161 if (offset_in_card > get_last_start(card_at_start)) 162 set_last_start(card_at_start, offset_in_card); 163 } 164 } 165 166 void ShenandoahCardCluster::coalesce_objects(HeapWord* address, size_t length_in_words) { 167 168 size_t card_at_start = _rs->card_index_for_addr(address); 169 HeapWord* card_start_address = _rs->addr_for_card_index(card_at_start); 170 size_t card_at_end = card_at_start + ((address + length_in_words) - card_start_address) / CardTable::card_size_in_words(); 171 172 if (card_at_start == card_at_end) { 173 // There are no changes to the get_first_start array. Either get_first_start(card_at_start) returns this coalesced object, 174 // or it returns an object that precedes the coalesced object. 175 if (card_start_address + get_last_start(card_at_start) < address + length_in_words) { 176 uint8_t coalesced_offset = checked_cast<uint8_t>(pointer_delta(address, card_start_address)); 177 // The object that used to be the last object starting within this card is being subsumed within the coalesced 178 // object. Since we always coalesce entire objects, this condition only occurs if the last object ends before or at 179 // the end of the card's memory range and there is no object following this object. In this case, adjust last_start 180 // to represent the start of the coalesced range. 181 set_last_start(card_at_start, coalesced_offset); 182 } 183 // Else, no changes to last_starts information. Either get_last_start(card_at_start) returns the object that immediately 184 // follows the coalesced object, or it returns an object that follows the object immediately following the coalesced object. 185 } else { 186 uint8_t coalesced_offset = checked_cast<uint8_t>(pointer_delta(address, card_start_address)); 187 if (get_last_start(card_at_start) > coalesced_offset) { 188 // Existing last start is being coalesced, create new last start 189 set_last_start(card_at_start, coalesced_offset); 190 } 191 // otherwise, get_last_start(card_at_start) must equal coalesced_offset 192 193 // All the cards between first and last get cleared. 194 for (size_t i = card_at_start + 1; i < card_at_end; i++) { 195 clear_starts_object_bit(i); 196 } 197 198 uint8_t follow_offset = checked_cast<uint8_t>((address + length_in_words) - _rs->addr_for_card_index(card_at_end)); 199 if (starts_object(card_at_end) && (get_first_start(card_at_end) < follow_offset)) { 200 // It may be that after coalescing within this last card's memory range, the last card 201 // no longer holds an object. 202 if (get_last_start(card_at_end) >= follow_offset) { 203 set_first_start(card_at_end, follow_offset); 204 } else { 205 // last_start is being coalesced so this card no longer has any objects. 206 clear_starts_object_bit(card_at_end); 207 } 208 } 209 // else 210 // card_at_end did not have an object, so it still does not have an object, or 211 // card_at_end had an object that starts after the coalesced object, so no changes required for card_at_end 212 213 } 214 } 215 216 217 size_t ShenandoahCardCluster::get_first_start(size_t card_index) const { 218 assert(starts_object(card_index), "Can't get first start because no object starts here"); 219 return _object_starts[card_index].offsets.first & FirstStartBits; 220 } 221 222 size_t ShenandoahCardCluster::get_last_start(size_t card_index) const { 223 assert(starts_object(card_index), "Can't get last start because no object starts here"); 224 return _object_starts[card_index].offsets.last; 225 } 226 227 // Given a card_index, return the starting address of the first block in the heap 228 // that straddles into this card. If this card is co-initial with an object, then 229 // this would return the first address of the range that this card covers, which is 230 // where the card's first object also begins. 231 HeapWord* ShenandoahCardCluster::block_start(const size_t card_index) const { 232 233 HeapWord* left = _rs->addr_for_card_index(card_index); 234 235 #ifdef ASSERT 236 assert(ShenandoahHeap::heap()->mode()->is_generational(), "Do not use in non-generational mode"); 237 ShenandoahHeapRegion* region = ShenandoahHeap::heap()->heap_region_containing(left); 238 assert(region->is_old(), "Do not use for young regions"); 239 // For HumongousRegion:s it's more efficient to jump directly to the 240 // start region. 241 assert(!region->is_humongous(), "Use region->humongous_start_region() instead"); 242 #endif 243 if (starts_object(card_index) && get_first_start(card_index) == 0) { 244 // This card contains a co-initial object; a fortiori, it covers 245 // also the case of a card being the first in a region. 246 assert(oopDesc::is_oop(cast_to_oop(left)), "Should be an object"); 247 return left; 248 } 249 250 HeapWord* p = nullptr; 251 oop obj = cast_to_oop(p); 252 ssize_t cur_index = (ssize_t)card_index; 253 assert(cur_index >= 0, "Overflow"); 254 assert(cur_index > 0, "Should have returned above"); 255 // Walk backwards over the cards... 256 while (--cur_index > 0 && !starts_object(cur_index)) { 257 // ... to the one that starts the object 258 } 259 // cur_index should start an object: we should not have walked 260 // past the left end of the region. 261 assert(cur_index >= 0 && (cur_index <= (ssize_t)card_index), "Error"); 262 assert(region->bottom() <= _rs->addr_for_card_index(cur_index), 263 "Fell off the bottom of containing region"); 264 assert(starts_object(cur_index), "Error"); 265 size_t offset = get_last_start(cur_index); 266 // can avoid call via card size arithmetic below instead 267 p = _rs->addr_for_card_index(cur_index) + offset; 268 // Recall that we already dealt with the co-initial object case above 269 assert(p < left, "obj should start before left"); 270 // While it is safe to ask an object its size in the loop that 271 // follows, the (ifdef'd out) loop should never be needed. 272 // 1. we ask this question only for regions in the old generation 273 // 2. there is no direct allocation ever by mutators in old generation 274 // regions. Only GC will ever allocate in old regions, and then 275 // too only during promotion/evacuation phases. Thus there is no danger 276 // of races between reading from and writing to the object start array, 277 // or of asking partially initialized objects their size (in the loop below). 278 // 3. only GC asks this question during phases when it is not concurrently 279 // evacuating/promoting, viz. during concurrent root scanning (before 280 // the evacuation phase) and during concurrent update refs (after the 281 // evacuation phase) of young collections. This is never called 282 // during old or global collections. 283 // 4. Every allocation under TAMS updates the object start array. 284 NOT_PRODUCT(obj = cast_to_oop(p);) 285 assert(oopDesc::is_oop(obj), "Should be an object"); 286 #define WALK_FORWARD_IN_BLOCK_START false 287 while (WALK_FORWARD_IN_BLOCK_START && p + obj->size() < left) { 288 p += obj->size(); 289 } 290 #undef WALK_FORWARD_IN_BLOCK_START // false 291 assert(p + obj->size() > left, "obj should end after left"); 292 return p; 293 } 294 295 size_t ShenandoahScanRemembered::card_index_for_addr(HeapWord* p) { 296 return _rs->card_index_for_addr(p); 297 } 298 299 HeapWord* ShenandoahScanRemembered::addr_for_card_index(size_t card_index) { 300 return _rs->addr_for_card_index(card_index); 301 } 302 303 bool ShenandoahScanRemembered::is_card_dirty(size_t card_index) { 304 return _rs->is_card_dirty(card_index); 305 } 306 307 bool ShenandoahScanRemembered::is_write_card_dirty(size_t card_index) { 308 return _rs->is_write_card_dirty(card_index); 309 } 310 311 bool ShenandoahScanRemembered::is_card_dirty(HeapWord* p) { 312 return _rs->is_card_dirty(p); 313 } 314 315 void ShenandoahScanRemembered::mark_card_as_dirty(HeapWord* p) { 316 _rs->mark_card_as_dirty(p); 317 } 318 319 bool ShenandoahScanRemembered::is_write_card_dirty(HeapWord* p) { 320 return _rs->is_write_card_dirty(p); 321 } 322 323 void ShenandoahScanRemembered::mark_range_as_dirty(HeapWord* p, size_t num_heap_words) { 324 _rs->mark_range_as_dirty(p, num_heap_words); 325 } 326 327 void ShenandoahScanRemembered::mark_range_as_clean(HeapWord* p, size_t num_heap_words) { 328 _rs->mark_range_as_clean(p, num_heap_words); 329 } 330 331 void ShenandoahScanRemembered::mark_read_table_as_clean() { 332 _rs->mark_read_table_as_clean(); 333 } 334 335 void ShenandoahScanRemembered::reset_object_range(HeapWord* from, HeapWord* to) { 336 _scc->reset_object_range(from, to); 337 } 338 339 void ShenandoahScanRemembered::register_object(HeapWord* addr) { 340 _scc->register_object(addr); 341 } 342 343 void ShenandoahScanRemembered::register_object_without_lock(HeapWord* addr) { 344 _scc->register_object_without_lock(addr); 345 } 346 347 bool ShenandoahScanRemembered::verify_registration(HeapWord* address, ShenandoahMarkingContext* ctx) { 348 349 size_t index = card_index_for_addr(address); 350 if (!_scc->starts_object(index)) { 351 return false; 352 } 353 HeapWord* base_addr = addr_for_card_index(index); 354 size_t offset = _scc->get_first_start(index); 355 ShenandoahHeap* heap = ShenandoahHeap::heap(); 356 357 // Verify that I can find this object within its enclosing card by scanning forward from first_start. 358 while (base_addr + offset < address) { 359 oop obj = cast_to_oop(base_addr + offset); 360 if (!ctx || ctx->is_marked(obj)) { 361 offset += obj->size(); 362 } else { 363 // If this object is not live, don't trust its size(); all objects above tams are live. 364 ShenandoahHeapRegion* r = heap->heap_region_containing(obj); 365 HeapWord* tams = ctx->top_at_mark_start(r); 366 offset = ctx->get_next_marked_addr(base_addr + offset, tams) - base_addr; 367 } 368 } 369 if (base_addr + offset != address){ 370 return false; 371 } 372 373 // At this point, offset represents object whose registration we are verifying. We know that at least this object resides 374 // within this card's memory. 375 376 // Make sure that last_offset is properly set for the enclosing card, but we can't verify this for 377 // candidate collection-set regions during mixed evacuations, so disable this check in general 378 // during mixed evacuations. 379 380 ShenandoahHeapRegion* r = heap->heap_region_containing(base_addr + offset); 381 size_t max_offset = r->top() - base_addr; 382 if (max_offset > CardTable::card_size_in_words()) { 383 max_offset = CardTable::card_size_in_words(); 384 } 385 size_t prev_offset; 386 if (!ctx) { 387 do { 388 oop obj = cast_to_oop(base_addr + offset); 389 prev_offset = offset; 390 offset += obj->size(); 391 } while (offset < max_offset); 392 if (_scc->get_last_start(index) != prev_offset) { 393 return false; 394 } 395 396 // base + offset represents address of first object that starts on following card, if there is one. 397 398 // Notes: base_addr is addr_for_card_index(index) 399 // base_addr + offset is end of the object we are verifying 400 // cannot use card_index_for_addr(base_addr + offset) because it asserts arg < end of whole heap 401 size_t end_card_index = index + offset / CardTable::card_size_in_words(); 402 403 if (end_card_index > index && end_card_index <= _rs->last_valid_index()) { 404 // If there is a following object registered on the next card, it should begin where this object ends. 405 if (_scc->starts_object(end_card_index) && 406 ((addr_for_card_index(end_card_index) + _scc->get_first_start(end_card_index)) != (base_addr + offset))) { 407 return false; 408 } 409 } 410 411 // Assure that no other objects are registered "inside" of this one. 412 for (index++; index < end_card_index; index++) { 413 if (_scc->starts_object(index)) { 414 return false; 415 } 416 } 417 } else { 418 // This is a mixed evacuation or a global collect: rely on mark bits to identify which objects need to be properly registered 419 assert(!ShenandoahHeap::heap()->is_concurrent_old_mark_in_progress(), "Cannot rely on mark context here."); 420 // If the object reaching or spanning the end of this card's memory is marked, then last_offset for this card 421 // should represent this object. Otherwise, last_offset is a don't care. 422 ShenandoahHeapRegion* region = heap->heap_region_containing(base_addr + offset); 423 HeapWord* tams = ctx->top_at_mark_start(region); 424 oop last_obj = nullptr; 425 do { 426 oop obj = cast_to_oop(base_addr + offset); 427 if (ctx->is_marked(obj)) { 428 prev_offset = offset; 429 offset += obj->size(); 430 last_obj = obj; 431 } else { 432 offset = ctx->get_next_marked_addr(base_addr + offset, tams) - base_addr; 433 // If there are no marked objects remaining in this region, offset equals tams - base_addr. If this offset is 434 // greater than max_offset, we will immediately exit this loop. Otherwise, the next iteration of the loop will 435 // treat the object at offset as marked and live (because address >= tams) and we will continue iterating object 436 // by consulting the size() fields of each. 437 } 438 } while (offset < max_offset); 439 if (last_obj != nullptr && prev_offset + last_obj->size() >= max_offset) { 440 // last marked object extends beyond end of card 441 if (_scc->get_last_start(index) != prev_offset) { 442 return false; 443 } 444 // otherwise, the value of _scc->get_last_start(index) is a don't care because it represents a dead object and we 445 // cannot verify its context 446 } 447 } 448 return true; 449 } 450 451 void ShenandoahScanRemembered::coalesce_objects(HeapWord* addr, size_t length_in_words) { 452 _scc->coalesce_objects(addr, length_in_words); 453 } 454 455 void ShenandoahScanRemembered::mark_range_as_empty(HeapWord* addr, size_t length_in_words) { 456 _rs->mark_range_as_clean(addr, length_in_words); 457 _scc->clear_objects_in_range(addr, length_in_words); 458 } 459 460 size_t ShenandoahScanRemembered::cluster_for_addr(HeapWordImpl **addr) { 461 size_t card_index = _rs->card_index_for_addr(addr); 462 size_t result = card_index / ShenandoahCardCluster::CardsPerCluster; 463 return result; 464 } 465 466 HeapWord* ShenandoahScanRemembered::addr_for_cluster(size_t cluster_no) { 467 size_t card_index = cluster_no * ShenandoahCardCluster::CardsPerCluster; 468 return addr_for_card_index(card_index); 469 } 470 471 // This is used only for debug verification so don't worry about making the scan parallel. 472 void ShenandoahScanRemembered::roots_do(OopIterateClosure* cl) { 473 ShenandoahHeap* heap = ShenandoahHeap::heap(); 474 bool old_bitmap_stable = heap->old_generation()->is_mark_complete(); 475 log_debug(gc, remset)("Scan remembered set using bitmap: %s", BOOL_TO_STR(old_bitmap_stable)); 476 for (size_t i = 0, n = heap->num_regions(); i < n; ++i) { 477 ShenandoahHeapRegion* region = heap->get_region(i); 478 if (region->is_old() && region->is_active() && !region->is_cset()) { 479 HeapWord* start_of_range = region->bottom(); 480 HeapWord* end_of_range = region->top(); 481 size_t start_cluster_no = cluster_for_addr(start_of_range); 482 size_t num_heapwords = end_of_range - start_of_range; 483 unsigned int cluster_size = CardTable::card_size_in_words() * ShenandoahCardCluster::CardsPerCluster; 484 size_t num_clusters = (size_t) ((num_heapwords - 1 + cluster_size) / cluster_size); 485 486 // Remembered set scanner 487 if (region->is_humongous()) { 488 process_humongous_clusters(region->humongous_start_region(), start_cluster_no, num_clusters, end_of_range, cl, 489 false /* use_write_table */); 490 } else { 491 process_clusters(start_cluster_no, num_clusters, end_of_range, cl, 492 false /* use_write_table */, 0 /* fake worker id */); 493 } 494 } 495 } 496 } 497 498 #ifndef PRODUCT 499 // Log given card stats 500 void ShenandoahScanRemembered::log_card_stats(HdrSeq* stats) { 501 for (int i = 0; i < MAX_CARD_STAT_TYPE; i++) { 502 log_info(gc, remset)("%18s: [ %8.2f %8.2f %8.2f %8.2f %8.2f ]", 503 _card_stats_name[i], 504 stats[i].percentile(0), stats[i].percentile(25), 505 stats[i].percentile(50), stats[i].percentile(75), 506 stats[i].maximum()); 507 } 508 } 509 510 // Log card stats for all nworkers for a specific phase t 511 void ShenandoahScanRemembered::log_card_stats(uint nworkers, CardStatLogType t) { 512 assert(ShenandoahEnableCardStats, "Do not call"); 513 HdrSeq* sum_stats = card_stats_for_phase(t); 514 log_info(gc, remset)("%s", _card_stat_log_type[t]); 515 for (uint i = 0; i < nworkers; i++) { 516 log_worker_card_stats(i, sum_stats); 517 } 518 519 // Every so often, log the cumulative global stats 520 if (++_card_stats_log_counter[t] >= ShenandoahCardStatsLogInterval) { 521 _card_stats_log_counter[t] = 0; 522 log_info(gc, remset)("Cumulative stats"); 523 log_card_stats(sum_stats); 524 } 525 } 526 527 // Log card stats for given worker_id, & clear them after merging into given cumulative stats 528 void ShenandoahScanRemembered::log_worker_card_stats(uint worker_id, HdrSeq* sum_stats) { 529 assert(ShenandoahEnableCardStats, "Do not call"); 530 531 HdrSeq* worker_card_stats = card_stats(worker_id); 532 log_info(gc, remset)("Worker %u Card Stats: ", worker_id); 533 log_card_stats(worker_card_stats); 534 // Merge worker stats into the cumulative stats & clear worker stats 535 merge_worker_card_stats_cumulative(worker_card_stats, sum_stats); 536 } 537 538 void ShenandoahScanRemembered::merge_worker_card_stats_cumulative( 539 HdrSeq* worker_stats, HdrSeq* sum_stats) { 540 for (int i = 0; i < MAX_CARD_STAT_TYPE; i++) { 541 sum_stats[i].add(worker_stats[i]); 542 worker_stats[i].clear(); 543 } 544 } 545 #endif 546 547 // A closure that takes an oop in the old generation and, if it's pointing 548 // into the young generation, dirties the corresponding remembered set entry. 549 // This is only used to rebuild the remembered set after a full GC. 550 class ShenandoahDirtyRememberedSetClosure : public BasicOopIterateClosure { 551 protected: 552 ShenandoahGenerationalHeap* const _heap; 553 ShenandoahScanRemembered* const _scanner; 554 555 public: 556 ShenandoahDirtyRememberedSetClosure() : 557 _heap(ShenandoahGenerationalHeap::heap()), 558 _scanner(_heap->old_generation()->card_scan()) {} 559 560 template<class T> 561 inline void work(T* p) { 562 assert(_heap->is_in_old(p), "Expecting to get an old gen address"); 563 T o = RawAccess<>::oop_load(p); 564 if (!CompressedOops::is_null(o)) { 565 oop obj = CompressedOops::decode_not_null(o); 566 if (_heap->is_in_young(obj)) { 567 // Dirty the card containing the cross-generational pointer. 568 _scanner->mark_card_as_dirty((HeapWord*) p); 569 } 570 } 571 } 572 573 virtual void do_oop(narrowOop* p) { work(p); } 574 virtual void do_oop(oop* p) { work(p); } 575 }; 576 577 ShenandoahDirectCardMarkRememberedSet::ShenandoahDirectCardMarkRememberedSet(ShenandoahCardTable* card_table, size_t total_card_count) : 578 LogCardValsPerIntPtr(log2i_exact(sizeof(intptr_t)) - log2i_exact(sizeof(CardValue))), 579 LogCardSizeInWords(log2i_exact(CardTable::card_size_in_words())) { 580 581 // Paranoid assert for LogCardsPerIntPtr calculation above 582 assert(sizeof(intptr_t) > sizeof(CardValue), "LogsCardValsPerIntPtr would underflow"); 583 584 _heap = ShenandoahHeap::heap(); 585 _card_table = card_table; 586 _total_card_count = total_card_count; 587 _card_shift = CardTable::card_shift(); 588 589 _byte_map = _card_table->byte_for_index(0); 590 591 _whole_heap_base = _card_table->addr_for(_byte_map); 592 _byte_map_base = _byte_map - (uintptr_t(_whole_heap_base) >> _card_shift); 593 594 assert(total_card_count % ShenandoahCardCluster::CardsPerCluster == 0, "Invalid card count."); 595 assert(total_card_count > 0, "Card count cannot be zero."); 596 } 597 598 // Merge any dirty values from write table into the read table, while leaving 599 // the write table unchanged. 600 void ShenandoahDirectCardMarkRememberedSet::merge_write_table(HeapWord* start, size_t word_count) { 601 size_t start_index = card_index_for_addr(start); 602 #ifdef ASSERT 603 // avoid querying card_index_for_addr() for an address past end of heap 604 size_t end_index = card_index_for_addr(start + word_count - 1) + 1; 605 #endif 606 assert(start_index % ((size_t)1 << LogCardValsPerIntPtr) == 0, "Expected a multiple of CardValsPerIntPtr"); 607 assert(end_index % ((size_t)1 << LogCardValsPerIntPtr) == 0, "Expected a multiple of CardValsPerIntPtr"); 608 609 // We'll access in groups of intptr_t worth of card entries 610 intptr_t* const read_table = (intptr_t*) &(_card_table->read_byte_map())[start_index]; 611 intptr_t* const write_table = (intptr_t*) &(_card_table->write_byte_map())[start_index]; 612 613 // Avoid division, use shift instead 614 assert(word_count % ((size_t)1 << (LogCardSizeInWords + LogCardValsPerIntPtr)) == 0, "Expected a multiple of CardSizeInWords*CardValsPerIntPtr"); 615 size_t const num = word_count >> (LogCardSizeInWords + LogCardValsPerIntPtr); 616 617 for (size_t i = 0; i < num; i++) { 618 read_table[i] &= write_table[i]; 619 } 620 621 log_info(gc, remset)("Finished merging write_table into read_table."); 622 } 623 624 void ShenandoahDirectCardMarkRememberedSet::swap_card_tables() { 625 CardTable::CardValue* new_ptr = _card_table->swap_read_and_write_tables(); 626 627 #ifdef ASSERT 628 CardValue* start_bp = &(_card_table->write_byte_map())[0]; 629 CardValue* end_bp = &(new_ptr)[_card_table->last_valid_index()]; 630 631 while (start_bp <= end_bp) { 632 assert(*start_bp == CardTable::clean_card_val(), "Should be clean: " PTR_FORMAT, p2i(start_bp)); 633 start_bp++; 634 } 635 #endif 636 637 struct SwapTLSCardTable : public ThreadClosure { 638 CardTable::CardValue* _new_ptr; 639 SwapTLSCardTable(CardTable::CardValue* np) : _new_ptr(np) {} 640 virtual void do_thread(Thread* t) { 641 ShenandoahThreadLocalData::set_card_table(t, _new_ptr); 642 } 643 } swap_it(new_ptr); 644 645 // Iterate on threads and adjust thread local data 646 Threads::threads_do(&swap_it); 647 648 log_info(gc, barrier)("Current write_card_table: " PTR_FORMAT, p2i(swap_it._new_ptr)); 649 } 650 651 ShenandoahScanRememberedTask::ShenandoahScanRememberedTask(ShenandoahObjToScanQueueSet* queue_set, 652 ShenandoahObjToScanQueueSet* old_queue_set, 653 ShenandoahReferenceProcessor* rp, 654 ShenandoahRegionChunkIterator* work_list, bool is_concurrent) : 655 WorkerTask("Scan Remembered Set"), 656 _queue_set(queue_set), _old_queue_set(old_queue_set), _rp(rp), _work_list(work_list), _is_concurrent(is_concurrent) { 657 bool old_bitmap_stable = ShenandoahHeap::heap()->old_generation()->is_mark_complete(); 658 log_debug(gc, remset)("Scan remembered set using bitmap: %s", BOOL_TO_STR(old_bitmap_stable)); 659 } 660 661 void ShenandoahScanRememberedTask::work(uint worker_id) { 662 if (_is_concurrent) { 663 // This sets up a thread local reference to the worker_id which is needed by the weak reference processor. 664 ShenandoahConcurrentWorkerSession worker_session(worker_id); 665 ShenandoahSuspendibleThreadSetJoiner stsj; 666 do_work(worker_id); 667 } else { 668 // This sets up a thread local reference to the worker_id which is needed by the weak reference processor. 669 ShenandoahParallelWorkerSession worker_session(worker_id); 670 do_work(worker_id); 671 } 672 } 673 674 void ShenandoahScanRememberedTask::do_work(uint worker_id) { 675 ShenandoahWorkerTimingsTracker x(ShenandoahPhaseTimings::init_scan_rset, ShenandoahPhaseTimings::ScanClusters, worker_id); 676 677 ShenandoahObjToScanQueue* q = _queue_set->queue(worker_id); 678 ShenandoahObjToScanQueue* old = _old_queue_set == nullptr ? nullptr : _old_queue_set->queue(worker_id); 679 ShenandoahMarkRefsClosure<YOUNG> cl(q, _rp, old); 680 ShenandoahGenerationalHeap* heap = ShenandoahGenerationalHeap::heap(); 681 ShenandoahScanRemembered* scanner = heap->old_generation()->card_scan(); 682 683 // set up thread local closure for shen ref processor 684 _rp->set_mark_closure(worker_id, &cl); 685 struct ShenandoahRegionChunk assignment; 686 while (_work_list->next(&assignment)) { 687 ShenandoahHeapRegion* region = assignment._r; 688 log_debug(gc)("ShenandoahScanRememberedTask::do_work(%u), processing slice of region " 689 SIZE_FORMAT " at offset " SIZE_FORMAT ", size: " SIZE_FORMAT, 690 worker_id, region->index(), assignment._chunk_offset, assignment._chunk_size); 691 if (region->is_old()) { 692 size_t cluster_size = 693 CardTable::card_size_in_words() * ShenandoahCardCluster::CardsPerCluster; 694 size_t clusters = assignment._chunk_size / cluster_size; 695 assert(clusters * cluster_size == assignment._chunk_size, "Chunk assignments must align on cluster boundaries"); 696 HeapWord* end_of_range = region->bottom() + assignment._chunk_offset + assignment._chunk_size; 697 698 // During concurrent mark, region->top() equals TAMS with respect to the current young-gen pass. 699 if (end_of_range > region->top()) { 700 end_of_range = region->top(); 701 } 702 scanner->process_region_slice(region, assignment._chunk_offset, clusters, end_of_range, &cl, false, worker_id); 703 } 704 #ifdef ENABLE_REMEMBERED_SET_CANCELLATION 705 // This check is currently disabled to avoid crashes that occur 706 // when we try to cancel remembered set scanning; it should be re-enabled 707 // after the issues are fixed, as it would allow more prompt cancellation and 708 // transition to degenerated / full GCs. Note that work that has been assigned/ 709 // claimed above must be completed before we return here upon cancellation. 710 if (heap->check_cancelled_gc_and_yield(_is_concurrent)) { 711 return; 712 } 713 #endif 714 } 715 } 716 717 size_t ShenandoahRegionChunkIterator::calc_regular_group_size() { 718 // The group size is calculated from the number of regions. Suppose the heap has N regions. The first group processes 719 // N/2 regions. The second group processes N/4 regions, the third group N/8 regions and so on. 720 // Note that infinite series N/2 + N/4 + N/8 + N/16 + ... sums to N. 721 // 722 // The normal group size is the number of regions / 2. 723 // 724 // In the case that the region_size_words is greater than _maximum_chunk_size_words, the first group_size is 725 // larger than the normal group size because each chunk in the group will be smaller than the region size. 726 // 727 // The last group also has more than the normal entries because it finishes the total scanning effort. The chunk sizes are 728 // different for each group. The intention is that the first group processes roughly half of the heap, the second processes 729 // half of the remaining heap, the third processes half of what remains and so on. The smallest chunk size 730 // is represented by _smallest_chunk_size_words. We do not divide work any smaller than this. 731 // 732 size_t group_size = _heap->num_regions() / 2; 733 return group_size; 734 } 735 736 size_t ShenandoahRegionChunkIterator::calc_first_group_chunk_size_b4_rebalance() { 737 size_t words_in_first_chunk = ShenandoahHeapRegion::region_size_words(); 738 return words_in_first_chunk; 739 } 740 741 size_t ShenandoahRegionChunkIterator::calc_num_groups() { 742 size_t total_heap_size = _heap->num_regions() * ShenandoahHeapRegion::region_size_words(); 743 size_t num_groups = 0; 744 size_t cumulative_group_span = 0; 745 size_t current_group_span = _first_group_chunk_size_b4_rebalance * _regular_group_size; 746 size_t smallest_group_span = smallest_chunk_size_words() * _regular_group_size; 747 while ((num_groups < _maximum_groups) && (cumulative_group_span + current_group_span <= total_heap_size)) { 748 num_groups++; 749 cumulative_group_span += current_group_span; 750 if (current_group_span <= smallest_group_span) { 751 break; 752 } else { 753 current_group_span /= 2; // Each group spans half of what the preceding group spanned. 754 } 755 } 756 // Loop post condition: 757 // num_groups <= _maximum_groups 758 // cumulative_group_span is the memory spanned by num_groups 759 // current_group_span is the span of the last fully populated group (assuming loop iterates at least once) 760 // each of num_groups is fully populated with _regular_group_size chunks in each 761 // Non post conditions: 762 // cumulative_group_span may be less than total_heap size for one or more of the folowing reasons 763 // a) The number of regions remaining to be spanned is smaller than a complete group, or 764 // b) We have filled up all groups through _maximum_groups and still have not spanned all regions 765 766 if (cumulative_group_span < total_heap_size) { 767 // We've got more regions to span 768 if ((num_groups < _maximum_groups) && (current_group_span > smallest_group_span)) { 769 num_groups++; // Place all remaining regions into a new not-full group (chunk_size half that of previous group) 770 } 771 // Else we are unable to create a new group because we've exceed the number of allowed groups or have reached the 772 // minimum chunk size. 773 774 // Any remaining regions will be treated as if they are part of the most recently created group. This group will 775 // have more than _regular_group_size chunks within it. 776 } 777 assert (num_groups <= _maximum_groups, "Cannot have more than %zu groups", _maximum_groups); 778 return num_groups; 779 } 780 781 size_t ShenandoahRegionChunkIterator::calc_total_chunks() { 782 size_t region_size_words = ShenandoahHeapRegion::region_size_words(); 783 size_t unspanned_heap_size = _heap->num_regions() * region_size_words; 784 size_t num_chunks = 0; 785 size_t cumulative_group_span = 0; 786 size_t current_group_span = _first_group_chunk_size_b4_rebalance * _regular_group_size; 787 size_t smallest_group_span = smallest_chunk_size_words() * _regular_group_size; 788 789 // The first group gets special handling because the first chunk size can be no larger than _maximum_chunk_size_words 790 if (region_size_words > _maximum_chunk_size_words) { 791 // In the case that we shrink the first group's chunk size, certain other groups will also be subsumed within the first group 792 size_t effective_chunk_size = _first_group_chunk_size_b4_rebalance; 793 uint coalesced_groups = 0; 794 while (effective_chunk_size >= _maximum_chunk_size_words) { 795 // Each iteration of this loop subsumes one original group into a new rebalanced initial group. 796 num_chunks += current_group_span / _maximum_chunk_size_words; 797 unspanned_heap_size -= current_group_span; 798 effective_chunk_size /= 2; 799 current_group_span /= 2; 800 coalesced_groups++; 801 } 802 assert(effective_chunk_size * 2 == _maximum_chunk_size_words, 803 "We assume _first_group_chunk_size_b4_rebalance is _maximum_chunk_size_words * a power of two"); 804 _largest_chunk_size_words = _maximum_chunk_size_words; 805 _adjusted_num_groups = _num_groups - (coalesced_groups - 1); 806 } else { 807 num_chunks = _regular_group_size; 808 unspanned_heap_size -= current_group_span; 809 _largest_chunk_size_words = current_group_span / num_chunks; 810 _adjusted_num_groups = _num_groups; 811 current_group_span /= 2; 812 } 813 814 size_t spanned_groups = 1; 815 while (unspanned_heap_size > 0) { 816 if (current_group_span <= unspanned_heap_size) { 817 unspanned_heap_size -= current_group_span; 818 num_chunks += _regular_group_size; 819 spanned_groups++; 820 821 // _num_groups is the number of groups required to span the configured heap size. We are not allowed 822 // to change the number of groups. The last group is responsible for spanning all chunks not spanned 823 // by previously processed groups. 824 if (spanned_groups >= _num_groups) { 825 // The last group has more than _regular_group_size entries. 826 size_t chunk_span = current_group_span / _regular_group_size; 827 size_t extra_chunks = unspanned_heap_size / chunk_span; 828 assert (extra_chunks * chunk_span == unspanned_heap_size, "Chunks must precisely span regions"); 829 num_chunks += extra_chunks; 830 return num_chunks; 831 } else if (current_group_span <= smallest_group_span) { 832 // We cannot introduce new groups because we've reached the lower bound on group size. So this last 833 // group may hold extra chunks. 834 size_t chunk_span = smallest_chunk_size_words(); 835 size_t extra_chunks = unspanned_heap_size / chunk_span; 836 assert (extra_chunks * chunk_span == unspanned_heap_size, "Chunks must precisely span regions"); 837 num_chunks += extra_chunks; 838 return num_chunks; 839 } else { 840 current_group_span /= 2; 841 } 842 } else { 843 // This last group has fewer than _regular_group_size entries. 844 size_t chunk_span = current_group_span / _regular_group_size; 845 size_t last_group_size = unspanned_heap_size / chunk_span; 846 assert (last_group_size * chunk_span == unspanned_heap_size, "Chunks must precisely span regions"); 847 num_chunks += last_group_size; 848 return num_chunks; 849 } 850 } 851 return num_chunks; 852 } 853 854 ShenandoahRegionChunkIterator::ShenandoahRegionChunkIterator(size_t worker_count) : 855 ShenandoahRegionChunkIterator(ShenandoahHeap::heap(), worker_count) 856 { 857 } 858 859 ShenandoahRegionChunkIterator::ShenandoahRegionChunkIterator(ShenandoahHeap* heap, size_t worker_count) : 860 _heap(heap), 861 _regular_group_size(calc_regular_group_size()), 862 _first_group_chunk_size_b4_rebalance(calc_first_group_chunk_size_b4_rebalance()), 863 _num_groups(calc_num_groups()), 864 _total_chunks(calc_total_chunks()), 865 _index(0) 866 { 867 #ifdef ASSERT 868 size_t expected_chunk_size_words = _clusters_in_smallest_chunk * CardTable::card_size_in_words() * ShenandoahCardCluster::CardsPerCluster; 869 assert(smallest_chunk_size_words() == expected_chunk_size_words, "_smallest_chunk_size (" SIZE_FORMAT") is not valid because it does not equal (" SIZE_FORMAT ")", 870 smallest_chunk_size_words(), expected_chunk_size_words); 871 assert(_num_groups <= _maximum_groups, 872 "The number of remembered set scanning groups must be less than or equal to maximum groups"); 873 assert(smallest_chunk_size_words() << (_adjusted_num_groups - 1) == _largest_chunk_size_words, 874 "The number of groups (%zu) needs to span smallest chunk size (%zu) to largest chunk size (%zu)", 875 _adjusted_num_groups, smallest_chunk_size_words(), _largest_chunk_size_words); 876 #endif 877 878 size_t words_in_region = ShenandoahHeapRegion::region_size_words(); 879 _region_index[0] = 0; 880 _group_offset[0] = 0; 881 if (words_in_region > _maximum_chunk_size_words) { 882 // In the case that we shrink the first group's chunk size, certain other groups will also be subsumed within the first group 883 size_t num_chunks = 0; 884 size_t effective_chunk_size = _first_group_chunk_size_b4_rebalance; 885 size_t current_group_span = effective_chunk_size * _regular_group_size; 886 while (effective_chunk_size >= _maximum_chunk_size_words) { 887 num_chunks += current_group_span / _maximum_chunk_size_words; 888 effective_chunk_size /= 2; 889 current_group_span /= 2; 890 } 891 _group_entries[0] = num_chunks; 892 _group_chunk_size[0] = _maximum_chunk_size_words; 893 } else { 894 _group_entries[0] = _regular_group_size; 895 _group_chunk_size[0] = _first_group_chunk_size_b4_rebalance; 896 } 897 898 size_t previous_group_span = _group_entries[0] * _group_chunk_size[0]; 899 for (size_t i = 1; i < _adjusted_num_groups; i++) { 900 _group_chunk_size[i] = _group_chunk_size[i-1] / 2; 901 size_t chunks_in_group = _regular_group_size; 902 size_t this_group_span = _group_chunk_size[i] * chunks_in_group; 903 size_t total_span_of_groups = previous_group_span + this_group_span; 904 _region_index[i] = previous_group_span / words_in_region; 905 _group_offset[i] = previous_group_span % words_in_region; 906 _group_entries[i] = _group_entries[i-1] + _regular_group_size; 907 previous_group_span = total_span_of_groups; 908 } 909 if (_group_entries[_adjusted_num_groups-1] < _total_chunks) { 910 assert((_total_chunks - _group_entries[_adjusted_num_groups-1]) * _group_chunk_size[_adjusted_num_groups-1] + previous_group_span == 911 heap->num_regions() * words_in_region, "Total region chunks (" SIZE_FORMAT 912 ") do not span total heap regions (" SIZE_FORMAT ")", _total_chunks, _heap->num_regions()); 913 previous_group_span += (_total_chunks - _group_entries[_adjusted_num_groups-1]) * _group_chunk_size[_adjusted_num_groups-1]; 914 _group_entries[_adjusted_num_groups-1] = _total_chunks; 915 } 916 assert(previous_group_span == heap->num_regions() * words_in_region, "Total region chunks (" SIZE_FORMAT 917 ") do not span total heap regions (" SIZE_FORMAT "): " SIZE_FORMAT " does not equal " SIZE_FORMAT, 918 _total_chunks, _heap->num_regions(), previous_group_span, heap->num_regions() * words_in_region); 919 920 // Not necessary, but keeps things tidy 921 for (size_t i = _adjusted_num_groups; i < _maximum_groups; i++) { 922 _region_index[i] = 0; 923 _group_offset[i] = 0; 924 _group_entries[i] = _group_entries[i-1]; 925 _group_chunk_size[i] = 0; 926 } 927 } 928 929 void ShenandoahRegionChunkIterator::reset() { 930 _index = 0; 931 } 932 933 ShenandoahReconstructRememberedSetTask::ShenandoahReconstructRememberedSetTask(ShenandoahRegionIterator* regions) 934 : WorkerTask("Shenandoah Reset Bitmap") 935 , _regions(regions) { } 936 937 void ShenandoahReconstructRememberedSetTask::work(uint worker_id) { 938 ShenandoahParallelWorkerSession worker_session(worker_id); 939 ShenandoahHeapRegion* r = _regions->next(); 940 ShenandoahGenerationalHeap* heap = ShenandoahGenerationalHeap::heap(); 941 ShenandoahScanRemembered* scanner = heap->old_generation()->card_scan(); 942 ShenandoahDirtyRememberedSetClosure dirty_cards_for_cross_generational_pointers; 943 944 while (r != nullptr) { 945 if (r->is_old() && r->is_active()) { 946 HeapWord* obj_addr = r->bottom(); 947 if (r->is_humongous_start()) { 948 // First, clear the remembered set 949 oop obj = cast_to_oop(obj_addr); 950 size_t size = obj->size(); 951 952 size_t num_regions = ShenandoahHeapRegion::required_regions(size * HeapWordSize); 953 size_t region_index = r->index(); 954 ShenandoahHeapRegion* humongous_region = heap->get_region(region_index); 955 while (num_regions-- != 0) { 956 scanner->reset_object_range(humongous_region->bottom(), humongous_region->end()); 957 region_index++; 958 humongous_region = heap->get_region(region_index); 959 } 960 961 // Then register the humongous object and DIRTY relevant remembered set cards 962 scanner->register_object_without_lock(obj_addr); 963 obj->oop_iterate(&dirty_cards_for_cross_generational_pointers); 964 } else if (!r->is_humongous()) { 965 scanner->reset_object_range(r->bottom(), r->end()); 966 967 // Then iterate over all objects, registering object and DIRTYing relevant remembered set cards 968 HeapWord* t = r->top(); 969 while (obj_addr < t) { 970 oop obj = cast_to_oop(obj_addr); 971 scanner->register_object_without_lock(obj_addr); 972 obj_addr += obj->oop_iterate_size(&dirty_cards_for_cross_generational_pointers); 973 } 974 } // else, ignore humongous continuation region 975 } 976 // else, this region is FREE or YOUNG or inactive and we can ignore it. 977 r = _regions->next(); 978 } 979 }