1 /* 2 * Copyright Amazon.com Inc. or its affiliates. All Rights Reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #ifndef SHARE_GC_SHENANDOAH_SHENANDOAHSCANREMEMBEREDINLINE_HPP 26 #define SHARE_GC_SHENANDOAH_SHENANDOAHSCANREMEMBEREDINLINE_HPP 27 28 #include "memory/iterator.hpp" 29 #include "oops/oop.hpp" 30 #include "oops/objArrayOop.hpp" 31 #include "gc/shared/collectorCounters.hpp" 32 #include "gc/shenandoah/shenandoahCardStats.hpp" 33 #include "gc/shenandoah/shenandoahCardTable.hpp" 34 #include "gc/shenandoah/shenandoahHeap.hpp" 35 #include "gc/shenandoah/shenandoahHeapRegion.hpp" 36 #include "gc/shenandoah/shenandoahOldGeneration.hpp" 37 #include "gc/shenandoah/shenandoahScanRemembered.hpp" 38 #include "gc/shenandoah/mode/shenandoahMode.hpp" 39 #include "logging/log.hpp" 40 41 // Process all objects starting within count clusters beginning with first_cluster and for which the start address is 42 // less than end_of_range. For any non-array object whose header lies on a dirty card, scan the entire object, 43 // even if its end reaches beyond end_of_range. Object arrays, on the other hand, are precisely dirtied and 44 // only the portions of the array on dirty cards need to be scanned. 45 // 46 // Do not CANCEL within process_clusters. It is assumed that if a worker thread accepts responsibility for processing 47 // a chunk of work, it will finish the work it starts. Otherwise, the chunk of work will be lost in the transition to 48 // degenerated execution, leading to dangling references. 49 template <typename ClosureType> 50 void ShenandoahScanRemembered::process_clusters(size_t first_cluster, size_t count, HeapWord* end_of_range, 51 ClosureType* cl, bool use_write_table, uint worker_id) { 52 53 assert(ShenandoahHeap::heap()->old_generation()->is_parsable(), "Old generation regions must be parsable for remembered set scan"); 54 // If old-gen evacuation is active, then MarkingContext for old-gen heap regions is valid. We use the MarkingContext 55 // bits to determine which objects within a DIRTY card need to be scanned. This is necessary because old-gen heap 56 // regions that are in the candidate collection set have not been coalesced and filled. Thus, these heap regions 57 // may contain zombie objects. Zombie objects are known to be dead, but have not yet been "collected". Scanning 58 // zombie objects is unsafe because the Klass pointer is not reliable, objects referenced from a zombie may have been 59 // collected (if dead), or relocated (if live), or if dead but not yet collected, we don't want to "revive" them 60 // by marking them (when marking) or evacuating them (when updating references). 61 62 // start and end addresses of range of objects to be scanned, clipped to end_of_range 63 const size_t start_card_index = first_cluster * ShenandoahCardCluster::CardsPerCluster; 64 const HeapWord* start_addr = _rs->addr_for_card_index(start_card_index); 65 // clip at end_of_range (exclusive) 66 HeapWord* end_addr = MIN2(end_of_range, (HeapWord*)start_addr + (count * ShenandoahCardCluster::CardsPerCluster 67 * CardTable::card_size_in_words())); 68 assert(start_addr < end_addr, "Empty region?"); 69 70 const size_t whole_cards = (end_addr - start_addr + CardTable::card_size_in_words() - 1)/CardTable::card_size_in_words(); 71 const size_t end_card_index = start_card_index + whole_cards - 1; 72 log_debug(gc, remset)("Worker %u: cluster = " SIZE_FORMAT " count = " SIZE_FORMAT " eor = " INTPTR_FORMAT 73 " start_addr = " INTPTR_FORMAT " end_addr = " INTPTR_FORMAT " cards = " SIZE_FORMAT, 74 worker_id, first_cluster, count, p2i(end_of_range), p2i(start_addr), p2i(end_addr), whole_cards); 75 76 // use_write_table states whether we are using the card table that is being 77 // marked by the mutators. If false, we are using a snapshot of the card table 78 // that is not subject to modifications. Even when this arg is true, and 79 // the card table is being actively marked, SATB marking ensures that we need not 80 // worry about cards marked after the processing here has passed them. 81 const CardValue* const ctbm = _rs->get_card_table_byte_map(use_write_table); 82 83 // If old gen evacuation is active, ctx will hold the completed marking of 84 // old generation objects. We'll only scan objects that are marked live by 85 // the old generation marking. These include objects allocated since the 86 // start of old generation marking (being those above TAMS). 87 const ShenandoahHeap* heap = ShenandoahHeap::heap(); 88 const ShenandoahMarkingContext* ctx = heap->old_generation()->is_mark_complete() ? 89 heap->marking_context() : nullptr; 90 91 // The region we will scan is the half-open interval [start_addr, end_addr), 92 // and lies entirely within a single region. 93 const ShenandoahHeapRegion* region = ShenandoahHeap::heap()->heap_region_containing(start_addr); 94 assert(region->contains(end_addr - 1), "Slice shouldn't cross regions"); 95 96 // This code may have implicit assumptions of examining only old gen regions. 97 assert(region->is_old(), "We only expect to be processing old regions"); 98 assert(!region->is_humongous(), "Humongous regions can be processed more efficiently;" 99 "see process_humongous_clusters()"); 100 // tams and ctx below are for old generation marking. As such, young gen roots must 101 // consider everything above tams, since it doesn't represent a TAMS for young gen's 102 // SATB marking. 103 const HeapWord* tams = (ctx == nullptr ? region->bottom() : ctx->top_at_mark_start(region)); 104 105 NOT_PRODUCT(ShenandoahCardStats stats(whole_cards, card_stats(worker_id));) 106 107 // In the case of imprecise marking, we remember the lowest address 108 // scanned in a range of dirty cards, as we work our way left from the 109 // highest end_addr. This serves as another upper bound on the address we will 110 // scan as we move left over each contiguous range of dirty cards. 111 HeapWord* upper_bound = nullptr; 112 113 // Starting at the right end of the address range, walk backwards accumulating 114 // a maximal dirty range of cards, then process those cards. 115 ssize_t cur_index = (ssize_t) end_card_index; 116 assert(cur_index >= 0, "Overflow"); 117 assert(((ssize_t)start_card_index) >= 0, "Overflow"); 118 while (cur_index >= (ssize_t)start_card_index) { 119 120 // We'll continue the search starting with the card for the upper bound 121 // address identified by the last dirty range that we processed, if any, 122 // skipping any cards at higher addresses. 123 if (upper_bound != nullptr) { 124 ssize_t right_index = _rs->card_index_for_addr(upper_bound); 125 assert(right_index >= 0, "Overflow"); 126 cur_index = MIN2(cur_index, right_index); 127 assert(upper_bound < end_addr, "Program logic"); 128 end_addr = upper_bound; // lower end_addr 129 upper_bound = nullptr; // and clear upper_bound 130 if (end_addr <= start_addr) { 131 assert(right_index <= (ssize_t)start_card_index, "Program logic"); 132 // We are done with our cluster 133 return; 134 } 135 } 136 137 if (ctbm[cur_index] == CardTable::dirty_card_val()) { 138 // ==== BEGIN DIRTY card range processing ==== 139 140 const size_t dirty_r = cur_index; // record right end of dirty range (inclusive) 141 while (--cur_index >= (ssize_t)start_card_index && ctbm[cur_index] == CardTable::dirty_card_val()) { 142 // walk back over contiguous dirty cards to find left end of dirty range (inclusive) 143 } 144 // [dirty_l, dirty_r] is a "maximal" closed interval range of dirty card indices: 145 // it may not be maximal if we are using the write_table, because of concurrent 146 // mutations dirtying the card-table. It may also not be maximal if an upper bound 147 // was established by the scan of the previous chunk. 148 const size_t dirty_l = cur_index + 1; // record left end of dirty range (inclusive) 149 // Check that we identified a boundary on our left 150 assert(ctbm[dirty_l] == CardTable::dirty_card_val(), "First card in range should be dirty"); 151 assert(dirty_l == start_card_index || use_write_table 152 || ctbm[dirty_l - 1] == CardTable::clean_card_val(), 153 "Interval isn't maximal on the left"); 154 assert(dirty_r >= dirty_l, "Error"); 155 assert(ctbm[dirty_r] == CardTable::dirty_card_val(), "Last card in range should be dirty"); 156 // Record alternations, dirty run length, and dirty card count 157 NOT_PRODUCT(stats.record_dirty_run(dirty_r - dirty_l + 1);) 158 159 // Find first object that starts this range: 160 // [left, right) is a maximal right-open interval of dirty cards 161 HeapWord* left = _rs->addr_for_card_index(dirty_l); // inclusive 162 HeapWord* right = _rs->addr_for_card_index(dirty_r + 1); // exclusive 163 // Clip right to end_addr established above (still exclusive) 164 right = MIN2(right, end_addr); 165 assert(right <= region->top() && end_addr <= region->top(), "Busted bounds"); 166 const MemRegion mr(left, right); 167 168 // NOTE: We'll not call block_start() repeatedly 169 // on a very large object if its head card is dirty. If not, 170 // (i.e. the head card is clean) we'll call it each time we 171 // process a new dirty range on the object. This is always 172 // the case for large object arrays, which are typically more 173 // common. 174 HeapWord* p = _scc->block_start(dirty_l); 175 oop obj = cast_to_oop(p); 176 177 // PREFIX: The object that straddles into this range of dirty cards 178 // from the left may be subject to special treatment unless 179 // it is an object array. 180 if (p < left && !obj->is_objArray()) { 181 // The mutator (both compiler and interpreter, but not JNI?) 182 // typically dirty imprecisely (i.e. only the head of an object), 183 // but GC closures typically dirty the object precisely. (It would 184 // be nice to have everything be precise for maximum efficiency.) 185 // 186 // To handle this, we check the head card of the object here and, 187 // if dirty, (arrange to) scan the object in its entirety. If we 188 // find the head card clean, we'll scan only the portion of the 189 // object lying in the dirty card range below, assuming this was 190 // the result of precise marking by GC closures. 191 192 // index of the "head card" for p 193 const size_t hc_index = _rs->card_index_for_addr(p); 194 if (ctbm[hc_index] == CardTable::dirty_card_val()) { 195 // Scan or skip the object, depending on location of its 196 // head card, and remember that we'll have processed all 197 // the objects back up to p, which is thus an upper bound 198 // for the next iteration of a dirty card loop. 199 upper_bound = p; // remember upper bound for next chunk 200 if (p < start_addr) { 201 // if object starts in a previous slice, it'll be handled 202 // in its entirety by the thread processing that slice; we can 203 // skip over it and avoid an unnecessary extra scan. 204 assert(obj == cast_to_oop(p), "Inconsistency detected"); 205 p += obj->size(); 206 } else { 207 // the object starts in our slice, we scan it in its entirety 208 assert(obj == cast_to_oop(p), "Inconsistency detected"); 209 if (ctx == nullptr || ctx->is_marked(obj)) { 210 // Scan the object in its entirety 211 p += obj->oop_iterate_size(cl); 212 } else { 213 assert(p < tams, "Error 1 in ctx/marking/tams logic"); 214 // Skip over any intermediate dead objects 215 p = ctx->get_next_marked_addr(p, tams); 216 assert(p <= tams, "Error 2 in ctx/marking/tams logic"); 217 } 218 } 219 assert(p > left, "Should have processed into interior of dirty range"); 220 } 221 } 222 223 size_t i = 0; 224 HeapWord* last_p = nullptr; 225 226 // BODY: Deal with (other) objects in this dirty card range 227 while (p < right) { 228 obj = cast_to_oop(p); 229 // walk right scanning eligible objects 230 if (ctx == nullptr || ctx->is_marked(obj)) { 231 // we need to remember the last object ptr we scanned, in case we need to 232 // complete a partial suffix scan after mr, see below 233 last_p = p; 234 // apply the closure to the oops in the portion of 235 // the object within mr. 236 p += obj->oop_iterate_size(cl, mr); 237 NOT_PRODUCT(i++); 238 } else { 239 // forget the last object pointer we remembered 240 last_p = nullptr; 241 assert(p < tams, "Tams and above are implicitly marked in ctx"); 242 // object under tams isn't marked: skip to next live object 243 p = ctx->get_next_marked_addr(p, tams); 244 assert(p <= tams, "Error 3 in ctx/marking/tams logic"); 245 } 246 } 247 248 // SUFFIX: Fix up a possible incomplete scan at right end of window 249 // by scanning the portion of a non-objArray that wasn't done. 250 if (p > right && last_p != nullptr) { 251 assert(last_p < right, "Error"); 252 // check if last_p suffix needs scanning 253 const oop last_obj = cast_to_oop(last_p); 254 if (!last_obj->is_objArray()) { 255 // scan the remaining suffix of the object 256 const MemRegion last_mr(right, p); 257 assert(p == last_p + last_obj->size(), "Would miss portion of last_obj"); 258 last_obj->oop_iterate(cl, last_mr); 259 log_develop_debug(gc, remset)("Fixed up non-objArray suffix scan in [" INTPTR_FORMAT ", " INTPTR_FORMAT ")", 260 p2i(last_mr.start()), p2i(last_mr.end())); 261 } else { 262 log_develop_debug(gc, remset)("Skipped suffix scan of objArray in [" INTPTR_FORMAT ", " INTPTR_FORMAT ")", 263 p2i(right), p2i(p)); 264 } 265 } 266 NOT_PRODUCT(stats.record_scan_obj_cnt(i);) 267 268 // ==== END DIRTY card range processing ==== 269 } else { 270 // ==== BEGIN CLEAN card range processing ==== 271 272 // If we are using the write table (during update refs, e.g.), a mutator may dirty 273 // a card at any time. This is fine for the algorithm below because it is only 274 // counting contiguous runs of clean cards (and only for non-product builds). 275 assert(use_write_table || ctbm[cur_index] == CardTable::clean_card_val(), "Error"); 276 277 // walk back over contiguous clean cards 278 size_t i = 0; 279 while (--cur_index >= (ssize_t)start_card_index && ctbm[cur_index] == CardTable::clean_card_val()) { 280 NOT_PRODUCT(i++); 281 } 282 // Record alternations, clean run length, and clean card count 283 NOT_PRODUCT(stats.record_clean_run(i);) 284 285 // ==== END CLEAN card range processing ==== 286 } 287 } 288 } 289 290 // Given that this range of clusters is known to span a humongous object spanned by region r, scan the 291 // portion of the humongous object that corresponds to the specified range. 292 template <typename ClosureType> 293 inline void 294 ShenandoahScanRemembered::process_humongous_clusters(ShenandoahHeapRegion* r, size_t first_cluster, size_t count, 295 HeapWord *end_of_range, ClosureType *cl, bool use_write_table) { 296 ShenandoahHeapRegion* start_region = r->humongous_start_region(); 297 HeapWord* p = start_region->bottom(); 298 oop obj = cast_to_oop(p); 299 assert(r->is_humongous(), "Only process humongous regions here"); 300 assert(start_region->is_humongous_start(), "Should be start of humongous region"); 301 assert(p + obj->size() >= end_of_range, "Humongous object ends before range ends"); 302 303 size_t first_card_index = first_cluster * ShenandoahCardCluster::CardsPerCluster; 304 HeapWord* first_cluster_addr = _rs->addr_for_card_index(first_card_index); 305 size_t spanned_words = count * ShenandoahCardCluster::CardsPerCluster * CardTable::card_size_in_words(); 306 start_region->oop_iterate_humongous_slice_dirty(cl, first_cluster_addr, spanned_words, use_write_table); 307 } 308 309 310 // This method takes a region & determines the end of the region that the worker can scan. 311 template <typename ClosureType> 312 inline void 313 ShenandoahScanRemembered::process_region_slice(ShenandoahHeapRegion *region, size_t start_offset, size_t clusters, 314 HeapWord *end_of_range, ClosureType *cl, bool use_write_table, 315 uint worker_id) { 316 317 // This is called only for young gen collection, when we scan old gen regions 318 assert(region->is_old(), "Expecting an old region"); 319 HeapWord *start_of_range = region->bottom() + start_offset; 320 size_t start_cluster_no = cluster_for_addr(start_of_range); 321 assert(addr_for_cluster(start_cluster_no) == start_of_range, "process_region_slice range must align on cluster boundary"); 322 323 // region->end() represents the end of memory spanned by this region, but not all of this 324 // memory is eligible to be scanned because some of this memory has not yet been allocated. 325 // 326 // region->top() represents the end of allocated memory within this region. Any addresses 327 // beyond region->top() should not be scanned as that memory does not hold valid objects. 328 329 if (use_write_table) { 330 // This is update-refs servicing. 331 if (end_of_range > region->get_update_watermark()) { 332 end_of_range = region->get_update_watermark(); 333 } 334 } else { 335 // This is concurrent mark servicing. Note that TAMS for this region is TAMS at start of old-gen 336 // collection. Here, we need to scan up to TAMS for most recently initiated young-gen collection. 337 // Since all LABs are retired at init mark, and since replacement LABs are allocated lazily, and since no 338 // promotions occur until evacuation phase, TAMS for most recent young-gen is same as top(). 339 if (end_of_range > region->top()) { 340 end_of_range = region->top(); 341 } 342 } 343 344 log_debug(gc)("Remembered set scan processing Region " SIZE_FORMAT ", from " PTR_FORMAT " to " PTR_FORMAT ", using %s table", 345 region->index(), p2i(start_of_range), p2i(end_of_range), 346 use_write_table? "read/write (updating)": "read (marking)"); 347 348 // Note that end_of_range may point to the middle of a cluster because we limit scanning to 349 // region->top() or region->get_update_watermark(). We avoid processing past end_of_range. 350 // Objects that start between start_of_range and end_of_range, including humongous objects, will 351 // be fully processed by process_clusters. In no case should we need to scan past end_of_range. 352 if (start_of_range < end_of_range) { 353 if (region->is_humongous()) { 354 ShenandoahHeapRegion* start_region = region->humongous_start_region(); 355 process_humongous_clusters(start_region, start_cluster_no, clusters, end_of_range, cl, use_write_table); 356 } else { 357 process_clusters(start_cluster_no, clusters, end_of_range, cl, use_write_table, worker_id); 358 } 359 } 360 } 361 362 inline bool ShenandoahRegionChunkIterator::has_next() const { 363 return _index < _total_chunks; 364 } 365 366 inline bool ShenandoahRegionChunkIterator::next(struct ShenandoahRegionChunk *assignment) { 367 if (_index >= _total_chunks) { 368 return false; 369 } 370 size_t new_index = Atomic::add(&_index, (size_t) 1, memory_order_relaxed); 371 if (new_index > _total_chunks) { 372 // First worker that hits new_index == _total_chunks continues, other 373 // contending workers return false. 374 return false; 375 } 376 // convert to zero-based indexing 377 new_index--; 378 assert(new_index < _total_chunks, "Error"); 379 380 // Find the group number for the assigned chunk index 381 size_t group_no; 382 for (group_no = 0; new_index >= _group_entries[group_no]; group_no++) 383 ; 384 assert(group_no < _num_groups, "Cannot have group no greater or equal to _num_groups"); 385 386 // All size computations measured in HeapWord 387 size_t region_size_words = ShenandoahHeapRegion::region_size_words(); 388 size_t group_region_index = _region_index[group_no]; 389 size_t group_region_offset = _group_offset[group_no]; 390 391 size_t index_within_group = (group_no == 0)? new_index: new_index - _group_entries[group_no - 1]; 392 size_t group_chunk_size = _group_chunk_size[group_no]; 393 size_t offset_of_this_chunk = group_region_offset + index_within_group * group_chunk_size; 394 size_t regions_spanned_by_chunk_offset = offset_of_this_chunk / region_size_words; 395 size_t offset_within_region = offset_of_this_chunk % region_size_words; 396 397 size_t region_index = group_region_index + regions_spanned_by_chunk_offset; 398 399 assignment->_r = _heap->get_region(region_index); 400 assignment->_chunk_offset = offset_within_region; 401 assignment->_chunk_size = group_chunk_size; 402 return true; 403 } 404 405 #endif // SHARE_GC_SHENANDOAH_SHENANDOAHSCANREMEMBEREDINLINE_HPP