1 /*
  2  * Copyright Amazon.com Inc. or its affiliates. All Rights Reserved.
  3  * Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved.
  4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  5  *
  6  * This code is free software; you can redistribute it and/or modify it
  7  * under the terms of the GNU General Public License version 2 only, as
  8  * published by the Free Software Foundation.
  9  *
 10  * This code is distributed in the hope that it will be useful, but WITHOUT
 11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 13  * version 2 for more details (a copy is included in the LICENSE file that
 14  * accompanied this code).
 15  *
 16  * You should have received a copy of the GNU General Public License version
 17  * 2 along with this work; if not, write to the Free Software Foundation,
 18  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 19  *
 20  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 21  * or visit www.oracle.com if you need additional information or have any
 22  * questions.
 23  *
 24  */
 25 
 26 #ifndef SHARE_GC_SHENANDOAH_SHENANDOAHSCANREMEMBEREDINLINE_HPP
 27 #define SHARE_GC_SHENANDOAH_SHENANDOAHSCANREMEMBEREDINLINE_HPP
 28 
 29 #include "gc/shenandoah/shenandoahScanRemembered.hpp"
 30 
 31 #include "gc/shared/collectorCounters.hpp"
 32 #include "gc/shenandoah/mode/shenandoahMode.hpp"
 33 #include "gc/shenandoah/shenandoahCardStats.hpp"
 34 #include "gc/shenandoah/shenandoahCardTable.hpp"
 35 #include "gc/shenandoah/shenandoahHeap.hpp"
 36 #include "gc/shenandoah/shenandoahHeapRegion.hpp"
 37 #include "gc/shenandoah/shenandoahOldGeneration.hpp"
 38 #include "logging/log.hpp"
 39 #include "memory/iterator.hpp"
 40 #include "oops/objArrayOop.hpp"
 41 #include "oops/oop.hpp"
 42 
 43 // Process all objects starting within count clusters beginning with first_cluster and for which the start address is
 44 // less than end_of_range.  For any non-array object whose header lies on a dirty card, scan the entire object,
 45 // even if its end reaches beyond end_of_range. Object arrays, on the other hand, are precisely dirtied and
 46 // only the portions of the array on dirty cards need to be scanned.
 47 //
 48 // Do not CANCEL within process_clusters.  It is assumed that if a worker thread accepts responsibility for processing
 49 // a chunk of work, it will finish the work it starts.  Otherwise, the chunk of work will be lost in the transition to
 50 // degenerated execution, leading to dangling references.
 51 template <typename ClosureType>
 52 void ShenandoahScanRemembered::process_clusters(size_t first_cluster, size_t count, HeapWord* end_of_range,
 53                                                 ClosureType* cl, bool use_write_table, uint worker_id) {
 54 
 55   assert(ShenandoahHeap::heap()->old_generation()->is_parsable(), "Old generation regions must be parsable for remembered set scan");
 56   // If old-gen evacuation is active, then MarkingContext for old-gen heap regions is valid.  We use the MarkingContext
 57   // bits to determine which objects within a DIRTY card need to be scanned.  This is necessary because old-gen heap
 58   // regions that are in the candidate collection set have not been coalesced and filled.  Thus, these heap regions
 59   // may contain zombie objects.  Zombie objects are known to be dead, but have not yet been "collected".  Scanning
 60   // zombie objects is unsafe because the Klass pointer is not reliable, objects referenced from a zombie may have been
 61   // collected (if dead), or relocated (if live), or if dead but not yet collected, we don't want to "revive" them
 62   // by marking them (when marking) or evacuating them (when updating references).
 63 
 64   // start and end addresses of range of objects to be scanned, clipped to end_of_range
 65   const size_t start_card_index = first_cluster * ShenandoahCardCluster::CardsPerCluster;
 66   const HeapWord* start_addr = _rs->addr_for_card_index(start_card_index);
 67   // clip at end_of_range (exclusive)
 68   HeapWord* end_addr = MIN2(end_of_range, (HeapWord*)start_addr + (count * ShenandoahCardCluster::CardsPerCluster
 69                                                                    * CardTable::card_size_in_words()));
 70   assert(start_addr < end_addr, "Empty region?");
 71 
 72   const size_t whole_cards = (end_addr - start_addr + CardTable::card_size_in_words() - 1)/CardTable::card_size_in_words();
 73   const size_t end_card_index = start_card_index + whole_cards - 1;
 74   log_debug(gc, remset)("Worker %u: cluster = %zu count = %zu eor = " INTPTR_FORMAT
 75                         " start_addr = " INTPTR_FORMAT " end_addr = " INTPTR_FORMAT " cards = %zu",
 76                         worker_id, first_cluster, count, p2i(end_of_range), p2i(start_addr), p2i(end_addr), whole_cards);
 77 
 78   // use_write_table states whether we are using the card table that is being
 79   // marked by the mutators. If false, we are using a snapshot of the card table
 80   // that is not subject to modifications. Even when this arg is true, and
 81   // the card table is being actively marked, SATB marking ensures that we need not
 82   // worry about cards marked after the processing here has passed them.
 83   const CardValue* const ctbm = _rs->get_card_table_byte_map(use_write_table);
 84 
 85   // If old gen evacuation is active, ctx will hold the completed marking of
 86   // old generation objects. We'll only scan objects that are marked live by
 87   // the old generation marking. These include objects allocated since the
 88   // start of old generation marking (being those above TAMS).
 89   const ShenandoahHeap* heap = ShenandoahHeap::heap();
 90   const ShenandoahMarkingContext* ctx = heap->old_generation()->is_mark_complete() ?
 91                                         heap->marking_context() : nullptr;
 92 
 93   // The region we will scan is the half-open interval [start_addr, end_addr),
 94   // and lies entirely within a single region.
 95   const ShenandoahHeapRegion* region = ShenandoahHeap::heap()->heap_region_containing(start_addr);
 96   assert(region->contains(end_addr - 1), "Slice shouldn't cross regions");
 97 
 98   // This code may have implicit assumptions of examining only old gen regions.
 99   assert(region->is_old(), "We only expect to be processing old regions");
100   assert(!region->is_humongous(), "Humongous regions can be processed more efficiently;"
101                                   "see process_humongous_clusters()");
102   // tams and ctx below are for old generation marking. As such, young gen roots must
103   // consider everything above tams, since it doesn't represent a TAMS for young gen's
104   // SATB marking.
105   HeapWord* const tams = (ctx == nullptr ? region->bottom() : ctx->top_at_mark_start(region));
106 
107   NOT_PRODUCT(ShenandoahCardStats stats(whole_cards, card_stats(worker_id));)
108 
109   // In the case of imprecise marking, we remember the lowest address
110   // scanned in a range of dirty cards, as we work our way left from the
111   // highest end_addr. This serves as another upper bound on the address we will
112   // scan as we move left over each contiguous range of dirty cards.
113   HeapWord* upper_bound = nullptr;
114 
115   // Starting at the right end of the address range, walk backwards accumulating
116   // a maximal dirty range of cards, then process those cards.
117   ssize_t cur_index = (ssize_t) end_card_index;
118   assert(cur_index >= 0, "Overflow");
119   assert(((ssize_t)start_card_index) >= 0, "Overflow");
120   while (cur_index >= (ssize_t)start_card_index) {
121 
122     // We'll continue the search starting with the card for the upper bound
123     // address identified by the last dirty range that we processed, if any,
124     // skipping any cards at higher addresses.
125     if (upper_bound != nullptr) {
126       ssize_t right_index = _rs->card_index_for_addr(upper_bound);
127       assert(right_index >= 0, "Overflow");
128       cur_index = MIN2(cur_index, right_index);
129       assert(upper_bound < end_addr, "Program logic");
130       end_addr  = upper_bound;   // lower end_addr
131       upper_bound = nullptr;     // and clear upper_bound
132       if (end_addr <= start_addr) {
133         assert(right_index <= (ssize_t)start_card_index, "Program logic");
134         // We are done with our cluster
135         return;
136       }
137     }
138 
139     if (ctbm[cur_index] == CardTable::dirty_card_val()) {
140       // ==== BEGIN DIRTY card range processing ====
141 
142       const size_t dirty_r = cur_index;  // record right end of dirty range (inclusive)
143       while (--cur_index >= (ssize_t)start_card_index && ctbm[cur_index] == CardTable::dirty_card_val()) {
144         // walk back over contiguous dirty cards to find left end of dirty range (inclusive)
145       }
146       // [dirty_l, dirty_r] is a "maximal" closed interval range of dirty card indices:
147       // it may not be maximal if we are using the write_table, because of concurrent
148       // mutations dirtying the card-table. It may also not be maximal if an upper bound
149       // was established by the scan of the previous chunk.
150       const size_t dirty_l = cur_index + 1;   // record left end of dirty range (inclusive)
151       // Check that we identified a boundary on our left
152       assert(ctbm[dirty_l] == CardTable::dirty_card_val(), "First card in range should be dirty");
153       assert(dirty_l == start_card_index || use_write_table
154              || ctbm[dirty_l - 1] == CardTable::clean_card_val(),
155              "Interval isn't maximal on the left");
156       assert(dirty_r >= dirty_l, "Error");
157       assert(ctbm[dirty_r] == CardTable::dirty_card_val(), "Last card in range should be dirty");
158       // Record alternations, dirty run length, and dirty card count
159       NOT_PRODUCT(stats.record_dirty_run(dirty_r - dirty_l + 1);)
160 
161       // Find first object that starts this range:
162       // [left, right) is a maximal right-open interval of dirty cards
163       HeapWord* left = _rs->addr_for_card_index(dirty_l);        // inclusive
164       HeapWord* right = _rs->addr_for_card_index(dirty_r + 1);   // exclusive
165       if (end_addr <= left) {
166         // The range of addresses to be scanned is empty
167         continue;
168       }
169       // Clip right to end_addr established above (still exclusive)
170       right = MIN2(right, end_addr);
171       assert(right <= region->top() && end_addr <= region->top(), "Busted bounds");
172       const MemRegion mr(left, right);
173 
174       // NOTE: We'll not call first_object_start() repeatedly
175       // on a very large object, i.e. one spanning multiple cards,
176       // if its head card is dirty. If not, (i.e. its head card is clean)
177       // we'll call it each time we process a new dirty range on the object.
178       // This is always the case for large object arrays, which are typically more
179       // common.
180       assert(ctx != nullptr || heap->old_generation()->is_parsable(), "Error");
181       HeapWord* p = _scc->first_object_start(dirty_l, ctx, tams, right);
182       assert((p == nullptr) || (p < right), "No first object found is denoted by nullptr, p: "
183              PTR_FORMAT ", right: " PTR_FORMAT ", end_addr: " PTR_FORMAT ", next card addr: " PTR_FORMAT,
184              p2i(p), p2i(right), p2i(end_addr), p2i(_rs->addr_for_card_index(dirty_r + 1)));
185       if (p == nullptr) {
186         // There are no live objects to be scanned in this dirty range.  cur_index identifies first card in this
187         // uninteresting dirty range.  At top of next loop iteration, we will either end the looop
188         // (because cur_index < start_card_index) or we will begin the search for a range of clean cards.
189         continue;
190       }
191 
192       oop obj = cast_to_oop(p);
193       assert(oopDesc::is_oop(obj), "Not an object at " PTR_FORMAT ", left: " PTR_FORMAT ", right: " PTR_FORMAT,
194              p2i(p), p2i(left), p2i(right));
195       assert(ctx==nullptr || ctx->is_marked(obj), "Error");
196 
197       // PREFIX: The object that straddles into this range of dirty cards
198       // from the left may be subject to special treatment unless
199       // it is an object array.
200       if (p < left && !obj->is_refArray()) {
201         // The mutator (both compiler and interpreter, but not JNI?)
202         // typically dirty imprecisely (i.e. only the head of an object),
203         // but GC closures typically dirty the object precisely. (It would
204         // be nice to have everything be precise for maximum efficiency.)
205         //
206         // To handle this, we check the head card of the object here and,
207         // if dirty, (arrange to) scan the object in its entirety. If we
208         // find the head card clean, we'll scan only the portion of the
209         // object lying in the dirty card range below, assuming this was
210         // the result of precise marking by GC closures.
211 
212         // index of the "head card" for p
213         const size_t hc_index = _rs->card_index_for_addr(p);
214         if (ctbm[hc_index] == CardTable::dirty_card_val()) {
215           // Scan or skip the object, depending on location of its
216           // head card, and remember that we'll have processed all
217           // the objects back up to p, which is thus an upper bound
218           // for the next iteration of a dirty card loop.
219           upper_bound = p;   // remember upper bound for next chunk
220           if (p < start_addr) {
221             // if object starts in a previous slice, it'll be handled
222             // in its entirety by the thread processing that slice; we can
223             // skip over it and avoid an unnecessary extra scan.
224             assert(obj == cast_to_oop(p), "Inconsistency detected");
225             p += obj->size();
226           } else {
227             // the object starts in our slice, we scan it in its entirety
228             assert(obj == cast_to_oop(p), "Inconsistency detected");
229             if (ctx == nullptr || ctx->is_marked(obj)) {
230               // Scan the object in its entirety
231               p += obj->oop_iterate_size(cl);
232             } else {
233               assert(p < tams, "Error 1 in ctx/marking/tams logic");
234               // Skip over any intermediate dead objects
235               p = ctx->get_next_marked_addr(p, tams);
236               assert(p <= tams, "Error 2 in ctx/marking/tams logic");
237             }
238           }
239           assert(p > left, "Should have processed into interior of dirty range");
240         }
241       }
242 
243       size_t i = 0;
244       HeapWord* last_p = nullptr;
245 
246       // BODY: Deal with (other) objects in this dirty card range
247       while (p < right) {
248         obj = cast_to_oop(p);
249         // walk right scanning eligible objects
250         if (ctx == nullptr || ctx->is_marked(obj)) {
251           // we need to remember the last object ptr we scanned, in case we need to
252           // complete a partial suffix scan after mr, see below
253           last_p = p;
254           // apply the closure to the oops in the portion of
255           // the object within mr.
256           p += obj->oop_iterate_size(cl, mr);
257           NOT_PRODUCT(i++);
258         } else {
259           // forget the last object pointer we remembered
260           last_p = nullptr;
261           assert(p < tams, "Tams and above are implicitly marked in ctx");
262           // object under tams isn't marked: skip to next live object
263           p = ctx->get_next_marked_addr(p, tams);
264           assert(p <= tams, "Error 3 in ctx/marking/tams logic");
265         }
266       }
267 
268       // SUFFIX: Fix up a possible incomplete scan at right end of window
269       // by scanning the portion of a non-objArray that wasn't done.
270       if (p > right && last_p != nullptr) {
271         assert(last_p < right, "Error");
272         // check if last_p suffix needs scanning
273         const oop last_obj = cast_to_oop(last_p);
274         if (!last_obj->is_refArray()) {
275           // scan the remaining suffix of the object
276           const MemRegion last_mr(right, p);
277           assert(p == last_p + last_obj->size(), "Would miss portion of last_obj");
278           last_obj->oop_iterate(cl, last_mr);
279           log_develop_debug(gc, remset)("Fixed up non-objArray suffix scan in [" INTPTR_FORMAT ", " INTPTR_FORMAT ")",
280                                         p2i(last_mr.start()), p2i(last_mr.end()));
281         } else {
282           log_develop_debug(gc, remset)("Skipped suffix scan of objArray in [" INTPTR_FORMAT ", " INTPTR_FORMAT ")",
283                                         p2i(right), p2i(p));
284         }
285       }
286       NOT_PRODUCT(stats.record_scan_obj_cnt(i);)
287 
288       // ==== END   DIRTY card range processing ====
289     } else {
290       // ==== BEGIN CLEAN card range processing ====
291 
292       // If we are using the write table (during update refs, e.g.), a mutator may dirty
293       // a card at any time. This is fine for the algorithm below because it is only
294       // counting contiguous runs of clean cards (and only for non-product builds).
295       assert(use_write_table || ctbm[cur_index] == CardTable::clean_card_val(), "Error");
296 
297       // walk back over contiguous clean cards
298       size_t i = 0;
299       while (--cur_index >= (ssize_t)start_card_index && ctbm[cur_index] == CardTable::clean_card_val()) {
300         NOT_PRODUCT(i++);
301       }
302       // Record alternations, clean run length, and clean card count
303       NOT_PRODUCT(stats.record_clean_run(i);)
304 
305       // ==== END CLEAN card range processing ====
306     }
307   }
308 }
309 
310 // Given that this range of clusters is known to span a humongous object spanned by region r, scan the
311 // portion of the humongous object that corresponds to the specified range.
312 template <typename ClosureType>
313 inline void
314 ShenandoahScanRemembered::process_humongous_clusters(ShenandoahHeapRegion* r, size_t first_cluster, size_t count,
315                                                                     HeapWord *end_of_range, ClosureType *cl, bool use_write_table) {
316   ShenandoahHeapRegion* start_region = r->humongous_start_region();
317   HeapWord* p = start_region->bottom();
318   oop obj = cast_to_oop(p);
319   assert(r->is_humongous(), "Only process humongous regions here");
320   assert(start_region->is_humongous_start(), "Should be start of humongous region");
321   assert(p + obj->size() >= end_of_range, "Humongous object ends before range ends");
322 
323   size_t first_card_index = first_cluster * ShenandoahCardCluster::CardsPerCluster;
324   HeapWord* first_cluster_addr = _rs->addr_for_card_index(first_card_index);
325   size_t spanned_words = count * ShenandoahCardCluster::CardsPerCluster * CardTable::card_size_in_words();
326   start_region->oop_iterate_humongous_slice_dirty(cl, first_cluster_addr, spanned_words, use_write_table);
327 }
328 
329 
330 // This method takes a region & determines the end of the region that the worker can scan.
331 template <typename ClosureType>
332 inline void
333 ShenandoahScanRemembered::process_region_slice(ShenandoahHeapRegion *region, size_t start_offset, size_t clusters,
334                                                               HeapWord *end_of_range, ClosureType *cl, bool use_write_table,
335                                                               uint worker_id) {
336 
337   // This is called only for young gen collection, when we scan old gen regions
338   assert(region->is_old(), "Expecting an old region");
339   HeapWord *start_of_range = region->bottom() + start_offset;
340   size_t start_cluster_no = cluster_for_addr(start_of_range);
341   assert(addr_for_cluster(start_cluster_no) == start_of_range, "process_region_slice range must align on cluster boundary");
342 
343   // region->end() represents the end of memory spanned by this region, but not all of this
344   //   memory is eligible to be scanned because some of this memory has not yet been allocated.
345   //
346   // region->top() represents the end of allocated memory within this region.  Any addresses
347   //   beyond region->top() should not be scanned as that memory does not hold valid objects.
348 
349   if (use_write_table) {
350     // This is update-refs servicing.
351     if (end_of_range > region->get_update_watermark()) {
352       end_of_range = region->get_update_watermark();
353     }
354   } else {
355     // This is concurrent mark servicing.  Note that TAMS for this region is TAMS at start of old-gen
356     // collection.  Here, we need to scan up to TAMS for most recently initiated young-gen collection.
357     // Since all LABs are retired at init mark, and since replacement LABs are allocated lazily, and since no
358     // promotions occur until evacuation phase, TAMS for most recent young-gen is same as top().
359     if (end_of_range > region->top()) {
360       end_of_range = region->top();
361     }
362   }
363 
364   log_debug(gc, remset)("Remembered set scan processing Region %zu, from " PTR_FORMAT " to " PTR_FORMAT ", using %s table",
365                         region->index(), p2i(start_of_range), p2i(end_of_range),
366                         use_write_table? "read/write (updating)": "read (marking)");
367 
368   // Note that end_of_range may point to the middle of a cluster because we limit scanning to
369   // region->top() or region->get_update_watermark(). We avoid processing past end_of_range.
370   // Objects that start between start_of_range and end_of_range, including humongous objects, will
371   // be fully processed by process_clusters. In no case should we need to scan past end_of_range.
372   if (start_of_range < end_of_range) {
373     if (region->is_humongous()) {
374       ShenandoahHeapRegion* start_region = region->humongous_start_region();
375       process_humongous_clusters(start_region, start_cluster_no, clusters, end_of_range, cl, use_write_table);
376     } else {
377       process_clusters(start_cluster_no, clusters, end_of_range, cl, use_write_table, worker_id);
378     }
379   }
380 }
381 
382 inline bool ShenandoahRegionChunkIterator::has_next() const {
383   return _index < _total_chunks;
384 }
385 
386 inline bool ShenandoahRegionChunkIterator::next(struct ShenandoahRegionChunk *assignment) {
387   if (_index >= _total_chunks) {
388     return false;
389   }
390   size_t new_index = AtomicAccess::add(&_index, (size_t) 1, memory_order_relaxed);
391   if (new_index > _total_chunks) {
392     // First worker that hits new_index == _total_chunks continues, other
393     // contending workers return false.
394     return false;
395   }
396   // convert to zero-based indexing
397   new_index--;
398   assert(new_index < _total_chunks, "Error");
399 
400   // Find the group number for the assigned chunk index
401   size_t group_no;
402   for (group_no = 0; new_index >= _group_entries[group_no]; group_no++)
403     ;
404   assert(group_no < _num_groups, "Cannot have group no greater or equal to _num_groups");
405 
406   // All size computations measured in HeapWord
407   size_t region_size_words = ShenandoahHeapRegion::region_size_words();
408   size_t group_region_index = _region_index[group_no];
409   size_t group_region_offset = _group_offset[group_no];
410 
411   size_t index_within_group = (group_no == 0)? new_index: new_index - _group_entries[group_no - 1];
412   size_t group_chunk_size = _group_chunk_size[group_no];
413   size_t offset_of_this_chunk = group_region_offset + index_within_group * group_chunk_size;
414   size_t regions_spanned_by_chunk_offset = offset_of_this_chunk / region_size_words;
415   size_t offset_within_region = offset_of_this_chunk % region_size_words;
416 
417   size_t region_index = group_region_index + regions_spanned_by_chunk_offset;
418 
419   assignment->_r = _heap->get_region(region_index);
420   assignment->_chunk_offset = offset_within_region;
421   assignment->_chunk_size = group_chunk_size;
422   return true;
423 }
424 
425 #endif   // SHARE_GC_SHENANDOAH_SHENANDOAHSCANREMEMBEREDINLINE_HPP