19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25 #include "precompiled.hpp"
26 #include "gc/g1/g1Allocator.inline.hpp"
27 #include "gc/g1/g1CollectedHeap.inline.hpp"
28 #include "gc/g1/g1CollectionSet.hpp"
29 #include "gc/g1/g1EvacFailureRegions.inline.hpp"
30 #include "gc/g1/g1HeapRegionPrinter.hpp"
31 #include "gc/g1/g1OopClosures.inline.hpp"
32 #include "gc/g1/g1ParScanThreadState.inline.hpp"
33 #include "gc/g1/g1RootClosures.hpp"
34 #include "gc/g1/g1StringDedup.hpp"
35 #include "gc/g1/g1Trace.hpp"
36 #include "gc/g1/g1YoungGCAllocationFailureInjector.inline.hpp"
37 #include "gc/shared/continuationGCSupport.inline.hpp"
38 #include "gc/shared/partialArrayTaskStepper.inline.hpp"
39 #include "gc/shared/preservedMarks.inline.hpp"
40 #include "gc/shared/stringdedup/stringDedup.hpp"
41 #include "gc/shared/taskqueue.inline.hpp"
42 #include "memory/allocation.inline.hpp"
43 #include "oops/access.inline.hpp"
44 #include "oops/oop.inline.hpp"
45 #include "runtime/atomic.hpp"
46 #include "runtime/prefetch.inline.hpp"
47 #include "utilities/globalDefinitions.hpp"
48 #include "utilities/macros.hpp"
49
50 // In fastdebug builds the code size can get out of hand, potentially
51 // tripping over compiler limits (which may be bugs, but nevertheless
52 // need to be taken into consideration). A side benefit of limiting
53 // inlining is that we get more call frames that might aid debugging.
54 // And the fastdebug compile time for this file is much reduced.
55 // Explicit NOINLINE to block ATTRIBUTE_FLATTENing.
56 #define MAYBE_INLINE_EVACUATION NOT_DEBUG(inline) DEBUG_ONLY(NOINLINE)
57
58 G1ParScanThreadState::G1ParScanThreadState(G1CollectedHeap* g1h,
59 G1RedirtyCardsQueueSet* rdcqs,
60 PreservedMarks* preserved_marks,
61 uint worker_id,
62 uint num_workers,
63 G1CollectionSet* collection_set,
64 G1EvacFailureRegions* evac_failure_regions)
65 : _g1h(g1h),
66 _task_queue(g1h->task_queue(worker_id)),
67 _rdc_local_qset(rdcqs),
68 _ct(g1h->card_table()),
69 _closures(nullptr),
70 _plab_allocator(nullptr),
71 _age_table(false),
72 _tenuring_threshold(g1h->policy()->tenuring_threshold()),
73 _scanner(g1h, this),
74 _worker_id(worker_id),
75 _last_enqueued_card(SIZE_MAX),
76 _stack_trim_upper_threshold(GCDrainStackTargetSize * 2 + 1),
77 _stack_trim_lower_threshold(GCDrainStackTargetSize),
78 _trim_ticks(),
79 _surviving_young_words_base(nullptr),
80 _surviving_young_words(nullptr),
81 _surviving_words_length(collection_set->young_region_length() + 1),
82 _old_gen_is_full(false),
83 _partial_objarray_chunk_size(ParGCArrayScanChunk),
84 _partial_array_stepper(num_workers),
85 _string_dedup_requests(),
86 _max_num_optional_regions(collection_set->optional_region_length()),
87 _numa(g1h->numa()),
88 _obj_alloc_stat(nullptr),
89 ALLOCATION_FAILURE_INJECTOR_ONLY(_allocation_failure_inject_counter(0) COMMA)
90 _preserved_marks(preserved_marks),
91 _evacuation_failed_info(),
92 _evac_failure_regions(evac_failure_regions),
93 _evac_failure_enqueued_cards(0)
94 {
95 // We allocate number of young gen regions in the collection set plus one
96 // entries, since entry 0 keeps track of surviving bytes for non-young regions.
97 // We also add a few elements at the beginning and at the end in
98 // an attempt to eliminate cache contention
99 const size_t padding_elem_num = (DEFAULT_PADDING_SIZE / sizeof(size_t));
100 size_t array_length = padding_elem_num + _surviving_words_length + padding_elem_num;
101
102 _surviving_young_words_base = NEW_C_HEAP_ARRAY(size_t, array_length, mtGC);
103 _surviving_young_words = _surviving_young_words_base + padding_elem_num;
104 memset(_surviving_young_words, 0, _surviving_words_length * sizeof(size_t));
105
106 _plab_allocator = new G1PLABAllocator(_g1h->allocator());
107
108 _closures = G1EvacuationRootClosures::create_root_closures(_g1h,
109 this,
110 collection_set->only_contains_young_regions());
196
197 // Although we never intentionally push references outside of the collection
198 // set, due to (benign) races in the claim mechanism during RSet scanning more
199 // than one thread might claim the same card. So the same card may be
200 // processed multiple times, and so we might get references into old gen here.
201 // So we need to redo this check.
202 const G1HeapRegionAttr region_attr = _g1h->region_attr(obj);
203 // References pushed onto the work stack should never point to a humongous region
204 // as they are not added to the collection set due to above precondition.
205 assert(!region_attr.is_humongous_candidate(),
206 "Obj " PTR_FORMAT " should not refer to humongous region %u from " PTR_FORMAT,
207 p2i(obj), _g1h->addr_to_region(obj), p2i(p));
208
209 if (!region_attr.is_in_cset()) {
210 // In this case somebody else already did all the work.
211 return;
212 }
213
214 markWord m = obj->mark();
215 if (m.is_forwarded()) {
216 obj = m.forwardee();
217 } else {
218 obj = do_copy_to_survivor_space(region_attr, obj, m);
219 }
220 RawAccess<IS_NOT_NULL>::oop_store(p, obj);
221
222 write_ref_field_post(p, obj);
223 }
224
225 MAYBE_INLINE_EVACUATION
226 void G1ParScanThreadState::do_partial_array(PartialArrayScanTask task) {
227 oop from_obj = task.to_source_array();
228
229 assert(_g1h->is_in_reserved(from_obj), "must be in heap.");
230 assert(from_obj->is_objArray(), "must be obj array");
231 assert(from_obj->is_forwarded(), "must be forwarded");
232
233 oop to_obj = from_obj->forwardee();
234 assert(from_obj != to_obj, "should not be chunking self-forwarded objects");
235 assert(to_obj->is_objArray(), "must be obj array");
236 objArrayOop to_array = objArrayOop(to_obj);
237
238 PartialArrayTaskStepper::Step step
239 = _partial_array_stepper.next(objArrayOop(from_obj),
240 to_array,
241 _partial_objarray_chunk_size);
242 for (uint i = 0; i < step._ncreate; ++i) {
243 push_on_queue(ScannerTask(PartialArrayScanTask(from_obj)));
244 }
245
246 G1HeapRegionAttr dest_attr = _g1h->region_attr(to_array);
247 G1SkipCardEnqueueSetter x(&_scanner, dest_attr.is_new_survivor());
248 // Process claimed task. The length of to_array is not correct, but
249 // fortunately the iteration ignores the length field and just relies
250 // on start/end.
251 to_array->oop_iterate_range(&_scanner,
252 step._index,
253 step._index + _partial_objarray_chunk_size);
254 }
255
256 MAYBE_INLINE_EVACUATION
257 void G1ParScanThreadState::start_partial_objarray(G1HeapRegionAttr dest_attr,
258 oop from_obj,
259 oop to_obj) {
260 assert(from_obj->is_objArray(), "precondition");
261 assert(from_obj->is_forwarded(), "precondition");
262 assert(from_obj->forwardee() == to_obj, "precondition");
263 assert(from_obj != to_obj, "should not be scanning self-forwarded objects");
264 assert(to_obj->is_objArray(), "precondition");
265
266 objArrayOop to_array = objArrayOop(to_obj);
267
268 PartialArrayTaskStepper::Step step
269 = _partial_array_stepper.start(objArrayOop(from_obj),
270 to_array,
271 _partial_objarray_chunk_size);
272
273 // Push any needed partial scan tasks. Pushed before processing the
274 // initial chunk to allow other workers to steal while we're processing.
275 for (uint i = 0; i < step._ncreate; ++i) {
276 push_on_queue(ScannerTask(PartialArrayScanTask(from_obj)));
277 }
278
279 // Skip the card enqueue iff the object (to_array) is in survivor region.
280 // However, G1HeapRegion::is_survivor() is too expensive here.
367 // no other space to try.
368 return nullptr;
369 }
370 }
371
372 G1HeapRegionAttr G1ParScanThreadState::next_region_attr(G1HeapRegionAttr const region_attr, markWord const m, uint& age) {
373 assert(region_attr.is_young() || region_attr.is_old(), "must be either Young or Old");
374
375 if (region_attr.is_young()) {
376 age = !m.has_displaced_mark_helper() ? m.age()
377 : m.displaced_mark_helper().age();
378 if (age < _tenuring_threshold) {
379 return region_attr;
380 }
381 }
382 // young-to-old (promotion) or old-to-old; destination is old in both cases.
383 return G1HeapRegionAttr::Old;
384 }
385
386 void G1ParScanThreadState::report_promotion_event(G1HeapRegionAttr const dest_attr,
387 oop const old, size_t word_sz, uint age,
388 HeapWord * const obj_ptr, uint node_index) const {
389 PLAB* alloc_buf = _plab_allocator->alloc_buffer(dest_attr, node_index);
390 if (alloc_buf->contains(obj_ptr)) {
391 _g1h->gc_tracer_stw()->report_promotion_in_new_plab_event(old->klass(), word_sz * HeapWordSize, age,
392 dest_attr.type() == G1HeapRegionAttr::Old,
393 alloc_buf->word_sz() * HeapWordSize);
394 } else {
395 _g1h->gc_tracer_stw()->report_promotion_outside_plab_event(old->klass(), word_sz * HeapWordSize, age,
396 dest_attr.type() == G1HeapRegionAttr::Old);
397 }
398 }
399
400 NOINLINE
401 HeapWord* G1ParScanThreadState::allocate_copy_slow(G1HeapRegionAttr* dest_attr,
402 oop old,
403 size_t word_sz,
404 uint age,
405 uint node_index) {
406 HeapWord* obj_ptr = nullptr;
407 // Try slow-path allocation unless we're allocating old and old is already full.
408 if (!(dest_attr->is_old() && _old_gen_is_full)) {
409 bool plab_refill_failed = false;
410 obj_ptr = _plab_allocator->allocate_direct_or_new_plab(*dest_attr,
411 word_sz,
412 &plab_refill_failed,
413 node_index);
414 if (obj_ptr == nullptr) {
415 obj_ptr = allocate_in_next_plab(dest_attr,
416 word_sz,
417 plab_refill_failed,
418 node_index);
419 }
420 }
421 if (obj_ptr != nullptr) {
422 update_numa_stats(node_index);
423 if (_g1h->gc_tracer_stw()->should_report_promotion_events()) {
424 // The events are checked individually as part of the actual commit
425 report_promotion_event(*dest_attr, old, word_sz, age, obj_ptr, node_index);
426 }
427 }
428 return obj_ptr;
429 }
430
431 #if ALLOCATION_FAILURE_INJECTOR
432 bool G1ParScanThreadState::inject_allocation_failure(uint region_idx) {
433 return _g1h->allocation_failure_injector()->allocation_should_fail(_allocation_failure_inject_counter, region_idx);
434 }
435 #endif
436
437 NOINLINE
438 void G1ParScanThreadState::undo_allocation(G1HeapRegionAttr dest_attr,
439 HeapWord* obj_ptr,
440 size_t word_sz,
441 uint node_index) {
442 _plab_allocator->undo_allocation(dest_attr, obj_ptr, word_sz, node_index);
443 }
444
445 void G1ParScanThreadState::update_bot_after_copying(oop obj, size_t word_sz) {
446 HeapWord* obj_start = cast_from_oop<HeapWord*>(obj);
447 G1HeapRegion* region = _g1h->heap_region_containing(obj_start);
448 region->update_bot_for_block(obj_start, obj_start + word_sz);
449 }
450
451 // Private inline function, for direct internal use and providing the
452 // implementation of the public not-inline function.
453 MAYBE_INLINE_EVACUATION
454 oop G1ParScanThreadState::do_copy_to_survivor_space(G1HeapRegionAttr const region_attr,
455 oop const old,
456 markWord const old_mark) {
457 assert(region_attr.is_in_cset(),
458 "Unexpected region attr type: %s", region_attr.get_type_str());
459
460 // Get the klass once. We'll need it again later, and this avoids
461 // re-decoding when it's compressed.
462 Klass* klass = old->klass();
463 const size_t word_sz = old->size_given_klass(klass);
464
465 // JNI only allows pinning of typeArrays, so we only need to keep those in place.
466 if (region_attr.is_pinned() && klass->is_typeArray_klass()) {
467 return handle_evacuation_failure_par(old, old_mark, word_sz, true /* cause_pinned */);
468 }
469
470 uint age = 0;
471 G1HeapRegionAttr dest_attr = next_region_attr(region_attr, old_mark, age);
472 G1HeapRegion* const from_region = _g1h->heap_region_containing(old);
473 uint node_index = from_region->node_index();
474
475 HeapWord* obj_ptr = _plab_allocator->plab_allocate(dest_attr, word_sz, node_index);
476
477 // PLAB allocations should succeed most of the time, so we'll
478 // normally check against null once and that's it.
479 if (obj_ptr == nullptr) {
480 obj_ptr = allocate_copy_slow(&dest_attr, old, word_sz, age, node_index);
481 if (obj_ptr == nullptr) {
482 // This will either forward-to-self, or detect that someone else has
483 // installed a forwarding pointer.
484 return handle_evacuation_failure_par(old, old_mark, word_sz, false /* cause_pinned */);
485 }
486 }
487
488 assert(obj_ptr != nullptr, "when we get here, allocation should have succeeded");
489 assert(_g1h->is_in_reserved(obj_ptr), "Allocated memory should be in the heap");
490
491 // Should this evacuation fail?
492 if (inject_allocation_failure(from_region->hrm_index())) {
493 // Doing this after all the allocation attempts also tests the
494 // undo_allocation() method too.
495 undo_allocation(dest_attr, obj_ptr, word_sz, node_index);
496 return handle_evacuation_failure_par(old, old_mark, word_sz, false /* cause_pinned */);
497 }
498
499 // We're going to allocate linearly, so might as well prefetch ahead.
500 Prefetch::write(obj_ptr, PrefetchCopyIntervalInBytes);
561 return obj;
562 } else {
563 _plab_allocator->undo_allocation(dest_attr, obj_ptr, word_sz, node_index);
564 return forward_ptr;
565 }
566 }
567
568 // Public not-inline entry point.
569 ATTRIBUTE_FLATTEN
570 oop G1ParScanThreadState::copy_to_survivor_space(G1HeapRegionAttr region_attr,
571 oop old,
572 markWord old_mark) {
573 return do_copy_to_survivor_space(region_attr, old, old_mark);
574 }
575
576 G1ParScanThreadState* G1ParScanThreadStateSet::state_for_worker(uint worker_id) {
577 assert(worker_id < _num_workers, "out of bounds access");
578 if (_states[worker_id] == nullptr) {
579 _states[worker_id] =
580 new G1ParScanThreadState(_g1h, rdcqs(),
581 _preserved_marks_set.get(worker_id),
582 worker_id,
583 _num_workers,
584 _collection_set,
585 _evac_failure_regions);
586 }
587 return _states[worker_id];
588 }
589
590 const size_t* G1ParScanThreadStateSet::surviving_young_words() const {
591 assert(_flushed, "thread local state from the per thread states should have been flushed");
592 return _surviving_young_words_total;
593 }
594
595 void G1ParScanThreadStateSet::flush_stats() {
596 assert(!_flushed, "thread local state from the per thread states should be flushed once");
597 for (uint worker_id = 0; worker_id < _num_workers; ++worker_id) {
598 G1ParScanThreadState* pss = _states[worker_id];
599 assert(pss != nullptr, "must be initialized");
600
601 G1GCPhaseTimes* p = _g1h->phase_times();
620 dcq.merge_bufferlists(rdcqs());
621 rdcqs()->verify_empty();
622
623 _flushed = true;
624 }
625
626 void G1ParScanThreadStateSet::record_unused_optional_region(G1HeapRegion* hr) {
627 for (uint worker_index = 0; worker_index < _num_workers; ++worker_index) {
628 G1ParScanThreadState* pss = _states[worker_index];
629 assert(pss != nullptr, "must be initialized");
630
631 size_t used_memory = pss->oops_into_optional_region(hr)->used_memory();
632 _g1h->phase_times()->record_or_add_thread_work_item(G1GCPhaseTimes::OptScanHR, worker_index, used_memory, G1GCPhaseTimes::ScanHRUsedMemory);
633 }
634 }
635
636 NOINLINE
637 oop G1ParScanThreadState::handle_evacuation_failure_par(oop old, markWord m, size_t word_sz, bool cause_pinned) {
638 assert(_g1h->is_in_cset(old), "Object " PTR_FORMAT " should be in the CSet", p2i(old));
639
640 oop forward_ptr = old->forward_to_atomic(old, m, memory_order_relaxed);
641 if (forward_ptr == nullptr) {
642 // Forward-to-self succeeded. We are the "owner" of the object.
643 G1HeapRegion* r = _g1h->heap_region_containing(old);
644
645 if (_evac_failure_regions->record(_worker_id, r->hrm_index(), cause_pinned)) {
646 G1HeapRegionPrinter::evac_failure(r);
647 }
648
649 // Mark the failing object in the marking bitmap and later use the bitmap to handle
650 // evacuation failure recovery.
651 _g1h->mark_evac_failure_object(_worker_id, old, word_sz);
652
653 _preserved_marks->push_if_necessary(old, m);
654
655 ContinuationGCSupport::transform_stack_chunk(old);
656
657 _evacuation_failed_info.register_copy_failure(word_sz);
658
659 // For iterating objects that failed evacuation currently we can reuse the
660 // existing closure to scan evacuated objects; since we are iterating from a
661 // collection set region (i.e. never a Survivor region), we always need to
662 // gather cards for this case.
663 G1SkipCardEnqueueSetter x(&_scanner, false /* skip_card_enqueue */);
664 old->oop_iterate_backwards(&_scanner);
665
666 return old;
667 } else {
668 // Forward-to-self failed. Either someone else managed to allocate
669 // space for this object (old != forward_ptr) or they beat us in
670 // self-forwarding it (old == forward_ptr).
671 assert(old == forward_ptr || !_g1h->is_in_cset(forward_ptr),
672 "Object " PTR_FORMAT " forwarded to: " PTR_FORMAT " "
673 "should not be in the CSet",
674 p2i(old), p2i(forward_ptr));
692 void G1ParScanThreadState::flush_numa_stats() {
693 if (_obj_alloc_stat != nullptr) {
694 uint node_index = _numa->index_of_current_thread();
695 _numa->copy_statistics(G1NUMAStats::LocalObjProcessAtCopyToSurv, node_index, _obj_alloc_stat);
696 }
697 }
698
699 void G1ParScanThreadState::update_numa_stats(uint node_index) {
700 if (_obj_alloc_stat != nullptr) {
701 _obj_alloc_stat[node_index]++;
702 }
703 }
704
705 G1ParScanThreadStateSet::G1ParScanThreadStateSet(G1CollectedHeap* g1h,
706 uint num_workers,
707 G1CollectionSet* collection_set,
708 G1EvacFailureRegions* evac_failure_regions) :
709 _g1h(g1h),
710 _collection_set(collection_set),
711 _rdcqs(G1BarrierSet::dirty_card_queue_set().allocator()),
712 _preserved_marks_set(true /* in_c_heap */),
713 _states(NEW_C_HEAP_ARRAY(G1ParScanThreadState*, num_workers, mtGC)),
714 _rdc_buffers(NEW_C_HEAP_ARRAY(BufferNodeList, num_workers, mtGC)),
715 _surviving_young_words_total(NEW_C_HEAP_ARRAY(size_t, collection_set->young_region_length() + 1, mtGC)),
716 _num_workers(num_workers),
717 _flushed(false),
718 _evac_failure_regions(evac_failure_regions) {
719 _preserved_marks_set.init(num_workers);
720 for (uint i = 0; i < num_workers; ++i) {
721 _states[i] = nullptr;
722 _rdc_buffers[i] = BufferNodeList();
723 }
724 memset(_surviving_young_words_total, 0, (collection_set->young_region_length() + 1) * sizeof(size_t));
725 }
726
727 G1ParScanThreadStateSet::~G1ParScanThreadStateSet() {
728 assert(_flushed, "thread local state from the per thread states should have been flushed");
729 FREE_C_HEAP_ARRAY(G1ParScanThreadState*, _states);
730 FREE_C_HEAP_ARRAY(size_t, _surviving_young_words_total);
731 FREE_C_HEAP_ARRAY(BufferNodeList, _rdc_buffers);
732 _preserved_marks_set.reclaim();
733 }
|
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25 #include "precompiled.hpp"
26 #include "gc/g1/g1Allocator.inline.hpp"
27 #include "gc/g1/g1CollectedHeap.inline.hpp"
28 #include "gc/g1/g1CollectionSet.hpp"
29 #include "gc/g1/g1EvacFailureRegions.inline.hpp"
30 #include "gc/g1/g1HeapRegionPrinter.hpp"
31 #include "gc/g1/g1OopClosures.inline.hpp"
32 #include "gc/g1/g1ParScanThreadState.inline.hpp"
33 #include "gc/g1/g1RootClosures.hpp"
34 #include "gc/g1/g1StringDedup.hpp"
35 #include "gc/g1/g1Trace.hpp"
36 #include "gc/g1/g1YoungGCAllocationFailureInjector.inline.hpp"
37 #include "gc/shared/continuationGCSupport.inline.hpp"
38 #include "gc/shared/partialArrayTaskStepper.inline.hpp"
39 #include "gc/shared/stringdedup/stringDedup.hpp"
40 #include "gc/shared/taskqueue.inline.hpp"
41 #include "memory/allocation.inline.hpp"
42 #include "oops/access.inline.hpp"
43 #include "oops/oop.inline.hpp"
44 #include "runtime/atomic.hpp"
45 #include "runtime/prefetch.inline.hpp"
46 #include "utilities/globalDefinitions.hpp"
47 #include "utilities/macros.hpp"
48
49 // In fastdebug builds the code size can get out of hand, potentially
50 // tripping over compiler limits (which may be bugs, but nevertheless
51 // need to be taken into consideration). A side benefit of limiting
52 // inlining is that we get more call frames that might aid debugging.
53 // And the fastdebug compile time for this file is much reduced.
54 // Explicit NOINLINE to block ATTRIBUTE_FLATTENing.
55 #define MAYBE_INLINE_EVACUATION NOT_DEBUG(inline) DEBUG_ONLY(NOINLINE)
56
57 G1ParScanThreadState::G1ParScanThreadState(G1CollectedHeap* g1h,
58 G1RedirtyCardsQueueSet* rdcqs,
59 uint worker_id,
60 uint num_workers,
61 G1CollectionSet* collection_set,
62 G1EvacFailureRegions* evac_failure_regions)
63 : _g1h(g1h),
64 _task_queue(g1h->task_queue(worker_id)),
65 _rdc_local_qset(rdcqs),
66 _ct(g1h->card_table()),
67 _closures(nullptr),
68 _plab_allocator(nullptr),
69 _age_table(false),
70 _tenuring_threshold(g1h->policy()->tenuring_threshold()),
71 _scanner(g1h, this),
72 _worker_id(worker_id),
73 _last_enqueued_card(SIZE_MAX),
74 _stack_trim_upper_threshold(GCDrainStackTargetSize * 2 + 1),
75 _stack_trim_lower_threshold(GCDrainStackTargetSize),
76 _trim_ticks(),
77 _surviving_young_words_base(nullptr),
78 _surviving_young_words(nullptr),
79 _surviving_words_length(collection_set->young_region_length() + 1),
80 _old_gen_is_full(false),
81 _partial_objarray_chunk_size(ParGCArrayScanChunk),
82 _partial_array_stepper(num_workers),
83 _string_dedup_requests(),
84 _max_num_optional_regions(collection_set->optional_region_length()),
85 _numa(g1h->numa()),
86 _obj_alloc_stat(nullptr),
87 ALLOCATION_FAILURE_INJECTOR_ONLY(_allocation_failure_inject_counter(0) COMMA)
88 _evacuation_failed_info(),
89 _evac_failure_regions(evac_failure_regions),
90 _evac_failure_enqueued_cards(0)
91 {
92 // We allocate number of young gen regions in the collection set plus one
93 // entries, since entry 0 keeps track of surviving bytes for non-young regions.
94 // We also add a few elements at the beginning and at the end in
95 // an attempt to eliminate cache contention
96 const size_t padding_elem_num = (DEFAULT_PADDING_SIZE / sizeof(size_t));
97 size_t array_length = padding_elem_num + _surviving_words_length + padding_elem_num;
98
99 _surviving_young_words_base = NEW_C_HEAP_ARRAY(size_t, array_length, mtGC);
100 _surviving_young_words = _surviving_young_words_base + padding_elem_num;
101 memset(_surviving_young_words, 0, _surviving_words_length * sizeof(size_t));
102
103 _plab_allocator = new G1PLABAllocator(_g1h->allocator());
104
105 _closures = G1EvacuationRootClosures::create_root_closures(_g1h,
106 this,
107 collection_set->only_contains_young_regions());
193
194 // Although we never intentionally push references outside of the collection
195 // set, due to (benign) races in the claim mechanism during RSet scanning more
196 // than one thread might claim the same card. So the same card may be
197 // processed multiple times, and so we might get references into old gen here.
198 // So we need to redo this check.
199 const G1HeapRegionAttr region_attr = _g1h->region_attr(obj);
200 // References pushed onto the work stack should never point to a humongous region
201 // as they are not added to the collection set due to above precondition.
202 assert(!region_attr.is_humongous_candidate(),
203 "Obj " PTR_FORMAT " should not refer to humongous region %u from " PTR_FORMAT,
204 p2i(obj), _g1h->addr_to_region(obj), p2i(p));
205
206 if (!region_attr.is_in_cset()) {
207 // In this case somebody else already did all the work.
208 return;
209 }
210
211 markWord m = obj->mark();
212 if (m.is_forwarded()) {
213 obj = obj->forwardee(m);
214 } else {
215 obj = do_copy_to_survivor_space(region_attr, obj, m);
216 }
217 RawAccess<IS_NOT_NULL>::oop_store(p, obj);
218
219 write_ref_field_post(p, obj);
220 }
221
222 MAYBE_INLINE_EVACUATION
223 void G1ParScanThreadState::do_partial_array(PartialArrayScanTask task) {
224 oop from_obj = task.to_source_array();
225
226 assert(_g1h->is_in_reserved(from_obj), "must be in heap.");
227 assert(from_obj->forward_safe_klass()->is_objArray_klass(), "must be obj array");
228 assert(from_obj->is_forwarded(), "must be forwarded");
229
230 oop to_obj = from_obj->forwardee();
231 assert(from_obj != to_obj, "should not be chunking self-forwarded objects");
232 assert(to_obj->is_objArray(), "must be obj array");
233 objArrayOop to_array = objArrayOop(to_obj);
234
235 PartialArrayTaskStepper::Step step
236 = _partial_array_stepper.next(objArrayOop(from_obj),
237 to_array,
238 _partial_objarray_chunk_size);
239 for (uint i = 0; i < step._ncreate; ++i) {
240 push_on_queue(ScannerTask(PartialArrayScanTask(from_obj)));
241 }
242
243 G1HeapRegionAttr dest_attr = _g1h->region_attr(to_array);
244 G1SkipCardEnqueueSetter x(&_scanner, dest_attr.is_new_survivor());
245 // Process claimed task. The length of to_array is not correct, but
246 // fortunately the iteration ignores the length field and just relies
247 // on start/end.
248 to_array->oop_iterate_range(&_scanner,
249 step._index,
250 step._index + _partial_objarray_chunk_size);
251 }
252
253 MAYBE_INLINE_EVACUATION
254 void G1ParScanThreadState::start_partial_objarray(G1HeapRegionAttr dest_attr,
255 oop from_obj,
256 oop to_obj) {
257 assert(from_obj->forward_safe_klass()->is_objArray_klass(), "precondition");
258 assert(from_obj->is_forwarded(), "precondition");
259 assert(from_obj->forwardee() == to_obj, "precondition");
260 assert(from_obj != to_obj, "should not be scanning self-forwarded objects");
261 assert(to_obj->is_objArray(), "precondition");
262
263 objArrayOop to_array = objArrayOop(to_obj);
264
265 PartialArrayTaskStepper::Step step
266 = _partial_array_stepper.start(objArrayOop(from_obj),
267 to_array,
268 _partial_objarray_chunk_size);
269
270 // Push any needed partial scan tasks. Pushed before processing the
271 // initial chunk to allow other workers to steal while we're processing.
272 for (uint i = 0; i < step._ncreate; ++i) {
273 push_on_queue(ScannerTask(PartialArrayScanTask(from_obj)));
274 }
275
276 // Skip the card enqueue iff the object (to_array) is in survivor region.
277 // However, G1HeapRegion::is_survivor() is too expensive here.
364 // no other space to try.
365 return nullptr;
366 }
367 }
368
369 G1HeapRegionAttr G1ParScanThreadState::next_region_attr(G1HeapRegionAttr const region_attr, markWord const m, uint& age) {
370 assert(region_attr.is_young() || region_attr.is_old(), "must be either Young or Old");
371
372 if (region_attr.is_young()) {
373 age = !m.has_displaced_mark_helper() ? m.age()
374 : m.displaced_mark_helper().age();
375 if (age < _tenuring_threshold) {
376 return region_attr;
377 }
378 }
379 // young-to-old (promotion) or old-to-old; destination is old in both cases.
380 return G1HeapRegionAttr::Old;
381 }
382
383 void G1ParScanThreadState::report_promotion_event(G1HeapRegionAttr const dest_attr,
384 Klass* klass, size_t word_sz, uint age,
385 HeapWord * const obj_ptr, uint node_index) const {
386 PLAB* alloc_buf = _plab_allocator->alloc_buffer(dest_attr, node_index);
387 if (alloc_buf->contains(obj_ptr)) {
388 _g1h->gc_tracer_stw()->report_promotion_in_new_plab_event(klass, word_sz * HeapWordSize, age,
389 dest_attr.type() == G1HeapRegionAttr::Old,
390 alloc_buf->word_sz() * HeapWordSize);
391 } else {
392 _g1h->gc_tracer_stw()->report_promotion_outside_plab_event(klass, word_sz * HeapWordSize, age,
393 dest_attr.type() == G1HeapRegionAttr::Old);
394 }
395 }
396
397 NOINLINE
398 HeapWord* G1ParScanThreadState::allocate_copy_slow(G1HeapRegionAttr* dest_attr,
399 Klass* klass,
400 size_t word_sz,
401 uint age,
402 uint node_index) {
403 HeapWord* obj_ptr = nullptr;
404 // Try slow-path allocation unless we're allocating old and old is already full.
405 if (!(dest_attr->is_old() && _old_gen_is_full)) {
406 bool plab_refill_failed = false;
407 obj_ptr = _plab_allocator->allocate_direct_or_new_plab(*dest_attr,
408 word_sz,
409 &plab_refill_failed,
410 node_index);
411 if (obj_ptr == nullptr) {
412 obj_ptr = allocate_in_next_plab(dest_attr,
413 word_sz,
414 plab_refill_failed,
415 node_index);
416 }
417 }
418 if (obj_ptr != nullptr) {
419 update_numa_stats(node_index);
420 if (_g1h->gc_tracer_stw()->should_report_promotion_events()) {
421 // The events are checked individually as part of the actual commit
422 report_promotion_event(*dest_attr, klass, word_sz, age, obj_ptr, node_index);
423 }
424 }
425 return obj_ptr;
426 }
427
428 #if ALLOCATION_FAILURE_INJECTOR
429 bool G1ParScanThreadState::inject_allocation_failure(uint region_idx) {
430 return _g1h->allocation_failure_injector()->allocation_should_fail(_allocation_failure_inject_counter, region_idx);
431 }
432 #endif
433
434 NOINLINE
435 void G1ParScanThreadState::undo_allocation(G1HeapRegionAttr dest_attr,
436 HeapWord* obj_ptr,
437 size_t word_sz,
438 uint node_index) {
439 _plab_allocator->undo_allocation(dest_attr, obj_ptr, word_sz, node_index);
440 }
441
442 void G1ParScanThreadState::update_bot_after_copying(oop obj, size_t word_sz) {
443 HeapWord* obj_start = cast_from_oop<HeapWord*>(obj);
444 G1HeapRegion* region = _g1h->heap_region_containing(obj_start);
445 region->update_bot_for_block(obj_start, obj_start + word_sz);
446 }
447
448 // Private inline function, for direct internal use and providing the
449 // implementation of the public not-inline function.
450 MAYBE_INLINE_EVACUATION
451 oop G1ParScanThreadState::do_copy_to_survivor_space(G1HeapRegionAttr const region_attr,
452 oop const old,
453 markWord const old_mark) {
454 assert(region_attr.is_in_cset(),
455 "Unexpected region attr type: %s", region_attr.get_type_str());
456
457 // Get the klass once. We'll need it again later, and this avoids
458 // re-decoding when it's compressed.
459 // NOTE: With compact headers, it is not safe to load the Klass* from o, because
460 // that would access the mark-word, and the mark-word might change at any time by
461 // concurrent promotion. The promoted mark-word would point to the forwardee, which
462 // may not yet have completed copying. Therefore we must load the Klass* from
463 // the mark-word that we have already loaded. This is safe, because we have checked
464 // that this is not yet forwarded in the caller.
465 Klass* klass = old->forward_safe_klass(old_mark);
466 const size_t word_sz = old->size_given_klass(klass);
467
468 // JNI only allows pinning of typeArrays, so we only need to keep those in place.
469 if (region_attr.is_pinned() && klass->is_typeArray_klass()) {
470 return handle_evacuation_failure_par(old, old_mark, word_sz, true /* cause_pinned */);
471 }
472
473 uint age = 0;
474 G1HeapRegionAttr dest_attr = next_region_attr(region_attr, old_mark, age);
475 G1HeapRegion* const from_region = _g1h->heap_region_containing(old);
476 uint node_index = from_region->node_index();
477
478 HeapWord* obj_ptr = _plab_allocator->plab_allocate(dest_attr, word_sz, node_index);
479
480 // PLAB allocations should succeed most of the time, so we'll
481 // normally check against null once and that's it.
482 if (obj_ptr == nullptr) {
483 obj_ptr = allocate_copy_slow(&dest_attr, klass, word_sz, age, node_index);
484 if (obj_ptr == nullptr) {
485 // This will either forward-to-self, or detect that someone else has
486 // installed a forwarding pointer.
487 return handle_evacuation_failure_par(old, old_mark, word_sz, false /* cause_pinned */);
488 }
489 }
490
491 assert(obj_ptr != nullptr, "when we get here, allocation should have succeeded");
492 assert(_g1h->is_in_reserved(obj_ptr), "Allocated memory should be in the heap");
493
494 // Should this evacuation fail?
495 if (inject_allocation_failure(from_region->hrm_index())) {
496 // Doing this after all the allocation attempts also tests the
497 // undo_allocation() method too.
498 undo_allocation(dest_attr, obj_ptr, word_sz, node_index);
499 return handle_evacuation_failure_par(old, old_mark, word_sz, false /* cause_pinned */);
500 }
501
502 // We're going to allocate linearly, so might as well prefetch ahead.
503 Prefetch::write(obj_ptr, PrefetchCopyIntervalInBytes);
564 return obj;
565 } else {
566 _plab_allocator->undo_allocation(dest_attr, obj_ptr, word_sz, node_index);
567 return forward_ptr;
568 }
569 }
570
571 // Public not-inline entry point.
572 ATTRIBUTE_FLATTEN
573 oop G1ParScanThreadState::copy_to_survivor_space(G1HeapRegionAttr region_attr,
574 oop old,
575 markWord old_mark) {
576 return do_copy_to_survivor_space(region_attr, old, old_mark);
577 }
578
579 G1ParScanThreadState* G1ParScanThreadStateSet::state_for_worker(uint worker_id) {
580 assert(worker_id < _num_workers, "out of bounds access");
581 if (_states[worker_id] == nullptr) {
582 _states[worker_id] =
583 new G1ParScanThreadState(_g1h, rdcqs(),
584 worker_id,
585 _num_workers,
586 _collection_set,
587 _evac_failure_regions);
588 }
589 return _states[worker_id];
590 }
591
592 const size_t* G1ParScanThreadStateSet::surviving_young_words() const {
593 assert(_flushed, "thread local state from the per thread states should have been flushed");
594 return _surviving_young_words_total;
595 }
596
597 void G1ParScanThreadStateSet::flush_stats() {
598 assert(!_flushed, "thread local state from the per thread states should be flushed once");
599 for (uint worker_id = 0; worker_id < _num_workers; ++worker_id) {
600 G1ParScanThreadState* pss = _states[worker_id];
601 assert(pss != nullptr, "must be initialized");
602
603 G1GCPhaseTimes* p = _g1h->phase_times();
622 dcq.merge_bufferlists(rdcqs());
623 rdcqs()->verify_empty();
624
625 _flushed = true;
626 }
627
628 void G1ParScanThreadStateSet::record_unused_optional_region(G1HeapRegion* hr) {
629 for (uint worker_index = 0; worker_index < _num_workers; ++worker_index) {
630 G1ParScanThreadState* pss = _states[worker_index];
631 assert(pss != nullptr, "must be initialized");
632
633 size_t used_memory = pss->oops_into_optional_region(hr)->used_memory();
634 _g1h->phase_times()->record_or_add_thread_work_item(G1GCPhaseTimes::OptScanHR, worker_index, used_memory, G1GCPhaseTimes::ScanHRUsedMemory);
635 }
636 }
637
638 NOINLINE
639 oop G1ParScanThreadState::handle_evacuation_failure_par(oop old, markWord m, size_t word_sz, bool cause_pinned) {
640 assert(_g1h->is_in_cset(old), "Object " PTR_FORMAT " should be in the CSet", p2i(old));
641
642 oop forward_ptr = old->forward_to_self_atomic(m, memory_order_relaxed);
643 if (forward_ptr == nullptr) {
644 // Forward-to-self succeeded. We are the "owner" of the object.
645 G1HeapRegion* r = _g1h->heap_region_containing(old);
646
647 if (_evac_failure_regions->record(_worker_id, r->hrm_index(), cause_pinned)) {
648 G1HeapRegionPrinter::evac_failure(r);
649 }
650
651 // Mark the failing object in the marking bitmap and later use the bitmap to handle
652 // evacuation failure recovery.
653 _g1h->mark_evac_failure_object(_worker_id, old, word_sz);
654
655 ContinuationGCSupport::transform_stack_chunk(old);
656
657 _evacuation_failed_info.register_copy_failure(word_sz);
658
659 // For iterating objects that failed evacuation currently we can reuse the
660 // existing closure to scan evacuated objects; since we are iterating from a
661 // collection set region (i.e. never a Survivor region), we always need to
662 // gather cards for this case.
663 G1SkipCardEnqueueSetter x(&_scanner, false /* skip_card_enqueue */);
664 old->oop_iterate_backwards(&_scanner);
665
666 return old;
667 } else {
668 // Forward-to-self failed. Either someone else managed to allocate
669 // space for this object (old != forward_ptr) or they beat us in
670 // self-forwarding it (old == forward_ptr).
671 assert(old == forward_ptr || !_g1h->is_in_cset(forward_ptr),
672 "Object " PTR_FORMAT " forwarded to: " PTR_FORMAT " "
673 "should not be in the CSet",
674 p2i(old), p2i(forward_ptr));
692 void G1ParScanThreadState::flush_numa_stats() {
693 if (_obj_alloc_stat != nullptr) {
694 uint node_index = _numa->index_of_current_thread();
695 _numa->copy_statistics(G1NUMAStats::LocalObjProcessAtCopyToSurv, node_index, _obj_alloc_stat);
696 }
697 }
698
699 void G1ParScanThreadState::update_numa_stats(uint node_index) {
700 if (_obj_alloc_stat != nullptr) {
701 _obj_alloc_stat[node_index]++;
702 }
703 }
704
705 G1ParScanThreadStateSet::G1ParScanThreadStateSet(G1CollectedHeap* g1h,
706 uint num_workers,
707 G1CollectionSet* collection_set,
708 G1EvacFailureRegions* evac_failure_regions) :
709 _g1h(g1h),
710 _collection_set(collection_set),
711 _rdcqs(G1BarrierSet::dirty_card_queue_set().allocator()),
712 _states(NEW_C_HEAP_ARRAY(G1ParScanThreadState*, num_workers, mtGC)),
713 _rdc_buffers(NEW_C_HEAP_ARRAY(BufferNodeList, num_workers, mtGC)),
714 _surviving_young_words_total(NEW_C_HEAP_ARRAY(size_t, collection_set->young_region_length() + 1, mtGC)),
715 _num_workers(num_workers),
716 _flushed(false),
717 _evac_failure_regions(evac_failure_regions) {
718 for (uint i = 0; i < num_workers; ++i) {
719 _states[i] = nullptr;
720 _rdc_buffers[i] = BufferNodeList();
721 }
722 memset(_surviving_young_words_total, 0, (collection_set->young_region_length() + 1) * sizeof(size_t));
723 }
724
725 G1ParScanThreadStateSet::~G1ParScanThreadStateSet() {
726 assert(_flushed, "thread local state from the per thread states should have been flushed");
727 FREE_C_HEAP_ARRAY(G1ParScanThreadState*, _states);
728 FREE_C_HEAP_ARRAY(size_t, _surviving_young_words_total);
729 FREE_C_HEAP_ARRAY(BufferNodeList, _rdc_buffers);
730 }
|