18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25 #include "precompiled.hpp"
26 #include "gc/g1/g1Allocator.inline.hpp"
27 #include "gc/g1/g1CollectedHeap.inline.hpp"
28 #include "gc/g1/g1CollectionSet.hpp"
29 #include "gc/g1/g1EvacFailureRegions.inline.hpp"
30 #include "gc/g1/g1OopClosures.inline.hpp"
31 #include "gc/g1/g1ParScanThreadState.inline.hpp"
32 #include "gc/g1/g1RootClosures.hpp"
33 #include "gc/g1/g1StringDedup.hpp"
34 #include "gc/g1/g1Trace.hpp"
35 #include "gc/g1/g1YoungGCAllocationFailureInjector.inline.hpp"
36 #include "gc/shared/continuationGCSupport.inline.hpp"
37 #include "gc/shared/partialArrayTaskStepper.inline.hpp"
38 #include "gc/shared/preservedMarks.inline.hpp"
39 #include "gc/shared/stringdedup/stringDedup.hpp"
40 #include "gc/shared/taskqueue.inline.hpp"
41 #include "memory/allocation.inline.hpp"
42 #include "oops/access.inline.hpp"
43 #include "oops/oop.inline.hpp"
44 #include "runtime/atomic.hpp"
45 #include "runtime/prefetch.inline.hpp"
46 #include "utilities/globalDefinitions.hpp"
47 #include "utilities/macros.hpp"
48
49 // In fastdebug builds the code size can get out of hand, potentially
50 // tripping over compiler limits (which may be bugs, but nevertheless
51 // need to be taken into consideration). A side benefit of limiting
52 // inlining is that we get more call frames that might aid debugging.
53 // And the fastdebug compile time for this file is much reduced.
54 // Explicit NOINLINE to block ATTRIBUTE_FLATTENing.
55 #define MAYBE_INLINE_EVACUATION NOT_DEBUG(inline) DEBUG_ONLY(NOINLINE)
56
57 G1ParScanThreadState::G1ParScanThreadState(G1CollectedHeap* g1h,
58 G1RedirtyCardsQueueSet* rdcqs,
59 PreservedMarks* preserved_marks,
60 uint worker_id,
61 uint num_workers,
62 G1CollectionSet* collection_set,
63 G1EvacFailureRegions* evac_failure_regions)
64 : _g1h(g1h),
65 _task_queue(g1h->task_queue(worker_id)),
66 _rdc_local_qset(rdcqs),
67 _ct(g1h->card_table()),
68 _closures(nullptr),
69 _plab_allocator(nullptr),
70 _age_table(false),
71 _tenuring_threshold(g1h->policy()->tenuring_threshold()),
72 _scanner(g1h, this),
73 _worker_id(worker_id),
74 _last_enqueued_card(SIZE_MAX),
75 _stack_trim_upper_threshold(GCDrainStackTargetSize * 2 + 1),
76 _stack_trim_lower_threshold(GCDrainStackTargetSize),
77 _trim_ticks(),
78 _surviving_young_words_base(nullptr),
79 _surviving_young_words(nullptr),
80 _surviving_words_length(collection_set->young_region_length() + 1),
81 _old_gen_is_full(false),
82 _partial_objarray_chunk_size(ParGCArrayScanChunk),
83 _partial_array_stepper(num_workers),
84 _string_dedup_requests(),
85 _max_num_optional_regions(collection_set->optional_region_length()),
86 _numa(g1h->numa()),
87 _obj_alloc_stat(nullptr),
88 ALLOCATION_FAILURE_INJECTOR_ONLY(_allocation_failure_inject_counter(0) COMMA)
89 _preserved_marks(preserved_marks),
90 _evacuation_failed_info(),
91 _evac_failure_regions(evac_failure_regions),
92 _evac_failure_enqueued_cards(0)
93 {
94 // We allocate number of young gen regions in the collection set plus one
95 // entries, since entry 0 keeps track of surviving bytes for non-young regions.
96 // We also add a few elements at the beginning and at the end in
97 // an attempt to eliminate cache contention
98 const size_t padding_elem_num = (DEFAULT_PADDING_SIZE / sizeof(size_t));
99 size_t array_length = padding_elem_num + _surviving_words_length + padding_elem_num;
100
101 _surviving_young_words_base = NEW_C_HEAP_ARRAY(size_t, array_length, mtGC);
102 _surviving_young_words = _surviving_young_words_base + padding_elem_num;
103 memset(_surviving_young_words, 0, _surviving_words_length * sizeof(size_t));
104
105 _plab_allocator = new G1PLABAllocator(_g1h->allocator());
106
107 _closures = G1EvacuationRootClosures::create_root_closures(_g1h,
108 this,
109 collection_set->only_contains_young_regions());
194 oop obj = RawAccess<IS_NOT_NULL>::oop_load(p);
195
196 // Although we never intentionally push references outside of the collection
197 // set, due to (benign) races in the claim mechanism during RSet scanning more
198 // than one thread might claim the same card. So the same card may be
199 // processed multiple times, and so we might get references into old gen here.
200 // So we need to redo this check.
201 const G1HeapRegionAttr region_attr = _g1h->region_attr(obj);
202 // References pushed onto the work stack should never point to a humongous region
203 // as they are not added to the collection set due to above precondition.
204 assert(!region_attr.is_humongous_candidate(),
205 "Obj " PTR_FORMAT " should not refer to humongous region %u from " PTR_FORMAT,
206 p2i(obj), _g1h->addr_to_region(obj), p2i(p));
207
208 if (!region_attr.is_in_cset()) {
209 // In this case somebody else already did all the work.
210 return;
211 }
212
213 markWord m = obj->mark();
214 if (m.is_marked()) {
215 obj = cast_to_oop(m.decode_pointer());
216 } else {
217 obj = do_copy_to_survivor_space(region_attr, obj, m);
218 }
219 RawAccess<IS_NOT_NULL>::oop_store(p, obj);
220
221 write_ref_field_post(p, obj);
222 }
223
224 MAYBE_INLINE_EVACUATION
225 void G1ParScanThreadState::do_partial_array(PartialArrayScanTask task) {
226 oop from_obj = task.to_source_array();
227
228 assert(_g1h->is_in_reserved(from_obj), "must be in heap.");
229 assert(from_obj->is_objArray(), "must be obj array");
230 assert(from_obj->is_forwarded(), "must be forwarded");
231
232 oop to_obj = from_obj->forwardee();
233 assert(from_obj != to_obj, "should not be chunking self-forwarded objects");
234 assert(to_obj->is_objArray(), "must be obj array");
235 objArrayOop to_array = objArrayOop(to_obj);
236
237 PartialArrayTaskStepper::Step step
238 = _partial_array_stepper.next(objArrayOop(from_obj),
239 to_array,
240 _partial_objarray_chunk_size);
241 for (uint i = 0; i < step._ncreate; ++i) {
242 push_on_queue(ScannerTask(PartialArrayScanTask(from_obj)));
243 }
244
245 G1HeapRegionAttr dest_attr = _g1h->region_attr(to_array);
246 G1SkipCardEnqueueSetter x(&_scanner, dest_attr.is_new_survivor());
247 // Process claimed task. The length of to_array is not correct, but
248 // fortunately the iteration ignores the length field and just relies
249 // on start/end.
250 to_array->oop_iterate_range(&_scanner,
251 step._index,
252 step._index + _partial_objarray_chunk_size);
253 }
254
255 MAYBE_INLINE_EVACUATION
256 void G1ParScanThreadState::start_partial_objarray(G1HeapRegionAttr dest_attr,
257 oop from_obj,
258 oop to_obj) {
259 assert(from_obj->is_objArray(), "precondition");
260 assert(from_obj->is_forwarded(), "precondition");
261 assert(from_obj->forwardee() == to_obj, "precondition");
262 assert(from_obj != to_obj, "should not be scanning self-forwarded objects");
263 assert(to_obj->is_objArray(), "precondition");
264
265 objArrayOop to_array = objArrayOop(to_obj);
266
267 PartialArrayTaskStepper::Step step
268 = _partial_array_stepper.start(objArrayOop(from_obj),
269 to_array,
270 _partial_objarray_chunk_size);
271
272 // Push any needed partial scan tasks. Pushed before processing the
273 // initial chunk to allow other workers to steal while we're processing.
274 for (uint i = 0; i < step._ncreate; ++i) {
275 push_on_queue(ScannerTask(PartialArrayScanTask(from_obj)));
276 }
277
278 // Skip the card enqueue iff the object (to_array) is in survivor region.
279 // However, HeapRegion::is_survivor() is too expensive here.
366 // no other space to try.
367 return nullptr;
368 }
369 }
370
371 G1HeapRegionAttr G1ParScanThreadState::next_region_attr(G1HeapRegionAttr const region_attr, markWord const m, uint& age) {
372 assert(region_attr.is_young() || region_attr.is_old(), "must be either Young or Old");
373
374 if (region_attr.is_young()) {
375 age = !m.has_displaced_mark_helper() ? m.age()
376 : m.displaced_mark_helper().age();
377 if (age < _tenuring_threshold) {
378 return region_attr;
379 }
380 }
381 // young-to-old (promotion) or old-to-old; destination is old in both cases.
382 return G1HeapRegionAttr::Old;
383 }
384
385 void G1ParScanThreadState::report_promotion_event(G1HeapRegionAttr const dest_attr,
386 oop const old, size_t word_sz, uint age,
387 HeapWord * const obj_ptr, uint node_index) const {
388 PLAB* alloc_buf = _plab_allocator->alloc_buffer(dest_attr, node_index);
389 if (alloc_buf->contains(obj_ptr)) {
390 _g1h->gc_tracer_stw()->report_promotion_in_new_plab_event(old->klass(), word_sz * HeapWordSize, age,
391 dest_attr.type() == G1HeapRegionAttr::Old,
392 alloc_buf->word_sz() * HeapWordSize);
393 } else {
394 _g1h->gc_tracer_stw()->report_promotion_outside_plab_event(old->klass(), word_sz * HeapWordSize, age,
395 dest_attr.type() == G1HeapRegionAttr::Old);
396 }
397 }
398
399 NOINLINE
400 HeapWord* G1ParScanThreadState::allocate_copy_slow(G1HeapRegionAttr* dest_attr,
401 oop old,
402 size_t word_sz,
403 uint age,
404 uint node_index) {
405 HeapWord* obj_ptr = nullptr;
406 // Try slow-path allocation unless we're allocating old and old is already full.
407 if (!(dest_attr->is_old() && _old_gen_is_full)) {
408 bool plab_refill_failed = false;
409 obj_ptr = _plab_allocator->allocate_direct_or_new_plab(*dest_attr,
410 word_sz,
411 &plab_refill_failed,
412 node_index);
413 if (obj_ptr == nullptr) {
414 obj_ptr = allocate_in_next_plab(dest_attr,
415 word_sz,
416 plab_refill_failed,
417 node_index);
418 }
419 }
420 if (obj_ptr != nullptr) {
421 update_numa_stats(node_index);
422 if (_g1h->gc_tracer_stw()->should_report_promotion_events()) {
423 // The events are checked individually as part of the actual commit
424 report_promotion_event(*dest_attr, old, word_sz, age, obj_ptr, node_index);
425 }
426 }
427 return obj_ptr;
428 }
429
430 #if ALLOCATION_FAILURE_INJECTOR
431 bool G1ParScanThreadState::inject_allocation_failure(uint region_idx) {
432 return _g1h->allocation_failure_injector()->allocation_should_fail(_allocation_failure_inject_counter, region_idx);
433 }
434 #endif
435
436 NOINLINE
437 void G1ParScanThreadState::undo_allocation(G1HeapRegionAttr dest_attr,
438 HeapWord* obj_ptr,
439 size_t word_sz,
440 uint node_index) {
441 _plab_allocator->undo_allocation(dest_attr, obj_ptr, word_sz, node_index);
442 }
443
444 void G1ParScanThreadState::update_bot_after_copying(oop obj, size_t word_sz) {
445 HeapWord* obj_start = cast_from_oop<HeapWord*>(obj);
446 HeapRegion* region = _g1h->heap_region_containing(obj_start);
447 region->update_bot_for_obj(obj_start, word_sz);
448 }
449
450 // Private inline function, for direct internal use and providing the
451 // implementation of the public not-inline function.
452 MAYBE_INLINE_EVACUATION
453 oop G1ParScanThreadState::do_copy_to_survivor_space(G1HeapRegionAttr const region_attr,
454 oop const old,
455 markWord const old_mark) {
456 assert(region_attr.is_in_cset(),
457 "Unexpected region attr type: %s", region_attr.get_type_str());
458
459 // Get the klass once. We'll need it again later, and this avoids
460 // re-decoding when it's compressed.
461 Klass* klass = old->klass();
462 const size_t word_sz = old->size_given_klass(klass);
463
464 // JNI only allows pinning of typeArrays, so we only need to keep those in place.
465 if (region_attr.is_pinned() && klass->is_typeArray_klass()) {
466 return handle_evacuation_failure_par(old, old_mark, word_sz, true /* cause_pinned */);
467 }
468
469 uint age = 0;
470 G1HeapRegionAttr dest_attr = next_region_attr(region_attr, old_mark, age);
471 HeapRegion* const from_region = _g1h->heap_region_containing(old);
472 uint node_index = from_region->node_index();
473
474 HeapWord* obj_ptr = _plab_allocator->plab_allocate(dest_attr, word_sz, node_index);
475
476 // PLAB allocations should succeed most of the time, so we'll
477 // normally check against null once and that's it.
478 if (obj_ptr == nullptr) {
479 obj_ptr = allocate_copy_slow(&dest_attr, old, word_sz, age, node_index);
480 if (obj_ptr == nullptr) {
481 // This will either forward-to-self, or detect that someone else has
482 // installed a forwarding pointer.
483 return handle_evacuation_failure_par(old, old_mark, word_sz, false /* cause_pinned */);
484 }
485 }
486
487 assert(obj_ptr != nullptr, "when we get here, allocation should have succeeded");
488 assert(_g1h->is_in_reserved(obj_ptr), "Allocated memory should be in the heap");
489
490 // Should this evacuation fail?
491 if (inject_allocation_failure(from_region->hrm_index())) {
492 // Doing this after all the allocation attempts also tests the
493 // undo_allocation() method too.
494 undo_allocation(dest_attr, obj_ptr, word_sz, node_index);
495 return handle_evacuation_failure_par(old, old_mark, word_sz, false /* cause_pinned */);
496 }
497
498 // We're going to allocate linearly, so might as well prefetch ahead.
499 Prefetch::write(obj_ptr, PrefetchCopyIntervalInBytes);
560 return obj;
561 } else {
562 _plab_allocator->undo_allocation(dest_attr, obj_ptr, word_sz, node_index);
563 return forward_ptr;
564 }
565 }
566
567 // Public not-inline entry point.
568 ATTRIBUTE_FLATTEN
569 oop G1ParScanThreadState::copy_to_survivor_space(G1HeapRegionAttr region_attr,
570 oop old,
571 markWord old_mark) {
572 return do_copy_to_survivor_space(region_attr, old, old_mark);
573 }
574
575 G1ParScanThreadState* G1ParScanThreadStateSet::state_for_worker(uint worker_id) {
576 assert(worker_id < _num_workers, "out of bounds access");
577 if (_states[worker_id] == nullptr) {
578 _states[worker_id] =
579 new G1ParScanThreadState(_g1h, rdcqs(),
580 _preserved_marks_set.get(worker_id),
581 worker_id,
582 _num_workers,
583 _collection_set,
584 _evac_failure_regions);
585 }
586 return _states[worker_id];
587 }
588
589 const size_t* G1ParScanThreadStateSet::surviving_young_words() const {
590 assert(_flushed, "thread local state from the per thread states should have been flushed");
591 return _surviving_young_words_total;
592 }
593
594 void G1ParScanThreadStateSet::flush_stats() {
595 assert(!_flushed, "thread local state from the per thread states should be flushed once");
596 for (uint worker_id = 0; worker_id < _num_workers; ++worker_id) {
597 G1ParScanThreadState* pss = _states[worker_id];
598 assert(pss != nullptr, "must be initialized");
599
600 G1GCPhaseTimes* p = _g1h->phase_times();
619 dcq.merge_bufferlists(rdcqs());
620 rdcqs()->verify_empty();
621
622 _flushed = true;
623 }
624
625 void G1ParScanThreadStateSet::record_unused_optional_region(HeapRegion* hr) {
626 for (uint worker_index = 0; worker_index < _num_workers; ++worker_index) {
627 G1ParScanThreadState* pss = _states[worker_index];
628 assert(pss != nullptr, "must be initialized");
629
630 size_t used_memory = pss->oops_into_optional_region(hr)->used_memory();
631 _g1h->phase_times()->record_or_add_thread_work_item(G1GCPhaseTimes::OptScanHR, worker_index, used_memory, G1GCPhaseTimes::ScanHRUsedMemory);
632 }
633 }
634
635 NOINLINE
636 oop G1ParScanThreadState::handle_evacuation_failure_par(oop old, markWord m, size_t word_sz, bool cause_pinned) {
637 assert(_g1h->is_in_cset(old), "Object " PTR_FORMAT " should be in the CSet", p2i(old));
638
639 oop forward_ptr = old->forward_to_atomic(old, m, memory_order_relaxed);
640 if (forward_ptr == nullptr) {
641 // Forward-to-self succeeded. We are the "owner" of the object.
642 HeapRegion* r = _g1h->heap_region_containing(old);
643
644 if (_evac_failure_regions->record(_worker_id, r->hrm_index(), cause_pinned)) {
645 _g1h->hr_printer()->evac_failure(r);
646 }
647
648 // Mark the failing object in the marking bitmap and later use the bitmap to handle
649 // evacuation failure recovery.
650 _g1h->mark_evac_failure_object(_worker_id, old, word_sz);
651
652 _preserved_marks->push_if_necessary(old, m);
653
654 ContinuationGCSupport::transform_stack_chunk(old);
655
656 _evacuation_failed_info.register_copy_failure(word_sz);
657
658 // For iterating objects that failed evacuation currently we can reuse the
659 // existing closure to scan evacuated objects; since we are iterating from a
660 // collection set region (i.e. never a Survivor region), we always need to
661 // gather cards for this case.
662 G1SkipCardEnqueueSetter x(&_scanner, false /* skip_card_enqueue */);
663 old->oop_iterate_backwards(&_scanner);
664
665 return old;
666 } else {
667 // Forward-to-self failed. Either someone else managed to allocate
668 // space for this object (old != forward_ptr) or they beat us in
669 // self-forwarding it (old == forward_ptr).
670 assert(old == forward_ptr || !_g1h->is_in_cset(forward_ptr),
671 "Object " PTR_FORMAT " forwarded to: " PTR_FORMAT " "
672 "should not be in the CSet",
673 p2i(old), p2i(forward_ptr));
691 void G1ParScanThreadState::flush_numa_stats() {
692 if (_obj_alloc_stat != nullptr) {
693 uint node_index = _numa->index_of_current_thread();
694 _numa->copy_statistics(G1NUMAStats::LocalObjProcessAtCopyToSurv, node_index, _obj_alloc_stat);
695 }
696 }
697
698 void G1ParScanThreadState::update_numa_stats(uint node_index) {
699 if (_obj_alloc_stat != nullptr) {
700 _obj_alloc_stat[node_index]++;
701 }
702 }
703
704 G1ParScanThreadStateSet::G1ParScanThreadStateSet(G1CollectedHeap* g1h,
705 uint num_workers,
706 G1CollectionSet* collection_set,
707 G1EvacFailureRegions* evac_failure_regions) :
708 _g1h(g1h),
709 _collection_set(collection_set),
710 _rdcqs(G1BarrierSet::dirty_card_queue_set().allocator()),
711 _preserved_marks_set(true /* in_c_heap */),
712 _states(NEW_C_HEAP_ARRAY(G1ParScanThreadState*, num_workers, mtGC)),
713 _rdc_buffers(NEW_C_HEAP_ARRAY(BufferNodeList, num_workers, mtGC)),
714 _surviving_young_words_total(NEW_C_HEAP_ARRAY(size_t, collection_set->young_region_length() + 1, mtGC)),
715 _num_workers(num_workers),
716 _flushed(false),
717 _evac_failure_regions(evac_failure_regions) {
718 _preserved_marks_set.init(num_workers);
719 for (uint i = 0; i < num_workers; ++i) {
720 _states[i] = nullptr;
721 _rdc_buffers[i] = BufferNodeList();
722 }
723 memset(_surviving_young_words_total, 0, (collection_set->young_region_length() + 1) * sizeof(size_t));
724 }
725
726 G1ParScanThreadStateSet::~G1ParScanThreadStateSet() {
727 assert(_flushed, "thread local state from the per thread states should have been flushed");
728 FREE_C_HEAP_ARRAY(G1ParScanThreadState*, _states);
729 FREE_C_HEAP_ARRAY(size_t, _surviving_young_words_total);
730 FREE_C_HEAP_ARRAY(BufferNodeList, _rdc_buffers);
731 _preserved_marks_set.reclaim();
732 }
|
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25 #include "precompiled.hpp"
26 #include "gc/g1/g1Allocator.inline.hpp"
27 #include "gc/g1/g1CollectedHeap.inline.hpp"
28 #include "gc/g1/g1CollectionSet.hpp"
29 #include "gc/g1/g1EvacFailureRegions.inline.hpp"
30 #include "gc/g1/g1OopClosures.inline.hpp"
31 #include "gc/g1/g1ParScanThreadState.inline.hpp"
32 #include "gc/g1/g1RootClosures.hpp"
33 #include "gc/g1/g1StringDedup.hpp"
34 #include "gc/g1/g1Trace.hpp"
35 #include "gc/g1/g1YoungGCAllocationFailureInjector.inline.hpp"
36 #include "gc/shared/continuationGCSupport.inline.hpp"
37 #include "gc/shared/partialArrayTaskStepper.inline.hpp"
38 #include "gc/shared/stringdedup/stringDedup.hpp"
39 #include "gc/shared/taskqueue.inline.hpp"
40 #include "memory/allocation.inline.hpp"
41 #include "oops/access.inline.hpp"
42 #include "oops/oop.inline.hpp"
43 #include "runtime/atomic.hpp"
44 #include "runtime/prefetch.inline.hpp"
45 #include "utilities/globalDefinitions.hpp"
46 #include "utilities/macros.hpp"
47
48 // In fastdebug builds the code size can get out of hand, potentially
49 // tripping over compiler limits (which may be bugs, but nevertheless
50 // need to be taken into consideration). A side benefit of limiting
51 // inlining is that we get more call frames that might aid debugging.
52 // And the fastdebug compile time for this file is much reduced.
53 // Explicit NOINLINE to block ATTRIBUTE_FLATTENing.
54 #define MAYBE_INLINE_EVACUATION NOT_DEBUG(inline) DEBUG_ONLY(NOINLINE)
55
56 G1ParScanThreadState::G1ParScanThreadState(G1CollectedHeap* g1h,
57 G1RedirtyCardsQueueSet* rdcqs,
58 uint worker_id,
59 uint num_workers,
60 G1CollectionSet* collection_set,
61 G1EvacFailureRegions* evac_failure_regions)
62 : _g1h(g1h),
63 _task_queue(g1h->task_queue(worker_id)),
64 _rdc_local_qset(rdcqs),
65 _ct(g1h->card_table()),
66 _closures(nullptr),
67 _plab_allocator(nullptr),
68 _age_table(false),
69 _tenuring_threshold(g1h->policy()->tenuring_threshold()),
70 _scanner(g1h, this),
71 _worker_id(worker_id),
72 _last_enqueued_card(SIZE_MAX),
73 _stack_trim_upper_threshold(GCDrainStackTargetSize * 2 + 1),
74 _stack_trim_lower_threshold(GCDrainStackTargetSize),
75 _trim_ticks(),
76 _surviving_young_words_base(nullptr),
77 _surviving_young_words(nullptr),
78 _surviving_words_length(collection_set->young_region_length() + 1),
79 _old_gen_is_full(false),
80 _partial_objarray_chunk_size(ParGCArrayScanChunk),
81 _partial_array_stepper(num_workers),
82 _string_dedup_requests(),
83 _max_num_optional_regions(collection_set->optional_region_length()),
84 _numa(g1h->numa()),
85 _obj_alloc_stat(nullptr),
86 ALLOCATION_FAILURE_INJECTOR_ONLY(_allocation_failure_inject_counter(0) COMMA)
87 _evacuation_failed_info(),
88 _evac_failure_regions(evac_failure_regions),
89 _evac_failure_enqueued_cards(0)
90 {
91 // We allocate number of young gen regions in the collection set plus one
92 // entries, since entry 0 keeps track of surviving bytes for non-young regions.
93 // We also add a few elements at the beginning and at the end in
94 // an attempt to eliminate cache contention
95 const size_t padding_elem_num = (DEFAULT_PADDING_SIZE / sizeof(size_t));
96 size_t array_length = padding_elem_num + _surviving_words_length + padding_elem_num;
97
98 _surviving_young_words_base = NEW_C_HEAP_ARRAY(size_t, array_length, mtGC);
99 _surviving_young_words = _surviving_young_words_base + padding_elem_num;
100 memset(_surviving_young_words, 0, _surviving_words_length * sizeof(size_t));
101
102 _plab_allocator = new G1PLABAllocator(_g1h->allocator());
103
104 _closures = G1EvacuationRootClosures::create_root_closures(_g1h,
105 this,
106 collection_set->only_contains_young_regions());
191 oop obj = RawAccess<IS_NOT_NULL>::oop_load(p);
192
193 // Although we never intentionally push references outside of the collection
194 // set, due to (benign) races in the claim mechanism during RSet scanning more
195 // than one thread might claim the same card. So the same card may be
196 // processed multiple times, and so we might get references into old gen here.
197 // So we need to redo this check.
198 const G1HeapRegionAttr region_attr = _g1h->region_attr(obj);
199 // References pushed onto the work stack should never point to a humongous region
200 // as they are not added to the collection set due to above precondition.
201 assert(!region_attr.is_humongous_candidate(),
202 "Obj " PTR_FORMAT " should not refer to humongous region %u from " PTR_FORMAT,
203 p2i(obj), _g1h->addr_to_region(obj), p2i(p));
204
205 if (!region_attr.is_in_cset()) {
206 // In this case somebody else already did all the work.
207 return;
208 }
209
210 markWord m = obj->mark();
211 if (m.is_forwarded()) {
212 obj = obj->forwardee(m);
213 } else {
214 obj = do_copy_to_survivor_space(region_attr, obj, m);
215 }
216 RawAccess<IS_NOT_NULL>::oop_store(p, obj);
217
218 write_ref_field_post(p, obj);
219 }
220
221 MAYBE_INLINE_EVACUATION
222 void G1ParScanThreadState::do_partial_array(PartialArrayScanTask task) {
223 oop from_obj = task.to_source_array();
224
225 assert(_g1h->is_in_reserved(from_obj), "must be in heap.");
226 assert(from_obj->forward_safe_klass()->is_objArray_klass(), "must be obj array");
227 assert(from_obj->is_forwarded(), "must be forwarded");
228
229 oop to_obj = from_obj->forwardee();
230 assert(from_obj != to_obj, "should not be chunking self-forwarded objects");
231 assert(to_obj->is_objArray(), "must be obj array");
232 objArrayOop to_array = objArrayOop(to_obj);
233
234 PartialArrayTaskStepper::Step step
235 = _partial_array_stepper.next(objArrayOop(from_obj),
236 to_array,
237 _partial_objarray_chunk_size);
238 for (uint i = 0; i < step._ncreate; ++i) {
239 push_on_queue(ScannerTask(PartialArrayScanTask(from_obj)));
240 }
241
242 G1HeapRegionAttr dest_attr = _g1h->region_attr(to_array);
243 G1SkipCardEnqueueSetter x(&_scanner, dest_attr.is_new_survivor());
244 // Process claimed task. The length of to_array is not correct, but
245 // fortunately the iteration ignores the length field and just relies
246 // on start/end.
247 to_array->oop_iterate_range(&_scanner,
248 step._index,
249 step._index + _partial_objarray_chunk_size);
250 }
251
252 MAYBE_INLINE_EVACUATION
253 void G1ParScanThreadState::start_partial_objarray(G1HeapRegionAttr dest_attr,
254 oop from_obj,
255 oop to_obj) {
256 assert(from_obj->forward_safe_klass()->is_objArray_klass(), "precondition");
257 assert(from_obj->is_forwarded(), "precondition");
258 assert(from_obj->forwardee() == to_obj, "precondition");
259 assert(from_obj != to_obj, "should not be scanning self-forwarded objects");
260 assert(to_obj->is_objArray(), "precondition");
261
262 objArrayOop to_array = objArrayOop(to_obj);
263
264 PartialArrayTaskStepper::Step step
265 = _partial_array_stepper.start(objArrayOop(from_obj),
266 to_array,
267 _partial_objarray_chunk_size);
268
269 // Push any needed partial scan tasks. Pushed before processing the
270 // initial chunk to allow other workers to steal while we're processing.
271 for (uint i = 0; i < step._ncreate; ++i) {
272 push_on_queue(ScannerTask(PartialArrayScanTask(from_obj)));
273 }
274
275 // Skip the card enqueue iff the object (to_array) is in survivor region.
276 // However, HeapRegion::is_survivor() is too expensive here.
363 // no other space to try.
364 return nullptr;
365 }
366 }
367
368 G1HeapRegionAttr G1ParScanThreadState::next_region_attr(G1HeapRegionAttr const region_attr, markWord const m, uint& age) {
369 assert(region_attr.is_young() || region_attr.is_old(), "must be either Young or Old");
370
371 if (region_attr.is_young()) {
372 age = !m.has_displaced_mark_helper() ? m.age()
373 : m.displaced_mark_helper().age();
374 if (age < _tenuring_threshold) {
375 return region_attr;
376 }
377 }
378 // young-to-old (promotion) or old-to-old; destination is old in both cases.
379 return G1HeapRegionAttr::Old;
380 }
381
382 void G1ParScanThreadState::report_promotion_event(G1HeapRegionAttr const dest_attr,
383 Klass* klass, size_t word_sz, uint age,
384 HeapWord * const obj_ptr, uint node_index) const {
385 PLAB* alloc_buf = _plab_allocator->alloc_buffer(dest_attr, node_index);
386 if (alloc_buf->contains(obj_ptr)) {
387 _g1h->gc_tracer_stw()->report_promotion_in_new_plab_event(klass, word_sz * HeapWordSize, age,
388 dest_attr.type() == G1HeapRegionAttr::Old,
389 alloc_buf->word_sz() * HeapWordSize);
390 } else {
391 _g1h->gc_tracer_stw()->report_promotion_outside_plab_event(klass, word_sz * HeapWordSize, age,
392 dest_attr.type() == G1HeapRegionAttr::Old);
393 }
394 }
395
396 NOINLINE
397 HeapWord* G1ParScanThreadState::allocate_copy_slow(G1HeapRegionAttr* dest_attr,
398 Klass* klass,
399 size_t word_sz,
400 uint age,
401 uint node_index) {
402 HeapWord* obj_ptr = nullptr;
403 // Try slow-path allocation unless we're allocating old and old is already full.
404 if (!(dest_attr->is_old() && _old_gen_is_full)) {
405 bool plab_refill_failed = false;
406 obj_ptr = _plab_allocator->allocate_direct_or_new_plab(*dest_attr,
407 word_sz,
408 &plab_refill_failed,
409 node_index);
410 if (obj_ptr == nullptr) {
411 obj_ptr = allocate_in_next_plab(dest_attr,
412 word_sz,
413 plab_refill_failed,
414 node_index);
415 }
416 }
417 if (obj_ptr != nullptr) {
418 update_numa_stats(node_index);
419 if (_g1h->gc_tracer_stw()->should_report_promotion_events()) {
420 // The events are checked individually as part of the actual commit
421 report_promotion_event(*dest_attr, klass, word_sz, age, obj_ptr, node_index);
422 }
423 }
424 return obj_ptr;
425 }
426
427 #if ALLOCATION_FAILURE_INJECTOR
428 bool G1ParScanThreadState::inject_allocation_failure(uint region_idx) {
429 return _g1h->allocation_failure_injector()->allocation_should_fail(_allocation_failure_inject_counter, region_idx);
430 }
431 #endif
432
433 NOINLINE
434 void G1ParScanThreadState::undo_allocation(G1HeapRegionAttr dest_attr,
435 HeapWord* obj_ptr,
436 size_t word_sz,
437 uint node_index) {
438 _plab_allocator->undo_allocation(dest_attr, obj_ptr, word_sz, node_index);
439 }
440
441 void G1ParScanThreadState::update_bot_after_copying(oop obj, size_t word_sz) {
442 HeapWord* obj_start = cast_from_oop<HeapWord*>(obj);
443 HeapRegion* region = _g1h->heap_region_containing(obj_start);
444 region->update_bot_for_obj(obj_start, word_sz);
445 }
446
447 // Private inline function, for direct internal use and providing the
448 // implementation of the public not-inline function.
449 MAYBE_INLINE_EVACUATION
450 oop G1ParScanThreadState::do_copy_to_survivor_space(G1HeapRegionAttr const region_attr,
451 oop const old,
452 markWord const old_mark) {
453 assert(region_attr.is_in_cset(),
454 "Unexpected region attr type: %s", region_attr.get_type_str());
455
456 // Get the klass once. We'll need it again later, and this avoids
457 // re-decoding when it's compressed.
458 // NOTE: With compact headers, it is not safe to load the Klass* from o, because
459 // that would access the mark-word, and the mark-word might change at any time by
460 // concurrent promotion. The promoted mark-word would point to the forwardee, which
461 // may not yet have completed copying. Therefore we must load the Klass* from
462 // the mark-word that we have already loaded. This is safe, because we have checked
463 // that this is not yet forwarded in the caller.
464 Klass* klass = old->forward_safe_klass(old_mark);
465 const size_t word_sz = old->size_given_klass(klass);
466
467 // JNI only allows pinning of typeArrays, so we only need to keep those in place.
468 if (region_attr.is_pinned() && klass->is_typeArray_klass()) {
469 return handle_evacuation_failure_par(old, old_mark, word_sz, true /* cause_pinned */);
470 }
471
472 uint age = 0;
473 G1HeapRegionAttr dest_attr = next_region_attr(region_attr, old_mark, age);
474 HeapRegion* const from_region = _g1h->heap_region_containing(old);
475 uint node_index = from_region->node_index();
476
477 HeapWord* obj_ptr = _plab_allocator->plab_allocate(dest_attr, word_sz, node_index);
478
479 // PLAB allocations should succeed most of the time, so we'll
480 // normally check against null once and that's it.
481 if (obj_ptr == nullptr) {
482 obj_ptr = allocate_copy_slow(&dest_attr, klass, word_sz, age, node_index);
483 if (obj_ptr == nullptr) {
484 // This will either forward-to-self, or detect that someone else has
485 // installed a forwarding pointer.
486 return handle_evacuation_failure_par(old, old_mark, word_sz, false /* cause_pinned */);
487 }
488 }
489
490 assert(obj_ptr != nullptr, "when we get here, allocation should have succeeded");
491 assert(_g1h->is_in_reserved(obj_ptr), "Allocated memory should be in the heap");
492
493 // Should this evacuation fail?
494 if (inject_allocation_failure(from_region->hrm_index())) {
495 // Doing this after all the allocation attempts also tests the
496 // undo_allocation() method too.
497 undo_allocation(dest_attr, obj_ptr, word_sz, node_index);
498 return handle_evacuation_failure_par(old, old_mark, word_sz, false /* cause_pinned */);
499 }
500
501 // We're going to allocate linearly, so might as well prefetch ahead.
502 Prefetch::write(obj_ptr, PrefetchCopyIntervalInBytes);
563 return obj;
564 } else {
565 _plab_allocator->undo_allocation(dest_attr, obj_ptr, word_sz, node_index);
566 return forward_ptr;
567 }
568 }
569
570 // Public not-inline entry point.
571 ATTRIBUTE_FLATTEN
572 oop G1ParScanThreadState::copy_to_survivor_space(G1HeapRegionAttr region_attr,
573 oop old,
574 markWord old_mark) {
575 return do_copy_to_survivor_space(region_attr, old, old_mark);
576 }
577
578 G1ParScanThreadState* G1ParScanThreadStateSet::state_for_worker(uint worker_id) {
579 assert(worker_id < _num_workers, "out of bounds access");
580 if (_states[worker_id] == nullptr) {
581 _states[worker_id] =
582 new G1ParScanThreadState(_g1h, rdcqs(),
583 worker_id,
584 _num_workers,
585 _collection_set,
586 _evac_failure_regions);
587 }
588 return _states[worker_id];
589 }
590
591 const size_t* G1ParScanThreadStateSet::surviving_young_words() const {
592 assert(_flushed, "thread local state from the per thread states should have been flushed");
593 return _surviving_young_words_total;
594 }
595
596 void G1ParScanThreadStateSet::flush_stats() {
597 assert(!_flushed, "thread local state from the per thread states should be flushed once");
598 for (uint worker_id = 0; worker_id < _num_workers; ++worker_id) {
599 G1ParScanThreadState* pss = _states[worker_id];
600 assert(pss != nullptr, "must be initialized");
601
602 G1GCPhaseTimes* p = _g1h->phase_times();
621 dcq.merge_bufferlists(rdcqs());
622 rdcqs()->verify_empty();
623
624 _flushed = true;
625 }
626
627 void G1ParScanThreadStateSet::record_unused_optional_region(HeapRegion* hr) {
628 for (uint worker_index = 0; worker_index < _num_workers; ++worker_index) {
629 G1ParScanThreadState* pss = _states[worker_index];
630 assert(pss != nullptr, "must be initialized");
631
632 size_t used_memory = pss->oops_into_optional_region(hr)->used_memory();
633 _g1h->phase_times()->record_or_add_thread_work_item(G1GCPhaseTimes::OptScanHR, worker_index, used_memory, G1GCPhaseTimes::ScanHRUsedMemory);
634 }
635 }
636
637 NOINLINE
638 oop G1ParScanThreadState::handle_evacuation_failure_par(oop old, markWord m, size_t word_sz, bool cause_pinned) {
639 assert(_g1h->is_in_cset(old), "Object " PTR_FORMAT " should be in the CSet", p2i(old));
640
641 oop forward_ptr = old->forward_to_self_atomic(m, memory_order_relaxed);
642 if (forward_ptr == nullptr) {
643 // Forward-to-self succeeded. We are the "owner" of the object.
644 HeapRegion* r = _g1h->heap_region_containing(old);
645
646 if (_evac_failure_regions->record(_worker_id, r->hrm_index(), cause_pinned)) {
647 _g1h->hr_printer()->evac_failure(r);
648 }
649
650 // Mark the failing object in the marking bitmap and later use the bitmap to handle
651 // evacuation failure recovery.
652 _g1h->mark_evac_failure_object(_worker_id, old, word_sz);
653
654 ContinuationGCSupport::transform_stack_chunk(old);
655
656 _evacuation_failed_info.register_copy_failure(word_sz);
657
658 // For iterating objects that failed evacuation currently we can reuse the
659 // existing closure to scan evacuated objects; since we are iterating from a
660 // collection set region (i.e. never a Survivor region), we always need to
661 // gather cards for this case.
662 G1SkipCardEnqueueSetter x(&_scanner, false /* skip_card_enqueue */);
663 old->oop_iterate_backwards(&_scanner);
664
665 return old;
666 } else {
667 // Forward-to-self failed. Either someone else managed to allocate
668 // space for this object (old != forward_ptr) or they beat us in
669 // self-forwarding it (old == forward_ptr).
670 assert(old == forward_ptr || !_g1h->is_in_cset(forward_ptr),
671 "Object " PTR_FORMAT " forwarded to: " PTR_FORMAT " "
672 "should not be in the CSet",
673 p2i(old), p2i(forward_ptr));
691 void G1ParScanThreadState::flush_numa_stats() {
692 if (_obj_alloc_stat != nullptr) {
693 uint node_index = _numa->index_of_current_thread();
694 _numa->copy_statistics(G1NUMAStats::LocalObjProcessAtCopyToSurv, node_index, _obj_alloc_stat);
695 }
696 }
697
698 void G1ParScanThreadState::update_numa_stats(uint node_index) {
699 if (_obj_alloc_stat != nullptr) {
700 _obj_alloc_stat[node_index]++;
701 }
702 }
703
704 G1ParScanThreadStateSet::G1ParScanThreadStateSet(G1CollectedHeap* g1h,
705 uint num_workers,
706 G1CollectionSet* collection_set,
707 G1EvacFailureRegions* evac_failure_regions) :
708 _g1h(g1h),
709 _collection_set(collection_set),
710 _rdcqs(G1BarrierSet::dirty_card_queue_set().allocator()),
711 _states(NEW_C_HEAP_ARRAY(G1ParScanThreadState*, num_workers, mtGC)),
712 _rdc_buffers(NEW_C_HEAP_ARRAY(BufferNodeList, num_workers, mtGC)),
713 _surviving_young_words_total(NEW_C_HEAP_ARRAY(size_t, collection_set->young_region_length() + 1, mtGC)),
714 _num_workers(num_workers),
715 _flushed(false),
716 _evac_failure_regions(evac_failure_regions) {
717 for (uint i = 0; i < num_workers; ++i) {
718 _states[i] = nullptr;
719 _rdc_buffers[i] = BufferNodeList();
720 }
721 memset(_surviving_young_words_total, 0, (collection_set->young_region_length() + 1) * sizeof(size_t));
722 }
723
724 G1ParScanThreadStateSet::~G1ParScanThreadStateSet() {
725 assert(_flushed, "thread local state from the per thread states should have been flushed");
726 FREE_C_HEAP_ARRAY(G1ParScanThreadState*, _states);
727 FREE_C_HEAP_ARRAY(size_t, _surviving_young_words_total);
728 FREE_C_HEAP_ARRAY(BufferNodeList, _rdc_buffers);
729 }
|