1 /* 2 * Copyright (c) 2015, 2020, Red Hat, Inc. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #ifndef SHARE_GC_SHENANDOAH_SHENANDOAHHEAP_INLINE_HPP 26 #define SHARE_GC_SHENANDOAH_SHENANDOAHHEAP_INLINE_HPP 27 28 #include "gc/shenandoah/shenandoahHeap.hpp" 29 30 #include "classfile/javaClasses.inline.hpp" 31 #include "gc/shared/markBitMap.inline.hpp" 32 #include "gc/shared/threadLocalAllocBuffer.inline.hpp" 33 #include "gc/shared/continuationGCSupport.inline.hpp" 34 #include "gc/shared/suspendibleThreadSet.hpp" 35 #include "gc/shared/tlab_globals.hpp" 36 #include "gc/shenandoah/shenandoahAsserts.hpp" 37 #include "gc/shenandoah/shenandoahBarrierSet.inline.hpp" 38 #include "gc/shenandoah/shenandoahCollectionSet.inline.hpp" 39 #include "gc/shenandoah/shenandoahForwarding.inline.hpp" 40 #include "gc/shenandoah/shenandoahWorkGroup.hpp" 41 #include "gc/shenandoah/shenandoahHeapRegionSet.inline.hpp" 42 #include "gc/shenandoah/shenandoahHeapRegion.inline.hpp" 43 #include "gc/shenandoah/shenandoahControlThread.hpp" 44 #include "gc/shenandoah/shenandoahMarkingContext.inline.hpp" 45 #include "gc/shenandoah/shenandoahThreadLocalData.hpp" 46 #include "oops/compressedOops.inline.hpp" 47 #include "oops/oop.inline.hpp" 48 #include "runtime/atomic.hpp" 49 #include "runtime/javaThread.hpp" 50 #include "runtime/prefetch.inline.hpp" 51 #include "utilities/copy.hpp" 52 #include "utilities/globalDefinitions.hpp" 53 54 inline ShenandoahHeap* ShenandoahHeap::heap() { 55 return named_heap<ShenandoahHeap>(CollectedHeap::Shenandoah); 56 } 57 58 inline ShenandoahHeapRegion* ShenandoahRegionIterator::next() { 59 size_t new_index = Atomic::add(&_index, (size_t) 1, memory_order_relaxed); 60 // get_region() provides the bounds-check and returns null on OOB. 61 return _heap->get_region(new_index - 1); 62 } 63 64 inline bool ShenandoahHeap::has_forwarded_objects() const { 65 return _gc_state.is_set(HAS_FORWARDED); 66 } 67 68 inline WorkerThreads* ShenandoahHeap::workers() const { 69 return _workers; 70 } 71 72 inline WorkerThreads* ShenandoahHeap::safepoint_workers() { 73 return _safepoint_workers; 74 } 75 76 inline void ShenandoahHeap::notify_gc_progress() { 77 Atomic::store(&_gc_no_progress_count, (size_t) 0); 78 79 } 80 inline void ShenandoahHeap::notify_gc_no_progress() { 81 Atomic::inc(&_gc_no_progress_count); 82 } 83 84 inline size_t ShenandoahHeap::get_gc_no_progress_count() const { 85 return Atomic::load(&_gc_no_progress_count); 86 } 87 88 inline size_t ShenandoahHeap::heap_region_index_containing(const void* addr) const { 89 uintptr_t region_start = ((uintptr_t) addr); 90 uintptr_t index = (region_start - (uintptr_t) base()) >> ShenandoahHeapRegion::region_size_bytes_shift(); 91 assert(index < num_regions(), "Region index is in bounds: " PTR_FORMAT, p2i(addr)); 92 return index; 93 } 94 95 inline ShenandoahHeapRegion* ShenandoahHeap::heap_region_containing(const void* addr) const { 96 size_t index = heap_region_index_containing(addr); 97 ShenandoahHeapRegion* const result = get_region(index); 98 assert(addr >= result->bottom() && addr < result->end(), "Heap region contains the address: " PTR_FORMAT, p2i(addr)); 99 return result; 100 } 101 102 inline void ShenandoahHeap::enter_evacuation(Thread* t) { 103 _oom_evac_handler.enter_evacuation(t); 104 } 105 106 inline void ShenandoahHeap::leave_evacuation(Thread* t) { 107 _oom_evac_handler.leave_evacuation(t); 108 } 109 110 template <class T> 111 inline void ShenandoahHeap::non_conc_update_with_forwarded(T* p) { 112 T o = RawAccess<>::oop_load(p); 113 if (!CompressedOops::is_null(o)) { 114 oop obj = CompressedOops::decode_not_null(o); 115 if (in_collection_set(obj)) { 116 // Corner case: when evacuation fails, there are objects in collection 117 // set that are not really forwarded. We can still go and try and update them 118 // (uselessly) to simplify the common path. 119 shenandoah_assert_forwarded_except(p, obj, cancelled_gc()); 120 oop fwd = ShenandoahBarrierSet::resolve_forwarded_not_null(obj); 121 shenandoah_assert_not_in_cset_except(p, fwd, cancelled_gc()); 122 123 // Unconditionally store the update: no concurrent updates expected. 124 RawAccess<IS_NOT_NULL>::oop_store(p, fwd); 125 } 126 } 127 } 128 129 template <class T> 130 inline void ShenandoahHeap::conc_update_with_forwarded(T* p) { 131 T o = RawAccess<>::oop_load(p); 132 if (!CompressedOops::is_null(o)) { 133 oop obj = CompressedOops::decode_not_null(o); 134 if (in_collection_set(obj)) { 135 // Corner case: when evacuation fails, there are objects in collection 136 // set that are not really forwarded. We can still go and try CAS-update them 137 // (uselessly) to simplify the common path. 138 shenandoah_assert_forwarded_except(p, obj, cancelled_gc()); 139 oop fwd = ShenandoahBarrierSet::resolve_forwarded_not_null(obj); 140 shenandoah_assert_not_in_cset_except(p, fwd, cancelled_gc()); 141 142 // Sanity check: we should not be updating the cset regions themselves, 143 // unless we are recovering from the evacuation failure. 144 shenandoah_assert_not_in_cset_loc_except(p, !is_in(p) || cancelled_gc()); 145 146 // Either we succeed in updating the reference, or something else gets in our way. 147 // We don't care if that is another concurrent GC update, or another mutator update. 148 atomic_update_oop(fwd, p, obj); 149 } 150 } 151 } 152 153 // Atomic updates of heap location. This is only expected to work with updating the same 154 // logical object with its forwardee. The reason why we need stronger-than-relaxed memory 155 // ordering has to do with coordination with GC barriers and mutator accesses. 156 // 157 // In essence, stronger CAS access is required to maintain the transitive chains that mutator 158 // accesses build by themselves. To illustrate this point, consider the following example. 159 // 160 // Suppose "o" is the object that has a field "x" and the reference to "o" is stored 161 // to field at "addr", which happens to be Java volatile field. Normally, the accesses to volatile 162 // field at "addr" would be matched with release/acquire barriers. This changes when GC moves 163 // the object under mutator feet. 164 // 165 // Thread 1 (Java) 166 // // --- previous access starts here 167 // ... 168 // T1.1: store(&o.x, 1, mo_relaxed) 169 // T1.2: store(&addr, o, mo_release) // volatile store 170 // 171 // // --- new access starts here 172 // // LRB: copy and install the new copy to fwdptr 173 // T1.3: var copy = copy(o) 174 // T1.4: cas(&fwd, t, copy, mo_release) // pointer-mediated publication 175 // <access continues> 176 // 177 // Thread 2 (GC updater) 178 // T2.1: var f = load(&fwd, mo_{consume|acquire}) // pointer-mediated acquisition 179 // T2.2: cas(&addr, o, f, mo_release) // this method 180 // 181 // Thread 3 (Java) 182 // T3.1: var o = load(&addr, mo_acquire) // volatile read 183 // T3.2: if (o != null) 184 // T3.3: var r = load(&o.x, mo_relaxed) 185 // 186 // r is guaranteed to contain "1". 187 // 188 // Without GC involvement, there is synchronizes-with edge from T1.2 to T3.1, 189 // which guarantees this. With GC involvement, when LRB copies the object and 190 // another thread updates the reference to it, we need to have the transitive edge 191 // from T1.4 to T2.1 (that one is guaranteed by forwarding accesses), plus the edge 192 // from T2.2 to T3.1 (which is brought by this CAS). 193 // 194 // Note that we do not need to "acquire" in these methods, because we do not read the 195 // failure witnesses contents on any path, and "release" is enough. 196 // 197 198 inline void ShenandoahHeap::atomic_update_oop(oop update, oop* addr, oop compare) { 199 assert(is_aligned(addr, HeapWordSize), "Address should be aligned: " PTR_FORMAT, p2i(addr)); 200 Atomic::cmpxchg(addr, compare, update, memory_order_release); 201 } 202 203 inline void ShenandoahHeap::atomic_update_oop(oop update, narrowOop* addr, narrowOop compare) { 204 assert(is_aligned(addr, sizeof(narrowOop)), "Address should be aligned: " PTR_FORMAT, p2i(addr)); 205 narrowOop u = CompressedOops::encode(update); 206 Atomic::cmpxchg(addr, compare, u, memory_order_release); 207 } 208 209 inline void ShenandoahHeap::atomic_update_oop(oop update, narrowOop* addr, oop compare) { 210 assert(is_aligned(addr, sizeof(narrowOop)), "Address should be aligned: " PTR_FORMAT, p2i(addr)); 211 narrowOop c = CompressedOops::encode(compare); 212 narrowOop u = CompressedOops::encode(update); 213 Atomic::cmpxchg(addr, c, u, memory_order_release); 214 } 215 216 inline bool ShenandoahHeap::atomic_update_oop_check(oop update, oop* addr, oop compare) { 217 assert(is_aligned(addr, HeapWordSize), "Address should be aligned: " PTR_FORMAT, p2i(addr)); 218 return (oop) Atomic::cmpxchg(addr, compare, update, memory_order_release) == compare; 219 } 220 221 inline bool ShenandoahHeap::atomic_update_oop_check(oop update, narrowOop* addr, narrowOop compare) { 222 assert(is_aligned(addr, sizeof(narrowOop)), "Address should be aligned: " PTR_FORMAT, p2i(addr)); 223 narrowOop u = CompressedOops::encode(update); 224 return (narrowOop) Atomic::cmpxchg(addr, compare, u, memory_order_release) == compare; 225 } 226 227 inline bool ShenandoahHeap::atomic_update_oop_check(oop update, narrowOop* addr, oop compare) { 228 assert(is_aligned(addr, sizeof(narrowOop)), "Address should be aligned: " PTR_FORMAT, p2i(addr)); 229 narrowOop c = CompressedOops::encode(compare); 230 narrowOop u = CompressedOops::encode(update); 231 return CompressedOops::decode(Atomic::cmpxchg(addr, c, u, memory_order_release)) == compare; 232 } 233 234 // The memory ordering discussion above does not apply for methods that store nulls: 235 // then, there is no transitive reads in mutator (as we see nulls), and we can do 236 // relaxed memory ordering there. 237 238 inline void ShenandoahHeap::atomic_clear_oop(oop* addr, oop compare) { 239 assert(is_aligned(addr, HeapWordSize), "Address should be aligned: " PTR_FORMAT, p2i(addr)); 240 Atomic::cmpxchg(addr, compare, oop(), memory_order_relaxed); 241 } 242 243 inline void ShenandoahHeap::atomic_clear_oop(narrowOop* addr, oop compare) { 244 assert(is_aligned(addr, sizeof(narrowOop)), "Address should be aligned: " PTR_FORMAT, p2i(addr)); 245 narrowOop cmp = CompressedOops::encode(compare); 246 Atomic::cmpxchg(addr, cmp, narrowOop(), memory_order_relaxed); 247 } 248 249 inline void ShenandoahHeap::atomic_clear_oop(narrowOop* addr, narrowOop compare) { 250 assert(is_aligned(addr, sizeof(narrowOop)), "Address should be aligned: " PTR_FORMAT, p2i(addr)); 251 Atomic::cmpxchg(addr, compare, narrowOop(), memory_order_relaxed); 252 } 253 254 inline bool ShenandoahHeap::cancelled_gc() const { 255 return _cancelled_gc.get() == CANCELLED; 256 } 257 258 inline bool ShenandoahHeap::check_cancelled_gc_and_yield(bool sts_active) { 259 if (sts_active && !cancelled_gc()) { 260 if (SuspendibleThreadSet::should_yield()) { 261 SuspendibleThreadSet::yield(); 262 } 263 } 264 return cancelled_gc(); 265 } 266 267 inline void ShenandoahHeap::clear_cancelled_gc() { 268 _cancelled_gc.set(CANCELLABLE); 269 _oom_evac_handler.clear(); 270 } 271 272 inline HeapWord* ShenandoahHeap::allocate_from_gclab(Thread* thread, size_t size) { 273 assert(UseTLAB, "TLABs should be enabled"); 274 275 PLAB* gclab = ShenandoahThreadLocalData::gclab(thread); 276 if (gclab == nullptr) { 277 assert(!thread->is_Java_thread() && !thread->is_Worker_thread(), 278 "Performance: thread should have GCLAB: %s", thread->name()); 279 // No GCLABs in this thread, fallback to shared allocation 280 return nullptr; 281 } 282 HeapWord* obj = gclab->allocate(size); 283 if (obj != nullptr) { 284 return obj; 285 } 286 // Otherwise... 287 return allocate_from_gclab_slow(thread, size); 288 } 289 290 inline bool ShenandoahHeap::requires_marking(const void* entry) const { 291 oop obj = cast_to_oop(entry); 292 return !_marking_context->is_marked_strong(obj); 293 } 294 295 inline bool ShenandoahHeap::in_collection_set(oop p) const { 296 assert(collection_set() != nullptr, "Sanity"); 297 return collection_set()->is_in(p); 298 } 299 300 inline bool ShenandoahHeap::in_collection_set_loc(void* p) const { 301 assert(collection_set() != nullptr, "Sanity"); 302 return collection_set()->is_in_loc(p); 303 } 304 305 inline bool ShenandoahHeap::is_stable() const { 306 return _gc_state.is_clear(); 307 } 308 309 inline bool ShenandoahHeap::is_idle() const { 310 return _gc_state.is_unset(MARKING | EVACUATION | UPDATEREFS); 311 } 312 313 inline bool ShenandoahHeap::is_concurrent_mark_in_progress() const { 314 return _gc_state.is_set(MARKING); 315 } 316 317 inline bool ShenandoahHeap::is_evacuation_in_progress() const { 318 return _gc_state.is_set(EVACUATION); 319 } 320 321 inline bool ShenandoahHeap::is_degenerated_gc_in_progress() const { 322 return _degenerated_gc_in_progress.is_set(); 323 } 324 325 inline bool ShenandoahHeap::is_full_gc_in_progress() const { 326 return _full_gc_in_progress.is_set(); 327 } 328 329 inline bool ShenandoahHeap::is_full_gc_move_in_progress() const { 330 return _full_gc_move_in_progress.is_set(); 331 } 332 333 inline bool ShenandoahHeap::is_update_refs_in_progress() const { 334 return _gc_state.is_set(UPDATEREFS); 335 } 336 337 inline bool ShenandoahHeap::is_stw_gc_in_progress() const { 338 return is_full_gc_in_progress() || is_degenerated_gc_in_progress(); 339 } 340 341 inline bool ShenandoahHeap::is_concurrent_strong_root_in_progress() const { 342 return _concurrent_strong_root_in_progress.is_set(); 343 } 344 345 inline bool ShenandoahHeap::is_concurrent_weak_root_in_progress() const { 346 return _gc_state.is_set(WEAK_ROOTS); 347 } 348 349 template<class T> 350 inline void ShenandoahHeap::marked_object_iterate(ShenandoahHeapRegion* region, T* cl) { 351 marked_object_iterate(region, cl, region->top()); 352 } 353 354 template<class T> 355 inline void ShenandoahHeap::marked_object_iterate(ShenandoahHeapRegion* region, T* cl, HeapWord* limit) { 356 assert(! region->is_humongous_continuation(), "no humongous continuation regions here"); 357 358 ShenandoahMarkingContext* const ctx = complete_marking_context(); 359 assert(ctx->is_complete(), "sanity"); 360 361 HeapWord* tams = ctx->top_at_mark_start(region); 362 363 size_t skip_bitmap_delta = 1; 364 HeapWord* start = region->bottom(); 365 HeapWord* end = MIN2(tams, region->end()); 366 367 // Step 1. Scan below the TAMS based on bitmap data. 368 HeapWord* limit_bitmap = MIN2(limit, tams); 369 370 // Try to scan the initial candidate. If the candidate is above the TAMS, it would 371 // fail the subsequent "< limit_bitmap" checks, and fall through to Step 2. 372 HeapWord* cb = ctx->get_next_marked_addr(start, end); 373 374 intx dist = ShenandoahMarkScanPrefetch; 375 if (dist > 0) { 376 // Batched scan that prefetches the oop data, anticipating the access to 377 // either header, oop field, or forwarding pointer. Not that we cannot 378 // touch anything in oop, while it still being prefetched to get enough 379 // time for prefetch to work. This is why we try to scan the bitmap linearly, 380 // disregarding the object size. However, since we know forwarding pointer 381 // precedes the object, we can skip over it. Once we cannot trust the bitmap, 382 // there is no point for prefetching the oop contents, as oop->size() will 383 // touch it prematurely. 384 385 // No variable-length arrays in standard C++, have enough slots to fit 386 // the prefetch distance. 387 static const int SLOT_COUNT = 256; 388 guarantee(dist <= SLOT_COUNT, "adjust slot count"); 389 HeapWord* slots[SLOT_COUNT]; 390 391 int avail; 392 do { 393 avail = 0; 394 for (int c = 0; (c < dist) && (cb < limit_bitmap); c++) { 395 Prefetch::read(cb, oopDesc::mark_offset_in_bytes()); 396 slots[avail++] = cb; 397 cb += skip_bitmap_delta; 398 if (cb < limit_bitmap) { 399 cb = ctx->get_next_marked_addr(cb, limit_bitmap); 400 } 401 } 402 403 for (int c = 0; c < avail; c++) { 404 assert (slots[c] < tams, "only objects below TAMS here: " PTR_FORMAT " (" PTR_FORMAT ")", p2i(slots[c]), p2i(tams)); 405 assert (slots[c] < limit, "only objects below limit here: " PTR_FORMAT " (" PTR_FORMAT ")", p2i(slots[c]), p2i(limit)); 406 oop obj = cast_to_oop(slots[c]); 407 assert(oopDesc::is_oop(obj), "sanity"); 408 assert(ctx->is_marked(obj), "object expected to be marked"); 409 cl->do_object(obj); 410 } 411 } while (avail > 0); 412 } else { 413 while (cb < limit_bitmap) { 414 assert (cb < tams, "only objects below TAMS here: " PTR_FORMAT " (" PTR_FORMAT ")", p2i(cb), p2i(tams)); 415 assert (cb < limit, "only objects below limit here: " PTR_FORMAT " (" PTR_FORMAT ")", p2i(cb), p2i(limit)); 416 oop obj = cast_to_oop(cb); 417 assert(oopDesc::is_oop(obj), "sanity"); 418 assert(ctx->is_marked(obj), "object expected to be marked"); 419 cl->do_object(obj); 420 cb += skip_bitmap_delta; 421 if (cb < limit_bitmap) { 422 cb = ctx->get_next_marked_addr(cb, limit_bitmap); 423 } 424 } 425 } 426 427 // Step 2. Accurate size-based traversal, happens past the TAMS. 428 // This restarts the scan at TAMS, which makes sure we traverse all objects, 429 // regardless of what happened at Step 1. 430 HeapWord* cs = tams; 431 while (cs < limit) { 432 assert (cs >= tams, "only objects past TAMS here: " PTR_FORMAT " (" PTR_FORMAT ")", p2i(cs), p2i(tams)); 433 assert (cs < limit, "only objects below limit here: " PTR_FORMAT " (" PTR_FORMAT ")", p2i(cs), p2i(limit)); 434 oop obj = cast_to_oop(cs); 435 assert(oopDesc::is_oop(obj), "sanity"); 436 assert(ctx->is_marked(obj), "object expected to be marked"); 437 size_t size = ShenandoahForwarding::size(obj); 438 cl->do_object(obj); 439 cs += size; 440 } 441 } 442 443 template <class T> 444 class ShenandoahObjectToOopClosure : public ObjectClosure { 445 T* _cl; 446 public: 447 ShenandoahObjectToOopClosure(T* cl) : _cl(cl) {} 448 449 void do_object(oop obj) { 450 obj->oop_iterate(_cl); 451 } 452 }; 453 454 template <class T> 455 class ShenandoahObjectToOopBoundedClosure : public ObjectClosure { 456 T* _cl; 457 MemRegion _bounds; 458 public: 459 ShenandoahObjectToOopBoundedClosure(T* cl, HeapWord* bottom, HeapWord* top) : 460 _cl(cl), _bounds(bottom, top) {} 461 462 void do_object(oop obj) { 463 obj->oop_iterate(_cl, _bounds); 464 } 465 }; 466 467 template<class T> 468 inline void ShenandoahHeap::marked_object_oop_iterate(ShenandoahHeapRegion* region, T* cl, HeapWord* top) { 469 if (region->is_humongous()) { 470 HeapWord* bottom = region->bottom(); 471 if (top > bottom) { 472 region = region->humongous_start_region(); 473 ShenandoahObjectToOopBoundedClosure<T> objs(cl, bottom, top); 474 marked_object_iterate(region, &objs); 475 } 476 } else { 477 ShenandoahObjectToOopClosure<T> objs(cl); 478 marked_object_iterate(region, &objs, top); 479 } 480 } 481 482 inline ShenandoahHeapRegion* ShenandoahHeap::get_region(size_t region_idx) const { 483 if (region_idx < _num_regions) { 484 return _regions[region_idx]; 485 } else { 486 return nullptr; 487 } 488 } 489 490 inline void ShenandoahHeap::mark_complete_marking_context() { 491 _marking_context->mark_complete(); 492 } 493 494 inline void ShenandoahHeap::mark_incomplete_marking_context() { 495 _marking_context->mark_incomplete(); 496 } 497 498 inline ShenandoahMarkingContext* ShenandoahHeap::complete_marking_context() const { 499 assert (_marking_context->is_complete()," sanity"); 500 return _marking_context; 501 } 502 503 inline ShenandoahMarkingContext* ShenandoahHeap::marking_context() const { 504 return _marking_context; 505 } 506 507 #endif // SHARE_GC_SHENANDOAH_SHENANDOAHHEAP_INLINE_HPP