1 /*
   2  * Copyright (c) 2019, 2021, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "compiler/oopMap.inline.hpp"
  27 #include "oops/instanceStackChunkKlass.hpp"
  28 #include "gc/shared/gc_globals.hpp"
  29 #include "memory/resourceArea.hpp"
  30 #include "oops/oopsHierarchy.hpp"
  31 #include "oops/stackChunkOop.hpp"
  32 #include "code/scopeDesc.hpp"
  33 #include "classfile/javaClasses.inline.hpp"
  34 #include "classfile/systemDictionary.hpp"
  35 #include "gc/shared/collectedHeap.inline.hpp"
  36 #include "jfr/jfrEvents.hpp"
  37 #include "memory/iterator.inline.hpp"
  38 #include "memory/oopFactory.hpp"
  39 #include "memory/universe.hpp"
  40 #include "oops/instanceKlass.hpp"
  41 #include "oops/instanceStackChunkKlass.inline.hpp"
  42 #include "oops/instanceOop.hpp"
  43 #include "oops/oop.inline.hpp"
  44 #include "oops/symbol.hpp"
  45 #include "runtime/continuation.hpp"
  46 #include "runtime/globals.hpp"
  47 #include "utilities/bitMap.hpp"
  48 #include "utilities/globalDefinitions.hpp"
  49 #include "utilities/macros.hpp"
  50 
  51 int InstanceStackChunkKlass::_offset_of_stack = 0;
  52 
  53 #if INCLUDE_CDS
  54 void InstanceStackChunkKlass::serialize_offsets(SerializeClosure* f) {
  55   f->do_u4((u4*)&_offset_of_stack);
  56 }
  57 #endif
  58 
  59 typedef void (*MemcpyFnT)(void* src, void* dst, size_t count);
  60 
  61 void InstanceStackChunkKlass::default_memcpy(void* from, void* to, size_t size) {
  62   memcpy(to, from, size << LogBytesPerWord);
  63 }
  64 
  65 MemcpyFnT InstanceStackChunkKlass::memcpy_fn_from_stack_to_chunk = nullptr;
  66 MemcpyFnT InstanceStackChunkKlass::memcpy_fn_from_chunk_to_stack = nullptr;
  67 
  68 void InstanceStackChunkKlass::resolve_memcpy_functions() {
  69   if (!StubRoutines::has_word_memcpy() || UseNewCode) {
  70     // tty->print_cr(">> Config memcpy: default");
  71     memcpy_fn_from_stack_to_chunk = (MemcpyFnT)InstanceStackChunkKlass::default_memcpy;
  72     memcpy_fn_from_chunk_to_stack = (MemcpyFnT)InstanceStackChunkKlass::default_memcpy;
  73   } else {
  74     // tty->print_cr(">> Config memcpy: %s", UseContinuationStreamingCopy ? "NT" : "T");
  75     memcpy_fn_from_stack_to_chunk = UseContinuationStreamingCopy ? (MemcpyFnT)StubRoutines::word_memcpy_up_nt()
  76                                                                  : (MemcpyFnT)StubRoutines::word_memcpy_up();
  77     memcpy_fn_from_chunk_to_stack = UseContinuationStreamingCopy ? (MemcpyFnT)StubRoutines::word_memcpy_down_nt()
  78                                                                  : (MemcpyFnT)StubRoutines::word_memcpy_down();
  79   }
  80   assert (memcpy_fn_from_stack_to_chunk != nullptr, "");
  81   assert (memcpy_fn_from_chunk_to_stack != nullptr, "");
  82 }
  83 
  84 InstanceStackChunkKlass::InstanceStackChunkKlass(const ClassFileParser& parser)
  85  : InstanceKlass(parser, InstanceKlass::_misc_kind_stack_chunk, ID) {
  86    // see oopDesc::size_given_klass TODO perf
  87    const jint lh = Klass::instance_layout_helper(size_helper(), true);
  88    set_layout_helper(lh);
  89    assert (layout_helper_is_instance(layout_helper()), "");
  90    assert (layout_helper_needs_slow_path(layout_helper()), "");
  91 
  92   //  resolve_memcpy_functions(); -- too early here
  93 }
  94 
  95 int InstanceStackChunkKlass::oop_size(oop obj) const {
  96   // see oopDesc::size_given_klass
  97   return instance_size(jdk_internal_vm_StackChunk::size(obj));
  98 }
  99 
 100 template <int x> NOINLINE static bool verify_chunk(stackChunkOop c) { return c->verify(); }
 101 
 102 template <bool disjoint>
 103 size_t InstanceStackChunkKlass::copy(oop obj, HeapWord* to_addr, size_t word_size) {
 104   assert (obj->is_stackChunk(), "");
 105   stackChunkOop chunk = (stackChunkOop)obj;
 106 
 107   // pns2();
 108   // tty->print_cr(">>> CPY %s %p-%p (%zu) -> %p-%p (%zu) -- %d", disjoint ? "DIS" : "CON", cast_from_oop<HeapWord*>(obj), cast_from_oop<HeapWord*>(obj) + word_size, word_size, to_addr, to_addr + word_size, word_size, chunk->is_gc_mode());
 109 
 110   HeapWord* from_addr = cast_from_oop<HeapWord*>(obj);
 111   assert (from_addr != to_addr, "");
 112   disjoint ? Copy::aligned_disjoint_words(from_addr, to_addr, word_size)
 113            : Copy::aligned_conjoint_words(from_addr, to_addr, word_size);
 114 
 115   stackChunkOop to_chunk = (stackChunkOop) cast_to_oop(to_addr);
 116   assert (!to_chunk->has_bitmap()|| to_chunk->is_gc_mode(), "");
 117   if (!to_chunk->has_bitmap()) {
 118     build_bitmap(to_chunk);
 119   }
 120 
 121   return word_size;
 122 }
 123 
 124 template size_t InstanceStackChunkKlass::copy<false>(oop obj, HeapWord* to_addr, size_t word_size);
 125 template size_t InstanceStackChunkKlass::copy<true>(oop obj, HeapWord* to_addr, size_t word_size);
 126 
 127 int InstanceStackChunkKlass::compact_oop_size(oop obj) const {
 128   // tty->print_cr(">>>> InstanceStackChunkKlass::compact_oop_size");
 129   assert (obj->is_stackChunk(), "");
 130   stackChunkOop chunk = (stackChunkOop)obj;
 131   // We don't trim chunks with ZGC. See copy_compact
 132   // if (UseZGC) return instance_size(chunk->stack_size());
 133   int used_stack_in_words = chunk->stack_size() - chunk->sp() + metadata_words();
 134   assert (used_stack_in_words <= chunk->stack_size(), "");
 135   return align_object_size(size_helper() + used_stack_in_words + bitmap_size(used_stack_in_words));
 136 }
 137 
 138 template size_t InstanceStackChunkKlass::copy_compact<false>(oop obj, HeapWord* to_addr);
 139 template size_t InstanceStackChunkKlass::copy_compact<true>(oop obj, HeapWord* to_addr);
 140 
 141 template <bool disjoint>
 142 size_t InstanceStackChunkKlass::copy_compact(oop obj, HeapWord* to_addr) {
 143   // tty->print_cr(">>> InstanceStackChunkKlass::copy_compact from: %p to: %p", (oopDesc*)obj, to_addr);
 144   assert (obj->is_stackChunk(), "");
 145   stackChunkOop chunk = (stackChunkOop)obj;
 146 
 147   assert (chunk->verify(), "");
 148 
 149 #ifdef ASSERT
 150   int old_compact_size = obj->compact_size();
 151   int old_size = obj->size();
 152   assert (old_compact_size <= old_size, "");
 153 #endif
 154 
 155   const int from_sp = chunk->sp();
 156   assert (from_sp >= metadata_words(), "");
 157 
 158   // ZGC usually relocates objects into allocating regions that don't require barriers, which keeps/makes the chunk mutable.
 159   if (from_sp <= metadata_words() /*|| UseZGC*/) {
 160     assert (oop_size(obj) == compact_oop_size(obj), "");
 161     return copy<disjoint>(obj, to_addr, oop_size(obj));
 162   }
 163 
 164   const int header = size_helper();
 165   const int from_stack_size = chunk->stack_size();
 166   const int to_stack_size = from_stack_size - from_sp + metadata_words();
 167   const int from_bitmap_size = bitmap_size(from_stack_size);
 168   const int to_bitmap_size = bitmap_size(to_stack_size);
 169   const bool has_bitmap = chunk->has_bitmap();
 170   assert (to_stack_size >= 0, "");
 171   assert (to_stack_size > 0 || chunk->argsize() == 0, "");
 172 #ifdef ASSERT
 173   HeapWord* const start_of_stack0 = start_of_stack(obj);
 174   HeapWord* const start_of_bitmap0 = start_of_bitmap(obj);
 175 #endif
 176 
 177   // pns2();
 178   // tty->print_cr(">>> CPY %s %p-%p (%d) -> %p-%p (%d) [%d] -- %d", disjoint ? "DIS" : "CON", cast_from_oop<HeapWord*>(obj), cast_from_oop<HeapWord*>(obj) + header + from_size, from_size, to_addr, to_addr + header + to_stack_size, to_stack_size, old_compact_size, chunk->is_gc_mode());
 179 
 180   stackChunkOop to_chunk = (stackChunkOop) cast_to_oop(to_addr);
 181   HeapWord* from_addr = cast_from_oop<HeapWord*>(obj);
 182 
 183   // copy header and stack in the appropriate order if conjoint; must not touch old chunk after we start b/c this can be a conjoint copy
 184   const int first  = (disjoint || to_addr <= from_addr) ? 0 : 2;
 185   const int stride = 1 - first;
 186   for (int i = first; 0 <= i && i <= 2; i += stride) {
 187     switch(i) { // copy header and update it
 188     case 0:
 189       // tty->print_cr(">>> CPY header %p-%p -> %p-%p (%d)", from_addr, from_addr + header , to_addr, to_addr + header, header);
 190       if (to_addr != from_addr) {
 191         disjoint ? Copy::aligned_disjoint_words(from_addr, to_addr, header)
 192                  : Copy::aligned_conjoint_words(from_addr, to_addr, header);
 193       }
 194 
 195       jdk_internal_vm_StackChunk::set_size(to_addr, to_stack_size);
 196       to_chunk->set_sp(metadata_words());
 197       break;
 198     case 1: // copy stack
 199       if (to_stack_size > 0) {
 200         assert ((from_addr + header) == start_of_stack0, "");
 201         HeapWord* from_start = from_addr + header + from_sp - metadata_words();
 202         HeapWord* to_start = to_addr + header;
 203         // tty->print_cr(">>> CPY stack  %p-%p -> %p-%p (%d)", from_start, from_start + to_stack_size , to_start, to_start + to_stack_size, to_stack_size);
 204         disjoint ? Copy::aligned_disjoint_words(from_start, to_start, to_stack_size)
 205                  : Copy::aligned_conjoint_words(from_start, to_start, to_stack_size);
 206       }
 207       break;
 208     case 2: // copy bitmap
 209       if (to_stack_size > 0 && has_bitmap) {
 210         assert ((from_addr + header + from_stack_size) == start_of_bitmap0, "");
 211         assert (from_bitmap_size >= to_bitmap_size, "");
 212         HeapWord* from_start = from_addr + header + from_stack_size + (from_bitmap_size - to_bitmap_size);
 213         HeapWord* to_start = to_addr + header + to_stack_size;
 214         // tty->print_cr(">>> CPY bitmap  %p-%p -> %p-%p (%d)", from_start, from_start + to_bitmap_size , to_start, to_start + to_bitmap_size, to_bitmap_size);
 215         disjoint ? Copy::aligned_disjoint_words(from_start, to_start, to_bitmap_size)
 216                  : Copy::aligned_conjoint_words(from_start, to_start, to_bitmap_size);
 217       }
 218       break;
 219     }
 220   }
 221 
 222   assert (!to_chunk->has_bitmap()|| to_chunk->is_gc_mode(), "");
 223   assert (to_chunk->has_bitmap() == has_bitmap, "");
 224   if (!to_chunk->has_bitmap()) {
 225     build_bitmap(to_chunk);
 226   }
 227 
 228   assert (to_chunk->size() == old_compact_size, "");
 229   assert (to_chunk->size() == instance_size(to_stack_size), "");
 230   assert (from_sp <= metadata_words() ?  (to_chunk->size() == old_size) : (to_chunk->size() < old_size), "");
 231   assert (to_chunk->verify(), "");
 232 
 233   // assert (to_chunk->requires_barriers(), ""); // G1 sometimes compacts a young region and *then* turns it old ((G1CollectedHeap*)Universe::heap())->heap_region_containing(oop(to_addr))->print();
 234 
 235   return align_object_size(header + to_stack_size + to_bitmap_size);
 236 }
 237 
 238 // template class StackChunkFrameStream<true>;
 239 // template class StackChunkFrameStream<false>;
 240 
 241 template <bool mixed>
 242 int InstanceStackChunkKlass::count_frames(stackChunkOop chunk) {
 243   int frames = 0;
 244   for (StackChunkFrameStream<mixed> f(chunk); !f.is_done(); f.next(SmallRegisterMap::instance)) frames++;
 245   return frames;
 246 }
 247 
 248 #ifndef PRODUCT
 249 void InstanceStackChunkKlass::oop_print_on(oop obj, outputStream* st) {
 250   // InstanceKlass::oop_print_on(obj, st);
 251   print_chunk((stackChunkOop)obj, false, st);
 252 }
 253 #endif
 254 
 255 
 256 // We replace derived pointers with offsets; the converse is done in DerelativizeDerivedPointers
 257 template <bool concurrent_gc>
 258 class RelativizeDerivedPointers : public DerivedOopClosure {
 259 public:
 260   RelativizeDerivedPointers() {}
 261 
 262   virtual void do_derived_oop(oop* base_loc, derived_pointer* derived_loc) override {
 263     // The ordering in the following is crucial
 264     OrderAccess::loadload();
 265     oop base = Atomic::load((oop*)base_loc);
 266     // assert (Universe::heap()->is_in_or_null(base), "not an oop"); -- base might be invalid at this point
 267     if (base != (oop)nullptr) {
 268       assert (!CompressedOops::is_base(base), "");
 269 
 270 #if INCLUDE_ZGC
 271       if (concurrent_gc && UseZGC) {
 272         if (ZAddress::is_good(cast_from_oop<uintptr_t>(base)))
 273           return;
 274       }
 275 #endif
 276 #if INCLUDE_SHENANDOAHGC
 277       if (concurrent_gc && UseShenandoahGC) {
 278         if (!ShenandoahHeap::heap()->in_collection_set(base)) {
 279           return;
 280         }
 281       }
 282 #endif
 283 
 284       OrderAccess::loadload();
 285       intptr_t derived_int_val = Atomic::load((intptr_t*)derived_loc); // *derived_loc;
 286       if (derived_int_val < 0) {
 287         return;
 288       }
 289 
 290       // at this point, we've seen a non-offset value *after* we've read the base, but we write the offset *before* fixing the base,
 291       // so we are guaranteed that the value in derived_loc is consistent with base (i.e. points into the object).
 292       intptr_t offset = derived_int_val - cast_from_oop<intptr_t>(base);
 293       // assert (offset >= 0 && offset <= (base->size() << LogHeapWordSize), "offset: %ld size: %d", offset, (base->size() << LogHeapWordSize)); -- base might be invalid at this point
 294       Atomic::store((intptr_t*)derived_loc, -offset); // there could be a benign race here; we write a negative offset to let the sign bit signify it's an offset rather than an address
 295     } else {
 296       assert (*derived_loc == derived_pointer(0), "");
 297     }
 298   }
 299 };
 300 
 301 class DerelativizeDerivedPointers : public DerivedOopClosure {
 302 public:
 303   virtual void do_derived_oop(oop* base_loc, derived_pointer* derived_loc) override {
 304     // The ordering in the following is crucial
 305     OrderAccess::loadload();
 306     oop base = Atomic::load(base_loc);
 307     if (base != (oop)nullptr) {
 308       assert (!CompressedOops::is_base(base), "");
 309       ZGC_ONLY(assert (ZAddress::is_good(cast_from_oop<uintptr_t>(base)), "");)
 310 
 311       OrderAccess::loadload();
 312       intptr_t offset = Atomic::load((intptr_t*)derived_loc); // *derived_loc;
 313       if (offset >= 0)
 314         return;
 315 
 316       // at this point, we've seen a non-offset value *after* we've read the base, but we write the offset *before* fixing the base,
 317       // so we are guaranteed that the value in derived_loc is consistent with base (i.e. points into the object).
 318       if (offset < 0) {
 319         offset = -offset;
 320         assert (offset >= 0 && offset <= (base->size() << LogHeapWordSize), "");
 321         Atomic::store((intptr_t*)derived_loc, cast_from_oop<intptr_t>(base) + offset);
 322       }
 323   #ifdef ASSERT
 324       else { // DEBUG ONLY
 325         offset = offset - cast_from_oop<intptr_t>(base);
 326         assert (offset >= 0 && offset <= (base->size() << LogHeapWordSize), "offset: " PTR_FORMAT " size: %d", offset, (base->size() << LogHeapWordSize));
 327       }
 328   #endif
 329     }
 330   }
 331 };
 332 
 333 template <bool store, bool compressedOopsWithBitmap>
 334 class BarrierClosure: public OopClosure {
 335   NOT_PRODUCT(intptr_t* _sp;)
 336 public:
 337   BarrierClosure(intptr_t* sp) NOT_PRODUCT(: _sp(sp)) {}
 338 
 339   virtual void do_oop(oop* p)       override { compressedOopsWithBitmap ? do_oop_work((narrowOop*)p) : do_oop_work(p); }
 340   virtual void do_oop(narrowOop* p) override { do_oop_work(p); }
 341 
 342   template <class T> inline void do_oop_work(T* p) {
 343     oop value = (oop)HeapAccess<>::oop_load(p);
 344     if (store) HeapAccess<>::oop_store(p, value);
 345     log_develop_trace(jvmcont)("barriers_for_oops_in_frame narrow: %d p: " INTPTR_FORMAT " sp offset: " INTPTR_FORMAT, sizeof(T) < sizeof(intptr_t), p2i(p), (intptr_t*)p - _sp);
 346   }
 347 
 348 // virtual void do_oop(oop* p) override { *(oop*)p = (oop)NativeAccess<>::oop_load((oop*)p); }
 349 // virtual void do_oop(narrowOop* p) override { *(narrowOop*)p = CompressedOops::encode((oop)NativeAccess<>::oop_load((narrowOop*)p)); }
 350 };
 351 
 352 
 353 template<typename OopClosureType>
 354 class StackChunkOopIterateFilterClosure: public OopClosure {
 355 private:
 356   OopClosureType* const _closure;
 357   stackChunkOop _chunk;
 358   MemRegion _bound;
 359 
 360 public:
 361   StackChunkOopIterateFilterClosure(OopClosureType* closure, stackChunkOop chunk, MemRegion bound)
 362     : _closure(closure),
 363       _chunk(chunk),
 364       _bound(bound),
 365       _mutated(false),
 366       _num_oops(0) {}
 367 
 368   virtual void do_oop(oop* p)       override { do_oop_work(p); }
 369   virtual void do_oop(narrowOop* p) override { do_oop_work(p); }
 370 
 371   bool _mutated;
 372   int _num_oops;
 373 
 374   template <typename T>
 375   void do_oop_work(T* p) {
 376     if (_bound.contains(p)) {
 377       T before = *p;
 378       Devirtualizer::do_oop(_closure, p);
 379       _mutated |= before != *p;
 380       _num_oops++;
 381     }
 382   }
 383 };
 384 
 385 template <bool concurrent_gc, typename OopClosureType>
 386 class OopOopIterateStackClosure {
 387   stackChunkOop _chunk;
 388   const bool _do_destructive_processing;
 389   OopClosureType * const _closure;
 390   MemRegion _bound;
 391 
 392 public:
 393   int _num_frames, _num_oops;
 394   OopOopIterateStackClosure(stackChunkOop chunk, bool do_destructive_processing, OopClosureType* closure, MemRegion mr)
 395     : _chunk(chunk),
 396       _do_destructive_processing(do_destructive_processing),
 397       _closure(closure),
 398       _bound(mr),
 399       _num_frames(0),
 400       _num_oops(0) {}
 401 
 402   template <bool mixed, typename RegisterMapT>
 403   bool do_frame(const StackChunkFrameStream<mixed>& f, const RegisterMapT* map) {
 404     log_develop_trace(jvmcont)("stack_chunk_iterate_stack sp: " INTPTR_FORMAT " pc: " INTPTR_FORMAT, f.sp() - _chunk->start_address(), p2i(f.pc()));
 405     // if (Continuation::is_return_barrier_entry(f.pc())) {
 406     //   assert ((int)(f.sp() - chunk->start_address(chunk)) < chunk->sp(), ""); // only happens when starting from gcSP
 407     //   return;
 408     // }
 409 
 410     _num_frames++;
 411     assert (_closure != nullptr, "");
 412 
 413     assert (mixed || !f.is_deoptimized(), "");
 414     if (mixed && f.is_compiled()) f.handle_deopted();
 415 
 416     // For unload method debugging
 417     // tty->print_cr(">>>> OopOopIterateStackClosure::do_frame is_compiled: %d return_barrier: %d pc: %p", f.is_compiled(), Continuation::is_return_barrier_entry(f.pc()), f.pc()); f.print_on(tty);
 418     // if (f.is_compiled()) tty->print_cr(">>>> OopOopIterateStackClosure::do_frame nmethod: %p method: %p", f.cb()->as_nmethod(), f.cb()->as_compiled_method()->method());
 419 
 420     // if (log_develop_is_enabled(Trace, jvmcont)) cb->print_value_on(tty);
 421 
 422     if (Devirtualizer::do_metadata(_closure)) {
 423       if (f.is_interpreted()) {
 424         Method* im = f.to_frame().interpreter_frame_method();
 425         _closure->do_method(im);
 426       } else if (f.is_compiled()) {
 427         nmethod* nm = f.cb()->as_nmethod();
 428         // The do_nmethod function takes care of having the right synchronization
 429         // when keeping the nmethod alive during concurrent execution.
 430         _closure->do_nmethod(nm);
 431       }
 432     }
 433 
 434     if (_do_destructive_processing) { // evacuation always takes place at a safepoint; for concurrent iterations, we skip derived pointers, which is ok b/c coarse card marking is used for chunks
 435       assert (!f.is_compiled() || f.oopmap()->has_derived_oops() == f.oopmap()->has_any(OopMapValue::derived_oop_value), "");
 436       if (f.is_compiled() && f.oopmap()->has_derived_oops()) {
 437         if (concurrent_gc) {
 438           _chunk->set_gc_mode(true);
 439           OrderAccess::storestore();
 440         }
 441         InstanceStackChunkKlass::relativize_derived_pointers<concurrent_gc>(f, map);
 442         // OrderAccess::storestore();
 443       }
 444     }
 445 
 446     StackChunkOopIterateFilterClosure<OopClosureType> cl(_closure, _chunk, _bound);
 447     f.iterate_oops(&cl, map);
 448     bool mutated_oops = cl._mutated;
 449     _num_oops += cl._num_oops;// f.oopmap()->num_oops();
 450 
 451     // if (FIX_DERIVED_POINTERS && concurrent_gc && mutated_oops && _chunk->is_gc_mode()) { // TODO: this is a ZGC-specific optimization that depends on the one in iterate_derived_pointers
 452     //   InstanceStackChunkKlass::derelativize_derived_pointers(f, map);
 453     // }
 454     return true;
 455   }
 456 };
 457 
 458 template <bool concurrent_gc>
 459 void InstanceStackChunkKlass::oop_oop_iterate_stack_slow(stackChunkOop chunk, OopIterateClosure* closure, MemRegion mr) {
 460   // oop_oop_iterate_stack_bounded<concurrent_gc, OopClosureType>(chunk, closure, MemRegion(nullptr, SIZE_MAX));
 461   assert (Continuation::debug_is_stack_chunk(chunk), "");
 462   log_develop_trace(jvmcont)("stack_chunk_iterate_stack requires_barriers: %d", !chunk->requires_barriers());
 463 
 464   bool do_destructive_processing; // should really be `= closure.is_destructive()`, if we had such a thing
 465   if (concurrent_gc) {
 466     do_destructive_processing = true;
 467   } else {
 468     if (SafepointSynchronize::is_at_safepoint() /*&& !chunk->is_gc_mode()*/) {
 469       do_destructive_processing = true;
 470       chunk->set_gc_mode(true);
 471     } else {
 472       do_destructive_processing = false;
 473     }
 474     assert (!SafepointSynchronize::is_at_safepoint() || chunk->is_gc_mode(), "gc_mode: %d is_at_safepoint: %d", chunk->is_gc_mode(), SafepointSynchronize::is_at_safepoint());
 475   }
 476 
 477   // tty->print_cr(">>>> OopOopIterateStackClosure::oop_oop_iterate_stack");
 478   OopOopIterateStackClosure<concurrent_gc, OopIterateClosure> frame_closure(chunk, do_destructive_processing, closure, mr);
 479   chunk->iterate_stack(&frame_closure);
 480 
 481   // if (FIX_DERIVED_POINTERS && concurrent_gc) {
 482   //   OrderAccess::storestore(); // to preserve that we set the offset *before* fixing the base oop
 483   //   chunk->set_gc_mode(false);
 484   // }
 485 
 486   assert (frame_closure._num_frames >= 0, "");
 487   assert (frame_closure._num_oops >= 0, "");
 488   if (do_destructive_processing || closure == nullptr) {
 489     // chunk->set_numFrames(frame_closure._num_frames); -- TODO: remove those fields
 490     // chunk->set_numOops(frame_closure._num_oops);
 491   }
 492 
 493   if (closure != nullptr) {
 494     Continuation::emit_chunk_iterate_event(chunk, frame_closure._num_frames, frame_closure._num_oops);
 495   }
 496 
 497   log_develop_trace(jvmcont)("stack_chunk_iterate_stack ------- end -------");
 498   // tty->print_cr("<<< stack_chunk_iterate_stack %p %p", (oopDesc*)chunk, Thread::current());
 499 }
 500 
 501 template void InstanceStackChunkKlass::oop_oop_iterate_stack_slow<false>(stackChunkOop chunk, OopIterateClosure* closure, MemRegion mr);
 502 template void InstanceStackChunkKlass::oop_oop_iterate_stack_slow<true> (stackChunkOop chunk, OopIterateClosure* closure, MemRegion mr);
 503 
 504 class MarkMethodsStackClosure {
 505   OopIterateClosure* _closure;
 506 
 507 public:
 508   MarkMethodsStackClosure(OopIterateClosure* cl) : _closure(cl) {}
 509 
 510   template <bool mixed, typename RegisterMapT>
 511   bool do_frame(const StackChunkFrameStream<mixed>& f, const RegisterMapT* map) {
 512     if (f.is_interpreted()) {
 513       Method* im = f.to_frame().interpreter_frame_method();
 514       _closure->do_method(im);
 515     } else if (f.is_compiled()) {
 516       nmethod* nm = f.cb()->as_nmethod();
 517       // The do_nmethod function takes care of having the right synchronization
 518       // when keeping the nmethod alive during concurrent execution.
 519       _closure->do_nmethod(nm);
 520     }
 521     return true;
 522   }
 523 };
 524 
 525 void InstanceStackChunkKlass::mark_methods(stackChunkOop chunk, OopIterateClosure* cl) {
 526   MarkMethodsStackClosure closure(cl);
 527   chunk->iterate_stack(&closure);
 528 }
 529 
 530 template <bool concurrent_gc, bool mixed, typename RegisterMapT>
 531 void InstanceStackChunkKlass::relativize_derived_pointers(const StackChunkFrameStream<mixed>& f, const RegisterMapT* map) {
 532   RelativizeDerivedPointers<concurrent_gc> derived_closure;
 533   f.iterate_derived_pointers(&derived_closure, map);
 534 }
 535 
 536 template <bool mixed, typename RegisterMapT>
 537 void InstanceStackChunkKlass::derelativize_derived_pointers(const StackChunkFrameStream<mixed>& f, const RegisterMapT* map) {
 538   DerelativizeDerivedPointers derived_closure;
 539   f.iterate_derived_pointers(&derived_closure, map);
 540 }
 541 
 542 template void InstanceStackChunkKlass::relativize_derived_pointers<false>(const StackChunkFrameStream<true >& f, const RegisterMap* map);
 543 template void InstanceStackChunkKlass::relativize_derived_pointers<true> (const StackChunkFrameStream<true >& f, const RegisterMap* map);
 544 template void InstanceStackChunkKlass::relativize_derived_pointers<false>(const StackChunkFrameStream<false>& f, const RegisterMap* map);
 545 template void InstanceStackChunkKlass::relativize_derived_pointers<true> (const StackChunkFrameStream<false>& f, const RegisterMap* map);
 546 template void InstanceStackChunkKlass::relativize_derived_pointers<false>(const StackChunkFrameStream<true >& f, const SmallRegisterMap* map);
 547 template void InstanceStackChunkKlass::relativize_derived_pointers<true> (const StackChunkFrameStream<true >& f, const SmallRegisterMap* map);
 548 template void InstanceStackChunkKlass::relativize_derived_pointers<false>(const StackChunkFrameStream<false>& f, const SmallRegisterMap* map);
 549 template void InstanceStackChunkKlass::relativize_derived_pointers<true> (const StackChunkFrameStream<false>& f, const SmallRegisterMap* map);
 550 
 551 
 552 template <bool store, bool mixed, typename RegisterMapT>
 553 void InstanceStackChunkKlass::do_barriers(stackChunkOop chunk, const StackChunkFrameStream<mixed>& f, const RegisterMapT* map) {
 554   // we need to invoke the write barriers so as not to miss oops in old chunks that haven't yet been concurrently scanned
 555   if (f.is_done()) return;
 556   log_develop_trace(jvmcont)("InstanceStackChunkKlass::invoke_barriers sp: " INTPTR_FORMAT " pc: " INTPTR_FORMAT, p2i(f.sp()), p2i(f.pc()));
 557 
 558   if (log_develop_is_enabled(Trace, jvmcont) && !mixed && f.is_interpreted()) f.cb()->print_value_on(tty);
 559 
 560   if (mixed) f.handle_deopted(); // we could freeze deopted frames in slow mode.
 561 
 562   if (f.is_interpreted()) {
 563     Method* m = f.to_frame().interpreter_frame_method();
 564     m->record_marking_cycle();
 565   } else if (f.is_compiled()) {
 566     nmethod* nm = f.cb()->as_nmethod();
 567     // The entry barrier takes care of having the right synchronization
 568     // when keeping the nmethod alive during concurrent execution.
 569     nm->run_nmethod_entry_barrier();
 570   }
 571 
 572   assert (!f.is_compiled() || f.oopmap()->has_derived_oops() == f.oopmap()->has_any(OopMapValue::derived_oop_value), "");
 573   bool has_derived = f.is_compiled() && f.oopmap()->has_derived_oops();
 574   if (has_derived) {
 575     if (UseZGC || UseShenandoahGC) {
 576       relativize_derived_pointers<true>(f, map);
 577     }
 578   }
 579 
 580   if (chunk->has_bitmap() && UseCompressedOops) {
 581     BarrierClosure<store, true> oops_closure(f.sp());
 582     f.iterate_oops(&oops_closure, map);
 583   } else {
 584     BarrierClosure<store, false> oops_closure(f.sp());
 585     f.iterate_oops(&oops_closure, map);
 586   }
 587   OrderAccess::loadload(); // observing the barriers will prevent derived pointers from being derelativized concurrently
 588 
 589   // if (has_derived) { // we do this in fix_thawed_frame
 590   //   derelativize_derived_pointers(f, map);
 591   // }
 592 }
 593 
 594 template void InstanceStackChunkKlass::do_barriers<false>(stackChunkOop chunk, const StackChunkFrameStream<true >& f, const RegisterMap* map);
 595 template void InstanceStackChunkKlass::do_barriers<true> (stackChunkOop chunk, const StackChunkFrameStream<true >& f, const RegisterMap* map);
 596 template void InstanceStackChunkKlass::do_barriers<false>(stackChunkOop chunk, const StackChunkFrameStream<false>& f, const RegisterMap* map);
 597 template void InstanceStackChunkKlass::do_barriers<true> (stackChunkOop chunk, const StackChunkFrameStream<false>& f, const RegisterMap* map);
 598 template void InstanceStackChunkKlass::do_barriers<false>(stackChunkOop chunk, const StackChunkFrameStream<true >& f, const SmallRegisterMap* map);
 599 template void InstanceStackChunkKlass::do_barriers<true> (stackChunkOop chunk, const StackChunkFrameStream<true >& f, const SmallRegisterMap* map);
 600 template void InstanceStackChunkKlass::do_barriers<false>(stackChunkOop chunk, const StackChunkFrameStream<false>& f, const SmallRegisterMap* map);
 601 template void InstanceStackChunkKlass::do_barriers<true> (stackChunkOop chunk, const StackChunkFrameStream<false>& f, const SmallRegisterMap* map);
 602 
 603 template void InstanceStackChunkKlass::fix_thawed_frame(stackChunkOop chunk, const frame& f, const RegisterMap* map);
 604 template void InstanceStackChunkKlass::fix_thawed_frame(stackChunkOop chunk, const frame& f, const SmallRegisterMap* map);
 605 
 606 template <bool store>
 607 class DoBarriersStackClosure {
 608   const stackChunkOop _chunk;
 609 public:
 610   DoBarriersStackClosure(stackChunkOop chunk) : _chunk(chunk) {}
 611 
 612   template <bool mixed, typename RegisterMapT>
 613   bool do_frame(const StackChunkFrameStream<mixed>& f, const RegisterMapT* map) {
 614     InstanceStackChunkKlass::do_barriers<store>(_chunk, f, map);
 615     return true;
 616   }
 617 };
 618 
 619 template void InstanceStackChunkKlass::do_barriers<false>(stackChunkOop chunk);
 620 template void InstanceStackChunkKlass::do_barriers<true>(stackChunkOop chunk);
 621 
 622 template <bool store>
 623 void InstanceStackChunkKlass::do_barriers(stackChunkOop chunk) {
 624   DoBarriersStackClosure<store> closure(chunk);
 625   chunk->iterate_stack(&closure);
 626 }
 627 
 628 #ifdef ASSERT
 629 template<class P>
 630 static inline oop safe_load(P *addr) {
 631   oop obj = (oop)RawAccess<>::oop_load(addr);
 632   obj = (oop)NativeAccess<>::oop_load(&obj);
 633   return obj;
 634 }
 635 
 636 // Returns true iff the address p is readable and *(intptr_t*)p != errvalue
 637 extern "C" bool dbg_is_safe(const void* p, intptr_t errvalue);
 638 static bool is_good_oop(oop o) { return dbg_is_safe(o, -1) && dbg_is_safe(o->klass(), -1) && oopDesc::is_oop(o) && o->klass()->is_klass(); }
 639 #endif
 640 
 641 class FixCompressedOopClosure : public OopClosure {
 642   void do_oop(oop* p) override {
 643     assert (UseCompressedOops, "");
 644     oop obj = CompressedOops::decode(*(narrowOop*)p);
 645     assert (obj == nullptr || is_good_oop(obj), "p: " INTPTR_FORMAT " obj: " INTPTR_FORMAT, p2i(p), p2i((oopDesc*)obj));
 646     *p = obj;
 647   }
 648 
 649   void do_oop(narrowOop* p) override {}
 650 };
 651 
 652 template <bool compressedOops>
 653 class BuildBitmapOopClosure : public OopClosure {
 654   intptr_t* const _stack_start;
 655   const BitMap::idx_t _bit_offset;
 656   BitMapView _bm;
 657 public:
 658   BuildBitmapOopClosure(intptr_t* stack_start, BitMap::idx_t bit_offset, BitMapView bm) : _stack_start(stack_start), _bit_offset(bit_offset), _bm(bm) {}
 659 
 660   virtual void do_oop(oop* p) override {
 661     assert (p >= (oop*)_stack_start, "");
 662     if (compressedOops) {
 663       // Convert all oops to narrow before marking bit
 664       oop obj = *p;
 665       *p = nullptr;
 666       // assuming little endian
 667       *(narrowOop*)p = CompressedOops::encode(obj);
 668       do_oop((narrowOop*)p);
 669     } else {
 670       BitMap::idx_t index = _bit_offset + (p - (oop*)_stack_start);
 671       log_develop_trace(jvmcont)("Build bitmap wide oop p: " INTPTR_FORMAT " index: " SIZE_FORMAT " bit_offset: " SIZE_FORMAT, p2i(p), index, _bit_offset);
 672       assert (!_bm.at(index), "");
 673       _bm.set_bit(index);
 674     }
 675   }
 676 
 677   virtual void do_oop(narrowOop* p) override {
 678     assert (p >= (narrowOop*)_stack_start, "");
 679     BitMap::idx_t index = _bit_offset + (p - (narrowOop*)_stack_start);
 680     log_develop_trace(jvmcont)("Build bitmap narrow oop p: " INTPTR_FORMAT " index: " SIZE_FORMAT " bit_offset: " SIZE_FORMAT, p2i(p), index, _bit_offset);
 681     assert (!_bm.at(index), "");
 682     _bm.set_bit(index);
 683   }
 684 };
 685 
 686 template <bool compressedOops>
 687 class BuildBitmapStackClosure {
 688   stackChunkOop _chunk;
 689   const BitMap::idx_t _bit_offset;
 690 public:
 691   BuildBitmapStackClosure(stackChunkOop chunk) : _chunk(chunk), _bit_offset(chunk->bit_offset()) {}
 692 
 693   template <bool mixed, typename RegisterMapT>
 694   bool do_frame(const StackChunkFrameStream<mixed>& f, const RegisterMapT* map) {
 695     if (!_chunk->is_gc_mode() && f.is_compiled() && f.oopmap()->has_derived_oops()) {
 696       RelativizeDerivedPointers<false> derived_oops_closure;
 697       f.iterate_derived_pointers(&derived_oops_closure, map);
 698     }
 699 
 700     if (UseChunkBitmaps) {
 701       BuildBitmapOopClosure<compressedOops> oops_closure(_chunk->start_address(), _chunk->bit_offset(), _chunk->bitmap());
 702       f.iterate_oops(&oops_closure, map);
 703     }
 704 
 705     return true;
 706   }
 707 };
 708 
 709 void InstanceStackChunkKlass::build_bitmap(stackChunkOop chunk) {
 710   assert (!chunk->has_bitmap(), "");
 711   if (UseChunkBitmaps) {
 712     chunk->set_has_bitmap(true);
 713     BitMapView bm = chunk->bitmap();
 714     bm.clear();
 715   }
 716 
 717   if (UseCompressedOops) {
 718     BuildBitmapStackClosure<true> closure(chunk);
 719     chunk->iterate_stack(&closure);
 720   } else {
 721     BuildBitmapStackClosure<false> closure(chunk);
 722     chunk->iterate_stack(&closure);
 723   }
 724 
 725   chunk->set_gc_mode(true); // must be set *after* the above closure
 726 }
 727 
 728 // template <bool store>
 729 // class BarriersIterateStackClosure {
 730 // public:
 731 //   template <bool mixed, typename RegisterMapT>
 732 //   bool do_frame(const StackChunkFrameStream<mixed>& f, const RegisterMapT* map) {
 733 //     InstanceStackChunkKlass::barriers_for_oops_in_frame<mixed, store>(f, map);
 734 //     return true;
 735 //   }
 736 // };
 737 
 738 // template <bool store>
 739 // void InstanceStackChunkKlass::barriers_for_oops_in_chunk(stackChunkOop chunk) {
 740 //   BarriersIterateStackClosure<store> frame_closure;
 741 //   chunk->iterate_stack(&frame_closure);
 742 // }
 743 
 744 // NOINLINE void InstanceStackChunkKlass::fix_chunk(stackChunkOop chunk) {
 745 //   log_develop_trace(jvmcont)("fix_stack_chunk young: %d", !chunk->requires_barriers());
 746 //   FixChunkIterateStackClosure frame_closure(chunk);
 747 //   chunk->iterate_stack(&frame_closure);
 748 //   log_develop_trace(jvmcont)("fix_stack_chunk ------- end -------");
 749 // }
 750 
 751 template <typename RegisterMapT>
 752 void InstanceStackChunkKlass::fix_thawed_frame(stackChunkOop chunk, const frame& f, const RegisterMapT* map) {
 753   if (chunk->has_bitmap() && UseCompressedOops) {
 754     FixCompressedOopClosure oop_closure;
 755     if (f.is_interpreted_frame()) {
 756       f.oops_interpreted_do(&oop_closure, nullptr);
 757     } else {
 758       OopMapDo<OopClosure, DerelativizeDerivedPointers, SkipNullValue> visitor(&oop_closure, nullptr);
 759       visitor.oops_do(&f, map, f.oop_map());
 760     }
 761   }
 762 
 763   if (f.is_compiled_frame() && f.oop_map()->has_derived_oops()) {
 764     DerelativizeDerivedPointers derived_closure;
 765     OopMapDo<OopClosure, DerelativizeDerivedPointers, SkipNullValue> visitor(nullptr, &derived_closure);
 766     visitor.oops_do(&f, map, f.oop_map());
 767   }
 768 }
 769 
 770 #ifdef ASSERT
 771 
 772 template <typename OopT>
 773 class StackChunkVerifyBitmapClosure : public BitMapClosure {
 774   stackChunkOop _chunk;
 775   intptr_t* _top;
 776   intptr_t* _next;
 777 public:
 778   int _count;
 779   bool _exact; // whether or not the top, and therefore the count, is exact
 780   StackChunkVerifyBitmapClosure(stackChunkOop chunk) : _chunk(chunk), _count(0) {
 781     find_frame_top(StackChunkFrameStream<true>(chunk));
 782     log_develop_trace(jvmcont)("debug_verify_stack_chunk bitmap stack top: " INTPTR_FORMAT, p2i(_top));
 783   }
 784 
 785   bool do_bit(BitMap::idx_t index) override {
 786     OopT* p = _chunk->address_for_bit<OopT>(index);
 787 #if (defined(X86) || defined(AARCH64)) && !defined(ZERO)
 788     if ((intptr_t*)p < _top && (intptr_t*)p != _chunk->sp_address() - frame::sender_sp_offset) return true; // skip oops that are not seen by the oopmap scan
 789 #else
 790     Unimplemented();
 791 #endif
 792 
 793     log_develop_trace(jvmcont)("debug_verify_stack_chunk bitmap oop p: " INTPTR_FORMAT " index: " SIZE_FORMAT " bit_offset: " SIZE_FORMAT,
 794             p2i(p), index, _chunk->bit_offset());
 795     _count++;
 796     if (SafepointSynchronize::is_at_safepoint()) return true;
 797 
 798     oop obj = safe_load(p);
 799     assert ((!_exact && (intptr_t*)p < _next) || obj == nullptr || is_good_oop(obj),
 800             "p: " INTPTR_FORMAT " obj: " INTPTR_FORMAT " index: " SIZE_FORMAT " bit_offset: " SIZE_FORMAT,
 801             p2i(p), p2i((oopDesc*)obj), index, _chunk->bit_offset());
 802     return true; // continue processing
 803   }
 804 
 805   void find_frame_top(const StackChunkFrameStream<true>& f) {
 806     // We do this just so that the count is the same as the oopmap scan for verification purposes, but for GC traveral it's not important.
 807     _next = nullptr;
 808     _exact = true;
 809     if (f.is_interpreted()) {
 810       ResourceMark rm;
 811       InterpreterOopMap mask;
 812       frame fr = f.to_frame();
 813       fr.interpreted_frame_oop_map(&mask);
 814 #if (defined(X86) || defined(AARCH64)) && !defined(ZERO)
 815       _top = fr.addr_at(frame::interpreter_frame_initial_sp_offset) - mask.expression_stack_size();
 816 #else
 817       Unimplemented();
 818       _top = 0;
 819 #endif
 820     } else if (f.is_compiled()) {
 821       Method* callee = f.cb()->as_compiled_method()->attached_method_before_pc(f.pc());
 822       if (callee != nullptr) {
 823         int outgoing_args_words = (callee->num_stack_arg_slots() * VMRegImpl::stack_slot_size) >> LogBytesPerWord;
 824         _top = f.unextended_sp() + outgoing_args_words;
 825       } else {
 826         _top = f.unextended_sp();
 827 #if (defined(X86) || defined(AARCH64)) && !defined(ZERO)
 828         _next = _top + f.cb()->frame_size() - frame::sender_sp_offset;
 829 #else
 830         Unimplemented();
 831         _next = 0;
 832 #endif
 833         _exact = false;
 834       }
 835     } else {
 836       _top = f.unextended_sp();
 837     }
 838   }
 839 };
 840 
 841 class StackChunkVerifyOopsClosure : public OopClosure {
 842   stackChunkOop _chunk;
 843   intptr_t* _sp;
 844   int _count;
 845 public:
 846   StackChunkVerifyOopsClosure(stackChunkOop chunk, intptr_t* sp) : _chunk(chunk), _sp(sp), _count(0) {}
 847   int count() { return _count; }
 848   void do_oop(oop* p) override { (_chunk->has_bitmap() && UseCompressedOops) ? do_oop_work((narrowOop*)p) : do_oop_work(p); }
 849   void do_oop(narrowOop* p) override { do_oop_work(p); }
 850 
 851   template <class T> inline void do_oop_work(T* p) {
 852     log_develop_trace(jvmcont)("debug_verify_stack_chunk oop narrow: %d p: " INTPTR_FORMAT, sizeof(T) < sizeof(intptr_t), p2i(p));
 853      _count++;
 854     if (SafepointSynchronize::is_at_safepoint()) return;
 855 
 856     oop obj = safe_load(p);
 857     assert (obj == nullptr || is_good_oop(obj), "p: " INTPTR_FORMAT " obj: " INTPTR_FORMAT, p2i(p), p2i((oopDesc*)obj));
 858     if (_chunk->has_bitmap()) {
 859       BitMap::idx_t index = (p - (T*)_chunk->start_address()) + _chunk->bit_offset();
 860       assert (_chunk->bitmap().at(index), "Bit not set at index " SIZE_FORMAT " corresponding to " INTPTR_FORMAT, index, p2i(p));
 861     }
 862   }
 863 };
 864 
 865 class StackChunkVerifyDerivedPointersClosure : public DerivedOopClosure {
 866   stackChunkOop _chunk;
 867 public:
 868   StackChunkVerifyDerivedPointersClosure(stackChunkOop chunk) : _chunk(chunk) {}
 869 
 870   virtual void do_derived_oop(oop* base_loc, derived_pointer* derived_loc) override {
 871     log_develop_trace(jvmcont)("debug_verify_stack_chunk base: " INTPTR_FORMAT " derived: " INTPTR_FORMAT, p2i(base_loc), p2i(derived_loc));
 872     if (SafepointSynchronize::is_at_safepoint()) return;
 873 
 874     oop base = (_chunk->has_bitmap() && UseCompressedOops) ? CompressedOops::decode(Atomic::load((narrowOop*)base_loc)) : Atomic::load((oop*)base_loc);
 875     // (oop)NativeAccess<>::oop_load((oop*)base_loc); //
 876     if (base != nullptr) {
 877       ZGC_ONLY(if (UseZGC && !ZAddress::is_good(cast_from_oop<uintptr_t>(base))) return;)
 878       assert (!CompressedOops::is_base(base), "");
 879       assert (oopDesc::is_oop(base), "");
 880       ZGC_ONLY(assert (!UseZGC || ZAddress::is_good(cast_from_oop<uintptr_t>(base)), "");)
 881       OrderAccess::loadload();
 882       intptr_t offset = Atomic::load((intptr_t*)derived_loc);
 883       offset = offset < 0
 884                   ? -offset
 885                   : offset - cast_from_oop<intptr_t>(base);
 886 
 887       // The following assert fails on AArch64 for some reason
 888       // assert (offset >= 0 && offset <= (base->size() << LogHeapWordSize), "offset: %ld base->size: %d relative: %d", offset, base->size() << LogHeapWordSize, *(intptr_t*)derived_loc < 0);
 889     } else {
 890       assert (*derived_loc == derived_pointer(0), "");
 891     }
 892   }
 893 };
 894 
 895 class VerifyStackClosure {
 896   stackChunkOop _chunk;
 897 public:
 898   intptr_t* _sp;
 899   CodeBlob* _cb;
 900   bool _callee_interpreted;
 901   int _size;
 902   int _argsize;
 903   int _num_oops, _num_frames, _num_interpreted_frames, _num_i2c;
 904   VerifyStackClosure(stackChunkOop chunk, int num_frames, int size)
 905     : _chunk(chunk), _sp(nullptr), _cb(nullptr), _callee_interpreted(false),
 906       _size(size), _argsize(0), _num_oops(0), _num_frames(num_frames), _num_interpreted_frames(0), _num_i2c(0) {}
 907 
 908   template <bool mixed, typename RegisterMapT>
 909   bool do_frame(const StackChunkFrameStream<mixed>& f, const RegisterMapT* map) {
 910     _sp = f.sp();
 911     _cb = f.cb();
 912 
 913     int fsize = f.frame_size() - ((f.is_interpreted() == _callee_interpreted) ? _argsize : 0);
 914     int num_oops = f.num_oops();
 915     assert (num_oops >= 0, "");
 916     // tty->print_cr(">>> fsize: %d f.frame_size(): %d callee_interpreted: %d callee_argsize: %d", fsize, f.frame_size(), _callee_interpreted, _argsize);
 917 
 918     _argsize   = f.stack_argsize();
 919     _size     += fsize;
 920     _num_oops += num_oops;
 921     if (f.is_interpreted()) {
 922       _num_interpreted_frames++;
 923     }
 924 
 925     // assert (!chunk->requires_barriers() || num_frames <= chunk->numFrames(), "");
 926     log_develop_trace(jvmcont)("debug_verify_stack_chunk frame: %d sp: " INTPTR_FORMAT " pc: " INTPTR_FORMAT " interpreted: %d size: %d argsize: %d oops: %d", _num_frames, f.sp() - _chunk->start_address(), p2i(f.pc()), f.is_interpreted(), fsize, _argsize, num_oops);
 927     if (log_develop_is_enabled(Trace, jvmcont)) f.print_on(tty);
 928     assert (f.pc() != nullptr,
 929       "young: %d chunk->numFrames(): %d num_frames: %d sp: " INTPTR_FORMAT " start: " INTPTR_FORMAT " end: " INTPTR_FORMAT,
 930       !_chunk->requires_barriers(), _chunk->numFrames(), _num_frames, p2i(f.sp()), p2i(_chunk->start_address()), p2i(_chunk->bottom_address()));
 931 
 932     if (_num_frames == 0) {
 933       assert (f.pc() == _chunk->pc(), "");
 934     }
 935 
 936     if (_num_frames > 0 && !_callee_interpreted && f.is_interpreted()) {
 937       log_develop_trace(jvmcont)("debug_verify_stack_chunk i2c");
 938       _num_i2c++;
 939     }
 940 
 941     // if (_cb != nullptr && _cb->is_nmethod()) {
 942     //   nmethod* nm = cb->as_nmethod();
 943     //   if (check_deopt && nm->is_marked_for_deoptimization() && nm->is_not_entrant()) {
 944     //     tty->print_cr("-- FOUND NON ENTRANT NMETHOD IN CHUNK: ");
 945     //     if (nm->method() != nullptr) nm->method()->print_on(tty);
 946     //     nm->print_on(tty);
 947     //   }
 948     // }
 949 
 950     StackChunkVerifyOopsClosure oops_closure(_chunk, f.sp());
 951     f.iterate_oops(&oops_closure, map);
 952     assert (oops_closure.count() == num_oops, "oops: %d oopmap->num_oops(): %d", oops_closure.count(), num_oops);
 953 
 954     StackChunkVerifyDerivedPointersClosure derived_oops_closure(_chunk);
 955     f.iterate_derived_pointers(&derived_oops_closure, map);
 956 
 957     _callee_interpreted = f.is_interpreted();
 958     _num_frames++;
 959     return true;
 960   }
 961 };
 962 
 963 // verifies the consistency of the chunk's data
 964 bool InstanceStackChunkKlass::verify(oop obj, size_t* out_size, int* out_oops, int* out_frames, int* out_interpreted_frames) {
 965   DEBUG_ONLY(if (!VerifyContinuations) return true;)
 966 
 967   assert (oopDesc::is_oop(obj), "");
 968   assert (obj->is_stackChunk(), "");
 969   stackChunkOop chunk = (stackChunkOop)obj;
 970 
 971   log_develop_trace(jvmcont)("debug_verify_stack_chunk barriers: %d", chunk->requires_barriers());
 972   // chunk->print_on(true, tty);
 973 
 974   assert (chunk->is_stackChunk(), "");
 975   assert (chunk->stack_size() >= 0, "");
 976   assert (chunk->argsize() >= 0, "");
 977   assert (!chunk->has_bitmap() || chunk->is_gc_mode(), "");
 978 
 979   if (chunk->is_empty()) {
 980     assert (chunk->argsize() == 0, "");
 981     assert (chunk->max_size() == 0, "");
 982     assert (chunk->numFrames() == -1, "");
 983     assert (chunk->numOops() == -1, "");
 984   }
 985 
 986   if (!SafepointSynchronize::is_at_safepoint()) {
 987     assert (oopDesc::is_oop_or_null(chunk->parent()), "");
 988   }
 989 
 990   bool check_deopt = false;
 991   if (Thread::current()->is_Java_thread() && !SafepointSynchronize::is_at_safepoint()) {
 992     if (JavaThread::cast(Thread::current())->cont_fastpath_thread_state())
 993       check_deopt = true;
 994   }
 995 
 996   const bool concurrent = !SafepointSynchronize::is_at_safepoint() && !Thread::current()->is_Java_thread();
 997   const bool gc_mode = chunk->is_gc_mode();
 998   const bool is_last = chunk->parent() == nullptr;
 999   const bool mixed = chunk->has_mixed_frames();
1000 
1001   // if argsize == 0 and the chunk isn't mixed, the chunk contains the metadata (pc, fp -- frame::sender_sp_offset) for the top frame (below sp), and *not* for the bottom frame
1002   int size = chunk->stack_size() - chunk->argsize() - chunk->sp();
1003   // tty->print_cr("size: %d chunk->stack_size(): %d chunk->sp(): %d chunk->argsize(): %d", size, chunk->stack_size(), chunk->sp(), chunk->argsize());
1004   assert (size >= 0, "");
1005   assert ((size == 0) == chunk->is_empty(), "");
1006 
1007   const StackChunkFrameStream<true> first(chunk);
1008   const bool has_safepoint_stub_frame = first.is_stub();
1009 
1010   VerifyStackClosure closure(chunk,
1011     has_safepoint_stub_frame ? 1 : 0, // iterate_stack skips the safepoint stub
1012     has_safepoint_stub_frame ? first.frame_size() : 0);
1013   chunk->iterate_stack(&closure);
1014 
1015   assert (!chunk->is_empty() || closure._cb == nullptr, "");
1016   if (closure._cb != nullptr && closure._cb->is_compiled()) {
1017     assert (chunk->argsize() == (closure._cb->as_compiled_method()->method()->num_stack_arg_slots() * VMRegImpl::stack_slot_size) >> LogBytesPerWord,
1018       "chunk argsize: %d bottom frame argsize: %d", chunk->argsize(), (closure._cb->as_compiled_method()->method()->num_stack_arg_slots() * VMRegImpl::stack_slot_size) >> LogBytesPerWord);
1019   } 
1020   // else {
1021   //   assert (chunk->argsize() == 0, "");
1022   // }
1023   assert (closure._num_interpreted_frames == 0 || chunk->has_mixed_frames(), "");
1024 
1025   if (!concurrent) {
1026     assert (closure._size <= size + chunk->argsize() + metadata_words(), "size: %d argsize: %d closure.size: %d end sp: " PTR_FORMAT " start sp: %d chunk size: %d", size, chunk->argsize(), closure._size, closure._sp - chunk->start_address(), chunk->sp(), chunk->stack_size());
1027     assert (chunk->argsize() == closure._argsize, "chunk->argsize(): %d closure.argsize: %d closure.callee_interpreted: %d", chunk->argsize(), closure._argsize, closure._callee_interpreted);
1028 
1029     int max_size = closure._size + closure._num_i2c * align_wiggle();
1030     assert (chunk->max_size() == max_size, "max_size(): %d max_size: %d argsize: %d num_i2c: %d", chunk->max_size(), max_size, closure._argsize, closure._num_i2c);
1031 
1032     assert (chunk->numFrames() == -1 || closure._num_frames == chunk->numFrames(), "barriers: %d num_frames: %d chunk->numFrames(): %d", chunk->requires_barriers(), closure._num_frames, chunk->numFrames());
1033     assert (chunk->numOops()   == -1 || closure._num_oops   == chunk->numOops(),   "barriers: %d num_oops: %d chunk->numOops(): %d",     chunk->requires_barriers(), closure._num_oops,   chunk->numOops());
1034 
1035     if (out_size   != nullptr) *out_size   += size;
1036     if (out_oops   != nullptr) *out_oops   += closure._num_oops;
1037     if (out_frames != nullptr) *out_frames += closure._num_frames;
1038     if (out_interpreted_frames != nullptr) *out_interpreted_frames += closure._num_interpreted_frames;
1039   } else assert (out_size == nullptr && out_oops == nullptr && out_frames == nullptr && out_interpreted_frames == nullptr, "");
1040 
1041   if (chunk->has_bitmap()) {
1042     assert (chunk->bitmap().size() == chunk->bit_offset() + (size_t)(chunk->stack_size() << (UseCompressedOops ? 1 : 0)), "bitmap().size(): %zu bit_offset: %zu stack_size: %d", chunk->bitmap().size(), chunk->bit_offset(), chunk->stack_size());
1043     bool exact;
1044     int oop_count;
1045     if (UseCompressedOops) {
1046       StackChunkVerifyBitmapClosure<narrowOop> bitmap_closure(chunk);
1047       chunk->bitmap().iterate(&bitmap_closure, chunk->bit_index_for((narrowOop*)(chunk->sp_address() - metadata_words())), chunk->bit_index_for((narrowOop*)chunk->end_address()));
1048       exact = bitmap_closure._exact;
1049       oop_count = bitmap_closure._count;
1050     } else {
1051       StackChunkVerifyBitmapClosure<oop> bitmap_closure(chunk);
1052       chunk->bitmap().iterate(&bitmap_closure, chunk->bit_index_for((oop*)(chunk->sp_address() - metadata_words())), chunk->bit_index_for((oop*)chunk->end_address()));
1053       exact = bitmap_closure._exact;
1054       oop_count = bitmap_closure._count;
1055     }
1056     assert (exact ? oop_count == closure._num_oops : oop_count >= closure._num_oops, "bitmap_closure._count: %d closure._num_oops: %d", oop_count, closure._num_oops);
1057   }
1058 
1059   return true;
1060 }
1061 
1062 namespace {
1063 class DescribeStackChunkClosure {
1064   stackChunkOop _chunk;
1065   FrameValues _values;
1066   RegisterMap _map;
1067   int _frame_no;
1068 public:
1069   DescribeStackChunkClosure(stackChunkOop chunk) : _chunk(chunk), _map((JavaThread*)nullptr, true, false, true), _frame_no(0) {
1070     _map.set_include_argument_oops(false);
1071   }
1072 
1073   const RegisterMap* get_map(const RegisterMap* map,      intptr_t* sp) { return map; }
1074   const RegisterMap* get_map(const SmallRegisterMap* map, intptr_t* sp) { return map->copy_to_RegisterMap(&_map, sp); }
1075 
1076   template <bool mixed, typename RegisterMapT>
1077   bool do_frame(const StackChunkFrameStream<mixed>& f, const RegisterMapT* map) {
1078     ResetNoHandleMark rnhm;
1079     HandleMark hm(Thread::current());
1080 
1081     frame fr = f.to_frame();
1082     if (_frame_no == 0) fr.describe_top(_values);
1083     fr.template describe<true>(_values, _frame_no++, get_map(map, f.sp()));
1084     return true;
1085   }
1086 
1087   void describe_chunk() {
1088     // _values.describe(-1, _chunk->start_address(), "CHUNK START");
1089     _values.describe(-1, _chunk->sp_address(),      "CHUNK SP");
1090     _values.describe(-1, _chunk->bottom_address() - 1, "CHUNK ARGS");
1091     _values.describe(-1, _chunk->end_address() - 1, "CHUNK END");
1092   }
1093 
1094   void print_on(outputStream* out) {
1095     if (_frame_no > 0) {
1096       describe_chunk();
1097       _values.print_on(_chunk, out);
1098     } else {
1099       out->print_cr(" EMPTY");
1100     }
1101   }
1102 };
1103 }
1104 #endif
1105 
1106 namespace {
1107 class PrintStackChunkClosure {
1108   stackChunkOop _chunk;
1109   outputStream* _st;
1110 public:
1111   PrintStackChunkClosure(stackChunkOop chunk, outputStream* st) : _chunk(chunk), _st(st) {}
1112 
1113   template <bool mixed, typename RegisterMapT>
1114   bool do_frame(const StackChunkFrameStream<mixed>& fs, const RegisterMapT* map) {
1115     frame f = fs.to_frame();
1116     _st->print_cr("-- frame sp: " INTPTR_FORMAT " interpreted: %d size: %d argsize: %d", p2i(fs.sp()), fs.is_interpreted(), f.frame_size(), fs.is_interpreted() ? 0 : f.compiled_frame_stack_argsize());
1117     f.print_on<true>(_st);
1118     const ImmutableOopMap* oopmap = fs.oopmap();
1119     if (oopmap != nullptr) {
1120       oopmap->print_on(_st);
1121       _st->cr();
1122     }
1123     return true;
1124   }
1125 };
1126 }
1127 
1128 void InstanceStackChunkKlass::print_chunk(const stackChunkOop c, bool verbose, outputStream* st) {
1129   if (c == (oop)nullptr) {
1130     st->print_cr("CHUNK NULL");
1131     return;
1132   }
1133   assert(c->is_stackChunk(), "");
1134 
1135   // HeapRegion* hr = G1CollectedHeap::heap()->heap_region_containing(chunk);
1136   st->print_cr("CHUNK " INTPTR_FORMAT " " INTPTR_FORMAT " - " INTPTR_FORMAT " :: " INTPTR_FORMAT, p2i((oopDesc*)c), p2i(c->start_address()), p2i(c->end_address()), c->identity_hash());
1137   st->print_cr("       barriers: %d gc_mode: %d bitmap: %d parent: " INTPTR_FORMAT, c->requires_barriers(), c->is_gc_mode(), c->has_bitmap(), p2i((oopDesc*)c->parent()));
1138   st->print_cr("       flags mixed: %d", c->has_mixed_frames());
1139   st->print_cr("       size: %d argsize: %d max_size: %d sp: %d pc: " INTPTR_FORMAT " num_frames: %d num_oops: %d", c->stack_size(), c->argsize(), c->max_size(), c->sp(), p2i(c->pc()), c->numFrames(), c->numOops());
1140 
1141   if (verbose) {
1142     st->cr();
1143     st->print_cr("------ chunk frames end: " INTPTR_FORMAT, p2i(c->bottom_address()));
1144     PrintStackChunkClosure closure(c, st);
1145     c->iterate_stack(&closure);
1146     st->print_cr("------");
1147 
1148   #ifdef ASSERT
1149     ResourceMark rm;
1150     DescribeStackChunkClosure describe(c);
1151     c->iterate_stack(&describe);
1152     describe.print_on(st);
1153     st->print_cr("======");
1154   #endif
1155   }
1156 }
1157 
1158 #ifndef PRODUCT
1159 template void StackChunkFrameStream<true >::print_on(outputStream* st) const;
1160 template void StackChunkFrameStream<false>::print_on(outputStream* st) const;
1161 
1162 template <bool mixed>
1163 void StackChunkFrameStream<mixed>::print_on(outputStream* st) const {
1164   st->print_cr("chunk: " INTPTR_FORMAT " index: %d sp offset: %d stack size: %d", p2i(_chunk), _index, _chunk->to_offset(_sp), _chunk->stack_size());
1165   to_frame().template print_on<true>(st);
1166 }
1167 #endif