1 /* 2 * Copyright (c) 2019, 2024, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #ifndef CPU_AARCH64_CONTINUATIONFREEZETHAW_AARCH64_INLINE_HPP 26 #define CPU_AARCH64_CONTINUATIONFREEZETHAW_AARCH64_INLINE_HPP 27 28 #include "code/codeBlob.inline.hpp" 29 #include "oops/stackChunkOop.inline.hpp" 30 #include "runtime/frame.hpp" 31 #include "runtime/frame.inline.hpp" 32 33 34 inline void patch_callee_link(const frame& f, intptr_t* fp) { 35 DEBUG_ONLY(intptr_t* orig = *ContinuationHelper::Frame::callee_link_address(f)); 36 *ContinuationHelper::Frame::callee_link_address(f) = fp; 37 } 38 39 inline void patch_callee_link_relative(const frame& f, intptr_t* fp) { 40 intptr_t* la = (intptr_t*)ContinuationHelper::Frame::callee_link_address(f); 41 intptr_t new_value = fp - la; 42 *la = new_value; 43 } 44 45 ////// Freeze 46 47 // Fast path 48 49 inline void FreezeBase::patch_stack_pd(intptr_t* frame_sp, intptr_t* heap_sp) { 50 // copy the spilled fp from the heap to the stack 51 *(frame_sp - frame::sender_sp_offset) = *(heap_sp - frame::sender_sp_offset); 52 } 53 54 // Slow path 55 56 template<typename FKind> 57 inline frame FreezeBase::sender(const frame& f) { 58 assert(FKind::is_instance(f), ""); 59 if (FKind::interpreted) { 60 return frame(f.sender_sp(), f.interpreter_frame_sender_sp(), f.link(), f.sender_pc()); 61 } 62 intptr_t** link_addr = link_address<FKind>(f); 63 64 intptr_t* sender_sp = (intptr_t*)(link_addr + frame::sender_sp_offset); // f.unextended_sp() + (fsize/wordSize); // 65 address sender_pc = ContinuationHelper::return_address_at(sender_sp - 1); 66 assert(sender_sp != f.sp(), "must have changed"); 67 68 int slot = 0; 69 CodeBlob* sender_cb = CodeCache::find_blob_and_oopmap(sender_pc, slot); 70 return sender_cb != nullptr 71 ? frame(sender_sp, sender_sp, *link_addr, sender_pc, sender_cb, 72 slot == -1 ? nullptr : sender_cb->oop_map_for_slot(slot, sender_pc), 73 false /* on_heap ? */) 74 : frame(sender_sp, sender_sp, *link_addr, sender_pc); 75 } 76 77 template<typename FKind> 78 frame FreezeBase::new_heap_frame(frame& f, frame& caller) { 79 assert(FKind::is_instance(f), ""); 80 assert(!caller.is_interpreted_frame() 81 || caller.unextended_sp() == (intptr_t*)caller.at(frame::interpreter_frame_last_sp_offset), ""); 82 83 intptr_t *sp, *fp; // sp is really our unextended_sp 84 if (FKind::interpreted) { 85 assert((intptr_t*)f.at(frame::interpreter_frame_last_sp_offset) == nullptr 86 || f.unextended_sp() == (intptr_t*)f.at_relative(frame::interpreter_frame_last_sp_offset), ""); 87 intptr_t locals_offset = *f.addr_at(frame::interpreter_frame_locals_offset); 88 // If the caller.is_empty(), i.e. we're freezing into an empty chunk, then we set 89 // the chunk's argsize in finalize_freeze and make room for it above the unextended_sp 90 bool overlap_caller = caller.is_interpreted_frame() || caller.is_empty(); 91 fp = caller.unextended_sp() - 1 - locals_offset + (overlap_caller ? ContinuationHelper::InterpretedFrame::stack_argsize(f) : 0); 92 sp = fp - (f.fp() - f.unextended_sp()); 93 assert(sp <= fp, ""); 94 assert(fp <= caller.unextended_sp(), ""); 95 caller.set_sp(fp + frame::sender_sp_offset); 96 97 assert(_cont.tail()->is_in_chunk(sp), ""); 98 99 frame hf(sp, sp, fp, f.pc(), nullptr, nullptr, true /* on_heap */); 100 // copy relativized locals from the stack frame 101 *hf.addr_at(frame::interpreter_frame_locals_offset) = locals_offset; 102 return hf; 103 } else { 104 // We need to re-read fp out of the frame because it may be an oop and we might have 105 // had a safepoint in finalize_freeze, after constructing f. 106 fp = *(intptr_t**)(f.sp() - frame::sender_sp_offset); 107 108 int fsize = FKind::size(f); 109 sp = caller.unextended_sp() - fsize; 110 if (caller.is_interpreted_frame()) { 111 // If the caller is interpreted, our stackargs are not supposed to overlap with it 112 // so we make more room by moving sp down by argsize 113 int argsize = FKind::stack_argsize(f); 114 sp -= argsize; 115 } 116 caller.set_sp(sp + fsize); 117 118 assert(_cont.tail()->is_in_chunk(sp), ""); 119 120 return frame(sp, sp, fp, f.pc(), nullptr, nullptr, true /* on_heap */); 121 } 122 } 123 124 void FreezeBase::adjust_interpreted_frame_unextended_sp(frame& f) { 125 assert((f.at(frame::interpreter_frame_last_sp_offset) != 0) || (f.unextended_sp() == f.sp()), ""); 126 intptr_t* real_unextended_sp = (intptr_t*)f.at_relative_or_null(frame::interpreter_frame_last_sp_offset); 127 if (real_unextended_sp != nullptr) { 128 f.set_unextended_sp(real_unextended_sp); // can be null at a safepoint 129 } 130 } 131 132 inline void FreezeBase::prepare_freeze_interpreted_top_frame(const frame& f) { 133 assert(*f.addr_at(frame::interpreter_frame_last_sp_offset) == 0, "should be null for top frame"); 134 intptr_t* lspp = f.addr_at(frame::interpreter_frame_last_sp_offset); 135 *lspp = f.unextended_sp() - f.fp(); 136 } 137 138 inline void FreezeBase::relativize_interpreted_frame_metadata(const frame& f, const frame& hf) { 139 assert(hf.fp() == hf.unextended_sp() + (f.fp() - f.unextended_sp()), ""); 140 assert((f.at(frame::interpreter_frame_last_sp_offset) != 0) 141 || (f.unextended_sp() == f.sp()), ""); 142 assert(f.fp() > (intptr_t*)f.at_relative(frame::interpreter_frame_initial_sp_offset), ""); 143 144 // on AARCH64, we may insert padding between the locals and the rest of the frame 145 // (see TemplateInterpreterGenerator::generate_normal_entry, and AbstractInterpreter::layout_activation) 146 // because we freeze the padding word (see recurse_freeze_interpreted_frame) in order to keep the same relativized 147 // locals value, we don't need to change the locals value here. 148 149 // Make sure that last_sp is already relativized. 150 assert((intptr_t*)hf.at_relative(frame::interpreter_frame_last_sp_offset) == hf.unextended_sp(), ""); 151 152 // Make sure that monitor_block_top is already relativized. 153 assert(hf.at_absolute(frame::interpreter_frame_monitor_block_top_offset) <= frame::interpreter_frame_initial_sp_offset, ""); 154 155 // extended_sp is already relativized by TemplateInterpreterGenerator::generate_normal_entry or 156 // AbstractInterpreter::layout_activation 157 158 // The interpreter native wrapper code adds space in the stack equal to size_of_parameters() 159 // after the fixed part of the frame. For wait0 this is equal to 3 words (this + long parameter). 160 // We adjust by this size since otherwise the saved last sp will be less than the extended_sp. 161 DEBUG_ONLY(Method* m = hf.interpreter_frame_method();) 162 DEBUG_ONLY(int extra_space = m->is_object_wait0() ? m->size_of_parameters() : 0;) 163 164 assert((hf.fp() - hf.unextended_sp()) == (f.fp() - f.unextended_sp()), ""); 165 assert(hf.unextended_sp() == (intptr_t*)hf.at(frame::interpreter_frame_last_sp_offset), ""); 166 assert(hf.unextended_sp() <= (intptr_t*)hf.at(frame::interpreter_frame_initial_sp_offset), ""); 167 assert(hf.unextended_sp() + extra_space > (intptr_t*)hf.at(frame::interpreter_frame_extended_sp_offset), ""); 168 assert(hf.fp() > (intptr_t*)hf.at(frame::interpreter_frame_initial_sp_offset), ""); 169 assert(hf.fp() <= (intptr_t*)hf.at(frame::interpreter_frame_locals_offset), ""); 170 } 171 172 inline void FreezeBase::set_top_frame_metadata_pd(const frame& hf) { 173 stackChunkOop chunk = _cont.tail(); 174 assert(chunk->is_in_chunk(hf.sp() - 1), ""); 175 assert(chunk->is_in_chunk(hf.sp() - frame::sender_sp_offset), ""); 176 177 *(hf.sp() - 1) = (intptr_t)hf.pc(); 178 179 intptr_t* fp_addr = hf.sp() - frame::sender_sp_offset; 180 *fp_addr = hf.is_interpreted_frame() ? (intptr_t)(hf.fp() - fp_addr) 181 : (intptr_t)hf.fp(); 182 } 183 184 inline void FreezeBase::patch_pd(frame& hf, const frame& caller) { 185 if (caller.is_interpreted_frame()) { 186 assert(!caller.is_empty(), ""); 187 patch_callee_link_relative(caller, caller.fp()); 188 } else { 189 // If we're the bottom-most frame frozen in this freeze, the caller might have stayed frozen in the chunk, 190 // and its oop-containing fp fixed. We've now just overwritten it, so we must patch it back to its value 191 // as read from the chunk. 192 patch_callee_link(caller, caller.fp()); 193 } 194 } 195 196 //////// Thaw 197 198 // Fast path 199 200 inline void ThawBase::prefetch_chunk_pd(void* start, int size) { 201 size <<= LogBytesPerWord; 202 Prefetch::read(start, size); 203 Prefetch::read(start, size - 64); 204 } 205 206 template <typename ConfigT> 207 inline void Thaw<ConfigT>::patch_caller_links(intptr_t* sp, intptr_t* bottom) { 208 // Fast path depends on !PreserveFramePointer. See can_thaw_fast(). 209 assert(!PreserveFramePointer, "Frame pointers need to be fixed"); 210 } 211 212 // Slow path 213 214 inline frame ThawBase::new_entry_frame() { 215 intptr_t* sp = _cont.entrySP(); 216 return frame(sp, sp, _cont.entryFP(), _cont.entryPC()); // TODO PERF: This finds code blob and computes deopt state 217 } 218 219 template<typename FKind> frame ThawBase::new_stack_frame(const frame& hf, frame& caller, bool bottom) { 220 assert(FKind::is_instance(hf), ""); 221 // The values in the returned frame object will be written into the callee's stack in patch. 222 223 if (FKind::interpreted) { 224 intptr_t* heap_sp = hf.unextended_sp(); 225 // If caller is interpreted it already made room for the callee arguments 226 int overlap = caller.is_interpreted_frame() ? ContinuationHelper::InterpretedFrame::stack_argsize(hf) : 0; 227 const int fsize = (int)(ContinuationHelper::InterpretedFrame::frame_bottom(hf) - hf.unextended_sp() - overlap); 228 intptr_t* frame_sp = caller.unextended_sp() - fsize; 229 intptr_t* fp = frame_sp + (hf.fp() - heap_sp); 230 if ((intptr_t)fp % frame::frame_alignment != 0) { 231 fp--; 232 frame_sp--; 233 log_develop_trace(continuations)("Adding internal interpreted frame alignment"); 234 } 235 DEBUG_ONLY(intptr_t* unextended_sp = fp + *hf.addr_at(frame::interpreter_frame_last_sp_offset);) 236 assert(frame_sp == unextended_sp, ""); 237 caller.set_sp(fp + frame::sender_sp_offset); 238 frame f(frame_sp, frame_sp, fp, hf.pc()); 239 // we need to set the locals so that the caller of new_stack_frame() can call 240 // ContinuationHelper::InterpretedFrame::frame_bottom 241 // copy relativized locals from the heap frame 242 *f.addr_at(frame::interpreter_frame_locals_offset) = *hf.addr_at(frame::interpreter_frame_locals_offset); 243 assert((intptr_t)f.fp() % frame::frame_alignment == 0, ""); 244 return f; 245 } else { 246 int fsize = FKind::size(hf); 247 intptr_t* frame_sp = caller.unextended_sp() - fsize; 248 if (bottom || caller.is_interpreted_frame()) { 249 int argsize = FKind::stack_argsize(hf); 250 251 fsize += argsize; 252 frame_sp -= argsize; 253 caller.set_sp(caller.sp() - argsize); 254 assert(caller.sp() == frame_sp + (fsize-argsize), ""); 255 256 frame_sp = align(hf, frame_sp, caller, bottom); 257 } 258 259 assert(hf.cb() != nullptr, ""); 260 assert(hf.oop_map() != nullptr, ""); 261 intptr_t* fp; 262 if (PreserveFramePointer) { 263 // we need to recreate a "real" frame pointer, pointing into the stack 264 fp = frame_sp + FKind::size(hf) - frame::sender_sp_offset; 265 } else { 266 fp = FKind::stub || FKind::native 267 ? frame_sp + fsize - frame::sender_sp_offset // fp always points to the address below the pushed return pc. We need correct address. 268 : *(intptr_t**)(hf.sp() - frame::sender_sp_offset); // we need to re-read fp because it may be an oop and we might have fixed the frame. 269 } 270 return frame(frame_sp, frame_sp, fp, hf.pc(), hf.cb(), hf.oop_map(), false); // TODO PERF : this computes deopt state; is it necessary? 271 } 272 } 273 274 inline intptr_t* ThawBase::align(const frame& hf, intptr_t* frame_sp, frame& caller, bool bottom) { 275 #ifdef _LP64 276 if (((intptr_t)frame_sp & 0xf) != 0) { 277 assert(caller.is_interpreted_frame() || (bottom && hf.compiled_frame_stack_argsize() % 2 != 0), ""); 278 frame_sp--; 279 caller.set_sp(caller.sp() - 1); 280 } 281 assert(is_aligned(frame_sp, frame::frame_alignment), ""); 282 #endif 283 284 return frame_sp; 285 } 286 287 inline void ThawBase::patch_pd(frame& f, const frame& caller) { 288 patch_callee_link(caller, caller.fp()); 289 } 290 291 inline void ThawBase::patch_pd(frame& f, intptr_t* caller_sp) { 292 intptr_t* fp = caller_sp - frame::sender_sp_offset; 293 patch_callee_link(f, fp); 294 } 295 296 inline void ThawBase::fix_native_wrapper_return_pc_pd(frame& top) { 297 // Nothing to do since the last pc saved before making the call to 298 // JVM_MonitorWait() was already set to the correct resume pc. Just 299 // do some sanity check. 300 #ifdef ASSERT 301 Method* method = top.is_interpreted_frame() ? top.interpreter_frame_method() : CodeCache::find_blob(top.pc())->as_nmethod()->method(); 302 assert(method->is_object_wait0(), ""); 303 #endif 304 } 305 306 inline intptr_t* ThawBase::push_resume_adapter(frame& top) { 307 intptr_t* sp = top.sp(); 308 CodeBlob* cb = top.cb(); 309 310 #ifdef ASSERT 311 RegisterMap map(JavaThread::current(), 312 RegisterMap::UpdateMap::skip, 313 RegisterMap::ProcessFrames::skip, 314 RegisterMap::WalkContinuation::skip); 315 frame caller = top.sender(&map); 316 intptr_t link_addr = (intptr_t)ContinuationHelper::Frame::callee_link_address(caller); 317 assert(sp[-2] == link_addr, "wrong link address: " INTPTR_FORMAT " != " INTPTR_FORMAT, sp[-2], link_addr); 318 #endif 319 320 if (top.is_interpreted_frame()) { 321 intptr_t* fp = sp - frame::sender_sp_offset; 322 address pc = Interpreter::cont_resume_interpreter_adapter(); 323 324 sp -= frame::metadata_words; 325 *(address*)(sp - frame::sender_sp_ret_address_offset()) = pc; 326 *(intptr_t**)(sp - frame::sender_sp_offset) = fp; 327 328 log_develop_trace(continuations, preempt)("push_resume_adapter(): initial sp: " INTPTR_FORMAT " final sp: " INTPTR_FORMAT 329 " fp: " INTPTR_FORMAT, p2i(sp + frame::metadata_words), p2i(sp), p2i(fp)); 330 } else if (cb->frame_size() == 2) { 331 // C2 runtime stub case. For aarch64 the real size of the c2 runtime stub is 2 words bigger 332 // than what we think, i.e. size is 4. This is because the _last_Java_sp is not set to the 333 // sp right before making the call to the VM, but rather it is artificially set 2 words above 334 // this real sp so that we can store the return address at last_Java_sp[-1], and keep this 335 // property where we can retrieve the last_Java_pc from the last_Java_sp. But that means that 336 // once we return to the runtime stub, the code will adjust sp according to this real size. 337 // So we must adjust the frame size back here and we copy lr/rfp again. 338 sp -= 2; 339 sp[-2] = sp[0]; 340 sp[-1] = sp[1]; 341 342 log_develop_trace(continuations, preempt)("adjusted sp for c2 runtime stub, initial sp: " INTPTR_FORMAT " final sp: " INTPTR_FORMAT 343 " fp: " INTPTR_FORMAT, p2i(sp + frame::metadata_words), p2i(sp), sp[-2]); 344 } 345 return sp; 346 } 347 348 inline intptr_t* ThawBase::push_cleanup_continuation() { 349 frame enterSpecial = new_entry_frame(); 350 intptr_t* sp = enterSpecial.sp(); 351 352 sp[-1] = (intptr_t)ContinuationEntry::cleanup_pc(); 353 sp[-2] = (intptr_t)enterSpecial.fp(); 354 355 log_develop_trace(continuations, preempt)("push_cleanup_continuation initial sp: " INTPTR_FORMAT " final sp: " INTPTR_FORMAT, p2i(sp + 2 * frame::metadata_words), p2i(sp)); 356 return sp; 357 } 358 359 inline void ThawBase::derelativize_interpreted_frame_metadata(const frame& hf, const frame& f) { 360 // Make sure that last_sp is kept relativized. 361 assert((intptr_t*)f.at_relative(frame::interpreter_frame_last_sp_offset) == f.unextended_sp(), ""); 362 363 // Make sure that monitor_block_top is still relativized. 364 assert(f.at_absolute(frame::interpreter_frame_monitor_block_top_offset) <= frame::interpreter_frame_initial_sp_offset, ""); 365 366 // Make sure that extended_sp is kept relativized. 367 DEBUG_ONLY(Method* m = hf.interpreter_frame_method();) 368 DEBUG_ONLY(int extra_space = m->is_object_wait0() ? m->size_of_parameters() : 0;) // see comment in relativize_interpreted_frame_metadata() 369 assert((intptr_t*)f.at_relative(frame::interpreter_frame_extended_sp_offset) < f.unextended_sp() + extra_space, ""); 370 } 371 372 #endif // CPU_AARCH64_CONTINUATIONFREEZETHAW_AARCH64_INLINE_HPP