1 /*
  2  * Copyright (c) 2019, 2023, Oracle and/or its affiliates. All rights reserved.
  3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  4  *
  5  * This code is free software; you can redistribute it and/or modify it
  6  * under the terms of the GNU General Public License version 2 only, as
  7  * published by the Free Software Foundation.
  8  *
  9  * This code is distributed in the hope that it will be useful, but WITHOUT
 10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 12  * version 2 for more details (a copy is included in the LICENSE file that
 13  * accompanied this code).
 14  *
 15  * You should have received a copy of the GNU General Public License version
 16  * 2 along with this work; if not, write to the Free Software Foundation,
 17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 18  *
 19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 20  * or visit www.oracle.com if you need additional information or have any
 21  * questions.
 22  *
 23  */
 24 
 25 #ifndef CPU_AARCH64_CONTINUATIONFREEZETHAW_AARCH64_INLINE_HPP
 26 #define CPU_AARCH64_CONTINUATIONFREEZETHAW_AARCH64_INLINE_HPP
 27 
 28 #include "code/codeBlob.inline.hpp"
 29 #include "oops/stackChunkOop.inline.hpp"
 30 #include "runtime/frame.hpp"
 31 #include "runtime/frame.inline.hpp"
 32 
 33 
 34 inline void patch_callee_link(const frame& f, intptr_t* fp) {
 35   DEBUG_ONLY(intptr_t* orig = *ContinuationHelper::Frame::callee_link_address(f));
 36   *ContinuationHelper::Frame::callee_link_address(f) = fp;
 37 }
 38 
 39 inline void patch_callee_link_relative(const frame& f, intptr_t* fp) {
 40   intptr_t* la = (intptr_t*)ContinuationHelper::Frame::callee_link_address(f);
 41   intptr_t new_value = fp - la;
 42   *la = new_value;
 43 }
 44 
 45 ////// Freeze
 46 
 47 // Fast path
 48 
 49 inline void FreezeBase::patch_stack_pd(intptr_t* frame_sp, intptr_t* heap_sp) {
 50   // copy the spilled fp from the heap to the stack
 51   *(frame_sp - frame::sender_sp_offset) = *(heap_sp - frame::sender_sp_offset);
 52 }
 53 
 54 // Slow path
 55 
 56 template<typename FKind>
 57 inline frame FreezeBase::sender(const frame& f) {
 58   assert(FKind::is_instance(f), "");
 59   if (FKind::interpreted) {
 60     return frame(f.sender_sp(), f.interpreter_frame_sender_sp(), f.link(), f.sender_pc());
 61   }
 62   intptr_t** link_addr = link_address<FKind>(f);
 63 
 64   intptr_t* sender_sp = (intptr_t*)(link_addr + frame::sender_sp_offset); //  f.unextended_sp() + (fsize/wordSize); //
 65   address sender_pc = ContinuationHelper::return_address_at(sender_sp - 1);
 66   assert(sender_sp != f.sp(), "must have changed");
 67 
 68   int slot = 0;
 69   CodeBlob* sender_cb = CodeCache::find_blob_and_oopmap(sender_pc, slot);
 70   return sender_cb != nullptr
 71     ? frame(sender_sp, sender_sp, *link_addr, sender_pc, sender_cb,
 72             slot == -1 ? nullptr : sender_cb->oop_map_for_slot(slot, sender_pc),
 73             false /* on_heap ? */)
 74     : frame(sender_sp, sender_sp, *link_addr, sender_pc);
 75 }
 76 
 77 template<typename FKind>
 78 frame FreezeBase::new_heap_frame(frame& f, frame& caller) {
 79   assert(FKind::is_instance(f), "");
 80   assert(!caller.is_interpreted_frame()
 81     || caller.unextended_sp() == (intptr_t*)caller.at(frame::interpreter_frame_last_sp_offset), "");
 82 
 83   intptr_t *sp, *fp; // sp is really our unextended_sp
 84   if (FKind::interpreted) {
 85     assert((intptr_t*)f.at(frame::interpreter_frame_last_sp_offset) == nullptr
 86       || f.unextended_sp() == (intptr_t*)f.at_relative(frame::interpreter_frame_last_sp_offset), "");
 87     intptr_t locals_offset = *f.addr_at(frame::interpreter_frame_locals_offset);
 88     // If the caller.is_empty(), i.e. we're freezing into an empty chunk, then we set
 89     // the chunk's argsize in finalize_freeze and make room for it above the unextended_sp
 90     bool overlap_caller = caller.is_interpreted_frame() || caller.is_empty();
 91     fp = caller.unextended_sp() - 1 - locals_offset + (overlap_caller ? ContinuationHelper::InterpretedFrame::stack_argsize(f) : 0);
 92     sp = fp - (f.fp() - f.unextended_sp());
 93     assert(sp <= fp, "");
 94     assert(fp <= caller.unextended_sp(), "");
 95     caller.set_sp(fp + frame::sender_sp_offset);
 96 
 97     assert(_cont.tail()->is_in_chunk(sp), "");
 98 
 99     frame hf(sp, sp, fp, f.pc(), nullptr, nullptr, true /* on_heap */);
100     // copy relativized locals from the stack frame
101     *hf.addr_at(frame::interpreter_frame_locals_offset) = locals_offset;
102     return hf;
103   } else {
104     // We need to re-read fp out of the frame because it may be an oop and we might have
105     // had a safepoint in finalize_freeze, after constructing f.
106     fp = *(intptr_t**)(f.sp() - frame::sender_sp_offset);
107 
108     int fsize = FKind::size(f);
109     sp = caller.unextended_sp() - fsize;
110     if (caller.is_interpreted_frame()) {
111       // If the caller is interpreted, our stackargs are not supposed to overlap with it
112       // so we make more room by moving sp down by argsize
113       int argsize = FKind::stack_argsize(f);
114       sp -= argsize;
115     }
116     caller.set_sp(sp + fsize);
117 
118     assert(_cont.tail()->is_in_chunk(sp), "");
119 
120     return frame(sp, sp, fp, f.pc(), nullptr, nullptr, true /* on_heap */);
121   }
122 }
123 
124 void FreezeBase::adjust_interpreted_frame_unextended_sp(frame& f) {
125   assert((f.at(frame::interpreter_frame_last_sp_offset) != 0) || (f.unextended_sp() == f.sp()), "");
126   intptr_t* real_unextended_sp = (intptr_t*)f.at_relative_or_null(frame::interpreter_frame_last_sp_offset);
127   if (real_unextended_sp != nullptr) {
128     f.set_unextended_sp(real_unextended_sp); // can be null at a safepoint
129   }
130 }
131 
132 inline void FreezeBase::prepare_freeze_interpreted_top_frame(const frame& f) {
133   assert(*f.addr_at(frame::interpreter_frame_last_sp_offset) == 0, "should be null for top frame");
134   intptr_t* lspp = f.addr_at(frame::interpreter_frame_last_sp_offset);
135   *lspp = f.unextended_sp() - f.fp();
136 }
137 
138 inline void FreezeBase::relativize_interpreted_frame_metadata(const frame& f, const frame& hf) {
139   assert(hf.fp() == hf.unextended_sp() + (f.fp() - f.unextended_sp()), "");
140   assert((f.at(frame::interpreter_frame_last_sp_offset) != 0)
141     || (f.unextended_sp() == f.sp()), "");
142   assert(f.fp() > (intptr_t*)f.at_relative(frame::interpreter_frame_initial_sp_offset), "");
143 
144   // on AARCH64, we may insert padding between the locals and the rest of the frame
145   // (see TemplateInterpreterGenerator::generate_normal_entry, and AbstractInterpreter::layout_activation)
146   // because we freeze the padding word (see recurse_freeze_interpreted_frame) in order to keep the same relativized
147   // locals value, we don't need to change the locals value here.
148 
149   // Make sure that last_sp is already relativized.
150   assert((intptr_t*)hf.at_relative(frame::interpreter_frame_last_sp_offset) == hf.unextended_sp(), "");
151 
152   // Make sure that monitor_block_top is already relativized.
153   assert(hf.at_absolute(frame::interpreter_frame_monitor_block_top_offset) <= frame::interpreter_frame_initial_sp_offset, "");
154 
155   // extended_sp is already relativized by TemplateInterpreterGenerator::generate_normal_entry or
156   // AbstractInterpreter::layout_activation
157 
158   assert((hf.fp() - hf.unextended_sp()) == (f.fp() - f.unextended_sp()), "");
159   assert(hf.unextended_sp() == (intptr_t*)hf.at(frame::interpreter_frame_last_sp_offset), "");
160   assert(hf.unextended_sp() <= (intptr_t*)hf.at(frame::interpreter_frame_initial_sp_offset), "");
161   assert(hf.unextended_sp() >  (intptr_t*)hf.at(frame::interpreter_frame_extended_sp_offset), "");
162   assert(hf.fp()            >  (intptr_t*)hf.at(frame::interpreter_frame_initial_sp_offset), "");
163   assert(hf.fp()            <= (intptr_t*)hf.at(frame::interpreter_frame_locals_offset), "");
164 }
165 
166 inline void FreezeBase::set_top_frame_metadata_pd(const frame& hf) {
167   stackChunkOop chunk = _cont.tail();
168   assert(chunk->is_in_chunk(hf.sp() - 1), "");
169   assert(chunk->is_in_chunk(hf.sp() - frame::sender_sp_offset), "");
170 
171   *(hf.sp() - 1) = (intptr_t)hf.pc();
172 
173   intptr_t* fp_addr = hf.sp() - frame::sender_sp_offset;
174   *fp_addr = hf.is_interpreted_frame() ? (intptr_t)(hf.fp() - fp_addr)
175                                        : (intptr_t)hf.fp();
176 }
177 
178 inline void FreezeBase::patch_pd(frame& hf, const frame& caller) {
179   if (caller.is_interpreted_frame()) {
180     assert(!caller.is_empty(), "");
181     patch_callee_link_relative(caller, caller.fp());
182   } else {
183     // If we're the bottom-most frame frozen in this freeze, the caller might have stayed frozen in the chunk,
184     // and its oop-containing fp fixed. We've now just overwritten it, so we must patch it back to its value
185     // as read from the chunk.
186     patch_callee_link(caller, caller.fp());
187   }
188 }
189 
190 //////// Thaw
191 
192 // Fast path
193 
194 inline void ThawBase::prefetch_chunk_pd(void* start, int size) {
195   size <<= LogBytesPerWord;
196   Prefetch::read(start, size);
197   Prefetch::read(start, size - 64);
198 }
199 
200 template <typename ConfigT>
201 inline void Thaw<ConfigT>::patch_caller_links(intptr_t* sp, intptr_t* bottom) {
202   // Fast path depends on !PreserveFramePointer. See can_thaw_fast().
203   assert(!PreserveFramePointer, "Frame pointers need to be fixed");
204 }
205 
206 // Slow path
207 
208 inline frame ThawBase::new_entry_frame() {
209   intptr_t* sp = _cont.entrySP();
210   return frame(sp, sp, _cont.entryFP(), _cont.entryPC()); // TODO PERF: This finds code blob and computes deopt state
211 }
212 
213 template<typename FKind> frame ThawBase::new_stack_frame(const frame& hf, frame& caller, bool bottom) {
214   assert(FKind::is_instance(hf), "");
215   // The values in the returned frame object will be written into the callee's stack in patch.
216 
217   if (FKind::interpreted) {
218     intptr_t* heap_sp = hf.unextended_sp();
219     // If caller is interpreted it already made room for the callee arguments
220     int overlap = caller.is_interpreted_frame() ? ContinuationHelper::InterpretedFrame::stack_argsize(hf) : 0;
221     const int fsize = (int)(ContinuationHelper::InterpretedFrame::frame_bottom(hf) - hf.unextended_sp() - overlap);
222     const int locals = hf.interpreter_frame_method()->max_locals();
223     intptr_t* frame_sp = caller.unextended_sp() - fsize;
224     intptr_t* fp = frame_sp + (hf.fp() - heap_sp);
225     if ((intptr_t)fp % frame::frame_alignment != 0) {
226       fp--;
227       frame_sp--;
228       log_develop_trace(continuations)("Adding internal interpreted frame alignment");
229     }
230     DEBUG_ONLY(intptr_t* unextended_sp = fp + *hf.addr_at(frame::interpreter_frame_last_sp_offset);)
231     assert(frame_sp == unextended_sp, "");
232     caller.set_sp(fp + frame::sender_sp_offset);
233     frame f(frame_sp, frame_sp, fp, hf.pc());
234     // we need to set the locals so that the caller of new_stack_frame() can call
235     // ContinuationHelper::InterpretedFrame::frame_bottom
236     // copy relativized locals from the heap frame
237     *f.addr_at(frame::interpreter_frame_locals_offset) = *hf.addr_at(frame::interpreter_frame_locals_offset);
238     assert((intptr_t)f.fp() % frame::frame_alignment == 0, "");
239     return f;
240   } else {
241     int fsize = FKind::size(hf);
242     intptr_t* frame_sp = caller.unextended_sp() - fsize;
243     if (bottom || caller.is_interpreted_frame()) {
244       int argsize = FKind::stack_argsize(hf);
245 
246       fsize += argsize;
247       frame_sp   -= argsize;
248       caller.set_sp(caller.sp() - argsize);
249       assert(caller.sp() == frame_sp + (fsize-argsize), "");
250 
251       frame_sp = align(hf, frame_sp, caller, bottom);
252     }
253 
254     assert(hf.cb() != nullptr, "");
255     assert(hf.oop_map() != nullptr, "");
256     intptr_t* fp;
257     if (PreserveFramePointer) {
258       // we need to recreate a "real" frame pointer, pointing into the stack
259       fp = frame_sp + FKind::size(hf) - frame::sender_sp_offset;
260     } else {
261       fp = FKind::stub
262         ? frame_sp + fsize - frame::sender_sp_offset // fp always points to the address below the pushed return pc. We need correct address.
263         : *(intptr_t**)(hf.sp() - frame::sender_sp_offset); // we need to re-read fp because it may be an oop and we might have fixed the frame.
264     }
265     return frame(frame_sp, frame_sp, fp, hf.pc(), hf.cb(), hf.oop_map(), false); // TODO PERF : this computes deopt state; is it necessary?
266   }
267 }
268 
269 inline intptr_t* ThawBase::align(const frame& hf, intptr_t* frame_sp, frame& caller, bool bottom) {
270 #ifdef _LP64
271   if (((intptr_t)frame_sp & 0xf) != 0) {
272     assert(caller.is_interpreted_frame() || (bottom && hf.compiled_frame_stack_argsize() % 2 != 0), "");
273     frame_sp--;
274     caller.set_sp(caller.sp() - 1);
275   }
276   assert(is_aligned(frame_sp, frame::frame_alignment), "");
277 #endif
278 
279   return frame_sp;
280 }
281 
282 inline void ThawBase::patch_pd(frame& f, const frame& caller) {
283   patch_callee_link(caller, caller.fp());
284 }
285 
286 inline void ThawBase::patch_pd(frame& f, intptr_t* caller_sp) {
287   Unimplemented();
288 }
289 
290 inline intptr_t* ThawBase::push_preempt_rerun_adapter(frame top, bool is_interpreted_frame) {
291   intptr_t* sp = top.sp();
292   CodeBlob* cb = top.cb();
293   if (!is_interpreted_frame && cb->frame_size() == 2) {
294     // C2 runtime stub case. For aarch64 the real size of the c2 runtime stub is 2 words bigger
295     // than what we think, i.e. size is 4. This is because the _last_Java_sp is not set to the
296     // sp right before making the call to the VM, but rather it is artificially set 2 words above
297     // this real sp so that we can store the return address at last_Java_sp[-1], and keep this
298     // property where we can retrieve the last_Java_pc from the last_Java_sp. But that means that
299     // once we return to the runtime stub, the code will adjust sp according to this real size.
300     // So we must adjust the frame size back here. We just copy lr/rfp again. These 2 top words
301     // will be the ones popped in generate_cont_preempt_rerun_compiler_adapter(). The other 2 words
302     // will just be discarded once back in the runtime stub (add sp, sp, #0x10).
303     sp -= 2;
304     sp[-2] = sp[0];
305     sp[-1] = sp[1];
306   }
307 
308   intptr_t* fp = sp - frame::sender_sp_offset;
309   address pc = is_interpreted_frame ? Interpreter::cont_preempt_rerun_interpreter_adapter()
310                                     : StubRoutines::cont_preempt_rerun_compiler_adapter();
311 
312   sp -= frame::metadata_words;
313   *(address*)(sp - frame::sender_sp_ret_address_offset()) = pc;
314   *(intptr_t**)(sp - frame::sender_sp_offset) = fp;
315 
316   log_develop_trace(continuations, preempt)("push_preempt_rerun_%s_adapter() initial sp: " INTPTR_FORMAT " final sp: " INTPTR_FORMAT " fp: " INTPTR_FORMAT,
317     is_interpreted_frame ? "interpreter" : "safepointblob", p2i(sp + frame::metadata_words), p2i(sp), p2i(fp));
318   return sp;
319 }
320 
321 inline intptr_t* ThawBase::push_preempt_monitorenter_redo(stackChunkOop chunk) {
322 
323   // fprintf(stderr, "push_preempt_monitorenter_redo\n");
324   frame enterSpecial = new_entry_frame();
325   intptr_t* sp = enterSpecial.sp();
326 
327   // First push the return barrier frame
328   sp -= frame::metadata_words;
329   sp[1] = (intptr_t)StubRoutines::cont_returnBarrier();
330   sp[0] = (intptr_t)enterSpecial.fp();
331 
332   // Now push the ObjectMonitor*
333   sp -= frame::metadata_words;
334   sp[1] = (intptr_t)chunk->objectMonitor(); // alignment
335   sp[0] = (intptr_t)chunk->objectMonitor();
336 
337   // Finally arrange to return to the monitorenter_redo stub
338   sp[-1] = (intptr_t)StubRoutines::cont_preempt_monitorenter_redo();
339   sp[-2] = (intptr_t)enterSpecial.fp();
340   log_develop_trace(continuations, preempt)("push_preempt_monitorenter_redo initial sp: " INTPTR_FORMAT " final sp: " INTPTR_FORMAT, p2i(sp + 2 * frame::metadata_words), p2i(sp));
341   return sp;
342 }
343 
344 inline void ThawBase::derelativize_interpreted_frame_metadata(const frame& hf, const frame& f) {
345   // Make sure that last_sp is kept relativized.
346   assert((intptr_t*)f.at_relative(frame::interpreter_frame_last_sp_offset) == f.unextended_sp(), "");
347 
348   // Make sure that monitor_block_top is still relativized.
349   assert(f.at_absolute(frame::interpreter_frame_monitor_block_top_offset) <= frame::interpreter_frame_initial_sp_offset, "");
350 
351   // Make sure that extended_sp is kept relativized.
352   assert((intptr_t*)f.at_relative(frame::interpreter_frame_extended_sp_offset) < f.unextended_sp(), "");
353 }
354 
355 #endif // CPU_AARCH64_CONTINUATIONFREEZETHAW_AARCH64_INLINE_HPP