1 /*
   2  * Copyright (c) 2003, 2025, Oracle and/or its affiliates. All rights reserved.
   3  * Copyright (c) 2014, 2021, Red Hat Inc. All rights reserved.
   4  * Copyright (c) 2021, Azul Systems, Inc. All rights reserved.
   5  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   6  *
   7  * This code is free software; you can redistribute it and/or modify it
   8  * under the terms of the GNU General Public License version 2 only, as
   9  * published by the Free Software Foundation.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  *
  25  */
  26 
  27 #include "asm/macroAssembler.hpp"
  28 #include "asm/macroAssembler.inline.hpp"
  29 #include "code/codeCache.hpp"
  30 #include "code/compiledIC.hpp"
  31 #include "code/debugInfoRec.hpp"
  32 #include "code/vtableStubs.hpp"
  33 #include "compiler/oopMap.hpp"
  34 #include "gc/shared/barrierSetAssembler.hpp"
  35 #include "interpreter/interpreter.hpp"
  36 #include "interpreter/interp_masm.hpp"
  37 #include "logging/log.hpp"
  38 #include "memory/resourceArea.hpp"
  39 #include "nativeInst_aarch64.hpp"
  40 #include "oops/klass.inline.hpp"
  41 #include "oops/method.inline.hpp"
  42 #include "prims/methodHandles.hpp"
  43 #include "runtime/continuation.hpp"
  44 #include "runtime/continuationEntry.inline.hpp"
  45 #include "runtime/globals.hpp"
  46 #include "runtime/jniHandles.hpp"
  47 #include "runtime/safepointMechanism.hpp"
  48 #include "runtime/sharedRuntime.hpp"
  49 #include "runtime/signature.hpp"
  50 #include "runtime/stubRoutines.hpp"
  51 #include "runtime/timerTrace.hpp"
  52 #include "runtime/vframeArray.hpp"
  53 #include "utilities/align.hpp"
  54 #include "utilities/formatBuffer.hpp"
  55 #include "vmreg_aarch64.inline.hpp"
  56 #ifdef COMPILER1
  57 #include "c1/c1_Runtime1.hpp"
  58 #endif
  59 #ifdef COMPILER2
  60 #include "adfiles/ad_aarch64.hpp"
  61 #include "opto/runtime.hpp"
  62 #endif
  63 #if INCLUDE_JVMCI
  64 #include "jvmci/jvmciJavaClasses.hpp"
  65 #endif
  66 
  67 #define __ masm->
  68 
  69 #ifdef PRODUCT
  70 #define BLOCK_COMMENT(str) /* nothing */
  71 #else
  72 #define BLOCK_COMMENT(str) __ block_comment(str)
  73 #endif
  74 
  75 const int StackAlignmentInSlots = StackAlignmentInBytes / VMRegImpl::stack_slot_size;
  76 
  77 // FIXME -- this is used by C1
  78 class RegisterSaver {
  79   const bool _save_vectors;
  80  public:
  81   RegisterSaver(bool save_vectors) : _save_vectors(save_vectors) {}
  82 
  83   OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words);
  84   void restore_live_registers(MacroAssembler* masm);
  85 
  86   // Offsets into the register save area
  87   // Used by deoptimization when it is managing result register
  88   // values on its own
  89 
  90   int reg_offset_in_bytes(Register r);
  91   int r0_offset_in_bytes()    { return reg_offset_in_bytes(r0); }
  92   int rscratch1_offset_in_bytes()    { return reg_offset_in_bytes(rscratch1); }
  93   int v0_offset_in_bytes();
  94 
  95   // Total stack size in bytes for saving sve predicate registers.
  96   int total_sve_predicate_in_bytes();
  97 
  98   // Capture info about frame layout
  99   // Note this is only correct when not saving full vectors.
 100   enum layout {
 101                 fpu_state_off = 0,
 102                 fpu_state_end = fpu_state_off + FPUStateSizeInWords - 1,
 103                 // The frame sender code expects that rfp will be in
 104                 // the "natural" place and will override any oopMap
 105                 // setting for it. We must therefore force the layout
 106                 // so that it agrees with the frame sender code.
 107                 r0_off = fpu_state_off + FPUStateSizeInWords,
 108                 rfp_off = r0_off + (Register::number_of_registers - 2) * Register::max_slots_per_register,
 109                 return_off = rfp_off + Register::max_slots_per_register,      // slot for return address
 110                 reg_save_size = return_off + Register::max_slots_per_register};
 111 
 112 };
 113 
 114 int RegisterSaver::reg_offset_in_bytes(Register r) {
 115   // The integer registers are located above the floating point
 116   // registers in the stack frame pushed by save_live_registers() so the
 117   // offset depends on whether we are saving full vectors, and whether
 118   // those vectors are NEON or SVE.
 119 
 120   int slots_per_vect = FloatRegister::save_slots_per_register;
 121 
 122 #if COMPILER2_OR_JVMCI
 123   if (_save_vectors) {
 124     slots_per_vect = FloatRegister::slots_per_neon_register;
 125 
 126 #ifdef COMPILER2
 127     if (Matcher::supports_scalable_vector()) {
 128       slots_per_vect = Matcher::scalable_vector_reg_size(T_FLOAT);
 129     }
 130 #endif
 131   }
 132 #endif
 133 
 134   int r0_offset = v0_offset_in_bytes() + (slots_per_vect * FloatRegister::number_of_registers) * BytesPerInt;
 135   return r0_offset + r->encoding() * wordSize;
 136 }
 137 
 138 int RegisterSaver::v0_offset_in_bytes() {
 139   // The floating point registers are located above the predicate registers if
 140   // they are present in the stack frame pushed by save_live_registers(). So the
 141   // offset depends on the saved total predicate vectors in the stack frame.
 142   return (total_sve_predicate_in_bytes() / VMRegImpl::stack_slot_size) * BytesPerInt;
 143 }
 144 
 145 int RegisterSaver::total_sve_predicate_in_bytes() {
 146 #ifdef COMPILER2
 147   if (_save_vectors && Matcher::supports_scalable_vector()) {
 148     return (Matcher::scalable_vector_reg_size(T_BYTE) >> LogBitsPerByte) *
 149            PRegister::number_of_registers;
 150   }
 151 #endif
 152   return 0;
 153 }
 154 
 155 OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words) {
 156   bool use_sve = false;
 157   int sve_vector_size_in_bytes = 0;
 158   int sve_vector_size_in_slots = 0;
 159   int sve_predicate_size_in_slots = 0;
 160   int total_predicate_in_bytes = total_sve_predicate_in_bytes();
 161   int total_predicate_in_slots = total_predicate_in_bytes / VMRegImpl::stack_slot_size;
 162 
 163 #ifdef COMPILER2
 164   use_sve = Matcher::supports_scalable_vector();
 165   if (use_sve) {
 166     sve_vector_size_in_bytes = Matcher::scalable_vector_reg_size(T_BYTE);
 167     sve_vector_size_in_slots = Matcher::scalable_vector_reg_size(T_FLOAT);
 168     sve_predicate_size_in_slots = Matcher::scalable_predicate_reg_slots();
 169   }
 170 #endif
 171 
 172 #if COMPILER2_OR_JVMCI
 173   if (_save_vectors) {
 174     int extra_save_slots_per_register = 0;
 175     // Save upper half of vector registers
 176     if (use_sve) {
 177       extra_save_slots_per_register = sve_vector_size_in_slots - FloatRegister::save_slots_per_register;
 178     } else {
 179       extra_save_slots_per_register = FloatRegister::extra_save_slots_per_neon_register;
 180     }
 181     int extra_vector_bytes = extra_save_slots_per_register *
 182                              VMRegImpl::stack_slot_size *
 183                              FloatRegister::number_of_registers;
 184     additional_frame_words += ((extra_vector_bytes + total_predicate_in_bytes) / wordSize);
 185   }
 186 #else
 187   assert(!_save_vectors, "vectors are generated only by C2 and JVMCI");
 188 #endif
 189 
 190   int frame_size_in_bytes = align_up(additional_frame_words * wordSize +
 191                                      reg_save_size * BytesPerInt, 16);
 192   // OopMap frame size is in compiler stack slots (jint's) not bytes or words
 193   int frame_size_in_slots = frame_size_in_bytes / BytesPerInt;
 194   // The caller will allocate additional_frame_words
 195   int additional_frame_slots = additional_frame_words * wordSize / BytesPerInt;
 196   // CodeBlob frame size is in words.
 197   int frame_size_in_words = frame_size_in_bytes / wordSize;
 198   *total_frame_words = frame_size_in_words;
 199 
 200   // Save Integer and Float registers.
 201   __ enter();
 202   __ push_CPU_state(_save_vectors, use_sve, sve_vector_size_in_bytes, total_predicate_in_bytes);
 203 
 204   // Set an oopmap for the call site.  This oopmap will map all
 205   // oop-registers and debug-info registers as callee-saved.  This
 206   // will allow deoptimization at this safepoint to find all possible
 207   // debug-info recordings, as well as let GC find all oops.
 208 
 209   OopMapSet *oop_maps = new OopMapSet();
 210   OopMap* oop_map = new OopMap(frame_size_in_slots, 0);
 211 
 212   for (int i = 0; i < Register::number_of_registers; i++) {
 213     Register r = as_Register(i);
 214     if (i <= rfp->encoding() && r != rscratch1 && r != rscratch2) {
 215       // SP offsets are in 4-byte words.
 216       // Register slots are 8 bytes wide, 32 floating-point registers.
 217       int sp_offset = Register::max_slots_per_register * i +
 218                       FloatRegister::save_slots_per_register * FloatRegister::number_of_registers;
 219       oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset + additional_frame_slots), r->as_VMReg());
 220     }
 221   }
 222 
 223   for (int i = 0; i < FloatRegister::number_of_registers; i++) {
 224     FloatRegister r = as_FloatRegister(i);
 225     int sp_offset = 0;
 226     if (_save_vectors) {
 227       sp_offset = use_sve ? (total_predicate_in_slots + sve_vector_size_in_slots * i) :
 228                             (FloatRegister::slots_per_neon_register * i);
 229     } else {
 230       sp_offset = FloatRegister::save_slots_per_register * i;
 231     }
 232     oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset), r->as_VMReg());
 233   }
 234 
 235   return oop_map;
 236 }
 237 
 238 void RegisterSaver::restore_live_registers(MacroAssembler* masm) {
 239 #ifdef COMPILER2
 240   __ pop_CPU_state(_save_vectors, Matcher::supports_scalable_vector(),
 241                    Matcher::scalable_vector_reg_size(T_BYTE), total_sve_predicate_in_bytes());
 242 #else
 243 #if !INCLUDE_JVMCI
 244   assert(!_save_vectors, "vectors are generated only by C2 and JVMCI");
 245 #endif
 246   __ pop_CPU_state(_save_vectors);
 247 #endif
 248   __ ldp(rfp, lr, Address(__ post(sp, 2 * wordSize)));
 249   __ authenticate_return_address();
 250 }
 251 
 252 // Is vector's size (in bytes) bigger than a size saved by default?
 253 // 8 bytes vector registers are saved by default on AArch64.
 254 // The SVE supported min vector size is 8 bytes and we need to save
 255 // predicate registers when the vector size is 8 bytes as well.
 256 bool SharedRuntime::is_wide_vector(int size) {
 257   return size > 8 || (UseSVE > 0 && size >= 8);
 258 }
 259 
 260 // ---------------------------------------------------------------------------
 261 // Read the array of BasicTypes from a signature, and compute where the
 262 // arguments should go.  Values in the VMRegPair regs array refer to 4-byte
 263 // quantities.  Values less than VMRegImpl::stack0 are registers, those above
 264 // refer to 4-byte stack slots.  All stack slots are based off of the stack pointer
 265 // as framesizes are fixed.
 266 // VMRegImpl::stack0 refers to the first slot 0(sp).
 267 // and VMRegImpl::stack0+1 refers to the memory word 4-byes higher.
 268 // Register up to Register::number_of_registers are the 64-bit
 269 // integer registers.
 270 
 271 // Note: the INPUTS in sig_bt are in units of Java argument words,
 272 // which are 64-bit.  The OUTPUTS are in 32-bit units.
 273 
 274 // The Java calling convention is a "shifted" version of the C ABI.
 275 // By skipping the first C ABI register we can call non-static jni
 276 // methods with small numbers of arguments without having to shuffle
 277 // the arguments at all. Since we control the java ABI we ought to at
 278 // least get some advantage out of it.
 279 
 280 int SharedRuntime::java_calling_convention(const BasicType *sig_bt,
 281                                            VMRegPair *regs,
 282                                            int total_args_passed) {
 283 
 284   // Create the mapping between argument positions and
 285   // registers.
 286   static const Register INT_ArgReg[Argument::n_int_register_parameters_j] = {
 287     j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5, j_rarg6, j_rarg7
 288   };
 289   static const FloatRegister FP_ArgReg[Argument::n_float_register_parameters_j] = {
 290     j_farg0, j_farg1, j_farg2, j_farg3,
 291     j_farg4, j_farg5, j_farg6, j_farg7
 292   };
 293 
 294 
 295   uint int_args = 0;
 296   uint fp_args = 0;
 297   uint stk_args = 0;
 298 
 299   for (int i = 0; i < total_args_passed; i++) {
 300     switch (sig_bt[i]) {
 301     case T_BOOLEAN:
 302     case T_CHAR:
 303     case T_BYTE:
 304     case T_SHORT:
 305     case T_INT:
 306       if (int_args < Argument::n_int_register_parameters_j) {
 307         regs[i].set1(INT_ArgReg[int_args++]->as_VMReg());
 308       } else {
 309         stk_args = align_up(stk_args, 2);
 310         regs[i].set1(VMRegImpl::stack2reg(stk_args));
 311         stk_args += 1;
 312       }
 313       break;
 314     case T_VOID:
 315       // halves of T_LONG or T_DOUBLE
 316       assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half");
 317       regs[i].set_bad();
 318       break;
 319     case T_LONG:
 320       assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half");
 321       // fall through
 322     case T_OBJECT:
 323     case T_ARRAY:
 324     case T_ADDRESS:
 325       if (int_args < Argument::n_int_register_parameters_j) {
 326         regs[i].set2(INT_ArgReg[int_args++]->as_VMReg());
 327       } else {
 328         stk_args = align_up(stk_args, 2);
 329         regs[i].set2(VMRegImpl::stack2reg(stk_args));
 330         stk_args += 2;
 331       }
 332       break;
 333     case T_FLOAT:
 334       if (fp_args < Argument::n_float_register_parameters_j) {
 335         regs[i].set1(FP_ArgReg[fp_args++]->as_VMReg());
 336       } else {
 337         stk_args = align_up(stk_args, 2);
 338         regs[i].set1(VMRegImpl::stack2reg(stk_args));
 339         stk_args += 1;
 340       }
 341       break;
 342     case T_DOUBLE:
 343       assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half");
 344       if (fp_args < Argument::n_float_register_parameters_j) {
 345         regs[i].set2(FP_ArgReg[fp_args++]->as_VMReg());
 346       } else {
 347         stk_args = align_up(stk_args, 2);
 348         regs[i].set2(VMRegImpl::stack2reg(stk_args));
 349         stk_args += 2;
 350       }
 351       break;
 352     default:
 353       ShouldNotReachHere();
 354       break;
 355     }
 356   }
 357 
 358   return stk_args;
 359 }
 360 
 361 // Patch the callers callsite with entry to compiled code if it exists.
 362 static void patch_callers_callsite(MacroAssembler *masm) {
 363   Label L;
 364   __ ldr(rscratch1, Address(rmethod, in_bytes(Method::code_offset())));
 365   __ cbz(rscratch1, L);
 366 
 367   __ enter();
 368   __ push_CPU_state();
 369 
 370   // VM needs caller's callsite
 371   // VM needs target method
 372   // This needs to be a long call since we will relocate this adapter to
 373   // the codeBuffer and it may not reach
 374 
 375 #ifndef PRODUCT
 376   assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area");
 377 #endif
 378 
 379   __ mov(c_rarg0, rmethod);
 380   __ mov(c_rarg1, lr);
 381   __ authenticate_return_address(c_rarg1);
 382   __ lea(rscratch1, RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite)));
 383   __ blr(rscratch1);
 384 
 385   // Explicit isb required because fixup_callers_callsite may change the code
 386   // stream.
 387   __ safepoint_isb();
 388 
 389   __ pop_CPU_state();
 390   // restore sp
 391   __ leave();
 392   __ bind(L);
 393 }
 394 
 395 static void gen_c2i_adapter(MacroAssembler *masm,
 396                             int total_args_passed,
 397                             int comp_args_on_stack,
 398                             const BasicType *sig_bt,
 399                             const VMRegPair *regs,
 400                             Label& skip_fixup) {
 401   // Before we get into the guts of the C2I adapter, see if we should be here
 402   // at all.  We've come from compiled code and are attempting to jump to the
 403   // interpreter, which means the caller made a static call to get here
 404   // (vcalls always get a compiled target if there is one).  Check for a
 405   // compiled target.  If there is one, we need to patch the caller's call.
 406   patch_callers_callsite(masm);
 407 
 408   __ bind(skip_fixup);
 409 
 410   int words_pushed = 0;
 411 
 412   // Since all args are passed on the stack, total_args_passed *
 413   // Interpreter::stackElementSize is the space we need.
 414 
 415   int extraspace = total_args_passed * Interpreter::stackElementSize;
 416 
 417   __ mov(r19_sender_sp, sp);
 418 
 419   // stack is aligned, keep it that way
 420   extraspace = align_up(extraspace, 2*wordSize);
 421 
 422   if (extraspace)
 423     __ sub(sp, sp, extraspace);
 424 
 425   // Now write the args into the outgoing interpreter space
 426   for (int i = 0; i < total_args_passed; i++) {
 427     if (sig_bt[i] == T_VOID) {
 428       assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
 429       continue;
 430     }
 431 
 432     // offset to start parameters
 433     int st_off   = (total_args_passed - i - 1) * Interpreter::stackElementSize;
 434     int next_off = st_off - Interpreter::stackElementSize;
 435 
 436     // Say 4 args:
 437     // i   st_off
 438     // 0   32 T_LONG
 439     // 1   24 T_VOID
 440     // 2   16 T_OBJECT
 441     // 3    8 T_BOOL
 442     // -    0 return address
 443     //
 444     // However to make thing extra confusing. Because we can fit a Java long/double in
 445     // a single slot on a 64 bt vm and it would be silly to break them up, the interpreter
 446     // leaves one slot empty and only stores to a single slot. In this case the
 447     // slot that is occupied is the T_VOID slot. See I said it was confusing.
 448 
 449     VMReg r_1 = regs[i].first();
 450     VMReg r_2 = regs[i].second();
 451     if (!r_1->is_valid()) {
 452       assert(!r_2->is_valid(), "");
 453       continue;
 454     }
 455     if (r_1->is_stack()) {
 456       // memory to memory use rscratch1
 457       int ld_off = (r_1->reg2stack() * VMRegImpl::stack_slot_size
 458                     + extraspace
 459                     + words_pushed * wordSize);
 460       if (!r_2->is_valid()) {
 461         // sign extend??
 462         __ ldrw(rscratch1, Address(sp, ld_off));
 463         __ str(rscratch1, Address(sp, st_off));
 464 
 465       } else {
 466 
 467         __ ldr(rscratch1, Address(sp, ld_off));
 468 
 469         // Two VMREgs|OptoRegs can be T_OBJECT, T_ADDRESS, T_DOUBLE, T_LONG
 470         // T_DOUBLE and T_LONG use two slots in the interpreter
 471         if ( sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) {
 472           // ld_off == LSW, ld_off+wordSize == MSW
 473           // st_off == MSW, next_off == LSW
 474           __ str(rscratch1, Address(sp, next_off));
 475 #ifdef ASSERT
 476           // Overwrite the unused slot with known junk
 477           __ mov(rscratch1, (uint64_t)0xdeadffffdeadaaaaull);
 478           __ str(rscratch1, Address(sp, st_off));
 479 #endif /* ASSERT */
 480         } else {
 481           __ str(rscratch1, Address(sp, st_off));
 482         }
 483       }
 484     } else if (r_1->is_Register()) {
 485       Register r = r_1->as_Register();
 486       if (!r_2->is_valid()) {
 487         // must be only an int (or less ) so move only 32bits to slot
 488         // why not sign extend??
 489         __ str(r, Address(sp, st_off));
 490       } else {
 491         // Two VMREgs|OptoRegs can be T_OBJECT, T_ADDRESS, T_DOUBLE, T_LONG
 492         // T_DOUBLE and T_LONG use two slots in the interpreter
 493         if ( sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) {
 494           // jlong/double in gpr
 495 #ifdef ASSERT
 496           // Overwrite the unused slot with known junk
 497           __ mov(rscratch1, (uint64_t)0xdeadffffdeadaaabull);
 498           __ str(rscratch1, Address(sp, st_off));
 499 #endif /* ASSERT */
 500           __ str(r, Address(sp, next_off));
 501         } else {
 502           __ str(r, Address(sp, st_off));
 503         }
 504       }
 505     } else {
 506       assert(r_1->is_FloatRegister(), "");
 507       if (!r_2->is_valid()) {
 508         // only a float use just part of the slot
 509         __ strs(r_1->as_FloatRegister(), Address(sp, st_off));
 510       } else {
 511 #ifdef ASSERT
 512         // Overwrite the unused slot with known junk
 513         __ mov(rscratch1, (uint64_t)0xdeadffffdeadaaacull);
 514         __ str(rscratch1, Address(sp, st_off));
 515 #endif /* ASSERT */
 516         __ strd(r_1->as_FloatRegister(), Address(sp, next_off));
 517       }
 518     }
 519   }
 520 
 521   __ mov(esp, sp); // Interp expects args on caller's expression stack
 522 
 523   __ ldr(rscratch1, Address(rmethod, in_bytes(Method::interpreter_entry_offset())));
 524   __ br(rscratch1);
 525 }
 526 
 527 
 528 void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm,
 529                                     int total_args_passed,
 530                                     int comp_args_on_stack,
 531                                     const BasicType *sig_bt,
 532                                     const VMRegPair *regs) {
 533 
 534   // Note: r19_sender_sp contains the senderSP on entry. We must
 535   // preserve it since we may do a i2c -> c2i transition if we lose a
 536   // race where compiled code goes non-entrant while we get args
 537   // ready.
 538 
 539   // Adapters are frameless.
 540 
 541   // An i2c adapter is frameless because the *caller* frame, which is
 542   // interpreted, routinely repairs its own esp (from
 543   // interpreter_frame_last_sp), even if a callee has modified the
 544   // stack pointer.  It also recalculates and aligns sp.
 545 
 546   // A c2i adapter is frameless because the *callee* frame, which is
 547   // interpreted, routinely repairs its caller's sp (from sender_sp,
 548   // which is set up via the senderSP register).
 549 
 550   // In other words, if *either* the caller or callee is interpreted, we can
 551   // get the stack pointer repaired after a call.
 552 
 553   // This is why c2i and i2c adapters cannot be indefinitely composed.
 554   // In particular, if a c2i adapter were to somehow call an i2c adapter,
 555   // both caller and callee would be compiled methods, and neither would
 556   // clean up the stack pointer changes performed by the two adapters.
 557   // If this happens, control eventually transfers back to the compiled
 558   // caller, but with an uncorrected stack, causing delayed havoc.
 559 
 560   // Cut-out for having no stack args.
 561   int comp_words_on_stack = align_up(comp_args_on_stack*VMRegImpl::stack_slot_size, wordSize)>>LogBytesPerWord;
 562   if (comp_args_on_stack) {
 563     __ sub(rscratch1, sp, comp_words_on_stack * wordSize);
 564     __ andr(sp, rscratch1, -16);
 565   }
 566 
 567   // Will jump to the compiled code just as if compiled code was doing it.
 568   // Pre-load the register-jump target early, to schedule it better.
 569   __ ldr(rscratch1, Address(rmethod, in_bytes(Method::from_compiled_offset())));
 570 
 571 #if INCLUDE_JVMCI
 572   if (EnableJVMCI) {
 573     // check if this call should be routed towards a specific entry point
 574     __ ldr(rscratch2, Address(rthread, in_bytes(JavaThread::jvmci_alternate_call_target_offset())));
 575     Label no_alternative_target;
 576     __ cbz(rscratch2, no_alternative_target);
 577     __ mov(rscratch1, rscratch2);
 578     __ str(zr, Address(rthread, in_bytes(JavaThread::jvmci_alternate_call_target_offset())));
 579     __ bind(no_alternative_target);
 580   }
 581 #endif // INCLUDE_JVMCI
 582 
 583   // Now generate the shuffle code.
 584   for (int i = 0; i < total_args_passed; i++) {
 585     if (sig_bt[i] == T_VOID) {
 586       assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
 587       continue;
 588     }
 589 
 590     // Pick up 0, 1 or 2 words from SP+offset.
 591 
 592     assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(),
 593             "scrambled load targets?");
 594     // Load in argument order going down.
 595     int ld_off = (total_args_passed - i - 1)*Interpreter::stackElementSize;
 596     // Point to interpreter value (vs. tag)
 597     int next_off = ld_off - Interpreter::stackElementSize;
 598     //
 599     //
 600     //
 601     VMReg r_1 = regs[i].first();
 602     VMReg r_2 = regs[i].second();
 603     if (!r_1->is_valid()) {
 604       assert(!r_2->is_valid(), "");
 605       continue;
 606     }
 607     if (r_1->is_stack()) {
 608       // Convert stack slot to an SP offset (+ wordSize to account for return address )
 609       int st_off = regs[i].first()->reg2stack()*VMRegImpl::stack_slot_size;
 610       if (!r_2->is_valid()) {
 611         // sign extend???
 612         __ ldrsw(rscratch2, Address(esp, ld_off));
 613         __ str(rscratch2, Address(sp, st_off));
 614       } else {
 615         //
 616         // We are using two optoregs. This can be either T_OBJECT,
 617         // T_ADDRESS, T_LONG, or T_DOUBLE the interpreter allocates
 618         // two slots but only uses one for thr T_LONG or T_DOUBLE case
 619         // So we must adjust where to pick up the data to match the
 620         // interpreter.
 621         //
 622         // Interpreter local[n] == MSW, local[n+1] == LSW however locals
 623         // are accessed as negative so LSW is at LOW address
 624 
 625         // ld_off is MSW so get LSW
 626         const int offset = (sig_bt[i]==T_LONG||sig_bt[i]==T_DOUBLE)?
 627                            next_off : ld_off;
 628         __ ldr(rscratch2, Address(esp, offset));
 629         // st_off is LSW (i.e. reg.first())
 630         __ str(rscratch2, Address(sp, st_off));
 631       }
 632     } else if (r_1->is_Register()) {  // Register argument
 633       Register r = r_1->as_Register();
 634       if (r_2->is_valid()) {
 635         //
 636         // We are using two VMRegs. This can be either T_OBJECT,
 637         // T_ADDRESS, T_LONG, or T_DOUBLE the interpreter allocates
 638         // two slots but only uses one for thr T_LONG or T_DOUBLE case
 639         // So we must adjust where to pick up the data to match the
 640         // interpreter.
 641 
 642         const int offset = (sig_bt[i]==T_LONG||sig_bt[i]==T_DOUBLE)?
 643                            next_off : ld_off;
 644 
 645         // this can be a misaligned move
 646         __ ldr(r, Address(esp, offset));
 647       } else {
 648         // sign extend and use a full word?
 649         __ ldrw(r, Address(esp, ld_off));
 650       }
 651     } else {
 652       if (!r_2->is_valid()) {
 653         __ ldrs(r_1->as_FloatRegister(), Address(esp, ld_off));
 654       } else {
 655         __ ldrd(r_1->as_FloatRegister(), Address(esp, next_off));
 656       }
 657     }
 658   }
 659 
 660   __ mov(rscratch2, rscratch1);
 661   __ push_cont_fastpath(rthread); // Set JavaThread::_cont_fastpath to the sp of the oldest interpreted frame we know about; kills rscratch1
 662   __ mov(rscratch1, rscratch2);
 663 
 664   // 6243940 We might end up in handle_wrong_method if
 665   // the callee is deoptimized as we race thru here. If that
 666   // happens we don't want to take a safepoint because the
 667   // caller frame will look interpreted and arguments are now
 668   // "compiled" so it is much better to make this transition
 669   // invisible to the stack walking code. Unfortunately if
 670   // we try and find the callee by normal means a safepoint
 671   // is possible. So we stash the desired callee in the thread
 672   // and the vm will find there should this case occur.
 673 
 674   __ str(rmethod, Address(rthread, JavaThread::callee_target_offset()));
 675 
 676   __ br(rscratch1);
 677 }
 678 
 679 // ---------------------------------------------------------------
 680 void SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm,
 681                                             int total_args_passed,
 682                                             int comp_args_on_stack,
 683                                             const BasicType *sig_bt,
 684                                             const VMRegPair *regs,
 685                                             AdapterHandlerEntry* handler) {
 686   address i2c_entry = __ pc();
 687 
 688   gen_i2c_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs);
 689 
 690   address c2i_unverified_entry = __ pc();
 691   Label skip_fixup;
 692 
 693   Register data = rscratch2;
 694   Register receiver = j_rarg0;
 695   Register tmp = r10;  // A call-clobbered register not used for arg passing
 696 
 697   // -------------------------------------------------------------------------
 698   // Generate a C2I adapter.  On entry we know rmethod holds the Method* during calls
 699   // to the interpreter.  The args start out packed in the compiled layout.  They
 700   // need to be unpacked into the interpreter layout.  This will almost always
 701   // require some stack space.  We grow the current (compiled) stack, then repack
 702   // the args.  We  finally end in a jump to the generic interpreter entry point.
 703   // On exit from the interpreter, the interpreter will restore our SP (lest the
 704   // compiled code, which relies solely on SP and not FP, get sick).
 705 
 706   {
 707     __ block_comment("c2i_unverified_entry {");
 708     // Method might have been compiled since the call site was patched to
 709     // interpreted; if that is the case treat it as a miss so we can get
 710     // the call site corrected.
 711     __ ic_check(1 /* end_alignment */);
 712     __ ldr(rmethod, Address(data, CompiledICData::speculated_method_offset()));
 713 
 714     __ ldr(rscratch1, Address(rmethod, in_bytes(Method::code_offset())));
 715     __ cbz(rscratch1, skip_fixup);
 716     __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
 717     __ block_comment("} c2i_unverified_entry");
 718   }
 719 
 720   address c2i_entry = __ pc();
 721 
 722   // Class initialization barrier for static methods
 723   address c2i_no_clinit_check_entry = nullptr;
 724   if (VM_Version::supports_fast_class_init_checks()) {
 725     Label L_skip_barrier;
 726 
 727     { // Bypass the barrier for non-static methods
 728       __ ldrh(rscratch1, Address(rmethod, Method::access_flags_offset()));
 729       __ andsw(zr, rscratch1, JVM_ACC_STATIC);
 730       __ br(Assembler::EQ, L_skip_barrier); // non-static
 731     }
 732 
 733     __ load_method_holder(rscratch2, rmethod);
 734     __ clinit_barrier(rscratch2, rscratch1, &L_skip_barrier);
 735     __ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub()));
 736 
 737     __ bind(L_skip_barrier);
 738     c2i_no_clinit_check_entry = __ pc();
 739   }
 740 
 741   BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
 742   bs->c2i_entry_barrier(masm);
 743 
 744   gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup);
 745 
 746   handler->set_entry_points(i2c_entry, c2i_entry, c2i_unverified_entry, c2i_no_clinit_check_entry);
 747   return;
 748 }
 749 
 750 static int c_calling_convention_priv(const BasicType *sig_bt,
 751                                          VMRegPair *regs,
 752                                          int total_args_passed) {
 753 
 754 // We return the amount of VMRegImpl stack slots we need to reserve for all
 755 // the arguments NOT counting out_preserve_stack_slots.
 756 
 757     static const Register INT_ArgReg[Argument::n_int_register_parameters_c] = {
 758       c_rarg0, c_rarg1, c_rarg2, c_rarg3, c_rarg4, c_rarg5,  c_rarg6,  c_rarg7
 759     };
 760     static const FloatRegister FP_ArgReg[Argument::n_float_register_parameters_c] = {
 761       c_farg0, c_farg1, c_farg2, c_farg3,
 762       c_farg4, c_farg5, c_farg6, c_farg7
 763     };
 764 
 765     uint int_args = 0;
 766     uint fp_args = 0;
 767     uint stk_args = 0; // inc by 2 each time
 768 
 769     for (int i = 0; i < total_args_passed; i++) {
 770       switch (sig_bt[i]) {
 771       case T_BOOLEAN:
 772       case T_CHAR:
 773       case T_BYTE:
 774       case T_SHORT:
 775       case T_INT:
 776         if (int_args < Argument::n_int_register_parameters_c) {
 777           regs[i].set1(INT_ArgReg[int_args++]->as_VMReg());
 778         } else {
 779 #ifdef __APPLE__
 780           // Less-than word types are stored one after another.
 781           // The code is unable to handle this so bailout.
 782           return -1;
 783 #endif
 784           regs[i].set1(VMRegImpl::stack2reg(stk_args));
 785           stk_args += 2;
 786         }
 787         break;
 788       case T_LONG:
 789         assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half");
 790         // fall through
 791       case T_OBJECT:
 792       case T_ARRAY:
 793       case T_ADDRESS:
 794       case T_METADATA:
 795         if (int_args < Argument::n_int_register_parameters_c) {
 796           regs[i].set2(INT_ArgReg[int_args++]->as_VMReg());
 797         } else {
 798           regs[i].set2(VMRegImpl::stack2reg(stk_args));
 799           stk_args += 2;
 800         }
 801         break;
 802       case T_FLOAT:
 803         if (fp_args < Argument::n_float_register_parameters_c) {
 804           regs[i].set1(FP_ArgReg[fp_args++]->as_VMReg());
 805         } else {
 806 #ifdef __APPLE__
 807           // Less-than word types are stored one after another.
 808           // The code is unable to handle this so bailout.
 809           return -1;
 810 #endif
 811           regs[i].set1(VMRegImpl::stack2reg(stk_args));
 812           stk_args += 2;
 813         }
 814         break;
 815       case T_DOUBLE:
 816         assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half");
 817         if (fp_args < Argument::n_float_register_parameters_c) {
 818           regs[i].set2(FP_ArgReg[fp_args++]->as_VMReg());
 819         } else {
 820           regs[i].set2(VMRegImpl::stack2reg(stk_args));
 821           stk_args += 2;
 822         }
 823         break;
 824       case T_VOID: // Halves of longs and doubles
 825         assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half");
 826         regs[i].set_bad();
 827         break;
 828       default:
 829         ShouldNotReachHere();
 830         break;
 831       }
 832     }
 833 
 834   return stk_args;
 835 }
 836 
 837 int SharedRuntime::vector_calling_convention(VMRegPair *regs,
 838                                              uint num_bits,
 839                                              uint total_args_passed) {
 840   // More than 8 argument inputs are not supported now.
 841   assert(total_args_passed <= Argument::n_float_register_parameters_c, "unsupported");
 842   assert(num_bits >= 64 && num_bits <= 2048 && is_power_of_2(num_bits), "unsupported");
 843 
 844   static const FloatRegister VEC_ArgReg[Argument::n_float_register_parameters_c] = {
 845     v0, v1, v2, v3, v4, v5, v6, v7
 846   };
 847 
 848   // On SVE, we use the same vector registers with 128-bit vector registers on NEON.
 849   int next_reg_val = num_bits == 64 ? 1 : 3;
 850   for (uint i = 0; i < total_args_passed; i++) {
 851     VMReg vmreg = VEC_ArgReg[i]->as_VMReg();
 852     regs[i].set_pair(vmreg->next(next_reg_val), vmreg);
 853   }
 854   return 0;
 855 }
 856 
 857 int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
 858                                          VMRegPair *regs,
 859                                          int total_args_passed)
 860 {
 861   int result = c_calling_convention_priv(sig_bt, regs, total_args_passed);
 862   guarantee(result >= 0, "Unsupported arguments configuration");
 863   return result;
 864 }
 865 
 866 
 867 void SharedRuntime::save_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) {
 868   // We always ignore the frame_slots arg and just use the space just below frame pointer
 869   // which by this time is free to use
 870   switch (ret_type) {
 871   case T_FLOAT:
 872     __ strs(v0, Address(rfp, -wordSize));
 873     break;
 874   case T_DOUBLE:
 875     __ strd(v0, Address(rfp, -wordSize));
 876     break;
 877   case T_VOID:  break;
 878   default: {
 879     __ str(r0, Address(rfp, -wordSize));
 880     }
 881   }
 882 }
 883 
 884 void SharedRuntime::restore_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) {
 885   // We always ignore the frame_slots arg and just use the space just below frame pointer
 886   // which by this time is free to use
 887   switch (ret_type) {
 888   case T_FLOAT:
 889     __ ldrs(v0, Address(rfp, -wordSize));
 890     break;
 891   case T_DOUBLE:
 892     __ ldrd(v0, Address(rfp, -wordSize));
 893     break;
 894   case T_VOID:  break;
 895   default: {
 896     __ ldr(r0, Address(rfp, -wordSize));
 897     }
 898   }
 899 }
 900 static void save_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) {
 901   RegSet x;
 902   for ( int i = first_arg ; i < arg_count ; i++ ) {
 903     if (args[i].first()->is_Register()) {
 904       x = x + args[i].first()->as_Register();
 905     } else if (args[i].first()->is_FloatRegister()) {
 906       __ strd(args[i].first()->as_FloatRegister(), Address(__ pre(sp, -2 * wordSize)));
 907     }
 908   }
 909   __ push(x, sp);
 910 }
 911 
 912 static void restore_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) {
 913   RegSet x;
 914   for ( int i = first_arg ; i < arg_count ; i++ ) {
 915     if (args[i].first()->is_Register()) {
 916       x = x + args[i].first()->as_Register();
 917     } else {
 918       ;
 919     }
 920   }
 921   __ pop(x, sp);
 922   for ( int i = arg_count - 1 ; i >= first_arg ; i-- ) {
 923     if (args[i].first()->is_Register()) {
 924       ;
 925     } else if (args[i].first()->is_FloatRegister()) {
 926       __ ldrd(args[i].first()->as_FloatRegister(), Address(__ post(sp, 2 * wordSize)));
 927     }
 928   }
 929 }
 930 
 931 static void verify_oop_args(MacroAssembler* masm,
 932                             const methodHandle& method,
 933                             const BasicType* sig_bt,
 934                             const VMRegPair* regs) {
 935   Register temp_reg = r19;  // not part of any compiled calling seq
 936   if (VerifyOops) {
 937     for (int i = 0; i < method->size_of_parameters(); i++) {
 938       if (sig_bt[i] == T_OBJECT ||
 939           sig_bt[i] == T_ARRAY) {
 940         VMReg r = regs[i].first();
 941         assert(r->is_valid(), "bad oop arg");
 942         if (r->is_stack()) {
 943           __ ldr(temp_reg, Address(sp, r->reg2stack() * VMRegImpl::stack_slot_size));
 944           __ verify_oop(temp_reg);
 945         } else {
 946           __ verify_oop(r->as_Register());
 947         }
 948       }
 949     }
 950   }
 951 }
 952 
 953 // on exit, sp points to the ContinuationEntry
 954 static OopMap* continuation_enter_setup(MacroAssembler* masm, int& stack_slots) {
 955   assert(ContinuationEntry::size() % VMRegImpl::stack_slot_size == 0, "");
 956   assert(in_bytes(ContinuationEntry::cont_offset())  % VMRegImpl::stack_slot_size == 0, "");
 957   assert(in_bytes(ContinuationEntry::chunk_offset()) % VMRegImpl::stack_slot_size == 0, "");
 958 
 959   stack_slots += (int)ContinuationEntry::size()/wordSize;
 960   __ sub(sp, sp, (int)ContinuationEntry::size()); // place Continuation metadata
 961 
 962   OopMap* map = new OopMap(((int)ContinuationEntry::size() + wordSize)/ VMRegImpl::stack_slot_size, 0 /* arg_slots*/);
 963 
 964   __ ldr(rscratch1, Address(rthread, JavaThread::cont_entry_offset()));
 965   __ str(rscratch1, Address(sp, ContinuationEntry::parent_offset()));
 966   __ mov(rscratch1, sp); // we can't use sp as the source in str
 967   __ str(rscratch1, Address(rthread, JavaThread::cont_entry_offset()));
 968 
 969   return map;
 970 }
 971 
 972 // on entry c_rarg1 points to the continuation
 973 //          sp points to ContinuationEntry
 974 //          c_rarg3 -- isVirtualThread
 975 static void fill_continuation_entry(MacroAssembler* masm) {
 976 #ifdef ASSERT
 977   __ movw(rscratch1, ContinuationEntry::cookie_value());
 978   __ strw(rscratch1, Address(sp, ContinuationEntry::cookie_offset()));
 979 #endif
 980 
 981   __ str (c_rarg1, Address(sp, ContinuationEntry::cont_offset()));
 982   __ strw(c_rarg3, Address(sp, ContinuationEntry::flags_offset()));
 983   __ str (zr,      Address(sp, ContinuationEntry::chunk_offset()));
 984   __ strw(zr,      Address(sp, ContinuationEntry::argsize_offset()));
 985   __ strw(zr,      Address(sp, ContinuationEntry::pin_count_offset()));
 986 
 987   __ ldr(rscratch1, Address(rthread, JavaThread::cont_fastpath_offset()));
 988   __ str(rscratch1, Address(sp, ContinuationEntry::parent_cont_fastpath_offset()));
 989   __ ldr(rscratch1, Address(rthread, JavaThread::held_monitor_count_offset()));
 990   __ str(rscratch1, Address(sp, ContinuationEntry::parent_held_monitor_count_offset()));
 991 
 992   __ str(zr, Address(rthread, JavaThread::cont_fastpath_offset()));
 993   __ str(zr, Address(rthread, JavaThread::held_monitor_count_offset()));
 994 }
 995 
 996 // on entry, sp points to the ContinuationEntry
 997 // on exit, rfp points to the spilled rfp in the entry frame
 998 static void continuation_enter_cleanup(MacroAssembler* masm) {
 999 #ifndef PRODUCT
1000   Label OK;
1001   __ ldr(rscratch1, Address(rthread, JavaThread::cont_entry_offset()));
1002   __ cmp(sp, rscratch1);
1003   __ br(Assembler::EQ, OK);
1004   __ stop("incorrect sp1");
1005   __ bind(OK);
1006 #endif
1007   __ ldr(rscratch1, Address(sp, ContinuationEntry::parent_cont_fastpath_offset()));
1008   __ str(rscratch1, Address(rthread, JavaThread::cont_fastpath_offset()));
1009 
1010   if (CheckJNICalls) {
1011     // Check if this is a virtual thread continuation
1012     Label L_skip_vthread_code;
1013     __ ldrw(rscratch1, Address(sp, ContinuationEntry::flags_offset()));
1014     __ cbzw(rscratch1, L_skip_vthread_code);
1015 
1016     // If the held monitor count is > 0 and this vthread is terminating then
1017     // it failed to release a JNI monitor. So we issue the same log message
1018     // that JavaThread::exit does.
1019     __ ldr(rscratch1, Address(rthread, JavaThread::jni_monitor_count_offset()));
1020     __ cbz(rscratch1, L_skip_vthread_code);
1021 
1022     // Save return value potentially containing the exception oop in callee-saved R19.
1023     __ mov(r19, r0);
1024     __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::log_jni_monitor_still_held));
1025     // Restore potential return value.
1026     __ mov(r0, r19);
1027 
1028     // For vthreads we have to explicitly zero the JNI monitor count of the carrier
1029     // on termination. The held count is implicitly zeroed below when we restore from
1030     // the parent held count (which has to be zero).
1031     __ str(zr, Address(rthread, JavaThread::jni_monitor_count_offset()));
1032 
1033     __ bind(L_skip_vthread_code);
1034   }
1035 #ifdef ASSERT
1036   else {
1037     // Check if this is a virtual thread continuation
1038     Label L_skip_vthread_code;
1039     __ ldrw(rscratch1, Address(sp, ContinuationEntry::flags_offset()));
1040     __ cbzw(rscratch1, L_skip_vthread_code);
1041 
1042     // See comment just above. If not checking JNI calls the JNI count is only
1043     // needed for assertion checking.
1044     __ str(zr, Address(rthread, JavaThread::jni_monitor_count_offset()));
1045 
1046     __ bind(L_skip_vthread_code);
1047   }
1048 #endif
1049 
1050   __ ldr(rscratch1, Address(sp, ContinuationEntry::parent_held_monitor_count_offset()));
1051   __ str(rscratch1, Address(rthread, JavaThread::held_monitor_count_offset()));
1052 
1053   __ ldr(rscratch2, Address(sp, ContinuationEntry::parent_offset()));
1054   __ str(rscratch2, Address(rthread, JavaThread::cont_entry_offset()));
1055   __ add(rfp, sp, (int)ContinuationEntry::size());
1056 }
1057 
1058 // enterSpecial(Continuation c, boolean isContinue, boolean isVirtualThread)
1059 // On entry: c_rarg1 -- the continuation object
1060 //           c_rarg2 -- isContinue
1061 //           c_rarg3 -- isVirtualThread
1062 static void gen_continuation_enter(MacroAssembler* masm,
1063                                  const methodHandle& method,
1064                                  const BasicType* sig_bt,
1065                                  const VMRegPair* regs,
1066                                  int& exception_offset,
1067                                  OopMapSet*oop_maps,
1068                                  int& frame_complete,
1069                                  int& stack_slots,
1070                                  int& interpreted_entry_offset,
1071                                  int& compiled_entry_offset) {
1072   //verify_oop_args(masm, method, sig_bt, regs);
1073   Address resolve(SharedRuntime::get_resolve_static_call_stub(), relocInfo::static_call_type);
1074 
1075   address start = __ pc();
1076 
1077   Label call_thaw, exit;
1078 
1079   // i2i entry used at interp_only_mode only
1080   interpreted_entry_offset = __ pc() - start;
1081   {
1082 
1083 #ifdef ASSERT
1084     Label is_interp_only;
1085     __ ldrw(rscratch1, Address(rthread, JavaThread::interp_only_mode_offset()));
1086     __ cbnzw(rscratch1, is_interp_only);
1087     __ stop("enterSpecial interpreter entry called when not in interp_only_mode");
1088     __ bind(is_interp_only);
1089 #endif
1090 
1091     // Read interpreter arguments into registers (this is an ad-hoc i2c adapter)
1092     __ ldr(c_rarg1, Address(esp, Interpreter::stackElementSize*2));
1093     __ ldr(c_rarg2, Address(esp, Interpreter::stackElementSize*1));
1094     __ ldr(c_rarg3, Address(esp, Interpreter::stackElementSize*0));
1095     __ push_cont_fastpath(rthread);
1096 
1097     __ enter();
1098     stack_slots = 2; // will be adjusted in setup
1099     OopMap* map = continuation_enter_setup(masm, stack_slots);
1100     // The frame is complete here, but we only record it for the compiled entry, so the frame would appear unsafe,
1101     // but that's okay because at the very worst we'll miss an async sample, but we're in interp_only_mode anyway.
1102 
1103     fill_continuation_entry(masm);
1104 
1105     __ cbnz(c_rarg2, call_thaw);
1106 
1107     const address tr_call = __ trampoline_call(resolve);
1108     if (tr_call == nullptr) {
1109       fatal("CodeCache is full at gen_continuation_enter");
1110     }
1111 
1112     oop_maps->add_gc_map(__ pc() - start, map);
1113     __ post_call_nop();
1114 
1115     __ b(exit);
1116 
1117     address stub = CompiledDirectCall::emit_to_interp_stub(masm, tr_call);
1118     if (stub == nullptr) {
1119       fatal("CodeCache is full at gen_continuation_enter");
1120     }
1121   }
1122 
1123   // compiled entry
1124   __ align(CodeEntryAlignment);
1125   compiled_entry_offset = __ pc() - start;
1126 
1127   __ enter();
1128   stack_slots = 2; // will be adjusted in setup
1129   OopMap* map = continuation_enter_setup(masm, stack_slots);
1130   frame_complete = __ pc() - start;
1131 
1132   fill_continuation_entry(masm);
1133 
1134   __ cbnz(c_rarg2, call_thaw);
1135 
1136   const address tr_call = __ trampoline_call(resolve);
1137   if (tr_call == nullptr) {
1138     fatal("CodeCache is full at gen_continuation_enter");
1139   }
1140 
1141   oop_maps->add_gc_map(__ pc() - start, map);
1142   __ post_call_nop();
1143 
1144   __ b(exit);
1145 
1146   __ bind(call_thaw);
1147 
1148   ContinuationEntry::_thaw_call_pc_offset = __ pc() - start;
1149   __ rt_call(CAST_FROM_FN_PTR(address, StubRoutines::cont_thaw()));
1150   oop_maps->add_gc_map(__ pc() - start, map->deep_copy());
1151   ContinuationEntry::_return_pc_offset = __ pc() - start;
1152   __ post_call_nop();
1153 
1154   __ bind(exit);
1155   ContinuationEntry::_cleanup_offset = __ pc() - start;
1156   continuation_enter_cleanup(masm);
1157   __ leave();
1158   __ ret(lr);
1159 
1160   /// exception handling
1161 
1162   exception_offset = __ pc() - start;
1163   {
1164       __ mov(r19, r0); // save return value contaning the exception oop in callee-saved R19
1165 
1166       continuation_enter_cleanup(masm);
1167 
1168       __ ldr(c_rarg1, Address(rfp, wordSize)); // return address
1169       __ authenticate_return_address(c_rarg1);
1170       __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), rthread, c_rarg1);
1171 
1172       // see OptoRuntime::generate_exception_blob: r0 -- exception oop, r3 -- exception pc
1173 
1174       __ mov(r1, r0); // the exception handler
1175       __ mov(r0, r19); // restore return value contaning the exception oop
1176       __ verify_oop(r0);
1177 
1178       __ leave();
1179       __ mov(r3, lr);
1180       __ br(r1); // the exception handler
1181   }
1182 
1183   address stub = CompiledDirectCall::emit_to_interp_stub(masm, tr_call);
1184   if (stub == nullptr) {
1185     fatal("CodeCache is full at gen_continuation_enter");
1186   }
1187 }
1188 
1189 static void gen_continuation_yield(MacroAssembler* masm,
1190                                    const methodHandle& method,
1191                                    const BasicType* sig_bt,
1192                                    const VMRegPair* regs,
1193                                    OopMapSet* oop_maps,
1194                                    int& frame_complete,
1195                                    int& stack_slots,
1196                                    int& compiled_entry_offset) {
1197     enum layout {
1198       rfp_off1,
1199       rfp_off2,
1200       lr_off,
1201       lr_off2,
1202       framesize // inclusive of return address
1203     };
1204     // assert(is_even(framesize/2), "sp not 16-byte aligned");
1205     stack_slots = framesize /  VMRegImpl::slots_per_word;
1206     assert(stack_slots == 2, "recheck layout");
1207 
1208     address start = __ pc();
1209 
1210     compiled_entry_offset = __ pc() - start;
1211     __ enter();
1212 
1213     __ mov(c_rarg1, sp);
1214 
1215     frame_complete = __ pc() - start;
1216     address the_pc = __ pc();
1217 
1218     __ post_call_nop(); // this must be exactly after the pc value that is pushed into the frame info, we use this nop for fast CodeBlob lookup
1219 
1220     __ mov(c_rarg0, rthread);
1221     __ set_last_Java_frame(sp, rfp, the_pc, rscratch1);
1222     __ call_VM_leaf(Continuation::freeze_entry(), 2);
1223     __ reset_last_Java_frame(true);
1224 
1225     Label pinned;
1226 
1227     __ cbnz(r0, pinned);
1228 
1229     // We've succeeded, set sp to the ContinuationEntry
1230     __ ldr(rscratch1, Address(rthread, JavaThread::cont_entry_offset()));
1231     __ mov(sp, rscratch1);
1232     continuation_enter_cleanup(masm);
1233 
1234     __ bind(pinned); // pinned -- return to caller
1235 
1236     // handle pending exception thrown by freeze
1237     __ ldr(rscratch1, Address(rthread, in_bytes(Thread::pending_exception_offset())));
1238     Label ok;
1239     __ cbz(rscratch1, ok);
1240     __ leave();
1241     __ lea(rscratch1, RuntimeAddress(StubRoutines::forward_exception_entry()));
1242     __ br(rscratch1);
1243     __ bind(ok);
1244 
1245     __ leave();
1246     __ ret(lr);
1247 
1248     OopMap* map = new OopMap(framesize, 1);
1249     oop_maps->add_gc_map(the_pc - start, map);
1250 }
1251 
1252 void SharedRuntime::continuation_enter_cleanup(MacroAssembler* masm) {
1253   ::continuation_enter_cleanup(masm);
1254 }
1255 
1256 static void gen_special_dispatch(MacroAssembler* masm,
1257                                  const methodHandle& method,
1258                                  const BasicType* sig_bt,
1259                                  const VMRegPair* regs) {
1260   verify_oop_args(masm, method, sig_bt, regs);
1261   vmIntrinsics::ID iid = method->intrinsic_id();
1262 
1263   // Now write the args into the outgoing interpreter space
1264   bool     has_receiver   = false;
1265   Register receiver_reg   = noreg;
1266   int      member_arg_pos = -1;
1267   Register member_reg     = noreg;
1268   int      ref_kind       = MethodHandles::signature_polymorphic_intrinsic_ref_kind(iid);
1269   if (ref_kind != 0) {
1270     member_arg_pos = method->size_of_parameters() - 1;  // trailing MemberName argument
1271     member_reg = r19;  // known to be free at this point
1272     has_receiver = MethodHandles::ref_kind_has_receiver(ref_kind);
1273   } else if (iid == vmIntrinsics::_invokeBasic) {
1274     has_receiver = true;
1275   } else if (iid == vmIntrinsics::_linkToNative) {
1276     member_arg_pos = method->size_of_parameters() - 1;  // trailing NativeEntryPoint argument
1277     member_reg = r19;  // known to be free at this point
1278   } else {
1279     fatal("unexpected intrinsic id %d", vmIntrinsics::as_int(iid));
1280   }
1281 
1282   if (member_reg != noreg) {
1283     // Load the member_arg into register, if necessary.
1284     SharedRuntime::check_member_name_argument_is_last_argument(method, sig_bt, regs);
1285     VMReg r = regs[member_arg_pos].first();
1286     if (r->is_stack()) {
1287       __ ldr(member_reg, Address(sp, r->reg2stack() * VMRegImpl::stack_slot_size));
1288     } else {
1289       // no data motion is needed
1290       member_reg = r->as_Register();
1291     }
1292   }
1293 
1294   if (has_receiver) {
1295     // Make sure the receiver is loaded into a register.
1296     assert(method->size_of_parameters() > 0, "oob");
1297     assert(sig_bt[0] == T_OBJECT, "receiver argument must be an object");
1298     VMReg r = regs[0].first();
1299     assert(r->is_valid(), "bad receiver arg");
1300     if (r->is_stack()) {
1301       // Porting note:  This assumes that compiled calling conventions always
1302       // pass the receiver oop in a register.  If this is not true on some
1303       // platform, pick a temp and load the receiver from stack.
1304       fatal("receiver always in a register");
1305       receiver_reg = r2;  // known to be free at this point
1306       __ ldr(receiver_reg, Address(sp, r->reg2stack() * VMRegImpl::stack_slot_size));
1307     } else {
1308       // no data motion is needed
1309       receiver_reg = r->as_Register();
1310     }
1311   }
1312 
1313   // Figure out which address we are really jumping to:
1314   MethodHandles::generate_method_handle_dispatch(masm, iid,
1315                                                  receiver_reg, member_reg, /*for_compiler_entry:*/ true);
1316 }
1317 
1318 // ---------------------------------------------------------------------------
1319 // Generate a native wrapper for a given method.  The method takes arguments
1320 // in the Java compiled code convention, marshals them to the native
1321 // convention (handlizes oops, etc), transitions to native, makes the call,
1322 // returns to java state (possibly blocking), unhandlizes any result and
1323 // returns.
1324 //
1325 // Critical native functions are a shorthand for the use of
1326 // GetPrimtiveArrayCritical and disallow the use of any other JNI
1327 // functions.  The wrapper is expected to unpack the arguments before
1328 // passing them to the callee. Critical native functions leave the state _in_Java,
1329 // since they block out GC.
1330 // Some other parts of JNI setup are skipped like the tear down of the JNI handle
1331 // block and the check for pending exceptions it's impossible for them
1332 // to be thrown.
1333 //
1334 nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
1335                                                 const methodHandle& method,
1336                                                 int compile_id,
1337                                                 BasicType* in_sig_bt,
1338                                                 VMRegPair* in_regs,
1339                                                 BasicType ret_type) {
1340   if (method->is_continuation_native_intrinsic()) {
1341     int exception_offset = -1;
1342     OopMapSet* oop_maps = new OopMapSet();
1343     int frame_complete = -1;
1344     int stack_slots = -1;
1345     int interpreted_entry_offset = -1;
1346     int vep_offset = -1;
1347     if (method->is_continuation_enter_intrinsic()) {
1348       gen_continuation_enter(masm,
1349                              method,
1350                              in_sig_bt,
1351                              in_regs,
1352                              exception_offset,
1353                              oop_maps,
1354                              frame_complete,
1355                              stack_slots,
1356                              interpreted_entry_offset,
1357                              vep_offset);
1358     } else if (method->is_continuation_yield_intrinsic()) {
1359       gen_continuation_yield(masm,
1360                              method,
1361                              in_sig_bt,
1362                              in_regs,
1363                              oop_maps,
1364                              frame_complete,
1365                              stack_slots,
1366                              vep_offset);
1367     } else {
1368       guarantee(false, "Unknown Continuation native intrinsic");
1369     }
1370 
1371 #ifdef ASSERT
1372     if (method->is_continuation_enter_intrinsic()) {
1373       assert(interpreted_entry_offset != -1, "Must be set");
1374       assert(exception_offset != -1,         "Must be set");
1375     } else {
1376       assert(interpreted_entry_offset == -1, "Must be unset");
1377       assert(exception_offset == -1,         "Must be unset");
1378     }
1379     assert(frame_complete != -1,    "Must be set");
1380     assert(stack_slots != -1,       "Must be set");
1381     assert(vep_offset != -1,        "Must be set");
1382 #endif
1383 
1384     __ flush();
1385     nmethod* nm = nmethod::new_native_nmethod(method,
1386                                               compile_id,
1387                                               masm->code(),
1388                                               vep_offset,
1389                                               frame_complete,
1390                                               stack_slots,
1391                                               in_ByteSize(-1),
1392                                               in_ByteSize(-1),
1393                                               oop_maps,
1394                                               exception_offset);
1395     if (nm == nullptr) return nm;
1396     if (method->is_continuation_enter_intrinsic()) {
1397       ContinuationEntry::set_enter_code(nm, interpreted_entry_offset);
1398     } else if (method->is_continuation_yield_intrinsic()) {
1399       _cont_doYield_stub = nm;
1400     } else {
1401       guarantee(false, "Unknown Continuation native intrinsic");
1402     }
1403     return nm;
1404   }
1405 
1406   if (method->is_method_handle_intrinsic()) {
1407     vmIntrinsics::ID iid = method->intrinsic_id();
1408     intptr_t start = (intptr_t)__ pc();
1409     int vep_offset = ((intptr_t)__ pc()) - start;
1410 
1411     // First instruction must be a nop as it may need to be patched on deoptimisation
1412     __ nop();
1413     gen_special_dispatch(masm,
1414                          method,
1415                          in_sig_bt,
1416                          in_regs);
1417     int frame_complete = ((intptr_t)__ pc()) - start;  // not complete, period
1418     __ flush();
1419     int stack_slots = SharedRuntime::out_preserve_stack_slots();  // no out slots at all, actually
1420     return nmethod::new_native_nmethod(method,
1421                                        compile_id,
1422                                        masm->code(),
1423                                        vep_offset,
1424                                        frame_complete,
1425                                        stack_slots / VMRegImpl::slots_per_word,
1426                                        in_ByteSize(-1),
1427                                        in_ByteSize(-1),
1428                                        nullptr);
1429   }
1430   address native_func = method->native_function();
1431   assert(native_func != nullptr, "must have function");
1432 
1433   // An OopMap for lock (and class if static)
1434   OopMapSet *oop_maps = new OopMapSet();
1435   intptr_t start = (intptr_t)__ pc();
1436 
1437   // We have received a description of where all the java arg are located
1438   // on entry to the wrapper. We need to convert these args to where
1439   // the jni function will expect them. To figure out where they go
1440   // we convert the java signature to a C signature by inserting
1441   // the hidden arguments as arg[0] and possibly arg[1] (static method)
1442 
1443   const int total_in_args = method->size_of_parameters();
1444   int total_c_args = total_in_args + (method->is_static() ? 2 : 1);
1445 
1446   BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args);
1447   VMRegPair* out_regs   = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args);
1448 
1449   int argc = 0;
1450   out_sig_bt[argc++] = T_ADDRESS;
1451   if (method->is_static()) {
1452     out_sig_bt[argc++] = T_OBJECT;
1453   }
1454 
1455   for (int i = 0; i < total_in_args ; i++ ) {
1456     out_sig_bt[argc++] = in_sig_bt[i];
1457   }
1458 
1459   // Now figure out where the args must be stored and how much stack space
1460   // they require.
1461   int out_arg_slots;
1462   out_arg_slots = c_calling_convention_priv(out_sig_bt, out_regs, total_c_args);
1463 
1464   if (out_arg_slots < 0) {
1465     return nullptr;
1466   }
1467 
1468   // Compute framesize for the wrapper.  We need to handlize all oops in
1469   // incoming registers
1470 
1471   // Calculate the total number of stack slots we will need.
1472 
1473   // First count the abi requirement plus all of the outgoing args
1474   int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots;
1475 
1476   // Now the space for the inbound oop handle area
1477   int total_save_slots = 8 * VMRegImpl::slots_per_word;  // 8 arguments passed in registers
1478 
1479   int oop_handle_offset = stack_slots;
1480   stack_slots += total_save_slots;
1481 
1482   // Now any space we need for handlizing a klass if static method
1483 
1484   int klass_slot_offset = 0;
1485   int klass_offset = -1;
1486   int lock_slot_offset = 0;
1487   bool is_static = false;
1488 
1489   if (method->is_static()) {
1490     klass_slot_offset = stack_slots;
1491     stack_slots += VMRegImpl::slots_per_word;
1492     klass_offset = klass_slot_offset * VMRegImpl::stack_slot_size;
1493     is_static = true;
1494   }
1495 
1496   // Plus a lock if needed
1497 
1498   if (method->is_synchronized()) {
1499     lock_slot_offset = stack_slots;
1500     stack_slots += VMRegImpl::slots_per_word;
1501   }
1502 
1503   // Now a place (+2) to save return values or temp during shuffling
1504   // + 4 for return address (which we own) and saved rfp
1505   stack_slots += 6;
1506 
1507   // Ok The space we have allocated will look like:
1508   //
1509   //
1510   // FP-> |                     |
1511   //      |---------------------|
1512   //      | 2 slots for moves   |
1513   //      |---------------------|
1514   //      | lock box (if sync)  |
1515   //      |---------------------| <- lock_slot_offset
1516   //      | klass (if static)   |
1517   //      |---------------------| <- klass_slot_offset
1518   //      | oopHandle area      |
1519   //      |---------------------| <- oop_handle_offset (8 java arg registers)
1520   //      | outbound memory     |
1521   //      | based arguments     |
1522   //      |                     |
1523   //      |---------------------|
1524   //      |                     |
1525   // SP-> | out_preserved_slots |
1526   //
1527   //
1528 
1529 
1530   // Now compute actual number of stack words we need rounding to make
1531   // stack properly aligned.
1532   stack_slots = align_up(stack_slots, StackAlignmentInSlots);
1533 
1534   int stack_size = stack_slots * VMRegImpl::stack_slot_size;
1535 
1536   // First thing make an ic check to see if we should even be here
1537 
1538   // We are free to use all registers as temps without saving them and
1539   // restoring them except rfp. rfp is the only callee save register
1540   // as far as the interpreter and the compiler(s) are concerned.
1541 
1542   const Register receiver = j_rarg0;
1543 
1544   Label exception_pending;
1545 
1546   assert_different_registers(receiver, rscratch1);
1547   __ verify_oop(receiver);
1548   __ ic_check(8 /* end_alignment */);
1549 
1550   // Verified entry point must be aligned
1551   int vep_offset = ((intptr_t)__ pc()) - start;
1552 
1553   // If we have to make this method not-entrant we'll overwrite its
1554   // first instruction with a jump.  For this action to be legal we
1555   // must ensure that this first instruction is a B, BL, NOP, BKPT,
1556   // SVC, HVC, or SMC.  Make it a NOP.
1557   __ nop();
1558 
1559   if (VM_Version::supports_fast_class_init_checks() && method->needs_clinit_barrier()) {
1560     Label L_skip_barrier;
1561     __ mov_metadata(rscratch2, method->method_holder()); // InstanceKlass*
1562     __ clinit_barrier(rscratch2, rscratch1, &L_skip_barrier);
1563     __ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub()));
1564 
1565     __ bind(L_skip_barrier);
1566   }
1567 
1568   // Generate stack overflow check
1569   __ bang_stack_with_offset(checked_cast<int>(StackOverflow::stack_shadow_zone_size()));
1570 
1571   // Generate a new frame for the wrapper.
1572   __ enter();
1573   // -2 because return address is already present and so is saved rfp
1574   __ sub(sp, sp, stack_size - 2*wordSize);
1575 
1576   BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
1577   bs->nmethod_entry_barrier(masm, nullptr /* slow_path */, nullptr /* continuation */, nullptr /* guard */);
1578 
1579   // Frame is now completed as far as size and linkage.
1580   int frame_complete = ((intptr_t)__ pc()) - start;
1581 
1582   // We use r20 as the oop handle for the receiver/klass
1583   // It is callee save so it survives the call to native
1584 
1585   const Register oop_handle_reg = r20;
1586 
1587   //
1588   // We immediately shuffle the arguments so that any vm call we have to
1589   // make from here on out (sync slow path, jvmti, etc.) we will have
1590   // captured the oops from our caller and have a valid oopMap for
1591   // them.
1592 
1593   // -----------------
1594   // The Grand Shuffle
1595 
1596   // The Java calling convention is either equal (linux) or denser (win64) than the
1597   // c calling convention. However the because of the jni_env argument the c calling
1598   // convention always has at least one more (and two for static) arguments than Java.
1599   // Therefore if we move the args from java -> c backwards then we will never have
1600   // a register->register conflict and we don't have to build a dependency graph
1601   // and figure out how to break any cycles.
1602   //
1603 
1604   // Record esp-based slot for receiver on stack for non-static methods
1605   int receiver_offset = -1;
1606 
1607   // This is a trick. We double the stack slots so we can claim
1608   // the oops in the caller's frame. Since we are sure to have
1609   // more args than the caller doubling is enough to make
1610   // sure we can capture all the incoming oop args from the
1611   // caller.
1612   //
1613   OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
1614 
1615   // Mark location of rfp (someday)
1616   // map->set_callee_saved(VMRegImpl::stack2reg( stack_slots - 2), stack_slots * 2, 0, vmreg(rfp));
1617 
1618 
1619   int float_args = 0;
1620   int int_args = 0;
1621 
1622 #ifdef ASSERT
1623   bool reg_destroyed[Register::number_of_registers];
1624   bool freg_destroyed[FloatRegister::number_of_registers];
1625   for ( int r = 0 ; r < Register::number_of_registers ; r++ ) {
1626     reg_destroyed[r] = false;
1627   }
1628   for ( int f = 0 ; f < FloatRegister::number_of_registers ; f++ ) {
1629     freg_destroyed[f] = false;
1630   }
1631 
1632 #endif /* ASSERT */
1633 
1634   // For JNI natives the incoming and outgoing registers are offset upwards.
1635   GrowableArray<int> arg_order(2 * total_in_args);
1636 
1637   for (int i = total_in_args - 1, c_arg = total_c_args - 1; i >= 0; i--, c_arg--) {
1638     arg_order.push(i);
1639     arg_order.push(c_arg);
1640   }
1641 
1642   for (int ai = 0; ai < arg_order.length(); ai += 2) {
1643     int i = arg_order.at(ai);
1644     int c_arg = arg_order.at(ai + 1);
1645     __ block_comment(err_msg("move %d -> %d", i, c_arg));
1646     assert(c_arg != -1 && i != -1, "wrong order");
1647 #ifdef ASSERT
1648     if (in_regs[i].first()->is_Register()) {
1649       assert(!reg_destroyed[in_regs[i].first()->as_Register()->encoding()], "destroyed reg!");
1650     } else if (in_regs[i].first()->is_FloatRegister()) {
1651       assert(!freg_destroyed[in_regs[i].first()->as_FloatRegister()->encoding()], "destroyed reg!");
1652     }
1653     if (out_regs[c_arg].first()->is_Register()) {
1654       reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true;
1655     } else if (out_regs[c_arg].first()->is_FloatRegister()) {
1656       freg_destroyed[out_regs[c_arg].first()->as_FloatRegister()->encoding()] = true;
1657     }
1658 #endif /* ASSERT */
1659     switch (in_sig_bt[i]) {
1660       case T_ARRAY:
1661       case T_OBJECT:
1662         __ object_move(map, oop_handle_offset, stack_slots, in_regs[i], out_regs[c_arg],
1663                        ((i == 0) && (!is_static)),
1664                        &receiver_offset);
1665         int_args++;
1666         break;
1667       case T_VOID:
1668         break;
1669 
1670       case T_FLOAT:
1671         __ float_move(in_regs[i], out_regs[c_arg]);
1672         float_args++;
1673         break;
1674 
1675       case T_DOUBLE:
1676         assert( i + 1 < total_in_args &&
1677                 in_sig_bt[i + 1] == T_VOID &&
1678                 out_sig_bt[c_arg+1] == T_VOID, "bad arg list");
1679         __ double_move(in_regs[i], out_regs[c_arg]);
1680         float_args++;
1681         break;
1682 
1683       case T_LONG :
1684         __ long_move(in_regs[i], out_regs[c_arg]);
1685         int_args++;
1686         break;
1687 
1688       case T_ADDRESS: assert(false, "found T_ADDRESS in java args");
1689 
1690       default:
1691         __ move32_64(in_regs[i], out_regs[c_arg]);
1692         int_args++;
1693     }
1694   }
1695 
1696   // point c_arg at the first arg that is already loaded in case we
1697   // need to spill before we call out
1698   int c_arg = total_c_args - total_in_args;
1699 
1700   // Pre-load a static method's oop into c_rarg1.
1701   if (method->is_static()) {
1702 
1703     //  load oop into a register
1704     __ movoop(c_rarg1,
1705               JNIHandles::make_local(method->method_holder()->java_mirror()));
1706 
1707     // Now handlize the static class mirror it's known not-null.
1708     __ str(c_rarg1, Address(sp, klass_offset));
1709     map->set_oop(VMRegImpl::stack2reg(klass_slot_offset));
1710 
1711     // Now get the handle
1712     __ lea(c_rarg1, Address(sp, klass_offset));
1713     // and protect the arg if we must spill
1714     c_arg--;
1715   }
1716 
1717   // Change state to native (we save the return address in the thread, since it might not
1718   // be pushed on the stack when we do a stack traversal). It is enough that the pc()
1719   // points into the right code segment. It does not have to be the correct return pc.
1720   // We use the same pc/oopMap repeatedly when we call out.
1721 
1722   Label native_return;
1723   if (LockingMode != LM_LEGACY && method->is_object_wait0()) {
1724     // For convenience we use the pc we want to resume to in case of preemption on Object.wait.
1725     __ set_last_Java_frame(sp, noreg, native_return, rscratch1);
1726   } else {
1727     intptr_t the_pc = (intptr_t) __ pc();
1728     oop_maps->add_gc_map(the_pc - start, map);
1729 
1730     __ set_last_Java_frame(sp, noreg, __ pc(), rscratch1);
1731   }
1732 
1733   Label dtrace_method_entry, dtrace_method_entry_done;
1734   if (DTraceMethodProbes) {
1735     __ b(dtrace_method_entry);
1736     __ bind(dtrace_method_entry_done);
1737   }
1738 
1739   // RedefineClasses() tracing support for obsolete method entry
1740   if (log_is_enabled(Trace, redefine, class, obsolete)) {
1741     // protect the args we've loaded
1742     save_args(masm, total_c_args, c_arg, out_regs);
1743     __ mov_metadata(c_rarg1, method());
1744     __ call_VM_leaf(
1745       CAST_FROM_FN_PTR(address, SharedRuntime::rc_trace_method_entry),
1746       rthread, c_rarg1);
1747     restore_args(masm, total_c_args, c_arg, out_regs);
1748   }
1749 
1750   // Lock a synchronized method
1751 
1752   // Register definitions used by locking and unlocking
1753 
1754   const Register swap_reg = r0;
1755   const Register obj_reg  = r19;  // Will contain the oop
1756   const Register lock_reg = r13;  // Address of compiler lock object (BasicLock)
1757   const Register old_hdr  = r13;  // value of old header at unlock time
1758   const Register lock_tmp = r14;  // Temporary used by lightweight_lock/unlock
1759   const Register tmp = lr;
1760 
1761   Label slow_path_lock;
1762   Label lock_done;
1763 
1764   if (method->is_synchronized()) {
1765     Label count;
1766     const int mark_word_offset = BasicLock::displaced_header_offset_in_bytes();
1767 
1768     // Get the handle (the 2nd argument)
1769     __ mov(oop_handle_reg, c_rarg1);
1770 
1771     // Get address of the box
1772 
1773     __ lea(lock_reg, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size));
1774 
1775     // Load the oop from the handle
1776     __ ldr(obj_reg, Address(oop_handle_reg, 0));
1777 
1778     if (LockingMode == LM_MONITOR) {
1779       __ b(slow_path_lock);
1780     } else if (LockingMode == LM_LEGACY) {
1781       // Load (object->mark() | 1) into swap_reg %r0
1782       __ ldr(rscratch1, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
1783       __ orr(swap_reg, rscratch1, 1);
1784 
1785       // Save (object->mark() | 1) into BasicLock's displaced header
1786       __ str(swap_reg, Address(lock_reg, mark_word_offset));
1787 
1788       // src -> dest iff dest == r0 else r0 <- dest
1789       __ cmpxchg_obj_header(r0, lock_reg, obj_reg, rscratch1, count, /*fallthrough*/nullptr);
1790 
1791       // Hmm should this move to the slow path code area???
1792 
1793       // Test if the oopMark is an obvious stack pointer, i.e.,
1794       //  1) (mark & 3) == 0, and
1795       //  2) sp <= mark < mark + os::pagesize()
1796       // These 3 tests can be done by evaluating the following
1797       // expression: ((mark - sp) & (3 - os::vm_page_size())),
1798       // assuming both stack pointer and pagesize have their
1799       // least significant 2 bits clear.
1800       // NOTE: the oopMark is in swap_reg %r0 as the result of cmpxchg
1801 
1802       __ sub(swap_reg, sp, swap_reg);
1803       __ neg(swap_reg, swap_reg);
1804       __ ands(swap_reg, swap_reg, 3 - (int)os::vm_page_size());
1805 
1806       // Save the test result, for recursive case, the result is zero
1807       __ str(swap_reg, Address(lock_reg, mark_word_offset));
1808       __ br(Assembler::NE, slow_path_lock);
1809 
1810       __ bind(count);
1811       __ inc_held_monitor_count(rscratch1);
1812     } else {
1813       assert(LockingMode == LM_LIGHTWEIGHT, "must be");
1814       __ lightweight_lock(lock_reg, obj_reg, swap_reg, tmp, lock_tmp, slow_path_lock);
1815     }
1816 
1817     // Slow path will re-enter here
1818     __ bind(lock_done);
1819   }
1820 
1821 
1822   // Finally just about ready to make the JNI call
1823 
1824   // get JNIEnv* which is first argument to native
1825   __ lea(c_rarg0, Address(rthread, in_bytes(JavaThread::jni_environment_offset())));
1826 
1827   // Now set thread in native
1828   __ mov(rscratch1, _thread_in_native);
1829   __ lea(rscratch2, Address(rthread, JavaThread::thread_state_offset()));
1830   __ stlrw(rscratch1, rscratch2);
1831 
1832   __ rt_call(native_func);
1833 
1834   // Verify or restore cpu control state after JNI call
1835   __ restore_cpu_control_state_after_jni(rscratch1, rscratch2);
1836 
1837   // Unpack native results.
1838   switch (ret_type) {
1839   case T_BOOLEAN: __ c2bool(r0);                     break;
1840   case T_CHAR   : __ ubfx(r0, r0, 0, 16);            break;
1841   case T_BYTE   : __ sbfx(r0, r0, 0, 8);             break;
1842   case T_SHORT  : __ sbfx(r0, r0, 0, 16);            break;
1843   case T_INT    : __ sbfx(r0, r0, 0, 32);            break;
1844   case T_DOUBLE :
1845   case T_FLOAT  :
1846     // Result is in v0 we'll save as needed
1847     break;
1848   case T_ARRAY:                 // Really a handle
1849   case T_OBJECT:                // Really a handle
1850       break; // can't de-handlize until after safepoint check
1851   case T_VOID: break;
1852   case T_LONG: break;
1853   default       : ShouldNotReachHere();
1854   }
1855 
1856   Label safepoint_in_progress, safepoint_in_progress_done;
1857 
1858   // Switch thread to "native transition" state before reading the synchronization state.
1859   // This additional state is necessary because reading and testing the synchronization
1860   // state is not atomic w.r.t. GC, as this scenario demonstrates:
1861   //     Java thread A, in _thread_in_native state, loads _not_synchronized and is preempted.
1862   //     VM thread changes sync state to synchronizing and suspends threads for GC.
1863   //     Thread A is resumed to finish this native method, but doesn't block here since it
1864   //     didn't see any synchronization is progress, and escapes.
1865   __ mov(rscratch1, _thread_in_native_trans);
1866 
1867   __ strw(rscratch1, Address(rthread, JavaThread::thread_state_offset()));
1868 
1869   // Force this write out before the read below
1870   if (!UseSystemMemoryBarrier) {
1871     __ dmb(Assembler::ISH);
1872   }
1873 
1874   __ verify_sve_vector_length();
1875 
1876   // Check for safepoint operation in progress and/or pending suspend requests.
1877   {
1878     // No need for acquire as Java threads always disarm themselves.
1879     __ safepoint_poll(safepoint_in_progress, true /* at_return */, false /* acquire */, false /* in_nmethod */);
1880     __ ldrw(rscratch1, Address(rthread, JavaThread::suspend_flags_offset()));
1881     __ cbnzw(rscratch1, safepoint_in_progress);
1882     __ bind(safepoint_in_progress_done);
1883   }
1884 
1885   // change thread state
1886   __ mov(rscratch1, _thread_in_Java);
1887   __ lea(rscratch2, Address(rthread, JavaThread::thread_state_offset()));
1888   __ stlrw(rscratch1, rscratch2);
1889 
1890   if (LockingMode != LM_LEGACY && method->is_object_wait0()) {
1891     // Check preemption for Object.wait()
1892     __ ldr(rscratch1, Address(rthread, JavaThread::preempt_alternate_return_offset()));
1893     __ cbz(rscratch1, native_return);
1894     __ str(zr, Address(rthread, JavaThread::preempt_alternate_return_offset()));
1895     __ br(rscratch1);
1896     __ bind(native_return);
1897 
1898     intptr_t the_pc = (intptr_t) __ pc();
1899     oop_maps->add_gc_map(the_pc - start, map);
1900   }
1901 
1902   Label reguard;
1903   Label reguard_done;
1904   __ ldrb(rscratch1, Address(rthread, JavaThread::stack_guard_state_offset()));
1905   __ cmpw(rscratch1, StackOverflow::stack_guard_yellow_reserved_disabled);
1906   __ br(Assembler::EQ, reguard);
1907   __ bind(reguard_done);
1908 
1909   // native result if any is live
1910 
1911   // Unlock
1912   Label unlock_done;
1913   Label slow_path_unlock;
1914   if (method->is_synchronized()) {
1915 
1916     // Get locked oop from the handle we passed to jni
1917     __ ldr(obj_reg, Address(oop_handle_reg, 0));
1918 
1919     Label done, not_recursive;
1920 
1921     if (LockingMode == LM_LEGACY) {
1922       // Simple recursive lock?
1923       __ ldr(rscratch1, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size));
1924       __ cbnz(rscratch1, not_recursive);
1925       __ dec_held_monitor_count(rscratch1);
1926       __ b(done);
1927     }
1928 
1929     __ bind(not_recursive);
1930 
1931     // Must save r0 if if it is live now because cmpxchg must use it
1932     if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) {
1933       save_native_result(masm, ret_type, stack_slots);
1934     }
1935 
1936     if (LockingMode == LM_MONITOR) {
1937       __ b(slow_path_unlock);
1938     } else if (LockingMode == LM_LEGACY) {
1939       // get address of the stack lock
1940       __ lea(r0, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size));
1941       //  get old displaced header
1942       __ ldr(old_hdr, Address(r0, 0));
1943 
1944       // Atomic swap old header if oop still contains the stack lock
1945       Label count;
1946       __ cmpxchg_obj_header(r0, old_hdr, obj_reg, rscratch1, count, &slow_path_unlock);
1947       __ bind(count);
1948       __ dec_held_monitor_count(rscratch1);
1949     } else {
1950       assert(LockingMode == LM_LIGHTWEIGHT, "");
1951       __ lightweight_unlock(obj_reg, old_hdr, swap_reg, lock_tmp, slow_path_unlock);
1952     }
1953 
1954     // slow path re-enters here
1955     __ bind(unlock_done);
1956     if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) {
1957       restore_native_result(masm, ret_type, stack_slots);
1958     }
1959 
1960     __ bind(done);
1961   }
1962 
1963   Label dtrace_method_exit, dtrace_method_exit_done;
1964   if (DTraceMethodProbes) {
1965     __ b(dtrace_method_exit);
1966     __ bind(dtrace_method_exit_done);
1967   }
1968 
1969   __ reset_last_Java_frame(false);
1970 
1971   // Unbox oop result, e.g. JNIHandles::resolve result.
1972   if (is_reference_type(ret_type)) {
1973     __ resolve_jobject(r0, r1, r2);
1974   }
1975 
1976   if (CheckJNICalls) {
1977     // clear_pending_jni_exception_check
1978     __ str(zr, Address(rthread, JavaThread::pending_jni_exception_check_fn_offset()));
1979   }
1980 
1981   // reset handle block
1982   __ ldr(r2, Address(rthread, JavaThread::active_handles_offset()));
1983   __ str(zr, Address(r2, JNIHandleBlock::top_offset()));
1984 
1985   __ leave();
1986 
1987   // Any exception pending?
1988   __ ldr(rscratch1, Address(rthread, in_bytes(Thread::pending_exception_offset())));
1989   __ cbnz(rscratch1, exception_pending);
1990 
1991   // We're done
1992   __ ret(lr);
1993 
1994   // Unexpected paths are out of line and go here
1995 
1996   // forward the exception
1997   __ bind(exception_pending);
1998 
1999   // and forward the exception
2000   __ far_jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
2001 
2002   // Slow path locking & unlocking
2003   if (method->is_synchronized()) {
2004 
2005     __ block_comment("Slow path lock {");
2006     __ bind(slow_path_lock);
2007 
2008     // has last_Java_frame setup. No exceptions so do vanilla call not call_VM
2009     // args are (oop obj, BasicLock* lock, JavaThread* thread)
2010 
2011     // protect the args we've loaded
2012     save_args(masm, total_c_args, c_arg, out_regs);
2013 
2014     __ mov(c_rarg0, obj_reg);
2015     __ mov(c_rarg1, lock_reg);
2016     __ mov(c_rarg2, rthread);
2017 
2018     // Not a leaf but we have last_Java_frame setup as we want.
2019     // We don't want to unmount in case of contention since that would complicate preserving
2020     // the arguments that had already been marshalled into the native convention. So we force
2021     // the freeze slow path to find this native wrapper frame (see recurse_freeze_native_frame())
2022     // and pin the vthread. Otherwise the fast path won't find it since we don't walk the stack.
2023     __ push_cont_fastpath();
2024     __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_locking_C), 3);
2025     __ pop_cont_fastpath();
2026     restore_args(masm, total_c_args, c_arg, out_regs);
2027 
2028 #ifdef ASSERT
2029     { Label L;
2030       __ ldr(rscratch1, Address(rthread, in_bytes(Thread::pending_exception_offset())));
2031       __ cbz(rscratch1, L);
2032       __ stop("no pending exception allowed on exit from monitorenter");
2033       __ bind(L);
2034     }
2035 #endif
2036     __ b(lock_done);
2037 
2038     __ block_comment("} Slow path lock");
2039 
2040     __ block_comment("Slow path unlock {");
2041     __ bind(slow_path_unlock);
2042 
2043     // If we haven't already saved the native result we must save it now as xmm registers
2044     // are still exposed.
2045 
2046     if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) {
2047       save_native_result(masm, ret_type, stack_slots);
2048     }
2049 
2050     __ mov(c_rarg2, rthread);
2051     __ lea(c_rarg1, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size));
2052     __ mov(c_rarg0, obj_reg);
2053 
2054     // Save pending exception around call to VM (which contains an EXCEPTION_MARK)
2055     // NOTE that obj_reg == r19 currently
2056     __ ldr(r19, Address(rthread, in_bytes(Thread::pending_exception_offset())));
2057     __ str(zr, Address(rthread, in_bytes(Thread::pending_exception_offset())));
2058 
2059     __ rt_call(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C));
2060 
2061 #ifdef ASSERT
2062     {
2063       Label L;
2064       __ ldr(rscratch1, Address(rthread, in_bytes(Thread::pending_exception_offset())));
2065       __ cbz(rscratch1, L);
2066       __ stop("no pending exception allowed on exit complete_monitor_unlocking_C");
2067       __ bind(L);
2068     }
2069 #endif /* ASSERT */
2070 
2071     __ str(r19, Address(rthread, in_bytes(Thread::pending_exception_offset())));
2072 
2073     if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) {
2074       restore_native_result(masm, ret_type, stack_slots);
2075     }
2076     __ b(unlock_done);
2077 
2078     __ block_comment("} Slow path unlock");
2079 
2080   } // synchronized
2081 
2082   // SLOW PATH Reguard the stack if needed
2083 
2084   __ bind(reguard);
2085   save_native_result(masm, ret_type, stack_slots);
2086   __ rt_call(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages));
2087   restore_native_result(masm, ret_type, stack_slots);
2088   // and continue
2089   __ b(reguard_done);
2090 
2091   // SLOW PATH safepoint
2092   {
2093     __ block_comment("safepoint {");
2094     __ bind(safepoint_in_progress);
2095 
2096     // Don't use call_VM as it will see a possible pending exception and forward it
2097     // and never return here preventing us from clearing _last_native_pc down below.
2098     //
2099     save_native_result(masm, ret_type, stack_slots);
2100     __ mov(c_rarg0, rthread);
2101 #ifndef PRODUCT
2102   assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area");
2103 #endif
2104     __ lea(rscratch1, RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans)));
2105     __ blr(rscratch1);
2106 
2107     // Restore any method result value
2108     restore_native_result(masm, ret_type, stack_slots);
2109 
2110     __ b(safepoint_in_progress_done);
2111     __ block_comment("} safepoint");
2112   }
2113 
2114   // SLOW PATH dtrace support
2115   if (DTraceMethodProbes) {
2116     {
2117       __ block_comment("dtrace entry {");
2118       __ bind(dtrace_method_entry);
2119 
2120       // We have all of the arguments setup at this point. We must not touch any register
2121       // argument registers at this point (what if we save/restore them there are no oop?
2122 
2123       save_args(masm, total_c_args, c_arg, out_regs);
2124       __ mov_metadata(c_rarg1, method());
2125       __ call_VM_leaf(
2126         CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry),
2127         rthread, c_rarg1);
2128       restore_args(masm, total_c_args, c_arg, out_regs);
2129       __ b(dtrace_method_entry_done);
2130       __ block_comment("} dtrace entry");
2131     }
2132 
2133     {
2134       __ block_comment("dtrace exit {");
2135       __ bind(dtrace_method_exit);
2136       save_native_result(masm, ret_type, stack_slots);
2137       __ mov_metadata(c_rarg1, method());
2138       __ call_VM_leaf(
2139         CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit),
2140         rthread, c_rarg1);
2141       restore_native_result(masm, ret_type, stack_slots);
2142       __ b(dtrace_method_exit_done);
2143       __ block_comment("} dtrace exit");
2144     }
2145   }
2146 
2147   __ flush();
2148 
2149   nmethod *nm = nmethod::new_native_nmethod(method,
2150                                             compile_id,
2151                                             masm->code(),
2152                                             vep_offset,
2153                                             frame_complete,
2154                                             stack_slots / VMRegImpl::slots_per_word,
2155                                             (is_static ? in_ByteSize(klass_offset) : in_ByteSize(receiver_offset)),
2156                                             in_ByteSize(lock_slot_offset*VMRegImpl::stack_slot_size),
2157                                             oop_maps);
2158 
2159   return nm;
2160 }
2161 
2162 // this function returns the adjust size (in number of words) to a c2i adapter
2163 // activation for use during deoptimization
2164 int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals) {
2165   assert(callee_locals >= callee_parameters,
2166           "test and remove; got more parms than locals");
2167   if (callee_locals < callee_parameters)
2168     return 0;                   // No adjustment for negative locals
2169   int diff = (callee_locals - callee_parameters) * Interpreter::stackElementWords;
2170   // diff is counted in stack words
2171   return align_up(diff, 2);
2172 }
2173 
2174 
2175 //------------------------------generate_deopt_blob----------------------------
2176 void SharedRuntime::generate_deopt_blob() {
2177   // Allocate space for the code
2178   ResourceMark rm;
2179   // Setup code generation tools
2180   int pad = 0;
2181 #if INCLUDE_JVMCI
2182   if (EnableJVMCI) {
2183     pad += 512; // Increase the buffer size when compiling for JVMCI
2184   }
2185 #endif
2186   const char* name = SharedRuntime::stub_name(SharedStubId::deopt_id);
2187   CodeBuffer buffer(name, 2048+pad, 1024);
2188   MacroAssembler* masm = new MacroAssembler(&buffer);
2189   int frame_size_in_words;
2190   OopMap* map = nullptr;
2191   OopMapSet *oop_maps = new OopMapSet();
2192   RegisterSaver reg_save(COMPILER2_OR_JVMCI != 0);
2193 
2194   // -------------
2195   // This code enters when returning to a de-optimized nmethod.  A return
2196   // address has been pushed on the stack, and return values are in
2197   // registers.
2198   // If we are doing a normal deopt then we were called from the patched
2199   // nmethod from the point we returned to the nmethod. So the return
2200   // address on the stack is wrong by NativeCall::instruction_size
2201   // We will adjust the value so it looks like we have the original return
2202   // address on the stack (like when we eagerly deoptimized).
2203   // In the case of an exception pending when deoptimizing, we enter
2204   // with a return address on the stack that points after the call we patched
2205   // into the exception handler. We have the following register state from,
2206   // e.g., the forward exception stub (see stubGenerator_x86_64.cpp).
2207   //    r0: exception oop
2208   //    r19: exception handler
2209   //    r3: throwing pc
2210   // So in this case we simply jam r3 into the useless return address and
2211   // the stack looks just like we want.
2212   //
2213   // At this point we need to de-opt.  We save the argument return
2214   // registers.  We call the first C routine, fetch_unroll_info().  This
2215   // routine captures the return values and returns a structure which
2216   // describes the current frame size and the sizes of all replacement frames.
2217   // The current frame is compiled code and may contain many inlined
2218   // functions, each with their own JVM state.  We pop the current frame, then
2219   // push all the new frames.  Then we call the C routine unpack_frames() to
2220   // populate these frames.  Finally unpack_frames() returns us the new target
2221   // address.  Notice that callee-save registers are BLOWN here; they have
2222   // already been captured in the vframeArray at the time the return PC was
2223   // patched.
2224   address start = __ pc();
2225   Label cont;
2226 
2227   // Prolog for non exception case!
2228 
2229   // Save everything in sight.
2230   map = reg_save.save_live_registers(masm, 0, &frame_size_in_words);
2231 
2232   // Normal deoptimization.  Save exec mode for unpack_frames.
2233   __ movw(rcpool, Deoptimization::Unpack_deopt); // callee-saved
2234   __ b(cont);
2235 
2236   int reexecute_offset = __ pc() - start;
2237 #if INCLUDE_JVMCI && !defined(COMPILER1)
2238   if (UseJVMCICompiler) {
2239     // JVMCI does not use this kind of deoptimization
2240     __ should_not_reach_here();
2241   }
2242 #endif
2243 
2244   // Reexecute case
2245   // return address is the pc describes what bci to do re-execute at
2246 
2247   // No need to update map as each call to save_live_registers will produce identical oopmap
2248   (void) reg_save.save_live_registers(masm, 0, &frame_size_in_words);
2249 
2250   __ movw(rcpool, Deoptimization::Unpack_reexecute); // callee-saved
2251   __ b(cont);
2252 
2253 #if INCLUDE_JVMCI
2254   Label after_fetch_unroll_info_call;
2255   int implicit_exception_uncommon_trap_offset = 0;
2256   int uncommon_trap_offset = 0;
2257 
2258   if (EnableJVMCI) {
2259     implicit_exception_uncommon_trap_offset = __ pc() - start;
2260 
2261     __ ldr(lr, Address(rthread, in_bytes(JavaThread::jvmci_implicit_exception_pc_offset())));
2262     __ str(zr, Address(rthread, in_bytes(JavaThread::jvmci_implicit_exception_pc_offset())));
2263 
2264     uncommon_trap_offset = __ pc() - start;
2265 
2266     // Save everything in sight.
2267     reg_save.save_live_registers(masm, 0, &frame_size_in_words);
2268     // fetch_unroll_info needs to call last_java_frame()
2269     Label retaddr;
2270     __ set_last_Java_frame(sp, noreg, retaddr, rscratch1);
2271 
2272     __ ldrw(c_rarg1, Address(rthread, in_bytes(JavaThread::pending_deoptimization_offset())));
2273     __ movw(rscratch1, -1);
2274     __ strw(rscratch1, Address(rthread, in_bytes(JavaThread::pending_deoptimization_offset())));
2275 
2276     __ movw(rcpool, (int32_t)Deoptimization::Unpack_reexecute);
2277     __ mov(c_rarg0, rthread);
2278     __ movw(c_rarg2, rcpool); // exec mode
2279     __ lea(rscratch1,
2280            RuntimeAddress(CAST_FROM_FN_PTR(address,
2281                                            Deoptimization::uncommon_trap)));
2282     __ blr(rscratch1);
2283     __ bind(retaddr);
2284     oop_maps->add_gc_map( __ pc()-start, map->deep_copy());
2285 
2286     __ reset_last_Java_frame(false);
2287 
2288     __ b(after_fetch_unroll_info_call);
2289   } // EnableJVMCI
2290 #endif // INCLUDE_JVMCI
2291 
2292   int exception_offset = __ pc() - start;
2293 
2294   // Prolog for exception case
2295 
2296   // all registers are dead at this entry point, except for r0, and
2297   // r3 which contain the exception oop and exception pc
2298   // respectively.  Set them in TLS and fall thru to the
2299   // unpack_with_exception_in_tls entry point.
2300 
2301   __ str(r3, Address(rthread, JavaThread::exception_pc_offset()));
2302   __ str(r0, Address(rthread, JavaThread::exception_oop_offset()));
2303 
2304   int exception_in_tls_offset = __ pc() - start;
2305 
2306   // new implementation because exception oop is now passed in JavaThread
2307 
2308   // Prolog for exception case
2309   // All registers must be preserved because they might be used by LinearScan
2310   // Exceptiop oop and throwing PC are passed in JavaThread
2311   // tos: stack at point of call to method that threw the exception (i.e. only
2312   // args are on the stack, no return address)
2313 
2314   // The return address pushed by save_live_registers will be patched
2315   // later with the throwing pc. The correct value is not available
2316   // now because loading it from memory would destroy registers.
2317 
2318   // NB: The SP at this point must be the SP of the method that is
2319   // being deoptimized.  Deoptimization assumes that the frame created
2320   // here by save_live_registers is immediately below the method's SP.
2321   // This is a somewhat fragile mechanism.
2322 
2323   // Save everything in sight.
2324   map = reg_save.save_live_registers(masm, 0, &frame_size_in_words);
2325 
2326   // Now it is safe to overwrite any register
2327 
2328   // Deopt during an exception.  Save exec mode for unpack_frames.
2329   __ mov(rcpool, Deoptimization::Unpack_exception); // callee-saved
2330 
2331   // load throwing pc from JavaThread and patch it as the return address
2332   // of the current frame. Then clear the field in JavaThread
2333   __ ldr(r3, Address(rthread, JavaThread::exception_pc_offset()));
2334   __ protect_return_address(r3);
2335   __ str(r3, Address(rfp, wordSize));
2336   __ str(zr, Address(rthread, JavaThread::exception_pc_offset()));
2337 
2338 #ifdef ASSERT
2339   // verify that there is really an exception oop in JavaThread
2340   __ ldr(r0, Address(rthread, JavaThread::exception_oop_offset()));
2341   __ verify_oop(r0);
2342 
2343   // verify that there is no pending exception
2344   Label no_pending_exception;
2345   __ ldr(rscratch1, Address(rthread, Thread::pending_exception_offset()));
2346   __ cbz(rscratch1, no_pending_exception);
2347   __ stop("must not have pending exception here");
2348   __ bind(no_pending_exception);
2349 #endif
2350 
2351   __ bind(cont);
2352 
2353   // Call C code.  Need thread and this frame, but NOT official VM entry
2354   // crud.  We cannot block on this call, no GC can happen.
2355   //
2356   // UnrollBlock* fetch_unroll_info(JavaThread* thread)
2357 
2358   // fetch_unroll_info needs to call last_java_frame().
2359 
2360   Label retaddr;
2361   __ set_last_Java_frame(sp, noreg, retaddr, rscratch1);
2362 #ifdef ASSERT
2363   { Label L;
2364     __ ldr(rscratch1, Address(rthread, JavaThread::last_Java_fp_offset()));
2365     __ cbz(rscratch1, L);
2366     __ stop("SharedRuntime::generate_deopt_blob: last_Java_fp not cleared");
2367     __ bind(L);
2368   }
2369 #endif // ASSERT
2370   __ mov(c_rarg0, rthread);
2371   __ mov(c_rarg1, rcpool);
2372   __ lea(rscratch1, RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info)));
2373   __ blr(rscratch1);
2374   __ bind(retaddr);
2375 
2376   // Need to have an oopmap that tells fetch_unroll_info where to
2377   // find any register it might need.
2378   oop_maps->add_gc_map(__ pc() - start, map);
2379 
2380   __ reset_last_Java_frame(false);
2381 
2382 #if INCLUDE_JVMCI
2383   if (EnableJVMCI) {
2384     __ bind(after_fetch_unroll_info_call);
2385   }
2386 #endif
2387 
2388   // Load UnrollBlock* into r5
2389   __ mov(r5, r0);
2390 
2391   __ ldrw(rcpool, Address(r5, Deoptimization::UnrollBlock::unpack_kind_offset()));
2392    Label noException;
2393   __ cmpw(rcpool, Deoptimization::Unpack_exception);   // Was exception pending?
2394   __ br(Assembler::NE, noException);
2395   __ ldr(r0, Address(rthread, JavaThread::exception_oop_offset()));
2396   // QQQ this is useless it was null above
2397   __ ldr(r3, Address(rthread, JavaThread::exception_pc_offset()));
2398   __ str(zr, Address(rthread, JavaThread::exception_oop_offset()));
2399   __ str(zr, Address(rthread, JavaThread::exception_pc_offset()));
2400 
2401   __ verify_oop(r0);
2402 
2403   // Overwrite the result registers with the exception results.
2404   __ str(r0, Address(sp, reg_save.r0_offset_in_bytes()));
2405   // I think this is useless
2406   // __ str(r3, Address(sp, RegisterSaver::r3_offset_in_bytes()));
2407 
2408   __ bind(noException);
2409 
2410   // Only register save data is on the stack.
2411   // Now restore the result registers.  Everything else is either dead
2412   // or captured in the vframeArray.
2413 
2414   // Restore fp result register
2415   __ ldrd(v0, Address(sp, reg_save.v0_offset_in_bytes()));
2416   // Restore integer result register
2417   __ ldr(r0, Address(sp, reg_save.r0_offset_in_bytes()));
2418 
2419   // Pop all of the register save area off the stack
2420   __ add(sp, sp, frame_size_in_words * wordSize);
2421 
2422   // All of the register save area has been popped of the stack. Only the
2423   // return address remains.
2424 
2425   // Pop all the frames we must move/replace.
2426   //
2427   // Frame picture (youngest to oldest)
2428   // 1: self-frame (no frame link)
2429   // 2: deopting frame  (no frame link)
2430   // 3: caller of deopting frame (could be compiled/interpreted).
2431   //
2432   // Note: by leaving the return address of self-frame on the stack
2433   // and using the size of frame 2 to adjust the stack
2434   // when we are done the return to frame 3 will still be on the stack.
2435 
2436   // Pop deoptimized frame
2437   __ ldrw(r2, Address(r5, Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset()));
2438   __ sub(r2, r2, 2 * wordSize);
2439   __ add(sp, sp, r2);
2440   __ ldp(rfp, zr, __ post(sp, 2 * wordSize));
2441 
2442 #ifdef ASSERT
2443   // Compilers generate code that bang the stack by as much as the
2444   // interpreter would need. So this stack banging should never
2445   // trigger a fault. Verify that it does not on non product builds.
2446   __ ldrw(r19, Address(r5, Deoptimization::UnrollBlock::total_frame_sizes_offset()));
2447   __ bang_stack_size(r19, r2);
2448 #endif
2449   // Load address of array of frame pcs into r2
2450   __ ldr(r2, Address(r5, Deoptimization::UnrollBlock::frame_pcs_offset()));
2451 
2452   // Trash the old pc
2453   // __ addptr(sp, wordSize);  FIXME ????
2454 
2455   // Load address of array of frame sizes into r4
2456   __ ldr(r4, Address(r5, Deoptimization::UnrollBlock::frame_sizes_offset()));
2457 
2458   // Load counter into r3
2459   __ ldrw(r3, Address(r5, Deoptimization::UnrollBlock::number_of_frames_offset()));
2460 
2461   // Now adjust the caller's stack to make up for the extra locals
2462   // but record the original sp so that we can save it in the skeletal interpreter
2463   // frame and the stack walking of interpreter_sender will get the unextended sp
2464   // value and not the "real" sp value.
2465 
2466   const Register sender_sp = r6;
2467 
2468   __ mov(sender_sp, sp);
2469   __ ldrw(r19, Address(r5,
2470                        Deoptimization::UnrollBlock::
2471                        caller_adjustment_offset()));
2472   __ sub(sp, sp, r19);
2473 
2474   // Push interpreter frames in a loop
2475   __ mov(rscratch1, (uint64_t)0xDEADDEAD);        // Make a recognizable pattern
2476   __ mov(rscratch2, rscratch1);
2477   Label loop;
2478   __ bind(loop);
2479   __ ldr(r19, Address(__ post(r4, wordSize)));          // Load frame size
2480   __ sub(r19, r19, 2*wordSize);           // We'll push pc and fp by hand
2481   __ ldr(lr, Address(__ post(r2, wordSize)));  // Load pc
2482   __ enter();                           // Save old & set new fp
2483   __ sub(sp, sp, r19);                  // Prolog
2484   // This value is corrected by layout_activation_impl
2485   __ str(zr, Address(rfp, frame::interpreter_frame_last_sp_offset * wordSize));
2486   __ str(sender_sp, Address(rfp, frame::interpreter_frame_sender_sp_offset * wordSize)); // Make it walkable
2487   __ mov(sender_sp, sp);               // Pass sender_sp to next frame
2488   __ sub(r3, r3, 1);                   // Decrement counter
2489   __ cbnz(r3, loop);
2490 
2491     // Re-push self-frame
2492   __ ldr(lr, Address(r2));
2493   __ enter();
2494 
2495   // Allocate a full sized register save area.  We subtract 2 because
2496   // enter() just pushed 2 words
2497   __ sub(sp, sp, (frame_size_in_words - 2) * wordSize);
2498 
2499   // Restore frame locals after moving the frame
2500   __ strd(v0, Address(sp, reg_save.v0_offset_in_bytes()));
2501   __ str(r0, Address(sp, reg_save.r0_offset_in_bytes()));
2502 
2503   // Call C code.  Need thread but NOT official VM entry
2504   // crud.  We cannot block on this call, no GC can happen.  Call should
2505   // restore return values to their stack-slots with the new SP.
2506   //
2507   // void Deoptimization::unpack_frames(JavaThread* thread, int exec_mode)
2508 
2509   // Use rfp because the frames look interpreted now
2510   // Don't need the precise return PC here, just precise enough to point into this code blob.
2511   address the_pc = __ pc();
2512   __ set_last_Java_frame(sp, rfp, the_pc, rscratch1);
2513 
2514   __ mov(c_rarg0, rthread);
2515   __ movw(c_rarg1, rcpool); // second arg: exec_mode
2516   __ lea(rscratch1, RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames)));
2517   __ blr(rscratch1);
2518 
2519   // Set an oopmap for the call site
2520   // Use the same PC we used for the last java frame
2521   oop_maps->add_gc_map(the_pc - start,
2522                        new OopMap( frame_size_in_words, 0 ));
2523 
2524   // Clear fp AND pc
2525   __ reset_last_Java_frame(true);
2526 
2527   // Collect return values
2528   __ ldrd(v0, Address(sp, reg_save.v0_offset_in_bytes()));
2529   __ ldr(r0, Address(sp, reg_save.r0_offset_in_bytes()));
2530   // I think this is useless (throwing pc?)
2531   // __ ldr(r3, Address(sp, RegisterSaver::r3_offset_in_bytes()));
2532 
2533   // Pop self-frame.
2534   __ leave();                           // Epilog
2535 
2536   // Jump to interpreter
2537   __ ret(lr);
2538 
2539   // Make sure all code is generated
2540   masm->flush();
2541 
2542   _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset, reexecute_offset, frame_size_in_words);
2543   _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset);
2544 #if INCLUDE_JVMCI
2545   if (EnableJVMCI) {
2546     _deopt_blob->set_uncommon_trap_offset(uncommon_trap_offset);
2547     _deopt_blob->set_implicit_exception_uncommon_trap_offset(implicit_exception_uncommon_trap_offset);
2548   }
2549 #endif
2550 }
2551 
2552 // Number of stack slots between incoming argument block and the start of
2553 // a new frame.  The PROLOG must add this many slots to the stack.  The
2554 // EPILOG must remove this many slots. aarch64 needs two slots for
2555 // return address and fp.
2556 // TODO think this is correct but check
2557 uint SharedRuntime::in_preserve_stack_slots() {
2558   return 4;
2559 }
2560 
2561 uint SharedRuntime::out_preserve_stack_slots() {
2562   return 0;
2563 }
2564 
2565 
2566 VMReg SharedRuntime::thread_register() {
2567   return rthread->as_VMReg();
2568 }
2569 
2570 //------------------------------generate_handler_blob------
2571 //
2572 // Generate a special Compile2Runtime blob that saves all registers,
2573 // and setup oopmap.
2574 //
2575 SafepointBlob* SharedRuntime::generate_handler_blob(SharedStubId id, address call_ptr) {
2576   assert(is_polling_page_id(id), "expected a polling page stub id");
2577 
2578   ResourceMark rm;
2579   OopMapSet *oop_maps = new OopMapSet();
2580   OopMap* map;
2581 
2582   // Allocate space for the code.  Setup code generation tools.
2583   const char* name = SharedRuntime::stub_name(id);
2584   CodeBuffer buffer(name, 2048, 1024);
2585   MacroAssembler* masm = new MacroAssembler(&buffer);
2586 
2587   address start   = __ pc();
2588   address call_pc = nullptr;
2589   int frame_size_in_words;
2590   bool cause_return = (id == SharedStubId::polling_page_return_handler_id);
2591   RegisterSaver reg_save(id == SharedStubId::polling_page_vectors_safepoint_handler_id /* save_vectors */);
2592 
2593   // When the signal occurred, the LR was either signed and stored on the stack (in which
2594   // case it will be restored from the stack before being used) or unsigned and not stored
2595   // on the stack. Stipping ensures we get the right value.
2596   __ strip_return_address();
2597 
2598   // Save Integer and Float registers.
2599   map = reg_save.save_live_registers(masm, 0, &frame_size_in_words);
2600 
2601   // The following is basically a call_VM.  However, we need the precise
2602   // address of the call in order to generate an oopmap. Hence, we do all the
2603   // work ourselves.
2604 
2605   Label retaddr;
2606   __ set_last_Java_frame(sp, noreg, retaddr, rscratch1);
2607 
2608   // The return address must always be correct so that frame constructor never
2609   // sees an invalid pc.
2610 
2611   if (!cause_return) {
2612     // overwrite the return address pushed by save_live_registers
2613     // Additionally, r20 is a callee-saved register so we can look at
2614     // it later to determine if someone changed the return address for
2615     // us!
2616     __ ldr(r20, Address(rthread, JavaThread::saved_exception_pc_offset()));
2617     __ protect_return_address(r20);
2618     __ str(r20, Address(rfp, wordSize));
2619   }
2620 
2621   // Do the call
2622   __ mov(c_rarg0, rthread);
2623   __ lea(rscratch1, RuntimeAddress(call_ptr));
2624   __ blr(rscratch1);
2625   __ bind(retaddr);
2626 
2627   // Set an oopmap for the call site.  This oopmap will map all
2628   // oop-registers and debug-info registers as callee-saved.  This
2629   // will allow deoptimization at this safepoint to find all possible
2630   // debug-info recordings, as well as let GC find all oops.
2631 
2632   oop_maps->add_gc_map( __ pc() - start, map);
2633 
2634   Label noException;
2635 
2636   __ reset_last_Java_frame(false);
2637 
2638   __ membar(Assembler::LoadLoad | Assembler::LoadStore);
2639 
2640   __ ldr(rscratch1, Address(rthread, Thread::pending_exception_offset()));
2641   __ cbz(rscratch1, noException);
2642 
2643   // Exception pending
2644 
2645   reg_save.restore_live_registers(masm);
2646 
2647   __ far_jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
2648 
2649   // No exception case
2650   __ bind(noException);
2651 
2652   Label no_adjust, bail;
2653   if (!cause_return) {
2654     // If our stashed return pc was modified by the runtime we avoid touching it
2655     __ ldr(rscratch1, Address(rfp, wordSize));
2656     __ cmp(r20, rscratch1);
2657     __ br(Assembler::NE, no_adjust);
2658     __ authenticate_return_address(r20);
2659 
2660 #ifdef ASSERT
2661     // Verify the correct encoding of the poll we're about to skip.
2662     // See NativeInstruction::is_ldrw_to_zr()
2663     __ ldrw(rscratch1, Address(r20));
2664     __ ubfx(rscratch2, rscratch1, 22, 10);
2665     __ cmpw(rscratch2, 0b1011100101);
2666     __ br(Assembler::NE, bail);
2667     __ ubfx(rscratch2, rscratch1, 0, 5);
2668     __ cmpw(rscratch2, 0b11111);
2669     __ br(Assembler::NE, bail);
2670 #endif
2671     // Adjust return pc forward to step over the safepoint poll instruction
2672     __ add(r20, r20, NativeInstruction::instruction_size);
2673     __ protect_return_address(r20);
2674     __ str(r20, Address(rfp, wordSize));
2675   }
2676 
2677   __ bind(no_adjust);
2678   // Normal exit, restore registers and exit.
2679   reg_save.restore_live_registers(masm);
2680 
2681   __ ret(lr);
2682 
2683 #ifdef ASSERT
2684   __ bind(bail);
2685   __ stop("Attempting to adjust pc to skip safepoint poll but the return point is not what we expected");
2686 #endif
2687 
2688   // Make sure all code is generated
2689   masm->flush();
2690 
2691   // Fill-out other meta info
2692   return SafepointBlob::create(&buffer, oop_maps, frame_size_in_words);
2693 }
2694 
2695 //
2696 // generate_resolve_blob - call resolution (static/virtual/opt-virtual/ic-miss
2697 //
2698 // Generate a stub that calls into vm to find out the proper destination
2699 // of a java call. All the argument registers are live at this point
2700 // but since this is generic code we don't know what they are and the caller
2701 // must do any gc of the args.
2702 //
2703 RuntimeStub* SharedRuntime::generate_resolve_blob(SharedStubId id, address destination) {
2704   assert (StubRoutines::forward_exception_entry() != nullptr, "must be generated before");
2705   assert(is_resolve_id(id), "expected a resolve stub id");
2706 
2707   // allocate space for the code
2708   ResourceMark rm;
2709 
2710   const char* name = SharedRuntime::stub_name(id);
2711   CodeBuffer buffer(name, 1000, 512);
2712   MacroAssembler* masm                = new MacroAssembler(&buffer);
2713 
2714   int frame_size_in_words;
2715   RegisterSaver reg_save(false /* save_vectors */);
2716 
2717   OopMapSet *oop_maps = new OopMapSet();
2718   OopMap* map = nullptr;
2719 
2720   int start = __ offset();
2721 
2722   map = reg_save.save_live_registers(masm, 0, &frame_size_in_words);
2723 
2724   int frame_complete = __ offset();
2725 
2726   {
2727     Label retaddr;
2728     __ set_last_Java_frame(sp, noreg, retaddr, rscratch1);
2729 
2730     __ mov(c_rarg0, rthread);
2731     __ lea(rscratch1, RuntimeAddress(destination));
2732 
2733     __ blr(rscratch1);
2734     __ bind(retaddr);
2735   }
2736 
2737   // Set an oopmap for the call site.
2738   // We need this not only for callee-saved registers, but also for volatile
2739   // registers that the compiler might be keeping live across a safepoint.
2740 
2741   oop_maps->add_gc_map( __ offset() - start, map);
2742 
2743   // r0 contains the address we are going to jump to assuming no exception got installed
2744 
2745   // clear last_Java_sp
2746   __ reset_last_Java_frame(false);
2747   // check for pending exceptions
2748   Label pending;
2749   __ ldr(rscratch1, Address(rthread, Thread::pending_exception_offset()));
2750   __ cbnz(rscratch1, pending);
2751 
2752   // get the returned Method*
2753   __ get_vm_result_2(rmethod, rthread);
2754   __ str(rmethod, Address(sp, reg_save.reg_offset_in_bytes(rmethod)));
2755 
2756   // r0 is where we want to jump, overwrite rscratch1 which is saved and scratch
2757   __ str(r0, Address(sp, reg_save.rscratch1_offset_in_bytes()));
2758   reg_save.restore_live_registers(masm);
2759 
2760   // We are back to the original state on entry and ready to go.
2761 
2762   __ br(rscratch1);
2763 
2764   // Pending exception after the safepoint
2765 
2766   __ bind(pending);
2767 
2768   reg_save.restore_live_registers(masm);
2769 
2770   // exception pending => remove activation and forward to exception handler
2771 
2772   __ str(zr, Address(rthread, JavaThread::vm_result_offset()));
2773 
2774   __ ldr(r0, Address(rthread, Thread::pending_exception_offset()));
2775   __ far_jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
2776 
2777   // -------------
2778   // make sure all code is generated
2779   masm->flush();
2780 
2781   // return the  blob
2782   // frame_size_words or bytes??
2783   return RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_in_words, oop_maps, true);
2784 }
2785 
2786 // Continuation point for throwing of implicit exceptions that are
2787 // not handled in the current activation. Fabricates an exception
2788 // oop and initiates normal exception dispatching in this
2789 // frame. Since we need to preserve callee-saved values (currently
2790 // only for C2, but done for C1 as well) we need a callee-saved oop
2791 // map and therefore have to make these stubs into RuntimeStubs
2792 // rather than BufferBlobs.  If the compiler needs all registers to
2793 // be preserved between the fault point and the exception handler
2794 // then it must assume responsibility for that in
2795 // AbstractCompiler::continuation_for_implicit_null_exception or
2796 // continuation_for_implicit_division_by_zero_exception. All other
2797 // implicit exceptions (e.g., NullPointerException or
2798 // AbstractMethodError on entry) are either at call sites or
2799 // otherwise assume that stack unwinding will be initiated, so
2800 // caller saved registers were assumed volatile in the compiler.
2801 
2802 RuntimeStub* SharedRuntime::generate_throw_exception(SharedStubId id, address runtime_entry) {
2803   assert(is_throw_id(id), "expected a throw stub id");
2804 
2805   const char* name = SharedRuntime::stub_name(id);
2806 
2807   // Information about frame layout at time of blocking runtime call.
2808   // Note that we only have to preserve callee-saved registers since
2809   // the compilers are responsible for supplying a continuation point
2810   // if they expect all registers to be preserved.
2811   // n.b. aarch64 asserts that frame::arg_reg_save_area_bytes == 0
2812   enum layout {
2813     rfp_off = 0,
2814     rfp_off2,
2815     return_off,
2816     return_off2,
2817     framesize // inclusive of return address
2818   };
2819 
2820   int insts_size = 512;
2821   int locs_size  = 64;
2822 
2823   ResourceMark rm;
2824   const char* timer_msg = "SharedRuntime generate_throw_exception";
2825   TraceTime timer(timer_msg, TRACETIME_LOG(Info, startuptime));
2826 
2827   CodeBuffer code(name, insts_size, locs_size);
2828   OopMapSet* oop_maps  = new OopMapSet();
2829   MacroAssembler* masm = new MacroAssembler(&code);
2830 
2831   address start = __ pc();
2832 
2833   // This is an inlined and slightly modified version of call_VM
2834   // which has the ability to fetch the return PC out of
2835   // thread-local storage and also sets up last_Java_sp slightly
2836   // differently than the real call_VM
2837 
2838   __ enter(); // Save FP and LR before call
2839 
2840   assert(is_even(framesize/2), "sp not 16-byte aligned");
2841 
2842   // lr and fp are already in place
2843   __ sub(sp, rfp, ((uint64_t)framesize-4) << LogBytesPerInt); // prolog
2844 
2845   int frame_complete = __ pc() - start;
2846 
2847   // Set up last_Java_sp and last_Java_fp
2848   address the_pc = __ pc();
2849   __ set_last_Java_frame(sp, rfp, the_pc, rscratch1);
2850 
2851   __ mov(c_rarg0, rthread);
2852   BLOCK_COMMENT("call runtime_entry");
2853   __ mov(rscratch1, runtime_entry);
2854   __ blr(rscratch1);
2855 
2856   // Generate oop map
2857   OopMap* map = new OopMap(framesize, 0);
2858 
2859   oop_maps->add_gc_map(the_pc - start, map);
2860 
2861   __ reset_last_Java_frame(true);
2862 
2863   // Reinitialize the ptrue predicate register, in case the external runtime
2864   // call clobbers ptrue reg, as we may return to SVE compiled code.
2865   __ reinitialize_ptrue();
2866 
2867   __ leave();
2868 
2869   // check for pending exceptions
2870 #ifdef ASSERT
2871   Label L;
2872   __ ldr(rscratch1, Address(rthread, Thread::pending_exception_offset()));
2873   __ cbnz(rscratch1, L);
2874   __ should_not_reach_here();
2875   __ bind(L);
2876 #endif // ASSERT
2877   __ far_jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
2878 
2879   // codeBlob framesize is in words (not VMRegImpl::slot_size)
2880   RuntimeStub* stub =
2881     RuntimeStub::new_runtime_stub(name,
2882                                   &code,
2883                                   frame_complete,
2884                                   (framesize >> (LogBytesPerWord - LogBytesPerInt)),
2885                                   oop_maps, false);
2886   return stub;
2887 }
2888 
2889 #if INCLUDE_JFR
2890 
2891 static void jfr_prologue(address the_pc, MacroAssembler* masm, Register thread) {
2892   __ set_last_Java_frame(sp, rfp, the_pc, rscratch1);
2893   __ mov(c_rarg0, thread);
2894 }
2895 
2896 // The handle is dereferenced through a load barrier.
2897 static void jfr_epilogue(MacroAssembler* masm) {
2898   __ reset_last_Java_frame(true);
2899 }
2900 
2901 // For c2: c_rarg0 is junk, call to runtime to write a checkpoint.
2902 // It returns a jobject handle to the event writer.
2903 // The handle is dereferenced and the return value is the event writer oop.
2904 RuntimeStub* SharedRuntime::generate_jfr_write_checkpoint() {
2905   enum layout {
2906     rbp_off,
2907     rbpH_off,
2908     return_off,
2909     return_off2,
2910     framesize // inclusive of return address
2911   };
2912 
2913   int insts_size = 1024;
2914   int locs_size = 64;
2915   const char* name = SharedRuntime::stub_name(SharedStubId::jfr_write_checkpoint_id);
2916   CodeBuffer code(name, insts_size, locs_size);
2917   OopMapSet* oop_maps = new OopMapSet();
2918   MacroAssembler* masm = new MacroAssembler(&code);
2919 
2920   address start = __ pc();
2921   __ enter();
2922   int frame_complete = __ pc() - start;
2923   address the_pc = __ pc();
2924   jfr_prologue(the_pc, masm, rthread);
2925   __ call_VM_leaf(CAST_FROM_FN_PTR(address, JfrIntrinsicSupport::write_checkpoint), 1);
2926   jfr_epilogue(masm);
2927   __ resolve_global_jobject(r0, rscratch1, rscratch2);
2928   __ leave();
2929   __ ret(lr);
2930 
2931   OopMap* map = new OopMap(framesize, 1); // rfp
2932   oop_maps->add_gc_map(the_pc - start, map);
2933 
2934   RuntimeStub* stub = // codeBlob framesize is in words (not VMRegImpl::slot_size)
2935     RuntimeStub::new_runtime_stub(name, &code, frame_complete,
2936                                   (framesize >> (LogBytesPerWord - LogBytesPerInt)),
2937                                   oop_maps, false);
2938   return stub;
2939 }
2940 
2941 // For c2: call to return a leased buffer.
2942 RuntimeStub* SharedRuntime::generate_jfr_return_lease() {
2943   enum layout {
2944     rbp_off,
2945     rbpH_off,
2946     return_off,
2947     return_off2,
2948     framesize // inclusive of return address
2949   };
2950 
2951   int insts_size = 1024;
2952   int locs_size = 64;
2953 
2954   const char* name = SharedRuntime::stub_name(SharedStubId::jfr_return_lease_id);
2955   CodeBuffer code(name, insts_size, locs_size);
2956   OopMapSet* oop_maps = new OopMapSet();
2957   MacroAssembler* masm = new MacroAssembler(&code);
2958 
2959   address start = __ pc();
2960   __ enter();
2961   int frame_complete = __ pc() - start;
2962   address the_pc = __ pc();
2963   jfr_prologue(the_pc, masm, rthread);
2964   __ call_VM_leaf(CAST_FROM_FN_PTR(address, JfrIntrinsicSupport::return_lease), 1);
2965   jfr_epilogue(masm);
2966 
2967   __ leave();
2968   __ ret(lr);
2969 
2970   OopMap* map = new OopMap(framesize, 1); // rfp
2971   oop_maps->add_gc_map(the_pc - start, map);
2972 
2973   RuntimeStub* stub = // codeBlob framesize is in words (not VMRegImpl::slot_size)
2974     RuntimeStub::new_runtime_stub(name, &code, frame_complete,
2975                                   (framesize >> (LogBytesPerWord - LogBytesPerInt)),
2976                                   oop_maps, false);
2977   return stub;
2978 }
2979 
2980 #endif // INCLUDE_JFR