1 /*
   2  * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved.
   3  * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
   4  * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
   5  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   6  *
   7  * This code is free software; you can redistribute it and/or modify it
   8  * under the terms of the GNU General Public License version 2 only, as
   9  * published by the Free Software Foundation.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  *
  25  */
  26 
  27 #include "precompiled.hpp"
  28 #include "asm/macroAssembler.hpp"
  29 #include "asm/macroAssembler.inline.hpp"
  30 #include "code/debugInfoRec.hpp"
  31 #include "code/icBuffer.hpp"
  32 #include "code/vtableStubs.hpp"
  33 #include "compiler/oopMap.hpp"
  34 #include "gc/shared/barrierSetAssembler.hpp"
  35 #include "interpreter/interp_masm.hpp"
  36 #include "interpreter/interpreter.hpp"
  37 #include "logging/log.hpp"
  38 #include "memory/resourceArea.hpp"
  39 #include "nativeInst_riscv.hpp"
  40 #include "oops/compiledICHolder.hpp"
  41 #include "oops/klass.inline.hpp"
  42 #include "prims/methodHandles.hpp"
  43 #include "runtime/jniHandles.hpp"
  44 #include "runtime/safepointMechanism.hpp"
  45 #include "runtime/sharedRuntime.hpp"
  46 #include "runtime/signature.hpp"
  47 #include "runtime/stubRoutines.hpp"
  48 #include "runtime/vframeArray.hpp"
  49 #include "utilities/align.hpp"
  50 #include "utilities/formatBuffer.hpp"
  51 #include "vmreg_riscv.inline.hpp"
  52 #ifdef COMPILER1
  53 #include "c1/c1_Runtime1.hpp"
  54 #endif
  55 #ifdef COMPILER2
  56 #include "adfiles/ad_riscv.hpp"
  57 #include "opto/runtime.hpp"
  58 #endif
  59 
  60 #define __ masm->
  61 
  62 const int StackAlignmentInSlots = StackAlignmentInBytes / VMRegImpl::stack_slot_size;
  63 
  64 class SimpleRuntimeFrame {
  65 public:
  66 
  67   // Most of the runtime stubs have this simple frame layout.
  68   // This class exists to make the layout shared in one place.
  69   // Offsets are for compiler stack slots, which are jints.
  70   enum layout {
  71     // The frame sender code expects that fp will be in the "natural" place and
  72     // will override any oopMap setting for it. We must therefore force the layout
  73     // so that it agrees with the frame sender code.
  74     // we don't expect any arg reg save area so riscv asserts that
  75     // frame::arg_reg_save_area_bytes == 0
  76     fp_off = 0, fp_off2,
  77     return_off, return_off2,
  78     framesize
  79   };
  80 };
  81 
  82 class RegisterSaver {
  83   const bool _save_vectors;
  84  public:
  85   RegisterSaver(bool save_vectors) : _save_vectors(UseRVV && save_vectors) {}
  86   ~RegisterSaver() {}
  87   OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words);
  88   void restore_live_registers(MacroAssembler* masm);
  89 
  90   // Offsets into the register save area
  91   // Used by deoptimization when it is managing result register
  92   // values on its own
  93   // gregs:28, float_register:32; except: x1(ra) & x2(sp) & gp(x3) & tp(x4)
  94   // |---v0---|<---SP
  95   // |---v1---|save vectors only in generate_handler_blob
  96   // |-- .. --|
  97   // |---v31--|-----
  98   // |---f0---|
  99   // |---f1---|
 100   // |   ..   |
 101   // |---f31--|
 102   // |---reserved slot for stack alignment---|
 103   // |---x5---|
 104   // |   x6   |
 105   // |---.. --|
 106   // |---x31--|
 107   // |---fp---|
 108   // |---ra---|
 109   int v0_offset_in_bytes(void) { return 0; }
 110   int f0_offset_in_bytes(void) {
 111     int f0_offset = 0;
 112 #ifdef COMPILER2
 113     if (_save_vectors) {
 114       f0_offset += Matcher::scalable_vector_reg_size(T_INT) * VectorRegisterImpl::number_of_registers *
 115                    BytesPerInt;
 116     }
 117 #endif
 118     return f0_offset;
 119   }
 120   int reserved_slot_offset_in_bytes(void) {
 121     return f0_offset_in_bytes() +
 122            FloatRegisterImpl::max_slots_per_register *
 123            FloatRegisterImpl::number_of_registers *
 124            BytesPerInt;
 125   }
 126 
 127   int reg_offset_in_bytes(Register r) {
 128     assert (r->encoding() > 4, "ra, sp, gp and tp not saved");
 129     return reserved_slot_offset_in_bytes() + (r->encoding() - 4 /* x1, x2, x3, x4 */) * wordSize;
 130   }
 131 
 132   int freg_offset_in_bytes(FloatRegister f) {
 133     return f0_offset_in_bytes() + f->encoding() * wordSize;
 134   }
 135 
 136   int ra_offset_in_bytes(void) {
 137     return reserved_slot_offset_in_bytes() +
 138            (RegisterImpl::number_of_registers - 3) *
 139            RegisterImpl::max_slots_per_register *
 140            BytesPerInt;
 141   }
 142 };
 143 
 144 OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words) {
 145   int vector_size_in_bytes = 0;
 146   int vector_size_in_slots = 0;
 147 #ifdef COMPILER2
 148   if (_save_vectors) {
 149     vector_size_in_bytes += Matcher::scalable_vector_reg_size(T_BYTE);
 150     vector_size_in_slots += Matcher::scalable_vector_reg_size(T_INT);
 151   }
 152 #endif
 153 
 154   assert_cond(masm != NULL && total_frame_words != NULL);
 155   int frame_size_in_bytes = align_up(additional_frame_words * wordSize + ra_offset_in_bytes() + wordSize, 16);
 156   // OopMap frame size is in compiler stack slots (jint's) not bytes or words
 157   int frame_size_in_slots = frame_size_in_bytes / BytesPerInt;
 158   // The caller will allocate additional_frame_words
 159   int additional_frame_slots = additional_frame_words * wordSize / BytesPerInt;
 160   // CodeBlob frame size is in words.
 161   int frame_size_in_words = frame_size_in_bytes / wordSize;
 162   *total_frame_words = frame_size_in_words;
 163 
 164   // Save Integer, Float and Vector registers.
 165   __ enter();
 166   __ push_CPU_state(_save_vectors, vector_size_in_bytes);
 167 
 168   // Set an oopmap for the call site.  This oopmap will map all
 169   // oop-registers and debug-info registers as callee-saved.  This
 170   // will allow deoptimization at this safepoint to find all possible
 171   // debug-info recordings, as well as let GC find all oops.
 172 
 173   OopMapSet *oop_maps = new OopMapSet();
 174   OopMap* oop_map = new OopMap(frame_size_in_slots, 0);
 175   assert_cond(oop_maps != NULL && oop_map != NULL);
 176 
 177   int sp_offset_in_slots = 0;
 178   int step_in_slots = 0;
 179   if (_save_vectors) {
 180     step_in_slots = vector_size_in_slots;
 181     for (int i = 0; i < VectorRegisterImpl::number_of_registers; i++, sp_offset_in_slots += step_in_slots) {
 182       VectorRegister r = as_VectorRegister(i);
 183       oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset_in_slots), r->as_VMReg());
 184     }
 185   }
 186 
 187   step_in_slots = FloatRegisterImpl::max_slots_per_register;
 188   for (int i = 0; i < FloatRegisterImpl::number_of_registers; i++, sp_offset_in_slots += step_in_slots) {
 189     FloatRegister r = as_FloatRegister(i);
 190     oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset_in_slots), r->as_VMReg());
 191   }
 192 
 193   step_in_slots = RegisterImpl::max_slots_per_register;
 194   // skip the slot reserved for alignment, see MacroAssembler::push_reg;
 195   // also skip x5 ~ x6 on the stack because they are caller-saved registers.
 196   sp_offset_in_slots += RegisterImpl::max_slots_per_register * 3;
 197   // besides, we ignore x0 ~ x4 because push_CPU_state won't push them on the stack.
 198   for (int i = 7; i < RegisterImpl::number_of_registers; i++, sp_offset_in_slots += step_in_slots) {
 199     Register r = as_Register(i);
 200     if (r != xthread) {
 201       oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset_in_slots + additional_frame_slots), r->as_VMReg());
 202     }
 203   }
 204 
 205   return oop_map;
 206 }
 207 
 208 void RegisterSaver::restore_live_registers(MacroAssembler* masm) {
 209   assert_cond(masm != NULL);
 210 #ifdef COMPILER2
 211   __ pop_CPU_state(_save_vectors, Matcher::scalable_vector_reg_size(T_BYTE));
 212 #else
 213   __ pop_CPU_state(_save_vectors);
 214 #endif
 215   __ leave();
 216 }
 217 
 218 // Is vector's size (in bytes) bigger than a size saved by default?
 219 // riscv does not ovlerlay the floating-point registers on vector registers like aarch64.
 220 bool SharedRuntime::is_wide_vector(int size) {
 221   return UseRVV;
 222 }
 223 
 224 // The java_calling_convention describes stack locations as ideal slots on
 225 // a frame with no abi restrictions. Since we must observe abi restrictions
 226 // (like the placement of the register window) the slots must be biased by
 227 // the following value.
 228 static int reg2offset_in(VMReg r) {
 229   // Account for saved fp and ra
 230   // This should really be in_preserve_stack_slots
 231   return r->reg2stack() * VMRegImpl::stack_slot_size;
 232 }
 233 
 234 static int reg2offset_out(VMReg r) {
 235   return (r->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size;
 236 }
 237 
 238 // ---------------------------------------------------------------------------
 239 // Read the array of BasicTypes from a signature, and compute where the
 240 // arguments should go.  Values in the VMRegPair regs array refer to 4-byte
 241 // quantities.  Values less than VMRegImpl::stack0 are registers, those above
 242 // refer to 4-byte stack slots.  All stack slots are based off of the stack pointer
 243 // as framesizes are fixed.
 244 // VMRegImpl::stack0 refers to the first slot 0(sp).
 245 // and VMRegImpl::stack0+1 refers to the memory word 4-byes higher.  Register
 246 // up to RegisterImpl::number_of_registers) are the 64-bit
 247 // integer registers.
 248 
 249 // Note: the INPUTS in sig_bt are in units of Java argument words,
 250 // which are 64-bit.  The OUTPUTS are in 32-bit units.
 251 
 252 // The Java calling convention is a "shifted" version of the C ABI.
 253 // By skipping the first C ABI register we can call non-static jni
 254 // methods with small numbers of arguments without having to shuffle
 255 // the arguments at all. Since we control the java ABI we ought to at
 256 // least get some advantage out of it.
 257 
 258 int SharedRuntime::java_calling_convention(const BasicType *sig_bt,
 259                                            VMRegPair *regs,
 260                                            int total_args_passed) {
 261   // Create the mapping between argument positions and
 262   // registers.
 263   static const Register INT_ArgReg[Argument::n_int_register_parameters_j] = {
 264     j_rarg0, j_rarg1, j_rarg2, j_rarg3,
 265     j_rarg4, j_rarg5, j_rarg6, j_rarg7
 266   };
 267   static const FloatRegister FP_ArgReg[Argument::n_float_register_parameters_j] = {
 268     j_farg0, j_farg1, j_farg2, j_farg3,
 269     j_farg4, j_farg5, j_farg6, j_farg7
 270   };
 271 
 272   uint int_args = 0;
 273   uint fp_args = 0;
 274   uint stk_args = 0; // inc by 2 each time
 275 
 276   for (int i = 0; i < total_args_passed; i++) {
 277     switch (sig_bt[i]) {
 278       case T_BOOLEAN: // fall through
 279       case T_CHAR:    // fall through
 280       case T_BYTE:    // fall through
 281       case T_SHORT:   // fall through
 282       case T_INT:
 283         if (int_args < Argument::n_int_register_parameters_j) {
 284           regs[i].set1(INT_ArgReg[int_args++]->as_VMReg());
 285         } else {
 286           regs[i].set1(VMRegImpl::stack2reg(stk_args));
 287           stk_args += 2;
 288         }
 289         break;
 290       case T_VOID:
 291         // halves of T_LONG or T_DOUBLE
 292         assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half");
 293         regs[i].set_bad();
 294         break;
 295       case T_LONG:      // fall through
 296         assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half");
 297       case T_OBJECT:    // fall through
 298       case T_ARRAY:     // fall through
 299       case T_ADDRESS:
 300         if (int_args < Argument::n_int_register_parameters_j) {
 301           regs[i].set2(INT_ArgReg[int_args++]->as_VMReg());
 302         } else {
 303           regs[i].set2(VMRegImpl::stack2reg(stk_args));
 304           stk_args += 2;
 305         }
 306         break;
 307       case T_FLOAT:
 308         if (fp_args < Argument::n_float_register_parameters_j) {
 309           regs[i].set1(FP_ArgReg[fp_args++]->as_VMReg());
 310         } else {
 311           regs[i].set1(VMRegImpl::stack2reg(stk_args));
 312           stk_args += 2;
 313         }
 314         break;
 315       case T_DOUBLE:
 316         assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half");
 317         if (fp_args < Argument::n_float_register_parameters_j) {
 318           regs[i].set2(FP_ArgReg[fp_args++]->as_VMReg());
 319         } else {
 320           regs[i].set2(VMRegImpl::stack2reg(stk_args));
 321           stk_args += 2;
 322         }
 323         break;
 324       default:
 325         ShouldNotReachHere();
 326     }
 327   }
 328 
 329   return align_up(stk_args, 2);
 330 }
 331 
 332 // Patch the callers callsite with entry to compiled code if it exists.
 333 static void patch_callers_callsite(MacroAssembler *masm) {
 334   assert_cond(masm != NULL);
 335   Label L;
 336   __ ld(t0, Address(xmethod, in_bytes(Method::code_offset())));
 337   __ beqz(t0, L);
 338 
 339   __ enter();
 340   __ push_CPU_state();
 341 
 342   // VM needs caller's callsite
 343   // VM needs target method
 344   // This needs to be a long call since we will relocate this adapter to
 345   // the codeBuffer and it may not reach
 346 
 347 #ifndef PRODUCT
 348   assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area");
 349 #endif
 350 
 351   __ mv(c_rarg0, xmethod);
 352   __ mv(c_rarg1, ra);
 353   int32_t offset = 0;
 354   __ la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite)), offset);
 355   __ jalr(x1, t0, offset);
 356 
 357   // Explicit fence.i required because fixup_callers_callsite may change the code
 358   // stream.
 359   __ safepoint_ifence();
 360 
 361   __ pop_CPU_state();
 362   // restore sp
 363   __ leave();
 364   __ bind(L);
 365 }
 366 
 367 static void gen_c2i_adapter(MacroAssembler *masm,
 368                             int total_args_passed,
 369                             int comp_args_on_stack,
 370                             const BasicType *sig_bt,
 371                             const VMRegPair *regs,
 372                             Label& skip_fixup) {
 373   // Before we get into the guts of the C2I adapter, see if we should be here
 374   // at all.  We've come from compiled code and are attempting to jump to the
 375   // interpreter, which means the caller made a static call to get here
 376   // (vcalls always get a compiled target if there is one).  Check for a
 377   // compiled target.  If there is one, we need to patch the caller's call.
 378   patch_callers_callsite(masm);
 379 
 380   __ bind(skip_fixup);
 381 
 382   int words_pushed = 0;
 383 
 384   // Since all args are passed on the stack, total_args_passed *
 385   // Interpreter::stackElementSize is the space we need.
 386 
 387   int extraspace = total_args_passed * Interpreter::stackElementSize;
 388 
 389   __ mv(x30, sp);
 390 
 391   // stack is aligned, keep it that way
 392   extraspace = align_up(extraspace, 2 * wordSize);
 393 
 394   if (extraspace) {
 395     __ sub(sp, sp, extraspace);
 396   }
 397 
 398   // Now write the args into the outgoing interpreter space
 399   for (int i = 0; i < total_args_passed; i++) {
 400     if (sig_bt[i] == T_VOID) {
 401       assert(i > 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "missing half");
 402       continue;
 403     }
 404 
 405     // offset to start parameters
 406     int st_off   = (total_args_passed - i - 1) * Interpreter::stackElementSize;
 407     int next_off = st_off - Interpreter::stackElementSize;
 408 
 409     // Say 4 args:
 410     // i   st_off
 411     // 0   32 T_LONG
 412     // 1   24 T_VOID
 413     // 2   16 T_OBJECT
 414     // 3    8 T_BOOL
 415     // -    0 return address
 416     //
 417     // However to make thing extra confusing. Because we can fit a Java long/double in
 418     // a single slot on a 64 bt vm and it would be silly to break them up, the interpreter
 419     // leaves one slot empty and only stores to a single slot. In this case the
 420     // slot that is occupied is the T_VOID slot. See I said it was confusing.
 421 
 422     VMReg r_1 = regs[i].first();
 423     VMReg r_2 = regs[i].second();
 424     if (!r_1->is_valid()) {
 425       assert(!r_2->is_valid(), "");
 426       continue;
 427     }
 428     if (r_1->is_stack()) {
 429       // memory to memory use t0
 430       int ld_off = (r_1->reg2stack() * VMRegImpl::stack_slot_size
 431                     + extraspace
 432                     + words_pushed * wordSize);
 433       if (!r_2->is_valid()) {
 434         __ lwu(t0, Address(sp, ld_off));
 435         __ sd(t0, Address(sp, st_off), /*temp register*/esp);
 436       } else {
 437         __ ld(t0, Address(sp, ld_off), /*temp register*/esp);
 438 
 439         // Two VMREgs|OptoRegs can be T_OBJECT, T_ADDRESS, T_DOUBLE, T_LONG
 440         // T_DOUBLE and T_LONG use two slots in the interpreter
 441         if ( sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) {
 442           // ld_off == LSW, ld_off+wordSize == MSW
 443           // st_off == MSW, next_off == LSW
 444           __ sd(t0, Address(sp, next_off), /*temp register*/esp);
 445 #ifdef ASSERT
 446           // Overwrite the unused slot with known junk
 447           __ li(t0, 0xdeadffffdeadaaaaul);
 448           __ sd(t0, Address(sp, st_off), /*temp register*/esp);
 449 #endif /* ASSERT */
 450         } else {
 451           __ sd(t0, Address(sp, st_off), /*temp register*/esp);
 452         }
 453       }
 454     } else if (r_1->is_Register()) {
 455       Register r = r_1->as_Register();
 456       if (!r_2->is_valid()) {
 457         // must be only an int (or less ) so move only 32bits to slot
 458         __ sd(r, Address(sp, st_off));
 459       } else {
 460         // Two VMREgs|OptoRegs can be T_OBJECT, T_ADDRESS, T_DOUBLE, T_LONG
 461         // T_DOUBLE and T_LONG use two slots in the interpreter
 462         if ( sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) {
 463           // long/double in gpr
 464 #ifdef ASSERT
 465           // Overwrite the unused slot with known junk
 466           __ li(t0, 0xdeadffffdeadaaabul);
 467           __ sd(t0, Address(sp, st_off), /*temp register*/esp);
 468 #endif /* ASSERT */
 469           __ sd(r, Address(sp, next_off));
 470         } else {
 471           __ sd(r, Address(sp, st_off));
 472         }
 473       }
 474     } else {
 475       assert(r_1->is_FloatRegister(), "");
 476       if (!r_2->is_valid()) {
 477         // only a float use just part of the slot
 478         __ fsw(r_1->as_FloatRegister(), Address(sp, st_off));
 479       } else {
 480 #ifdef ASSERT
 481         // Overwrite the unused slot with known junk
 482         __ li(t0, 0xdeadffffdeadaaacul);
 483         __ sd(t0, Address(sp, st_off), /*temp register*/esp);
 484 #endif /* ASSERT */
 485         __ fsd(r_1->as_FloatRegister(), Address(sp, next_off));
 486       }
 487     }
 488   }
 489 
 490   __ mv(esp, sp); // Interp expects args on caller's expression stack
 491 
 492   __ ld(t0, Address(xmethod, in_bytes(Method::interpreter_entry_offset())));
 493   __ jr(t0);
 494 }
 495 
 496 void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm,
 497                                     int total_args_passed,
 498                                     int comp_args_on_stack,
 499                                     const BasicType *sig_bt,
 500                                     const VMRegPair *regs) {
 501   // Cut-out for having no stack args.
 502   int comp_words_on_stack = align_up(comp_args_on_stack * VMRegImpl::stack_slot_size, wordSize) >> LogBytesPerWord;
 503   if (comp_args_on_stack != 0) {
 504     __ sub(t0, sp, comp_words_on_stack * wordSize);
 505     __ andi(sp, t0, -16);
 506   }
 507 
 508   // Will jump to the compiled code just as if compiled code was doing it.
 509   // Pre-load the register-jump target early, to schedule it better.
 510   __ ld(t1, Address(xmethod, in_bytes(Method::from_compiled_offset())));
 511 
 512   // Now generate the shuffle code.
 513   for (int i = 0; i < total_args_passed; i++) {
 514     if (sig_bt[i] == T_VOID) {
 515       assert(i > 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "missing half");
 516       continue;
 517     }
 518 
 519     // Pick up 0, 1 or 2 words from SP+offset.
 520 
 521     assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(),
 522            "scrambled load targets?");
 523     // Load in argument order going down.
 524     int ld_off = (total_args_passed - i - 1) * Interpreter::stackElementSize;
 525     // Point to interpreter value (vs. tag)
 526     int next_off = ld_off - Interpreter::stackElementSize;
 527 
 528     VMReg r_1 = regs[i].first();
 529     VMReg r_2 = regs[i].second();
 530     if (!r_1->is_valid()) {
 531       assert(!r_2->is_valid(), "");
 532       continue;
 533     }
 534     if (r_1->is_stack()) {
 535       // Convert stack slot to an SP offset (+ wordSize to account for return address )
 536       int st_off = regs[i].first()->reg2stack() * VMRegImpl::stack_slot_size;
 537       if (!r_2->is_valid()) {
 538         __ lw(t0, Address(esp, ld_off));
 539         __ sd(t0, Address(sp, st_off), /*temp register*/t2);
 540       } else {
 541         //
 542         // We are using two optoregs. This can be either T_OBJECT,
 543         // T_ADDRESS, T_LONG, or T_DOUBLE the interpreter allocates
 544         // two slots but only uses one for thr T_LONG or T_DOUBLE case
 545         // So we must adjust where to pick up the data to match the
 546         // interpreter.
 547         //
 548         // Interpreter local[n] == MSW, local[n+1] == LSW however locals
 549         // are accessed as negative so LSW is at LOW address
 550 
 551         // ld_off is MSW so get LSW
 552         const int offset = (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) ?
 553                            next_off : ld_off;
 554         __ ld(t0, Address(esp, offset));
 555         // st_off is LSW (i.e. reg.first())
 556         __ sd(t0, Address(sp, st_off), /*temp register*/t2);
 557       }
 558     } else if (r_1->is_Register()) {  // Register argument
 559       Register r = r_1->as_Register();
 560       if (r_2->is_valid()) {
 561         //
 562         // We are using two VMRegs. This can be either T_OBJECT,
 563         // T_ADDRESS, T_LONG, or T_DOUBLE the interpreter allocates
 564         // two slots but only uses one for thr T_LONG or T_DOUBLE case
 565         // So we must adjust where to pick up the data to match the
 566         // interpreter.
 567 
 568         const int offset = (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) ?
 569                            next_off : ld_off;
 570 
 571         // this can be a misaligned move
 572         __ ld(r, Address(esp, offset));
 573       } else {
 574         // sign extend and use a full word?
 575         __ lw(r, Address(esp, ld_off));
 576       }
 577     } else {
 578       if (!r_2->is_valid()) {
 579         __ flw(r_1->as_FloatRegister(), Address(esp, ld_off));
 580       } else {
 581         __ fld(r_1->as_FloatRegister(), Address(esp, next_off));
 582       }
 583     }
 584   }
 585 
 586   // 6243940 We might end up in handle_wrong_method if
 587   // the callee is deoptimized as we race thru here. If that
 588   // happens we don't want to take a safepoint because the
 589   // caller frame will look interpreted and arguments are now
 590   // "compiled" so it is much better to make this transition
 591   // invisible to the stack walking code. Unfortunately if
 592   // we try and find the callee by normal means a safepoint
 593   // is possible. So we stash the desired callee in the thread
 594   // and the vm will find there should this case occur.
 595 
 596   __ sd(xmethod, Address(xthread, JavaThread::callee_target_offset()));
 597 
 598   __ jr(t1);
 599 }
 600 
 601 // ---------------------------------------------------------------
 602 AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm,
 603                                                             int total_args_passed,
 604                                                             int comp_args_on_stack,
 605                                                             const BasicType *sig_bt,
 606                                                             const VMRegPair *regs,
 607                                                             AdapterFingerPrint* fingerprint) {
 608   address i2c_entry = __ pc();
 609   gen_i2c_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs);
 610 
 611   address c2i_unverified_entry = __ pc();
 612   Label skip_fixup;
 613 
 614   Label ok;
 615 
 616   const Register holder = t1;
 617   const Register receiver = j_rarg0;
 618   const Register tmp = t2;  // A call-clobbered register not used for arg passing
 619 
 620   // -------------------------------------------------------------------------
 621   // Generate a C2I adapter.  On entry we know xmethod holds the Method* during calls
 622   // to the interpreter.  The args start out packed in the compiled layout.  They
 623   // need to be unpacked into the interpreter layout.  This will almost always
 624   // require some stack space.  We grow the current (compiled) stack, then repack
 625   // the args.  We  finally end in a jump to the generic interpreter entry point.
 626   // On exit from the interpreter, the interpreter will restore our SP (lest the
 627   // compiled code, which relys solely on SP and not FP, get sick).
 628 
 629   {
 630     __ block_comment("c2i_unverified_entry {");
 631     __ load_klass(t0, receiver);
 632     __ ld(tmp, Address(holder, CompiledICHolder::holder_klass_offset()));
 633     __ ld(xmethod, Address(holder, CompiledICHolder::holder_metadata_offset()));
 634     __ beq(t0, tmp, ok);
 635     __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
 636 
 637     __ bind(ok);
 638     // Method might have been compiled since the call site was patched to
 639     // interpreted; if that is the case treat it as a miss so we can get
 640     // the call site corrected.
 641     __ ld(t0, Address(xmethod, in_bytes(Method::code_offset())));
 642     __ beqz(t0, skip_fixup);
 643     __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
 644     __ block_comment("} c2i_unverified_entry");
 645   }
 646 
 647   address c2i_entry = __ pc();
 648 
 649   // Class initialization barrier for static methods
 650   address c2i_no_clinit_check_entry = NULL;
 651   if (VM_Version::supports_fast_class_init_checks()) {
 652     Label L_skip_barrier;
 653 
 654     { // Bypass the barrier for non-static methods
 655       __ lwu(t0, Address(xmethod, Method::access_flags_offset()));
 656       __ andi(t1, t0, JVM_ACC_STATIC);
 657       __ beqz(t1, L_skip_barrier); // non-static
 658     }
 659 
 660     __ load_method_holder(t1, xmethod);
 661     __ clinit_barrier(t1, t0, &L_skip_barrier);
 662     __ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub()));
 663 
 664     __ bind(L_skip_barrier);
 665     c2i_no_clinit_check_entry = __ pc();
 666   }
 667 
 668   BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
 669   bs->c2i_entry_barrier(masm);
 670 
 671   gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup);
 672 
 673   __ flush();
 674   return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry, c2i_no_clinit_check_entry);
 675 }
 676 
 677 int SharedRuntime::vector_calling_convention(VMRegPair *regs,
 678                                              uint num_bits,
 679                                              uint total_args_passed) {
 680   Unimplemented();
 681   return 0;
 682 }
 683 
 684 int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
 685                                          VMRegPair *regs,
 686                                          VMRegPair *regs2,
 687                                          int total_args_passed) {
 688   assert(regs2 == NULL, "not needed on riscv");
 689 
 690   // We return the amount of VMRegImpl stack slots we need to reserve for all
 691   // the arguments NOT counting out_preserve_stack_slots.
 692 
 693   static const Register INT_ArgReg[Argument::n_int_register_parameters_c] = {
 694     c_rarg0, c_rarg1, c_rarg2, c_rarg3,
 695     c_rarg4, c_rarg5,  c_rarg6,  c_rarg7
 696   };
 697   static const FloatRegister FP_ArgReg[Argument::n_float_register_parameters_c] = {
 698     c_farg0, c_farg1, c_farg2, c_farg3,
 699     c_farg4, c_farg5, c_farg6, c_farg7
 700   };
 701 
 702   uint int_args = 0;
 703   uint fp_args = 0;
 704   uint stk_args = 0; // inc by 2 each time
 705 
 706   for (int i = 0; i < total_args_passed; i++) {
 707     switch (sig_bt[i]) {
 708       case T_BOOLEAN:  // fall through
 709       case T_CHAR:     // fall through
 710       case T_BYTE:     // fall through
 711       case T_SHORT:    // fall through
 712       case T_INT:
 713         if (int_args < Argument::n_int_register_parameters_c) {
 714           regs[i].set1(INT_ArgReg[int_args++]->as_VMReg());
 715         } else {
 716           regs[i].set1(VMRegImpl::stack2reg(stk_args));
 717           stk_args += 2;
 718         }
 719         break;
 720       case T_LONG:      // fall through
 721         assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half");
 722       case T_OBJECT:    // fall through
 723       case T_ARRAY:     // fall through
 724       case T_ADDRESS:   // fall through
 725       case T_METADATA:
 726         if (int_args < Argument::n_int_register_parameters_c) {
 727           regs[i].set2(INT_ArgReg[int_args++]->as_VMReg());
 728         } else {
 729           regs[i].set2(VMRegImpl::stack2reg(stk_args));
 730           stk_args += 2;
 731         }
 732         break;
 733       case T_FLOAT:
 734         if (fp_args < Argument::n_float_register_parameters_c) {
 735           regs[i].set1(FP_ArgReg[fp_args++]->as_VMReg());
 736         } else if (int_args < Argument::n_int_register_parameters_c) {
 737           regs[i].set1(INT_ArgReg[int_args++]->as_VMReg());
 738         } else {
 739           regs[i].set1(VMRegImpl::stack2reg(stk_args));
 740           stk_args += 2;
 741         }
 742         break;
 743       case T_DOUBLE:
 744         assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half");
 745         if (fp_args < Argument::n_float_register_parameters_c) {
 746           regs[i].set2(FP_ArgReg[fp_args++]->as_VMReg());
 747         } else if (int_args < Argument::n_int_register_parameters_c) {
 748           regs[i].set2(INT_ArgReg[int_args++]->as_VMReg());
 749         } else {
 750           regs[i].set2(VMRegImpl::stack2reg(stk_args));
 751           stk_args += 2;
 752         }
 753         break;
 754       case T_VOID: // Halves of longs and doubles
 755         assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half");
 756         regs[i].set_bad();
 757         break;
 758       default:
 759         ShouldNotReachHere();
 760     }
 761   }
 762 
 763   return stk_args;
 764 }
 765 
 766 // On 64 bit we will store integer like items to the stack as
 767 // 64 bits items (riscv64 abi) even though java would only store
 768 // 32bits for a parameter. On 32bit it will simply be 32 bits
 769 // So this routine will do 32->32 on 32bit and 32->64 on 64bit
 770 static void move32_64(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
 771   assert_cond(masm != NULL);
 772   if (src.first()->is_stack()) {
 773     if (dst.first()->is_stack()) {
 774       // stack to stack
 775       __ ld(t0, Address(fp, reg2offset_in(src.first())));
 776       __ sd(t0, Address(sp, reg2offset_out(dst.first())));
 777     } else {
 778       // stack to reg
 779       __ lw(dst.first()->as_Register(), Address(fp, reg2offset_in(src.first())));
 780     }
 781   } else if (dst.first()->is_stack()) {
 782     // reg to stack
 783     __ sd(src.first()->as_Register(), Address(sp, reg2offset_out(dst.first())));
 784   } else {
 785     if (dst.first() != src.first()) {
 786       // 32bits extend sign
 787       __ addw(dst.first()->as_Register(), src.first()->as_Register(), zr);
 788     }
 789   }
 790 }
 791 
 792 // An oop arg. Must pass a handle not the oop itself
 793 static void object_move(MacroAssembler* masm,
 794                         OopMap* map,
 795                         int oop_handle_offset,
 796                         int framesize_in_slots,
 797                         VMRegPair src,
 798                         VMRegPair dst,
 799                         bool is_receiver,
 800                         int* receiver_offset) {
 801   assert_cond(masm != NULL && map != NULL && receiver_offset != NULL);
 802   // must pass a handle. First figure out the location we use as a handle
 803   Register rHandle = dst.first()->is_stack() ? t1 : dst.first()->as_Register();
 804 
 805   // See if oop is NULL if it is we need no handle
 806 
 807   if (src.first()->is_stack()) {
 808 
 809     // Oop is already on the stack as an argument
 810     int offset_in_older_frame = src.first()->reg2stack() + SharedRuntime::out_preserve_stack_slots();
 811     map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + framesize_in_slots));
 812     if (is_receiver) {
 813       *receiver_offset = (offset_in_older_frame + framesize_in_slots) * VMRegImpl::stack_slot_size;
 814     }
 815 
 816     __ ld(t0, Address(fp, reg2offset_in(src.first())));
 817     __ la(rHandle, Address(fp, reg2offset_in(src.first())));
 818     // conditionally move a NULL
 819     Label notZero1;
 820     __ bnez(t0, notZero1);
 821     __ mv(rHandle, zr);
 822     __ bind(notZero1);
 823   } else {
 824 
 825     // Oop is in an a register we must store it to the space we reserve
 826     // on the stack for oop_handles and pass a handle if oop is non-NULL
 827 
 828     const Register rOop = src.first()->as_Register();
 829     int oop_slot = -1;
 830     if (rOop == j_rarg0) {
 831       oop_slot = 0;
 832     } else if (rOop == j_rarg1) {
 833       oop_slot = 1;
 834     } else if (rOop == j_rarg2) {
 835       oop_slot = 2;
 836     } else if (rOop == j_rarg3) {
 837       oop_slot = 3;
 838     } else if (rOop == j_rarg4) {
 839       oop_slot = 4;
 840     } else if (rOop == j_rarg5) {
 841       oop_slot = 5;
 842     } else if (rOop == j_rarg6) {
 843       oop_slot = 6;
 844     } else {
 845       assert(rOop == j_rarg7, "wrong register");
 846       oop_slot = 7;
 847     }
 848 
 849     oop_slot = oop_slot * VMRegImpl::slots_per_word + oop_handle_offset;
 850     int offset = oop_slot * VMRegImpl::stack_slot_size;
 851 
 852     map->set_oop(VMRegImpl::stack2reg(oop_slot));
 853     // Store oop in handle area, may be NULL
 854     __ sd(rOop, Address(sp, offset));
 855     if (is_receiver) {
 856       *receiver_offset = offset;
 857     }
 858 
 859     //rOop maybe the same as rHandle
 860     if (rOop == rHandle) {
 861       Label isZero;
 862       __ beqz(rOop, isZero);
 863       __ la(rHandle, Address(sp, offset));
 864       __ bind(isZero);
 865     } else {
 866       Label notZero2;
 867       __ la(rHandle, Address(sp, offset));
 868       __ bnez(rOop, notZero2);
 869       __ mv(rHandle, zr);
 870       __ bind(notZero2);
 871     }
 872   }
 873 
 874   // If arg is on the stack then place it otherwise it is already in correct reg.
 875   if (dst.first()->is_stack()) {
 876     __ sd(rHandle, Address(sp, reg2offset_out(dst.first())));
 877   }
 878 }
 879 
 880 // A float arg may have to do float reg int reg conversion
 881 static void float_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
 882   assert(src.first()->is_stack() && dst.first()->is_stack() ||
 883          src.first()->is_reg() && dst.first()->is_reg() || src.first()->is_stack() && dst.first()->is_reg(), "Unexpected error");
 884   assert_cond(masm != NULL);
 885   if (src.first()->is_stack()) {
 886     if (dst.first()->is_stack()) {
 887       __ lwu(t0, Address(fp, reg2offset_in(src.first())));
 888       __ sw(t0, Address(sp, reg2offset_out(dst.first())));
 889     } else if (dst.first()->is_Register()) {
 890       __ lwu(dst.first()->as_Register(), Address(fp, reg2offset_in(src.first())));
 891     } else {
 892       ShouldNotReachHere();
 893     }
 894   } else if (src.first() != dst.first()) {
 895     if (src.is_single_phys_reg() && dst.is_single_phys_reg()) {
 896       __ fmv_s(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister());
 897     } else {
 898       ShouldNotReachHere();
 899     }
 900   }
 901 }
 902 
 903 // A long move
 904 static void long_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
 905   assert_cond(masm != NULL);
 906   if (src.first()->is_stack()) {
 907     if (dst.first()->is_stack()) {
 908       // stack to stack
 909       __ ld(t0, Address(fp, reg2offset_in(src.first())));
 910       __ sd(t0, Address(sp, reg2offset_out(dst.first())));
 911     } else {
 912       // stack to reg
 913       __ ld(dst.first()->as_Register(), Address(fp, reg2offset_in(src.first())));
 914     }
 915   } else if (dst.first()->is_stack()) {
 916     // reg to stack
 917     __ sd(src.first()->as_Register(), Address(sp, reg2offset_out(dst.first())));
 918   } else {
 919     if (dst.first() != src.first()) {
 920       __ mv(dst.first()->as_Register(), src.first()->as_Register());
 921     }
 922   }
 923 }
 924 
 925 // A double move
 926 static void double_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
 927   assert(src.first()->is_stack() && dst.first()->is_stack() ||
 928          src.first()->is_reg() && dst.first()->is_reg() || src.first()->is_stack() && dst.first()->is_reg(), "Unexpected error");
 929   assert_cond(masm != NULL);
 930   if (src.first()->is_stack()) {
 931     if (dst.first()->is_stack()) {
 932       __ ld(t0, Address(fp, reg2offset_in(src.first())));
 933       __ sd(t0, Address(sp, reg2offset_out(dst.first())));
 934     } else if (dst.first()-> is_Register()) {
 935       __ ld(dst.first()->as_Register(), Address(fp, reg2offset_in(src.first())));
 936     } else {
 937       ShouldNotReachHere();
 938     }
 939   } else if (src.first() != dst.first()) {
 940     if (src.is_single_phys_reg() && dst.is_single_phys_reg()) {
 941       __ fmv_d(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister());
 942     } else {
 943       ShouldNotReachHere();
 944     }
 945   }
 946 }
 947 
 948 void SharedRuntime::save_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) {
 949   assert_cond(masm != NULL);
 950   // We always ignore the frame_slots arg and just use the space just below frame pointer
 951   // which by this time is free to use
 952   switch (ret_type) {
 953     case T_FLOAT:
 954       __ fsw(f10, Address(fp, -3 * wordSize));
 955       break;
 956     case T_DOUBLE:
 957       __ fsd(f10, Address(fp, -3 * wordSize));
 958       break;
 959     case T_VOID:  break;
 960     default: {
 961       __ sd(x10, Address(fp, -3 * wordSize));
 962     }
 963   }
 964 }
 965 
 966 void SharedRuntime::restore_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) {
 967   assert_cond(masm != NULL);
 968   // We always ignore the frame_slots arg and just use the space just below frame pointer
 969   // which by this time is free to use
 970   switch (ret_type) {
 971     case T_FLOAT:
 972       __ flw(f10, Address(fp, -3 * wordSize));
 973       break;
 974     case T_DOUBLE:
 975       __ fld(f10, Address(fp, -3 * wordSize));
 976       break;
 977     case T_VOID:  break;
 978     default: {
 979       __ ld(x10, Address(fp, -3 * wordSize));
 980     }
 981   }
 982 }
 983 
 984 static void save_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) {
 985   assert_cond(masm != NULL && args != NULL);
 986   RegSet x;
 987   for ( int i = first_arg ; i < arg_count ; i++ ) {
 988     if (args[i].first()->is_Register()) {
 989       x = x + args[i].first()->as_Register();
 990     } else if (args[i].first()->is_FloatRegister()) {
 991       __ addi(sp, sp, -2 * wordSize);
 992       __ fsd(args[i].first()->as_FloatRegister(), Address(sp, 0));
 993     }
 994   }
 995   __ push_reg(x, sp);
 996 }
 997 
 998 static void restore_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) {
 999   assert_cond(masm != NULL && args != NULL);
1000   RegSet x;
1001   for ( int i = first_arg ; i < arg_count ; i++ ) {
1002     if (args[i].first()->is_Register()) {
1003       x = x + args[i].first()->as_Register();
1004     } else {
1005       ;
1006     }
1007   }
1008   __ pop_reg(x, sp);
1009   for ( int i = arg_count - 1 ; i >= first_arg ; i-- ) {
1010     if (args[i].first()->is_Register()) {
1011       ;
1012     } else if (args[i].first()->is_FloatRegister()) {
1013       __ fld(args[i].first()->as_FloatRegister(), Address(sp, 0));
1014       __ add(sp, sp, 2 * wordSize);
1015     }
1016   }
1017 }
1018 
1019 static void rt_call(MacroAssembler* masm, address dest) {
1020   assert_cond(masm != NULL);
1021   CodeBlob *cb = CodeCache::find_blob(dest);
1022   if (cb) {
1023     __ far_call(RuntimeAddress(dest));
1024   } else {
1025     int32_t offset = 0;
1026     __ la_patchable(t0, RuntimeAddress(dest), offset);
1027     __ jalr(x1, t0, offset);
1028   }
1029 }
1030 
1031 static void verify_oop_args(MacroAssembler* masm,
1032                             const methodHandle& method,
1033                             const BasicType* sig_bt,
1034                             const VMRegPair* regs) {
1035   const Register temp_reg = x9;  // not part of any compiled calling seq
1036   if (VerifyOops) {
1037     for (int i = 0; i < method->size_of_parameters(); i++) {
1038       if (sig_bt[i] == T_OBJECT ||
1039           sig_bt[i] == T_ARRAY) {
1040         VMReg r = regs[i].first();
1041         assert(r->is_valid(), "bad oop arg");
1042         if (r->is_stack()) {
1043           __ ld(temp_reg, Address(sp, r->reg2stack() * VMRegImpl::stack_slot_size));
1044           __ verify_oop(temp_reg);
1045         } else {
1046           __ verify_oop(r->as_Register());
1047         }
1048       }
1049     }
1050   }
1051 }
1052 
1053 static void gen_special_dispatch(MacroAssembler* masm,
1054                                  const methodHandle& method,
1055                                  const BasicType* sig_bt,
1056                                  const VMRegPair* regs) {
1057   verify_oop_args(masm, method, sig_bt, regs);
1058   vmIntrinsics::ID iid = method->intrinsic_id();
1059 
1060   // Now write the args into the outgoing interpreter space
1061   bool     has_receiver   = false;
1062   Register receiver_reg   = noreg;
1063   int      member_arg_pos = -1;
1064   Register member_reg     = noreg;
1065   int      ref_kind       = MethodHandles::signature_polymorphic_intrinsic_ref_kind(iid);
1066   if (ref_kind != 0) {
1067     member_arg_pos = method->size_of_parameters() - 1;  // trailing MemberName argument
1068     member_reg = x9;  // known to be free at this point
1069     has_receiver = MethodHandles::ref_kind_has_receiver(ref_kind);
1070   } else if (iid == vmIntrinsics::_invokeBasic || iid == vmIntrinsics::_linkToNative) {
1071     has_receiver = true;
1072   } else {
1073     fatal("unexpected intrinsic id %d", vmIntrinsics::as_int(iid));
1074   }
1075 
1076   if (member_reg != noreg) {
1077     // Load the member_arg into register, if necessary.
1078     SharedRuntime::check_member_name_argument_is_last_argument(method, sig_bt, regs);
1079     VMReg r = regs[member_arg_pos].first();
1080     if (r->is_stack()) {
1081       __ ld(member_reg, Address(sp, r->reg2stack() * VMRegImpl::stack_slot_size));
1082     } else {
1083       // no data motion is needed
1084       member_reg = r->as_Register();
1085     }
1086   }
1087 
1088   if (has_receiver) {
1089     // Make sure the receiver is loaded into a register.
1090     assert(method->size_of_parameters() > 0, "oob");
1091     assert(sig_bt[0] == T_OBJECT, "receiver argument must be an object");
1092     VMReg r = regs[0].first();
1093     assert(r->is_valid(), "bad receiver arg");
1094     if (r->is_stack()) {
1095       // Porting note:  This assumes that compiled calling conventions always
1096       // pass the receiver oop in a register.  If this is not true on some
1097       // platform, pick a temp and load the receiver from stack.
1098       fatal("receiver always in a register");
1099       receiver_reg = x12;  // known to be free at this point
1100       __ ld(receiver_reg, Address(sp, r->reg2stack() * VMRegImpl::stack_slot_size));
1101     } else {
1102       // no data motion is needed
1103       receiver_reg = r->as_Register();
1104     }
1105   }
1106 
1107   // Figure out which address we are really jumping to:
1108   MethodHandles::generate_method_handle_dispatch(masm, iid,
1109                                                  receiver_reg, member_reg, /*for_compiler_entry:*/ true);
1110 }
1111 
1112 // ---------------------------------------------------------------------------
1113 // Generate a native wrapper for a given method.  The method takes arguments
1114 // in the Java compiled code convention, marshals them to the native
1115 // convention (handlizes oops, etc), transitions to native, makes the call,
1116 // returns to java state (possibly blocking), unhandlizes any result and
1117 // returns.
1118 //
1119 // Critical native functions are a shorthand for the use of
1120 // GetPrimtiveArrayCritical and disallow the use of any other JNI
1121 // functions.  The wrapper is expected to unpack the arguments before
1122 // passing them to the callee and perform checks before and after the
1123 // native call to ensure that they GCLocker
1124 // lock_critical/unlock_critical semantics are followed.  Some other
1125 // parts of JNI setup are skipped like the tear down of the JNI handle
1126 // block and the check for pending exceptions it's impossible for them
1127 // to be thrown.
1128 //
1129 // They are roughly structured like this:
1130 //    if (GCLocker::needs_gc()) SharedRuntime::block_for_jni_critical()
1131 //    tranistion to thread_in_native
1132 //    unpack arrray arguments and call native entry point
1133 //    check for safepoint in progress
1134 //    check if any thread suspend flags are set
1135 //      call into JVM and possible unlock the JNI critical
1136 //      if a GC was suppressed while in the critical native.
1137 //    transition back to thread_in_Java
1138 //    return to caller
1139 //
1140 nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
1141                                                 const methodHandle& method,
1142                                                 int compile_id,
1143                                                 BasicType* in_sig_bt,
1144                                                 VMRegPair* in_regs,
1145                                                 BasicType ret_type) {
1146   if (method->is_method_handle_intrinsic()) {
1147     vmIntrinsics::ID iid = method->intrinsic_id();
1148     intptr_t start = (intptr_t)__ pc();
1149     int vep_offset = ((intptr_t)__ pc()) - start;
1150 
1151     // First instruction must be a nop as it may need to be patched on deoptimisation
1152     __ nop();
1153     gen_special_dispatch(masm,
1154                          method,
1155                          in_sig_bt,
1156                          in_regs);
1157     int frame_complete = ((intptr_t)__ pc()) - start;  // not complete, period
1158     __ flush();
1159     int stack_slots = SharedRuntime::out_preserve_stack_slots();  // no out slots at all, actually
1160     return nmethod::new_native_nmethod(method,
1161                                        compile_id,
1162                                        masm->code(),
1163                                        vep_offset,
1164                                        frame_complete,
1165                                        stack_slots / VMRegImpl::slots_per_word,
1166                                        in_ByteSize(-1),
1167                                        in_ByteSize(-1),
1168                                        (OopMapSet*)NULL);
1169   }
1170   address native_func = method->native_function();
1171   assert(native_func != NULL, "must have function");
1172 
1173   // An OopMap for lock (and class if static)
1174   OopMapSet *oop_maps = new OopMapSet();
1175   assert_cond(oop_maps != NULL);
1176   intptr_t start = (intptr_t)__ pc();
1177 
1178   // We have received a description of where all the java arg are located
1179   // on entry to the wrapper. We need to convert these args to where
1180   // the jni function will expect them. To figure out where they go
1181   // we convert the java signature to a C signature by inserting
1182   // the hidden arguments as arg[0] and possibly arg[1] (static method)
1183 
1184   const int total_in_args = method->size_of_parameters();
1185   int total_c_args = total_in_args + (method->is_static() ? 2 : 1);
1186 
1187   BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args);
1188   VMRegPair* out_regs   = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args);
1189   BasicType* in_elem_bt = NULL;
1190 
1191   int argc = 0;
1192   out_sig_bt[argc++] = T_ADDRESS;
1193   if (method->is_static()) {
1194     out_sig_bt[argc++] = T_OBJECT;
1195   }
1196 
1197   for (int i = 0; i < total_in_args ; i++) {
1198     out_sig_bt[argc++] = in_sig_bt[i];
1199   }
1200 
1201   // Now figure out where the args must be stored and how much stack space
1202   // they require.
1203   int out_arg_slots = c_calling_convention(out_sig_bt, out_regs, NULL, total_c_args);
1204 
1205   // Compute framesize for the wrapper.  We need to handlize all oops in
1206   // incoming registers
1207 
1208   // Calculate the total number of stack slots we will need.
1209 
1210   // First count the abi requirement plus all of the outgoing args
1211   int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots;
1212 
1213   // Now the space for the inbound oop handle area
1214   int total_save_slots = 8 * VMRegImpl::slots_per_word;  // 8 arguments passed in registers
1215 
1216   int oop_handle_offset = stack_slots;
1217   stack_slots += total_save_slots;
1218 
1219   // Now any space we need for handlizing a klass if static method
1220 
1221   int klass_slot_offset = 0;
1222   int klass_offset = -1;
1223   int lock_slot_offset = 0;
1224   bool is_static = false;
1225 
1226   if (method->is_static()) {
1227     klass_slot_offset = stack_slots;
1228     stack_slots += VMRegImpl::slots_per_word;
1229     klass_offset = klass_slot_offset * VMRegImpl::stack_slot_size;
1230     is_static = true;
1231   }
1232 
1233   // Plus a lock if needed
1234 
1235   if (method->is_synchronized()) {
1236     lock_slot_offset = stack_slots;
1237     stack_slots += VMRegImpl::slots_per_word;
1238   }
1239 
1240   // Now a place (+2) to save return values or temp during shuffling
1241   // + 4 for return address (which we own) and saved fp
1242   stack_slots += 6;
1243 
1244   // Ok The space we have allocated will look like:
1245   //
1246   //
1247   // FP-> |                     |
1248   //      | 2 slots (ra)        |
1249   //      | 2 slots (fp)        |
1250   //      |---------------------|
1251   //      | 2 slots for moves   |
1252   //      |---------------------|
1253   //      | lock box (if sync)  |
1254   //      |---------------------| <- lock_slot_offset
1255   //      | klass (if static)   |
1256   //      |---------------------| <- klass_slot_offset
1257   //      | oopHandle area      |
1258   //      |---------------------| <- oop_handle_offset (8 java arg registers)
1259   //      | outbound memory     |
1260   //      | based arguments     |
1261   //      |                     |
1262   //      |---------------------|
1263   //      |                     |
1264   // SP-> | out_preserved_slots |
1265   //
1266   //
1267 
1268 
1269   // Now compute actual number of stack words we need rounding to make
1270   // stack properly aligned.
1271   stack_slots = align_up(stack_slots, StackAlignmentInSlots);
1272 
1273   int stack_size = stack_slots * VMRegImpl::stack_slot_size;
1274 
1275   // First thing make an ic check to see if we should even be here
1276 
1277   // We are free to use all registers as temps without saving them and
1278   // restoring them except fp. fp is the only callee save register
1279   // as far as the interpreter and the compiler(s) are concerned.
1280 
1281 
1282   const Register ic_reg = t1;
1283   const Register receiver = j_rarg0;
1284 
1285   Label hit;
1286   Label exception_pending;
1287 
1288   assert_different_registers(ic_reg, receiver, t0);
1289   __ verify_oop(receiver);
1290   __ cmp_klass(receiver, ic_reg, t0, hit);
1291 
1292   __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
1293 
1294   // Verified entry point must be aligned
1295   __ align(8);
1296 
1297   __ bind(hit);
1298 
1299   int vep_offset = ((intptr_t)__ pc()) - start;
1300 
1301   // If we have to make this method not-entrant we'll overwrite its
1302   // first instruction with a jump.
1303   __ nop();
1304 
1305   if (VM_Version::supports_fast_class_init_checks() && method->needs_clinit_barrier()) {
1306     Label L_skip_barrier;
1307     __ mov_metadata(t1, method->method_holder()); // InstanceKlass*
1308     __ clinit_barrier(t1, t0, &L_skip_barrier);
1309     __ far_jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub()));
1310 
1311     __ bind(L_skip_barrier);
1312   }
1313 
1314   // Generate stack overflow check
1315   __ bang_stack_with_offset(checked_cast<int>(StackOverflow::stack_shadow_zone_size()));
1316 
1317   // Generate a new frame for the wrapper.
1318   __ enter();
1319   // -2 because return address is already present and so is saved fp
1320   __ sub(sp, sp, stack_size - 2 * wordSize);
1321 
1322   BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
1323   assert_cond(bs != NULL);
1324   bs->nmethod_entry_barrier(masm);
1325 
1326   // Frame is now completed as far as size and linkage.
1327   int frame_complete = ((intptr_t)__ pc()) - start;
1328 
1329   // We use x18 as the oop handle for the receiver/klass
1330   // It is callee save so it survives the call to native
1331 
1332   const Register oop_handle_reg = x18;
1333 
1334   //
1335   // We immediately shuffle the arguments so that any vm call we have to
1336   // make from here on out (sync slow path, jvmti, etc.) we will have
1337   // captured the oops from our caller and have a valid oopMap for
1338   // them.
1339 
1340   // -----------------
1341   // The Grand Shuffle
1342 
1343   // The Java calling convention is either equal (linux) or denser (win64) than the
1344   // c calling convention. However the because of the jni_env argument the c calling
1345   // convention always has at least one more (and two for static) arguments than Java.
1346   // Therefore if we move the args from java -> c backwards then we will never have
1347   // a register->register conflict and we don't have to build a dependency graph
1348   // and figure out how to break any cycles.
1349   //
1350 
1351   // Record esp-based slot for receiver on stack for non-static methods
1352   int receiver_offset = -1;
1353 
1354   // This is a trick. We double the stack slots so we can claim
1355   // the oops in the caller's frame. Since we are sure to have
1356   // more args than the caller doubling is enough to make
1357   // sure we can capture all the incoming oop args from the
1358   // caller.
1359   //
1360   OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
1361   assert_cond(map != NULL);
1362 
1363   int float_args = 0;
1364   int int_args = 0;
1365 
1366 #ifdef ASSERT
1367   bool reg_destroyed[RegisterImpl::number_of_registers];
1368   bool freg_destroyed[FloatRegisterImpl::number_of_registers];
1369   for ( int r = 0 ; r < RegisterImpl::number_of_registers ; r++ ) {
1370     reg_destroyed[r] = false;
1371   }
1372   for ( int f = 0 ; f < FloatRegisterImpl::number_of_registers ; f++ ) {
1373     freg_destroyed[f] = false;
1374   }
1375 
1376 #endif /* ASSERT */
1377 
1378   // For JNI natives the incoming and outgoing registers are offset upwards.
1379   GrowableArray<int> arg_order(2 * total_in_args);
1380   VMRegPair tmp_vmreg;
1381   tmp_vmreg.set2(x9->as_VMReg());
1382 
1383   for (int i = total_in_args - 1, c_arg = total_c_args - 1; i >= 0; i--, c_arg--) {
1384     arg_order.push(i);
1385     arg_order.push(c_arg);
1386   }
1387 
1388   int temploc = -1;
1389   for (int ai = 0; ai < arg_order.length(); ai += 2) {
1390     int i = arg_order.at(ai);
1391     int c_arg = arg_order.at(ai + 1);
1392     __ block_comment(err_msg("mv %d -> %d", i, c_arg));
1393     assert(c_arg != -1 && i != -1, "wrong order");
1394 #ifdef ASSERT
1395     if (in_regs[i].first()->is_Register()) {
1396       assert(!reg_destroyed[in_regs[i].first()->as_Register()->encoding()], "destroyed reg!");
1397     } else if (in_regs[i].first()->is_FloatRegister()) {
1398       assert(!freg_destroyed[in_regs[i].first()->as_FloatRegister()->encoding()], "destroyed reg!");
1399     }
1400     if (out_regs[c_arg].first()->is_Register()) {
1401       reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true;
1402     } else if (out_regs[c_arg].first()->is_FloatRegister()) {
1403       freg_destroyed[out_regs[c_arg].first()->as_FloatRegister()->encoding()] = true;
1404     }
1405 #endif /* ASSERT */
1406     switch (in_sig_bt[i]) {
1407       case T_ARRAY:
1408       case T_OBJECT:
1409         object_move(masm, map, oop_handle_offset, stack_slots, in_regs[i], out_regs[c_arg],
1410                     ((i == 0) && (!is_static)),
1411                     &receiver_offset);
1412         int_args++;
1413         break;
1414       case T_VOID:
1415         break;
1416 
1417       case T_FLOAT:
1418         float_move(masm, in_regs[i], out_regs[c_arg]);
1419         float_args++;
1420         break;
1421 
1422       case T_DOUBLE:
1423         assert( i + 1 < total_in_args &&
1424                 in_sig_bt[i + 1] == T_VOID &&
1425                 out_sig_bt[c_arg + 1] == T_VOID, "bad arg list");
1426         double_move(masm, in_regs[i], out_regs[c_arg]);
1427         float_args++;
1428         break;
1429 
1430       case T_LONG :
1431         long_move(masm, in_regs[i], out_regs[c_arg]);
1432         int_args++;
1433         break;
1434 
1435       case T_ADDRESS:
1436         assert(false, "found T_ADDRESS in java args");
1437         break;
1438 
1439       default:
1440         move32_64(masm, in_regs[i], out_regs[c_arg]);
1441         int_args++;
1442     }
1443   }
1444 
1445   // point c_arg at the first arg that is already loaded in case we
1446   // need to spill before we call out
1447   int c_arg = total_c_args - total_in_args;
1448 
1449   // Pre-load a static method's oop into c_rarg1.
1450   if (method->is_static()) {
1451 
1452     //  load oop into a register
1453     __ movoop(c_rarg1,
1454               JNIHandles::make_local(method->method_holder()->java_mirror()),
1455               /*immediate*/true);
1456 
1457     // Now handlize the static class mirror it's known not-null.
1458     __ sd(c_rarg1, Address(sp, klass_offset));
1459     map->set_oop(VMRegImpl::stack2reg(klass_slot_offset));
1460 
1461     // Now get the handle
1462     __ la(c_rarg1, Address(sp, klass_offset));
1463     // and protect the arg if we must spill
1464     c_arg--;
1465   }
1466 
1467   // Change state to native (we save the return address in the thread, since it might not
1468   // be pushed on the stack when we do a stack traversal).
1469   // We use the same pc/oopMap repeatedly when we call out
1470 
1471   Label native_return;
1472   __ set_last_Java_frame(sp, noreg, native_return, t0);
1473 
1474   Label dtrace_method_entry, dtrace_method_entry_done;
1475   {
1476     int32_t offset = 0;
1477     __ la_patchable(t0, ExternalAddress((address)&DTraceMethodProbes), offset);
1478     __ lbu(t0, Address(t0, offset));
1479     __ addw(t0, t0, zr);
1480     __ bnez(t0, dtrace_method_entry);
1481     __ bind(dtrace_method_entry_done);
1482   }
1483 
1484   // RedefineClasses() tracing support for obsolete method entry
1485   if (log_is_enabled(Trace, redefine, class, obsolete)) {
1486     // protect the args we've loaded
1487     save_args(masm, total_c_args, c_arg, out_regs);
1488     __ mov_metadata(c_rarg1, method());
1489     __ call_VM_leaf(
1490       CAST_FROM_FN_PTR(address, SharedRuntime::rc_trace_method_entry),
1491       xthread, c_rarg1);
1492     restore_args(masm, total_c_args, c_arg, out_regs);
1493   }
1494 
1495   // Lock a synchronized method
1496 
1497   // Register definitions used by locking and unlocking
1498 
1499   const Register swap_reg = x10;
1500   const Register obj_reg  = x9;  // Will contain the oop
1501   const Register lock_reg = x30;  // Address of compiler lock object (BasicLock)
1502   const Register old_hdr  = x30;  // value of old header at unlock time
1503   const Register tmp      = ra;
1504 
1505   Label slow_path_lock;
1506   Label lock_done;
1507 
1508   if (method->is_synchronized()) {
1509 
1510     const int mark_word_offset = BasicLock::displaced_header_offset_in_bytes();
1511 
1512     // Get the handle (the 2nd argument)
1513     __ mv(oop_handle_reg, c_rarg1);
1514 
1515     // Get address of the box
1516 
1517     __ la(lock_reg, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size));
1518 
1519     // Load the oop from the handle
1520     __ ld(obj_reg, Address(oop_handle_reg, 0));
1521 
1522     if (!UseHeavyMonitors) {
1523       // Load (object->mark() | 1) into swap_reg % x10
1524       __ ld(t0, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
1525       __ ori(swap_reg, t0, 1);
1526 
1527       // Save (object->mark() | 1) into BasicLock's displaced header
1528       __ sd(swap_reg, Address(lock_reg, mark_word_offset));
1529 
1530       // src -> dest if dest == x10 else x10 <- dest
1531       {
1532         Label here;
1533         __ cmpxchg_obj_header(x10, lock_reg, obj_reg, t0, lock_done, /*fallthrough*/NULL);
1534       }
1535 
1536       // Test if the oopMark is an obvious stack pointer, i.e.,
1537       //  1) (mark & 3) == 0, and
1538       //  2) sp <= mark < mark + os::pagesize()
1539       // These 3 tests can be done by evaluating the following
1540       // expression: ((mark - sp) & (3 - os::vm_page_size())),
1541       // assuming both stack pointer and pagesize have their
1542       // least significant 2 bits clear.
1543       // NOTE: the oopMark is in swap_reg % 10 as the result of cmpxchg
1544 
1545       __ sub(swap_reg, swap_reg, sp);
1546       __ andi(swap_reg, swap_reg, 3 - os::vm_page_size());
1547 
1548       // Save the test result, for recursive case, the result is zero
1549       __ sd(swap_reg, Address(lock_reg, mark_word_offset));
1550       __ bnez(swap_reg, slow_path_lock);
1551     } else {
1552       __ j(slow_path_lock);
1553     }
1554 
1555     // Slow path will re-enter here
1556     __ bind(lock_done);
1557   }
1558 
1559 
1560   // Finally just about ready to make the JNI call
1561 
1562   // get JNIEnv* which is first argument to native
1563   __ la(c_rarg0, Address(xthread, in_bytes(JavaThread::jni_environment_offset())));
1564 
1565   // Now set thread in native
1566   __ la(t1, Address(xthread, JavaThread::thread_state_offset()));
1567   __ mv(t0, _thread_in_native);
1568   __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore);
1569   __ sw(t0, Address(t1));
1570 
1571   rt_call(masm, native_func);
1572 
1573   __ bind(native_return);
1574 
1575   intptr_t return_pc = (intptr_t) __ pc();
1576   oop_maps->add_gc_map(return_pc - start, map);
1577 
1578   // Unpack native results.
1579   if (ret_type != T_OBJECT && ret_type != T_ARRAY) {
1580     __ cast_primitive_type(ret_type, x10);
1581   }
1582 
1583   Label safepoint_in_progress, safepoint_in_progress_done;
1584   Label after_transition;
1585 
1586   // Switch thread to "native transition" state before reading the synchronization state.
1587   // This additional state is necessary because reading and testing the synchronization
1588   // state is not atomic w.r.t. GC, as this scenario demonstrates:
1589   //     Java thread A, in _thread_in_native state, loads _not_synchronized and is preempted.
1590   //     VM thread changes sync state to synchronizing and suspends threads for GC.
1591   //     Thread A is resumed to finish this native method, but doesn't block here since it
1592   //     didn't see any synchronization is progress, and escapes.
1593   __ mv(t0, _thread_in_native_trans);
1594 
1595   __ sw(t0, Address(xthread, JavaThread::thread_state_offset()));
1596 
1597   // Force this write out before the read below
1598   __ membar(MacroAssembler::AnyAny);
1599 
1600   // check for safepoint operation in progress and/or pending suspend requests
1601   {
1602     // We need an acquire here to ensure that any subsequent load of the
1603     // global SafepointSynchronize::_state flag is ordered after this load
1604     // of the thread-local polling word. We don't want this poll to
1605     // return false (i.e. not safepointing) and a later poll of the global
1606     // SafepointSynchronize::_state spuriously to return true.
1607     // This is to avoid a race when we're in a native->Java transition
1608     // racing the code which wakes up from a safepoint.
1609 
1610     __ safepoint_poll(safepoint_in_progress, true /* at_return */, true /* acquire */, false /* in_nmethod */);
1611     __ lwu(t0, Address(xthread, JavaThread::suspend_flags_offset()));
1612     __ bnez(t0, safepoint_in_progress);
1613     __ bind(safepoint_in_progress_done);
1614   }
1615 
1616   // change thread state
1617   __ la(t1, Address(xthread, JavaThread::thread_state_offset()));
1618   __ mv(t0, _thread_in_Java);
1619   __ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore);
1620   __ sw(t0, Address(t1));
1621   __ bind(after_transition);
1622 
1623   Label reguard;
1624   Label reguard_done;
1625   __ lbu(t0, Address(xthread, JavaThread::stack_guard_state_offset()));
1626   __ mv(t1, StackOverflow::stack_guard_yellow_reserved_disabled);
1627   __ beq(t0, t1, reguard);
1628   __ bind(reguard_done);
1629 
1630   // native result if any is live
1631 
1632   // Unlock
1633   Label unlock_done;
1634   Label slow_path_unlock;
1635   if (method->is_synchronized()) {
1636 
1637     // Get locked oop from the handle we passed to jni
1638     __ ld(obj_reg, Address(oop_handle_reg, 0));
1639 
1640     Label done;
1641 
1642     if (!UseHeavyMonitors) {
1643       // Simple recursive lock?
1644       __ ld(t0, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size));
1645       __ beqz(t0, done);
1646     }
1647 
1648 
1649     // Must save x10 if if it is live now because cmpxchg must use it
1650     if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) {
1651       save_native_result(masm, ret_type, stack_slots);
1652     }
1653 
1654     if (!UseHeavyMonitors) {
1655       // get address of the stack lock
1656       __ la(x10, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size));
1657       //  get old displaced header
1658       __ ld(old_hdr, Address(x10, 0));
1659 
1660       // Atomic swap old header if oop still contains the stack lock
1661       Label succeed;
1662       __ cmpxchg_obj_header(x10, old_hdr, obj_reg, t0, succeed, &slow_path_unlock);
1663       __ bind(succeed);
1664     } else {
1665       __ j(slow_path_unlock);
1666     }
1667 
1668     // slow path re-enters here
1669     __ bind(unlock_done);
1670     if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) {
1671       restore_native_result(masm, ret_type, stack_slots);
1672     }
1673 
1674     __ bind(done);
1675   }
1676 
1677   Label dtrace_method_exit, dtrace_method_exit_done;
1678   {
1679     int32_t offset = 0;
1680     __ la_patchable(t0, ExternalAddress((address)&DTraceMethodProbes), offset);
1681     __ lbu(t0, Address(t0, offset));
1682     __ bnez(t0, dtrace_method_exit);
1683     __ bind(dtrace_method_exit_done);
1684   }
1685 
1686   __ reset_last_Java_frame(false);
1687 
1688   // Unbox oop result, e.g. JNIHandles::resolve result.
1689   if (is_reference_type(ret_type)) {
1690     __ resolve_jobject(x10, xthread, t1);
1691   }
1692 
1693   if (CheckJNICalls) {
1694     // clear_pending_jni_exception_check
1695     __ sd(zr, Address(xthread, JavaThread::pending_jni_exception_check_fn_offset()));
1696   }
1697 
1698   // reset handle block
1699   __ ld(x12, Address(xthread, JavaThread::active_handles_offset()));
1700   __ sd(zr, Address(x12, JNIHandleBlock::top_offset_in_bytes()));
1701 
1702   __ leave();
1703 
1704   // Any exception pending?
1705   __ ld(t0, Address(xthread, in_bytes(Thread::pending_exception_offset())));
1706   __ bnez(t0, exception_pending);
1707 
1708   // We're done
1709   __ ret();
1710 
1711   // Unexpected paths are out of line and go here
1712 
1713   // forward the exception
1714   __ bind(exception_pending);
1715 
1716   // and forward the exception
1717   __ far_jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
1718 
1719   // Slow path locking & unlocking
1720   if (method->is_synchronized()) {
1721 
1722     __ block_comment("Slow path lock {");
1723     __ bind(slow_path_lock);
1724 
1725     // has last_Java_frame setup. No exceptions so do vanilla call not call_VM
1726     // args are (oop obj, BasicLock* lock, JavaThread* thread)
1727 
1728     // protect the args we've loaded
1729     save_args(masm, total_c_args, c_arg, out_regs);
1730 
1731     __ mv(c_rarg0, obj_reg);
1732     __ mv(c_rarg1, lock_reg);
1733     __ mv(c_rarg2, xthread);
1734 
1735     // Not a leaf but we have last_Java_frame setup as we want
1736     __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_locking_C), 3);
1737     restore_args(masm, total_c_args, c_arg, out_regs);
1738 
1739 #ifdef ASSERT
1740     { Label L;
1741       __ ld(t0, Address(xthread, in_bytes(Thread::pending_exception_offset())));
1742       __ beqz(t0, L);
1743       __ stop("no pending exception allowed on exit from monitorenter");
1744       __ bind(L);
1745     }
1746 #endif
1747     __ j(lock_done);
1748 
1749     __ block_comment("} Slow path lock");
1750 
1751     __ block_comment("Slow path unlock {");
1752     __ bind(slow_path_unlock);
1753 
1754     if (ret_type == T_FLOAT || ret_type == T_DOUBLE) {
1755       save_native_result(masm, ret_type, stack_slots);
1756     }
1757 
1758     __ mv(c_rarg2, xthread);
1759     __ la(c_rarg1, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size));
1760     __ mv(c_rarg0, obj_reg);
1761 
1762     // Save pending exception around call to VM (which contains an EXCEPTION_MARK)
1763     // NOTE that obj_reg == x9 currently
1764     __ ld(x9, Address(xthread, in_bytes(Thread::pending_exception_offset())));
1765     __ sd(zr, Address(xthread, in_bytes(Thread::pending_exception_offset())));
1766 
1767     rt_call(masm, CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C));
1768 
1769 #ifdef ASSERT
1770     {
1771       Label L;
1772       __ ld(t0, Address(xthread, in_bytes(Thread::pending_exception_offset())));
1773       __ beqz(t0, L);
1774       __ stop("no pending exception allowed on exit complete_monitor_unlocking_C");
1775       __ bind(L);
1776     }
1777 #endif /* ASSERT */
1778 
1779     __ sd(x9, Address(xthread, in_bytes(Thread::pending_exception_offset())));
1780 
1781     if (ret_type == T_FLOAT || ret_type == T_DOUBLE) {
1782       restore_native_result(masm, ret_type, stack_slots);
1783     }
1784     __ j(unlock_done);
1785 
1786     __ block_comment("} Slow path unlock");
1787 
1788   } // synchronized
1789 
1790   // SLOW PATH Reguard the stack if needed
1791 
1792   __ bind(reguard);
1793   save_native_result(masm, ret_type, stack_slots);
1794   rt_call(masm, CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages));
1795   restore_native_result(masm, ret_type, stack_slots);
1796   // and continue
1797   __ j(reguard_done);
1798 
1799   // SLOW PATH safepoint
1800   {
1801     __ block_comment("safepoint {");
1802     __ bind(safepoint_in_progress);
1803 
1804     // Don't use call_VM as it will see a possible pending exception and forward it
1805     // and never return here preventing us from clearing _last_native_pc down below.
1806     //
1807     save_native_result(masm, ret_type, stack_slots);
1808     __ mv(c_rarg0, xthread);
1809 #ifndef PRODUCT
1810     assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area");
1811 #endif
1812     int32_t offset = 0;
1813     __ la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans)), offset);
1814     __ jalr(x1, t0, offset);
1815 
1816     // Restore any method result value
1817     restore_native_result(masm, ret_type, stack_slots);
1818 
1819     __ j(safepoint_in_progress_done);
1820     __ block_comment("} safepoint");
1821   }
1822 
1823   // SLOW PATH dtrace support
1824   {
1825     __ block_comment("dtrace entry {");
1826     __ bind(dtrace_method_entry);
1827 
1828     // We have all of the arguments setup at this point. We must not touch any register
1829     // argument registers at this point (what if we save/restore them there are no oop?
1830 
1831     save_args(masm, total_c_args, c_arg, out_regs);
1832     __ mov_metadata(c_rarg1, method());
1833     __ call_VM_leaf(
1834       CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry),
1835       xthread, c_rarg1);
1836     restore_args(masm, total_c_args, c_arg, out_regs);
1837     __ j(dtrace_method_entry_done);
1838     __ block_comment("} dtrace entry");
1839   }
1840 
1841   {
1842     __ block_comment("dtrace exit {");
1843     __ bind(dtrace_method_exit);
1844     save_native_result(masm, ret_type, stack_slots);
1845     __ mov_metadata(c_rarg1, method());
1846     __ call_VM_leaf(
1847          CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit),
1848          xthread, c_rarg1);
1849     restore_native_result(masm, ret_type, stack_slots);
1850     __ j(dtrace_method_exit_done);
1851     __ block_comment("} dtrace exit");
1852   }
1853 
1854   __ flush();
1855 
1856   nmethod *nm = nmethod::new_native_nmethod(method,
1857                                             compile_id,
1858                                             masm->code(),
1859                                             vep_offset,
1860                                             frame_complete,
1861                                             stack_slots / VMRegImpl::slots_per_word,
1862                                             (is_static ? in_ByteSize(klass_offset) : in_ByteSize(receiver_offset)),
1863                                             in_ByteSize(lock_slot_offset*VMRegImpl::stack_slot_size),
1864                                             oop_maps);
1865   assert(nm != NULL, "create native nmethod fail!");
1866   return nm;
1867 }
1868 
1869 // this function returns the adjust size (in number of words) to a c2i adapter
1870 // activation for use during deoptimization
1871 int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals) {
1872   assert(callee_locals >= callee_parameters,
1873          "test and remove; got more parms than locals");
1874   if (callee_locals < callee_parameters) {
1875     return 0;                   // No adjustment for negative locals
1876   }
1877   int diff = (callee_locals - callee_parameters) * Interpreter::stackElementWords;
1878   // diff is counted in stack words
1879   return align_up(diff, 2);
1880 }
1881 
1882 //------------------------------generate_deopt_blob----------------------------
1883 void SharedRuntime::generate_deopt_blob() {
1884   // Allocate space for the code
1885   ResourceMark rm;
1886   // Setup code generation tools
1887   int pad = 0;
1888   CodeBuffer buffer("deopt_blob", 2048 + pad, 1024);
1889   MacroAssembler* masm = new MacroAssembler(&buffer);
1890   int frame_size_in_words = -1;
1891   OopMap* map = NULL;
1892   OopMapSet *oop_maps = new OopMapSet();
1893   assert_cond(masm != NULL && oop_maps != NULL);
1894   RegisterSaver reg_saver(COMPILER2_OR_JVMCI != 0);
1895 
1896   // -------------
1897   // This code enters when returning to a de-optimized nmethod.  A return
1898   // address has been pushed on the the stack, and return values are in
1899   // registers.
1900   // If we are doing a normal deopt then we were called from the patched
1901   // nmethod from the point we returned to the nmethod. So the return
1902   // address on the stack is wrong by NativeCall::instruction_size
1903   // We will adjust the value so it looks like we have the original return
1904   // address on the stack (like when we eagerly deoptimized).
1905   // In the case of an exception pending when deoptimizing, we enter
1906   // with a return address on the stack that points after the call we patched
1907   // into the exception handler. We have the following register state from,
1908   // e.g., the forward exception stub (see stubGenerator_riscv.cpp).
1909   //    x10: exception oop
1910   //    x9: exception handler
1911   //    x13: throwing pc
1912   // So in this case we simply jam x13 into the useless return address and
1913   // the stack looks just like we want.
1914   //
1915   // At this point we need to de-opt.  We save the argument return
1916   // registers.  We call the first C routine, fetch_unroll_info().  This
1917   // routine captures the return values and returns a structure which
1918   // describes the current frame size and the sizes of all replacement frames.
1919   // The current frame is compiled code and may contain many inlined
1920   // functions, each with their own JVM state.  We pop the current frame, then
1921   // push all the new frames.  Then we call the C routine unpack_frames() to
1922   // populate these frames.  Finally unpack_frames() returns us the new target
1923   // address.  Notice that callee-save registers are BLOWN here; they have
1924   // already been captured in the vframeArray at the time the return PC was
1925   // patched.
1926   address start = __ pc();
1927   Label cont;
1928 
1929   // Prolog for non exception case!
1930 
1931   // Save everything in sight.
1932   map = reg_saver.save_live_registers(masm, 0, &frame_size_in_words);
1933 
1934   // Normal deoptimization.  Save exec mode for unpack_frames.
1935   __ mvw(xcpool, Deoptimization::Unpack_deopt); // callee-saved
1936   __ j(cont);
1937 
1938   int reexecute_offset = __ pc() - start;
1939 
1940   // Reexecute case
1941   // return address is the pc describes what bci to do re-execute at
1942 
1943   // No need to update map as each call to save_live_registers will produce identical oopmap
1944   (void) reg_saver.save_live_registers(masm, 0, &frame_size_in_words);
1945 
1946   __ mvw(xcpool, Deoptimization::Unpack_reexecute); // callee-saved
1947   __ j(cont);
1948 
1949   int exception_offset = __ pc() - start;
1950 
1951   // Prolog for exception case
1952 
1953   // all registers are dead at this entry point, except for x10, and
1954   // x13 which contain the exception oop and exception pc
1955   // respectively.  Set them in TLS and fall thru to the
1956   // unpack_with_exception_in_tls entry point.
1957 
1958   __ sd(x13, Address(xthread, JavaThread::exception_pc_offset()));
1959   __ sd(x10, Address(xthread, JavaThread::exception_oop_offset()));
1960 
1961   int exception_in_tls_offset = __ pc() - start;
1962 
1963   // new implementation because exception oop is now passed in JavaThread
1964 
1965   // Prolog for exception case
1966   // All registers must be preserved because they might be used by LinearScan
1967   // Exceptiop oop and throwing PC are passed in JavaThread
1968   // tos: stack at point of call to method that threw the exception (i.e. only
1969   // args are on the stack, no return address)
1970 
1971   // The return address pushed by save_live_registers will be patched
1972   // later with the throwing pc. The correct value is not available
1973   // now because loading it from memory would destroy registers.
1974 
1975   // NB: The SP at this point must be the SP of the method that is
1976   // being deoptimized.  Deoptimization assumes that the frame created
1977   // here by save_live_registers is immediately below the method's SP.
1978   // This is a somewhat fragile mechanism.
1979 
1980   // Save everything in sight.
1981   map = reg_saver.save_live_registers(masm, 0, &frame_size_in_words);
1982 
1983   // Now it is safe to overwrite any register
1984 
1985   // Deopt during an exception.  Save exec mode for unpack_frames.
1986   __ li(xcpool, Deoptimization::Unpack_exception); // callee-saved
1987 
1988   // load throwing pc from JavaThread and patch it as the return address
1989   // of the current frame. Then clear the field in JavaThread
1990 
1991   __ ld(x13, Address(xthread, JavaThread::exception_pc_offset()));
1992   __ sd(x13, Address(fp, frame::return_addr_offset * wordSize));
1993   __ sd(zr, Address(xthread, JavaThread::exception_pc_offset()));
1994 
1995 #ifdef ASSERT
1996   // verify that there is really an exception oop in JavaThread
1997   __ ld(x10, Address(xthread, JavaThread::exception_oop_offset()));
1998   __ verify_oop(x10);
1999 
2000   // verify that there is no pending exception
2001   Label no_pending_exception;
2002   __ ld(t0, Address(xthread, Thread::pending_exception_offset()));
2003   __ beqz(t0, no_pending_exception);
2004   __ stop("must not have pending exception here");
2005   __ bind(no_pending_exception);
2006 #endif
2007 
2008   __ bind(cont);
2009 
2010   // Call C code.  Need thread and this frame, but NOT official VM entry
2011   // crud.  We cannot block on this call, no GC can happen.
2012   //
2013   // UnrollBlock* fetch_unroll_info(JavaThread* thread)
2014 
2015   // fetch_unroll_info needs to call last_java_frame().
2016 
2017   Label retaddr;
2018   __ set_last_Java_frame(sp, noreg, retaddr, t0);
2019 #ifdef ASSERT
2020   {
2021     Label L;
2022     __ ld(t0, Address(xthread,
2023                               JavaThread::last_Java_fp_offset()));
2024     __ beqz(t0, L);
2025     __ stop("SharedRuntime::generate_deopt_blob: last_Java_fp not cleared");
2026     __ bind(L);
2027   }
2028 #endif // ASSERT
2029   __ mv(c_rarg0, xthread);
2030   __ mv(c_rarg1, xcpool);
2031   int32_t offset = 0;
2032   __ la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info)), offset);
2033   __ jalr(x1, t0, offset);
2034   __ bind(retaddr);
2035 
2036   // Need to have an oopmap that tells fetch_unroll_info where to
2037   // find any register it might need.
2038   oop_maps->add_gc_map(__ pc() - start, map);
2039 
2040   __ reset_last_Java_frame(false);
2041 
2042   // Load UnrollBlock* into x15
2043   __ mv(x15, x10);
2044 
2045   __ lwu(xcpool, Address(x15, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes()));
2046   Label noException;
2047   __ li(t0, Deoptimization::Unpack_exception);
2048   __ bne(xcpool, t0, noException); // Was exception pending?
2049   __ ld(x10, Address(xthread, JavaThread::exception_oop_offset()));
2050   __ ld(x13, Address(xthread, JavaThread::exception_pc_offset()));
2051   __ sd(zr, Address(xthread, JavaThread::exception_oop_offset()));
2052   __ sd(zr, Address(xthread, JavaThread::exception_pc_offset()));
2053 
2054   __ verify_oop(x10);
2055 
2056   // Overwrite the result registers with the exception results.
2057   __ sd(x10, Address(sp, reg_saver.reg_offset_in_bytes(x10)));
2058 
2059   __ bind(noException);
2060 
2061   // Only register save data is on the stack.
2062   // Now restore the result registers.  Everything else is either dead
2063   // or captured in the vframeArray.
2064 
2065   // Restore fp result register
2066   __ fld(f10, Address(sp, reg_saver.freg_offset_in_bytes(f10)));
2067   // Restore integer result register
2068   __ ld(x10, Address(sp, reg_saver.reg_offset_in_bytes(x10)));
2069 
2070   // Pop all of the register save area off the stack
2071   __ add(sp, sp, frame_size_in_words * wordSize);
2072 
2073   // All of the register save area has been popped of the stack. Only the
2074   // return address remains.
2075 
2076   // Pop all the frames we must move/replace.
2077   //
2078   // Frame picture (youngest to oldest)
2079   // 1: self-frame (no frame link)
2080   // 2: deopting frame  (no frame link)
2081   // 3: caller of deopting frame (could be compiled/interpreted).
2082   //
2083   // Note: by leaving the return address of self-frame on the stack
2084   // and using the size of frame 2 to adjust the stack
2085   // when we are done the return to frame 3 will still be on the stack.
2086 
2087   // Pop deoptimized frame
2088   __ lwu(x12, Address(x15, Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset_in_bytes()));
2089   __ sub(x12, x12, 2 * wordSize);
2090   __ add(sp, sp, x12);
2091   __ ld(fp, Address(sp, 0));
2092   __ ld(ra, Address(sp, wordSize));
2093   __ addi(sp, sp, 2 * wordSize);
2094   // RA should now be the return address to the caller (3)
2095 
2096 #ifdef ASSERT
2097   // Compilers generate code that bang the stack by as much as the
2098   // interpreter would need. So this stack banging should never
2099   // trigger a fault. Verify that it does not on non product builds.
2100   __ lwu(x9, Address(x15, Deoptimization::UnrollBlock::total_frame_sizes_offset_in_bytes()));
2101   __ bang_stack_size(x9, x12);
2102 #endif
2103   // Load address of array of frame pcs into x12
2104   __ ld(x12, Address(x15, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes()));
2105 
2106   // Load address of array of frame sizes into x14
2107   __ ld(x14, Address(x15, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes()));
2108 
2109   // Load counter into x13
2110   __ lwu(x13, Address(x15, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes()));
2111 
2112   // Now adjust the caller's stack to make up for the extra locals
2113   // but record the original sp so that we can save it in the skeletal interpreter
2114   // frame and the stack walking of interpreter_sender will get the unextended sp
2115   // value and not the "real" sp value.
2116 
2117   const Register sender_sp = x16;
2118 
2119   __ mv(sender_sp, sp);
2120   __ lwu(x9, Address(x15,
2121                      Deoptimization::UnrollBlock::
2122                      caller_adjustment_offset_in_bytes()));
2123   __ sub(sp, sp, x9);
2124 
2125   // Push interpreter frames in a loop
2126   __ li(t0, 0xDEADDEAD);               // Make a recognizable pattern
2127   __ mv(t1, t0);
2128   Label loop;
2129   __ bind(loop);
2130   __ ld(x9, Address(x14, 0));          // Load frame size
2131   __ addi(x14, x14, wordSize);
2132   __ sub(x9, x9, 2 * wordSize);        // We'll push pc and fp by hand
2133   __ ld(ra, Address(x12, 0));          // Load pc
2134   __ addi(x12, x12, wordSize);
2135   __ enter();                          // Save old & set new fp
2136   __ sub(sp, sp, x9);                  // Prolog
2137   // This value is corrected by layout_activation_impl
2138   __ sd(zr, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize));
2139   __ sd(sender_sp, Address(fp, frame::interpreter_frame_sender_sp_offset * wordSize)); // Make it walkable
2140   __ mv(sender_sp, sp);                // Pass sender_sp to next frame
2141   __ addi(x13, x13, -1);               // Decrement counter
2142   __ bnez(x13, loop);
2143 
2144     // Re-push self-frame
2145   __ ld(ra, Address(x12));
2146   __ enter();
2147 
2148   // Allocate a full sized register save area.  We subtract 2 because
2149   // enter() just pushed 2 words
2150   __ sub(sp, sp, (frame_size_in_words - 2) * wordSize);
2151 
2152   // Restore frame locals after moving the frame
2153   __ fsd(f10, Address(sp, reg_saver.freg_offset_in_bytes(f10)));
2154   __ sd(x10, Address(sp, reg_saver.reg_offset_in_bytes(x10)));
2155 
2156   // Call C code.  Need thread but NOT official VM entry
2157   // crud.  We cannot block on this call, no GC can happen.  Call should
2158   // restore return values to their stack-slots with the new SP.
2159   //
2160   // void Deoptimization::unpack_frames(JavaThread* thread, int exec_mode)
2161 
2162   // Use fp because the frames look interpreted now
2163   // Don't need the precise return PC here, just precise enough to point into this code blob.
2164   address the_pc = __ pc();
2165   __ set_last_Java_frame(sp, fp, the_pc, t0);
2166 
2167   __ mv(c_rarg0, xthread);
2168   __ mv(c_rarg1, xcpool); // second arg: exec_mode
2169   offset = 0;
2170   __ la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames)), offset);
2171   __ jalr(x1, t0, offset);
2172 
2173   // Set an oopmap for the call site
2174   // Use the same PC we used for the last java frame
2175   oop_maps->add_gc_map(the_pc - start,
2176                        new OopMap(frame_size_in_words, 0));
2177 
2178   // Clear fp AND pc
2179   __ reset_last_Java_frame(true);
2180 
2181   // Collect return values
2182   __ fld(f10, Address(sp, reg_saver.freg_offset_in_bytes(f10)));
2183   __ ld(x10, Address(sp, reg_saver.reg_offset_in_bytes(x10)));
2184 
2185   // Pop self-frame.
2186   __ leave();                           // Epilog
2187 
2188   // Jump to interpreter
2189   __ ret();
2190 
2191   // Make sure all code is generated
2192   masm->flush();
2193 
2194   _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset, reexecute_offset, frame_size_in_words);
2195   assert(_deopt_blob != NULL, "create deoptimization blob fail!");
2196   _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset);
2197 }
2198 
2199 // Number of stack slots between incoming argument block and the start of
2200 // a new frame. The PROLOG must add this many slots to the stack. The
2201 // EPILOG must remove this many slots.
2202 // RISCV needs two words for RA (return address) and FP (frame pointer).
2203 uint SharedRuntime::in_preserve_stack_slots() {
2204   return 2 * VMRegImpl::slots_per_word;
2205 }
2206 
2207 uint SharedRuntime::out_preserve_stack_slots() {
2208   return 0;
2209 }
2210 
2211 #ifdef COMPILER2
2212 //------------------------------generate_uncommon_trap_blob--------------------
2213 void SharedRuntime::generate_uncommon_trap_blob() {
2214   // Allocate space for the code
2215   ResourceMark rm;
2216   // Setup code generation tools
2217   CodeBuffer buffer("uncommon_trap_blob", 2048, 1024);
2218   MacroAssembler* masm = new MacroAssembler(&buffer);
2219   assert_cond(masm != NULL);
2220 
2221   assert(SimpleRuntimeFrame::framesize % 4 == 0, "sp not 16-byte aligned");
2222 
2223   address start = __ pc();
2224 
2225   // Push self-frame.  We get here with a return address in RA
2226   // and sp should be 16 byte aligned
2227   // push fp and retaddr by hand
2228   __ addi(sp, sp, -2 * wordSize);
2229   __ sd(ra, Address(sp, wordSize));
2230   __ sd(fp, Address(sp, 0));
2231   // we don't expect an arg reg save area
2232 #ifndef PRODUCT
2233   assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area");
2234 #endif
2235   // compiler left unloaded_class_index in j_rarg0 move to where the
2236   // runtime expects it.
2237   __ addiw(c_rarg1, j_rarg0, 0);
2238 
2239   // we need to set the past SP to the stack pointer of the stub frame
2240   // and the pc to the address where this runtime call will return
2241   // although actually any pc in this code blob will do).
2242   Label retaddr;
2243   __ set_last_Java_frame(sp, noreg, retaddr, t0);
2244 
2245   // Call C code.  Need thread but NOT official VM entry
2246   // crud.  We cannot block on this call, no GC can happen.  Call should
2247   // capture callee-saved registers as well as return values.
2248   //
2249   // UnrollBlock* uncommon_trap(JavaThread* thread, jint unloaded_class_index, jint exec_mode)
2250   //
2251   // n.b. 3 gp args, 0 fp args, integral return type
2252 
2253   __ mv(c_rarg0, xthread);
2254   __ mvw(c_rarg2, (unsigned)Deoptimization::Unpack_uncommon_trap);
2255   int32_t offset = 0;
2256   __ la_patchable(t0,
2257         RuntimeAddress(CAST_FROM_FN_PTR(address,
2258                                         Deoptimization::uncommon_trap)), offset);
2259   __ jalr(x1, t0, offset);
2260   __ bind(retaddr);
2261 
2262   // Set an oopmap for the call site
2263   OopMapSet* oop_maps = new OopMapSet();
2264   OopMap* map = new OopMap(SimpleRuntimeFrame::framesize, 0);
2265   assert_cond(oop_maps != NULL && map != NULL);
2266 
2267   // location of fp is known implicitly by the frame sender code
2268 
2269   oop_maps->add_gc_map(__ pc() - start, map);
2270 
2271   __ reset_last_Java_frame(false);
2272 
2273   // move UnrollBlock* into x14
2274   __ mv(x14, x10);
2275 
2276 #ifdef ASSERT
2277   { Label L;
2278     __ lwu(t0, Address(x14, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes()));
2279     __ mvw(t1, Deoptimization::Unpack_uncommon_trap);
2280     __ beq(t0, t1, L);
2281     __ stop("SharedRuntime::generate_deopt_blob: last_Java_fp not cleared");
2282     __ bind(L);
2283   }
2284 #endif
2285 
2286   // Pop all the frames we must move/replace.
2287   //
2288   // Frame picture (youngest to oldest)
2289   // 1: self-frame (no frame link)
2290   // 2: deopting frame  (no frame link)
2291   // 3: caller of deopting frame (could be compiled/interpreted).
2292 
2293   __ add(sp, sp, (SimpleRuntimeFrame::framesize) << LogBytesPerInt); // Epilog!
2294 
2295   // Pop deoptimized frame (int)
2296   __ lwu(x12, Address(x14,
2297                       Deoptimization::UnrollBlock::
2298                       size_of_deoptimized_frame_offset_in_bytes()));
2299   __ sub(x12, x12, 2 * wordSize);
2300   __ add(sp, sp, x12);
2301   __ ld(fp, sp, 0);
2302   __ ld(ra, sp, wordSize);
2303   __ addi(sp, sp, 2 * wordSize);
2304   // RA should now be the return address to the caller (3) frame
2305 
2306 #ifdef ASSERT
2307   // Compilers generate code that bang the stack by as much as the
2308   // interpreter would need. So this stack banging should never
2309   // trigger a fault. Verify that it does not on non product builds.
2310   __ lwu(x11, Address(x14,
2311                       Deoptimization::UnrollBlock::
2312                       total_frame_sizes_offset_in_bytes()));
2313   __ bang_stack_size(x11, x12);
2314 #endif
2315 
2316   // Load address of array of frame pcs into x12 (address*)
2317   __ ld(x12, Address(x14,
2318                      Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes()));
2319 
2320   // Load address of array of frame sizes into x15 (intptr_t*)
2321   __ ld(x15, Address(x14,
2322                      Deoptimization::UnrollBlock::
2323                      frame_sizes_offset_in_bytes()));
2324 
2325   // Counter
2326   __ lwu(x13, Address(x14,
2327                       Deoptimization::UnrollBlock::
2328                       number_of_frames_offset_in_bytes())); // (int)
2329 
2330   // Now adjust the caller's stack to make up for the extra locals but
2331   // record the original sp so that we can save it in the skeletal
2332   // interpreter frame and the stack walking of interpreter_sender
2333   // will get the unextended sp value and not the "real" sp value.
2334 
2335   const Register sender_sp = t1; // temporary register
2336 
2337   __ lwu(x11, Address(x14,
2338                       Deoptimization::UnrollBlock::
2339                       caller_adjustment_offset_in_bytes())); // (int)
2340   __ mv(sender_sp, sp);
2341   __ sub(sp, sp, x11);
2342 
2343   // Push interpreter frames in a loop
2344   Label loop;
2345   __ bind(loop);
2346   __ ld(x11, Address(x15, 0));       // Load frame size
2347   __ sub(x11, x11, 2 * wordSize);    // We'll push pc and fp by hand
2348   __ ld(ra, Address(x12, 0));        // Save return address
2349   __ enter();                        // and old fp & set new fp
2350   __ sub(sp, sp, x11);               // Prolog
2351   __ sd(sender_sp, Address(fp, frame::interpreter_frame_sender_sp_offset * wordSize)); // Make it walkable
2352   // This value is corrected by layout_activation_impl
2353   __ sd(zr, Address(fp, frame::interpreter_frame_last_sp_offset * wordSize));
2354   __ mv(sender_sp, sp);              // Pass sender_sp to next frame
2355   __ add(x15, x15, wordSize);        // Bump array pointer (sizes)
2356   __ add(x12, x12, wordSize);        // Bump array pointer (pcs)
2357   __ subw(x13, x13, 1);              // Decrement counter
2358   __ bgtz(x13, loop);
2359   __ ld(ra, Address(x12, 0));        // save final return address
2360   // Re-push self-frame
2361   __ enter();                        // & old fp & set new fp
2362 
2363   // Use fp because the frames look interpreted now
2364   // Save "the_pc" since it cannot easily be retrieved using the last_java_SP after we aligned SP.
2365   // Don't need the precise return PC here, just precise enough to point into this code blob.
2366   address the_pc = __ pc();
2367   __ set_last_Java_frame(sp, fp, the_pc, t0);
2368 
2369   // Call C code.  Need thread but NOT official VM entry
2370   // crud.  We cannot block on this call, no GC can happen.  Call should
2371   // restore return values to their stack-slots with the new SP.
2372   //
2373   // BasicType unpack_frames(JavaThread* thread, int exec_mode)
2374   //
2375 
2376   // n.b. 2 gp args, 0 fp args, integral return type
2377 
2378   // sp should already be aligned
2379   __ mv(c_rarg0, xthread);
2380   __ mvw(c_rarg1, (unsigned)Deoptimization::Unpack_uncommon_trap);
2381   offset = 0;
2382   __ la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames)), offset);
2383   __ jalr(x1, t0, offset);
2384 
2385   // Set an oopmap for the call site
2386   // Use the same PC we used for the last java frame
2387   oop_maps->add_gc_map(the_pc - start, new OopMap(SimpleRuntimeFrame::framesize, 0));
2388 
2389   // Clear fp AND pc
2390   __ reset_last_Java_frame(true);
2391 
2392   // Pop self-frame.
2393   __ leave();                 // Epilog
2394 
2395   // Jump to interpreter
2396   __ ret();
2397 
2398   // Make sure all code is generated
2399   masm->flush();
2400 
2401   _uncommon_trap_blob =  UncommonTrapBlob::create(&buffer, oop_maps,
2402                                                   SimpleRuntimeFrame::framesize >> 1);
2403 }
2404 #endif // COMPILER2
2405 
2406 //------------------------------generate_handler_blob------
2407 //
2408 // Generate a special Compile2Runtime blob that saves all registers,
2409 // and setup oopmap.
2410 //
2411 SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_type) {
2412   ResourceMark rm;
2413   OopMapSet *oop_maps = new OopMapSet();
2414   assert_cond(oop_maps != NULL);
2415   OopMap* map = NULL;
2416 
2417   // Allocate space for the code.  Setup code generation tools.
2418   CodeBuffer buffer("handler_blob", 2048, 1024);
2419   MacroAssembler* masm = new MacroAssembler(&buffer);
2420   assert_cond(masm != NULL);
2421 
2422   address start   = __ pc();
2423   address call_pc = NULL;
2424   int frame_size_in_words = -1;
2425   bool cause_return = (poll_type == POLL_AT_RETURN);
2426   RegisterSaver reg_saver(poll_type == POLL_AT_VECTOR_LOOP /* save_vectors */);
2427 
2428   // Save Integer and Float registers.
2429   map = reg_saver.save_live_registers(masm, 0, &frame_size_in_words);
2430 
2431   // The following is basically a call_VM.  However, we need the precise
2432   // address of the call in order to generate an oopmap. Hence, we do all the
2433   // work outselves.
2434 
2435   Label retaddr;
2436   __ set_last_Java_frame(sp, noreg, retaddr, t0);
2437 
2438   // The return address must always be correct so that frame constructor never
2439   // sees an invalid pc.
2440 
2441   if (!cause_return) {
2442     // overwrite the return address pushed by save_live_registers
2443     // Additionally, x18 is a callee-saved register so we can look at
2444     // it later to determine if someone changed the return address for
2445     // us!
2446     __ ld(x18, Address(xthread, JavaThread::saved_exception_pc_offset()));
2447     __ sd(x18, Address(fp, frame::return_addr_offset * wordSize));
2448   }
2449 
2450   // Do the call
2451   __ mv(c_rarg0, xthread);
2452   int32_t offset = 0;
2453   __ la_patchable(t0, RuntimeAddress(call_ptr), offset);
2454   __ jalr(x1, t0, offset);
2455   __ bind(retaddr);
2456 
2457   // Set an oopmap for the call site.  This oopmap will map all
2458   // oop-registers and debug-info registers as callee-saved.  This
2459   // will allow deoptimization at this safepoint to find all possible
2460   // debug-info recordings, as well as let GC find all oops.
2461 
2462   oop_maps->add_gc_map( __ pc() - start, map);
2463 
2464   Label noException;
2465 
2466   __ reset_last_Java_frame(false);
2467 
2468   __ membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
2469 
2470   __ ld(t0, Address(xthread, Thread::pending_exception_offset()));
2471   __ beqz(t0, noException);
2472 
2473   // Exception pending
2474 
2475   reg_saver.restore_live_registers(masm);
2476 
2477   __ far_jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
2478 
2479   // No exception case
2480   __ bind(noException);
2481 
2482   Label no_adjust, bail;
2483   if (!cause_return) {
2484     // If our stashed return pc was modified by the runtime we avoid touching it
2485     __ ld(t0, Address(fp, frame::return_addr_offset * wordSize));
2486     __ bne(x18, t0, no_adjust);
2487 
2488 #ifdef ASSERT
2489     // Verify the correct encoding of the poll we're about to skip.
2490     // See NativeInstruction::is_lwu_to_zr()
2491     __ lwu(t0, Address(x18));
2492     __ andi(t1, t0, 0b0000011);
2493     __ mv(t2, 0b0000011);
2494     __ bne(t1, t2, bail); // 0-6:0b0000011
2495     __ srli(t1, t0, 7);
2496     __ andi(t1, t1, 0b00000);
2497     __ bnez(t1, bail);    // 7-11:0b00000
2498     __ srli(t1, t0, 12);
2499     __ andi(t1, t1, 0b110);
2500     __ mv(t2, 0b110);
2501     __ bne(t1, t2, bail); // 12-14:0b110
2502 #endif
2503     // Adjust return pc forward to step over the safepoint poll instruction
2504     __ add(x18, x18, NativeInstruction::instruction_size);
2505     __ sd(x18, Address(fp, frame::return_addr_offset * wordSize));
2506   }
2507 
2508   __ bind(no_adjust);
2509   // Normal exit, restore registers and exit.
2510 
2511   reg_saver.restore_live_registers(masm);
2512   __ ret();
2513 
2514 #ifdef ASSERT
2515   __ bind(bail);
2516   __ stop("Attempting to adjust pc to skip safepoint poll but the return point is not what we expected");
2517 #endif
2518 
2519   // Make sure all code is generated
2520   masm->flush();
2521 
2522   // Fill-out other meta info
2523   return SafepointBlob::create(&buffer, oop_maps, frame_size_in_words);
2524 }
2525 
2526 //
2527 // generate_resolve_blob - call resolution (static/virtual/opt-virtual/ic-miss
2528 //
2529 // Generate a stub that calls into vm to find out the proper destination
2530 // of a java call. All the argument registers are live at this point
2531 // but since this is generic code we don't know what they are and the caller
2532 // must do any gc of the args.
2533 //
2534 RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const char* name) {
2535   assert (StubRoutines::forward_exception_entry() != NULL, "must be generated before");
2536 
2537   // allocate space for the code
2538   ResourceMark rm;
2539 
2540   CodeBuffer buffer(name, 1000, 512);
2541   MacroAssembler* masm = new MacroAssembler(&buffer);
2542   assert_cond(masm != NULL);
2543 
2544   int frame_size_in_words = -1;
2545   RegisterSaver reg_saver(false /* save_vectors */);
2546 
2547   OopMapSet *oop_maps = new OopMapSet();
2548   assert_cond(oop_maps != NULL);
2549   OopMap* map = NULL;
2550 
2551   int start = __ offset();
2552 
2553   map = reg_saver.save_live_registers(masm, 0, &frame_size_in_words);
2554 
2555   int frame_complete = __ offset();
2556 
2557   {
2558     Label retaddr;
2559     __ set_last_Java_frame(sp, noreg, retaddr, t0);
2560 
2561     __ mv(c_rarg0, xthread);
2562     int32_t offset = 0;
2563     __ la_patchable(t0, RuntimeAddress(destination), offset);
2564     __ jalr(x1, t0, offset);
2565     __ bind(retaddr);
2566   }
2567 
2568   // Set an oopmap for the call site.
2569   // We need this not only for callee-saved registers, but also for volatile
2570   // registers that the compiler might be keeping live across a safepoint.
2571 
2572   oop_maps->add_gc_map( __ offset() - start, map);
2573 
2574   // x10 contains the address we are going to jump to assuming no exception got installed
2575 
2576   // clear last_Java_sp
2577   __ reset_last_Java_frame(false);
2578   // check for pending exceptions
2579   Label pending;
2580   __ ld(t0, Address(xthread, Thread::pending_exception_offset()));
2581   __ bnez(t0, pending);
2582 
2583   // get the returned Method*
2584   __ get_vm_result_2(xmethod, xthread);
2585   __ sd(xmethod, Address(sp, reg_saver.reg_offset_in_bytes(xmethod)));
2586 
2587   // x10 is where we want to jump, overwrite t0 which is saved and temporary
2588   __ sd(x10, Address(sp, reg_saver.reg_offset_in_bytes(t0)));
2589   reg_saver.restore_live_registers(masm);
2590 
2591   // We are back the the original state on entry and ready to go.
2592 
2593   __ jr(t0);
2594 
2595   // Pending exception after the safepoint
2596 
2597   __ bind(pending);
2598 
2599   reg_saver.restore_live_registers(masm);
2600 
2601   // exception pending => remove activation and forward to exception handler
2602 
2603   __ sd(zr, Address(xthread, JavaThread::vm_result_offset()));
2604 
2605   __ ld(x10, Address(xthread, Thread::pending_exception_offset()));
2606   __ far_jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
2607 
2608   // -------------
2609   // make sure all code is generated
2610   masm->flush();
2611 
2612   // return the  blob
2613   return RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_in_words, oop_maps, true);
2614 }
2615 
2616 #ifdef COMPILER2
2617 RuntimeStub* SharedRuntime::make_native_invoker(address call_target,
2618                                                 int shadow_space_bytes,
2619                                                 const GrowableArray<VMReg>& input_registers,
2620                                                 const GrowableArray<VMReg>& output_registers) {
2621   Unimplemented();
2622   return nullptr;
2623 }
2624 
2625 //------------------------------generate_exception_blob---------------------------
2626 // creates exception blob at the end
2627 // Using exception blob, this code is jumped from a compiled method.
2628 // (see emit_exception_handler in riscv.ad file)
2629 //
2630 // Given an exception pc at a call we call into the runtime for the
2631 // handler in this method. This handler might merely restore state
2632 // (i.e. callee save registers) unwind the frame and jump to the
2633 // exception handler for the nmethod if there is no Java level handler
2634 // for the nmethod.
2635 //
2636 // This code is entered with a jmp.
2637 //
2638 // Arguments:
2639 //   x10: exception oop
2640 //   x13: exception pc
2641 //
2642 // Results:
2643 //   x10: exception oop
2644 //   x13: exception pc in caller
2645 //   destination: exception handler of caller
2646 //
2647 // Note: the exception pc MUST be at a call (precise debug information)
2648 //       Registers x10, x13, x12, x14, x15, t0 are not callee saved.
2649 //
2650 
2651 void OptoRuntime::generate_exception_blob() {
2652   assert(!OptoRuntime::is_callee_saved_register(R13_num), "");
2653   assert(!OptoRuntime::is_callee_saved_register(R10_num), "");
2654   assert(!OptoRuntime::is_callee_saved_register(R12_num), "");
2655 
2656   assert(SimpleRuntimeFrame::framesize % 4 == 0, "sp not 16-byte aligned");
2657 
2658   // Allocate space for the code
2659   ResourceMark rm;
2660   // Setup code generation tools
2661   CodeBuffer buffer("exception_blob", 2048, 1024);
2662   MacroAssembler* masm = new MacroAssembler(&buffer);
2663   assert_cond(masm != NULL);
2664 
2665   // TODO check various assumptions made here
2666   //
2667   // make sure we do so before running this
2668 
2669   address start = __ pc();
2670 
2671   // push fp and retaddr by hand
2672   // Exception pc is 'return address' for stack walker
2673   __ addi(sp, sp, -2 * wordSize);
2674   __ sd(ra, Address(sp, wordSize));
2675   __ sd(fp, Address(sp));
2676   // there are no callee save registers and we don't expect an
2677   // arg reg save area
2678 #ifndef PRODUCT
2679   assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area");
2680 #endif
2681   // Store exception in Thread object. We cannot pass any arguments to the
2682   // handle_exception call, since we do not want to make any assumption
2683   // about the size of the frame where the exception happened in.
2684   __ sd(x10, Address(xthread, JavaThread::exception_oop_offset()));
2685   __ sd(x13, Address(xthread, JavaThread::exception_pc_offset()));
2686 
2687   // This call does all the hard work.  It checks if an exception handler
2688   // exists in the method.
2689   // If so, it returns the handler address.
2690   // If not, it prepares for stack-unwinding, restoring the callee-save
2691   // registers of the frame being removed.
2692   //
2693   // address OptoRuntime::handle_exception_C(JavaThread* thread)
2694   //
2695   // n.b. 1 gp arg, 0 fp args, integral return type
2696 
2697   // the stack should always be aligned
2698   address the_pc = __ pc();
2699   __ set_last_Java_frame(sp, noreg, the_pc, t0);
2700   __ mv(c_rarg0, xthread);
2701   int32_t offset = 0;
2702   __ la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, OptoRuntime::handle_exception_C)), offset);
2703   __ jalr(x1, t0, offset);
2704 
2705 
2706   // handle_exception_C is a special VM call which does not require an explicit
2707   // instruction sync afterwards.
2708 
2709   // Set an oopmap for the call site.  This oopmap will only be used if we
2710   // are unwinding the stack.  Hence, all locations will be dead.
2711   // Callee-saved registers will be the same as the frame above (i.e.,
2712   // handle_exception_stub), since they were restored when we got the
2713   // exception.
2714 
2715   OopMapSet* oop_maps = new OopMapSet();
2716   assert_cond(oop_maps != NULL);
2717 
2718   oop_maps->add_gc_map(the_pc - start, new OopMap(SimpleRuntimeFrame::framesize, 0));
2719 
2720   __ reset_last_Java_frame(false);
2721 
2722   // Restore callee-saved registers
2723 
2724   // fp is an implicitly saved callee saved register (i.e. the calling
2725   // convention will save restore it in prolog/epilog) Other than that
2726   // there are no callee save registers now that adapter frames are gone.
2727   // and we dont' expect an arg reg save area
2728   __ ld(fp, Address(sp));
2729   __ ld(x13, Address(sp, wordSize));
2730   __ addi(sp, sp , 2 * wordSize);
2731 
2732   // x10: exception handler
2733 
2734   // We have a handler in x10 (could be deopt blob).
2735   __ mv(t0, x10);
2736 
2737   // Get the exception oop
2738   __ ld(x10, Address(xthread, JavaThread::exception_oop_offset()));
2739   // Get the exception pc in case we are deoptimized
2740   __ ld(x14, Address(xthread, JavaThread::exception_pc_offset()));
2741 #ifdef ASSERT
2742   __ sd(zr, Address(xthread, JavaThread::exception_handler_pc_offset()));
2743   __ sd(zr, Address(xthread, JavaThread::exception_pc_offset()));
2744 #endif
2745   // Clear the exception oop so GC no longer processes it as a root.
2746   __ sd(zr, Address(xthread, JavaThread::exception_oop_offset()));
2747 
2748   // x10: exception oop
2749   // t0:  exception handler
2750   // x14: exception pc
2751   // Jump to handler
2752 
2753   __ jr(t0);
2754 
2755   // Make sure all code is generated
2756   masm->flush();
2757 
2758   // Set exception blob
2759   _exception_blob =  ExceptionBlob::create(&buffer, oop_maps, SimpleRuntimeFrame::framesize >> 1);
2760 }
2761 #endif // COMPILER2