1 /*
   2  * Copyright (c) 1997, 2026, Oracle and/or its affiliates. All rights reserved.
   3  * Copyright (c) 2012, 2026 SAP SE. All rights reserved.
   4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   5  *
   6  * This code is free software; you can redistribute it and/or modify it
   7  * under the terms of the GNU General Public License version 2 only, as
   8  * published by the Free Software Foundation.
   9  *
  10  * This code is distributed in the hope that it will be useful, but WITHOUT
  11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  13  * version 2 for more details (a copy is included in the LICENSE file that
  14  * accompanied this code).
  15  *
  16  * You should have received a copy of the GNU General Public License version
  17  * 2 along with this work; if not, write to the Free Software Foundation,
  18  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  19  *
  20  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  21  * or visit www.oracle.com if you need additional information or have any
  22  * questions.
  23  *
  24  */
  25 
  26 #include "asm/macroAssembler.inline.hpp"
  27 #include "code/debugInfoRec.hpp"
  28 #include "code/compiledIC.hpp"
  29 #include "code/vtableStubs.hpp"
  30 #include "frame_ppc.hpp"
  31 #include "compiler/oopMap.hpp"
  32 #include "gc/shared/gcLocker.hpp"
  33 #include "interpreter/interpreter.hpp"
  34 #include "interpreter/interp_masm.hpp"
  35 #include "memory/resourceArea.hpp"
  36 #include "oops/klass.inline.hpp"
  37 #include "prims/methodHandles.hpp"
  38 #include "runtime/continuation.hpp"
  39 #include "runtime/continuationEntry.inline.hpp"
  40 #include "runtime/jniHandles.hpp"
  41 #include "runtime/os.inline.hpp"
  42 #include "runtime/safepointMechanism.hpp"
  43 #include "runtime/sharedRuntime.hpp"
  44 #include "runtime/signature.hpp"
  45 #include "runtime/stubRoutines.hpp"
  46 #include "runtime/timerTrace.hpp"
  47 #include "runtime/vframeArray.hpp"
  48 #include "utilities/align.hpp"
  49 #include "utilities/macros.hpp"
  50 #include "vmreg_ppc.inline.hpp"
  51 #ifdef COMPILER1
  52 #include "c1/c1_Runtime1.hpp"
  53 #endif
  54 #ifdef COMPILER2
  55 #include "opto/ad.hpp"
  56 #include "opto/runtime.hpp"
  57 #endif
  58 
  59 #include <alloca.h>
  60 
  61 #define __ masm->
  62 
  63 #ifdef PRODUCT
  64 #define BLOCK_COMMENT(str) // nothing
  65 #else
  66 #define BLOCK_COMMENT(str) __ block_comment(str)
  67 #endif
  68 
  69 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
  70 
  71 
  72 class RegisterSaver {
  73  // Used for saving volatile registers.
  74  public:
  75 
  76   // Support different return pc locations.
  77   enum ReturnPCLocation {
  78     return_pc_is_lr,
  79     return_pc_is_pre_saved,
  80     return_pc_is_thread_saved_exception_pc
  81   };
  82 
  83   static OopMap* push_frame_reg_args_and_save_live_registers(MacroAssembler* masm,
  84                          int* out_frame_size_in_bytes,
  85                          bool generate_oop_map,
  86                          ReturnPCLocation return_pc_location,
  87                          bool save_vectors = false);
  88   static void    restore_live_registers_and_pop_frame(MacroAssembler* masm,
  89                          int frame_size_in_bytes,
  90                          bool restore_ctr,
  91                          bool save_vectors = false);
  92 
  93   static void push_frame_and_save_argument_registers(MacroAssembler* masm,
  94                          Register r_temp,
  95                          int frame_size,
  96                          int total_args,
  97                          const VMRegPair *regs, const VMRegPair *regs2 = nullptr);
  98   static void restore_argument_registers_and_pop_frame(MacroAssembler*masm,
  99                          int frame_size,
 100                          int total_args,
 101                          const VMRegPair *regs, const VMRegPair *regs2 = nullptr);
 102 
 103   // During deoptimization only the result registers need to be restored
 104   // all the other values have already been extracted.
 105   static void restore_result_registers(MacroAssembler* masm, int frame_size_in_bytes, bool save_vectors);
 106 
 107   // Constants and data structures:
 108 
 109   typedef enum {
 110     int_reg,
 111     float_reg,
 112     special_reg,
 113     vec_reg
 114   } RegisterType;
 115 
 116   typedef enum {
 117     reg_size          = 8,
 118     half_reg_size     = reg_size / 2,
 119     vec_reg_size      = 16
 120   } RegisterConstants;
 121 
 122   typedef struct {
 123     RegisterType        reg_type;
 124     int                 reg_num;
 125     VMReg               vmreg;
 126   } LiveRegType;
 127 };
 128 
 129 
 130 #define RegisterSaver_LiveIntReg(regname) \
 131   { RegisterSaver::int_reg,     regname->encoding(), regname->as_VMReg() }
 132 
 133 #define RegisterSaver_LiveFloatReg(regname) \
 134   { RegisterSaver::float_reg,   regname->encoding(), regname->as_VMReg() }
 135 
 136 #define RegisterSaver_LiveSpecialReg(regname) \
 137   { RegisterSaver::special_reg, regname->encoding(), regname->as_VMReg() }
 138 
 139 #define RegisterSaver_LiveVecReg(regname) \
 140   { RegisterSaver::vec_reg,      regname->encoding(), regname->as_VMReg() }
 141 
 142 static const RegisterSaver::LiveRegType RegisterSaver_LiveRegs[] = {
 143   // Live registers which get spilled to the stack. Register
 144   // positions in this array correspond directly to the stack layout.
 145 
 146   //
 147   // live special registers:
 148   //
 149   RegisterSaver_LiveSpecialReg(SR_CTR),
 150   //
 151   // live float registers:
 152   //
 153   RegisterSaver_LiveFloatReg( F0  ),
 154   RegisterSaver_LiveFloatReg( F1  ),
 155   RegisterSaver_LiveFloatReg( F2  ),
 156   RegisterSaver_LiveFloatReg( F3  ),
 157   RegisterSaver_LiveFloatReg( F4  ),
 158   RegisterSaver_LiveFloatReg( F5  ),
 159   RegisterSaver_LiveFloatReg( F6  ),
 160   RegisterSaver_LiveFloatReg( F7  ),
 161   RegisterSaver_LiveFloatReg( F8  ),
 162   RegisterSaver_LiveFloatReg( F9  ),
 163   RegisterSaver_LiveFloatReg( F10 ),
 164   RegisterSaver_LiveFloatReg( F11 ),
 165   RegisterSaver_LiveFloatReg( F12 ),
 166   RegisterSaver_LiveFloatReg( F13 ),
 167   RegisterSaver_LiveFloatReg( F14 ),
 168   RegisterSaver_LiveFloatReg( F15 ),
 169   RegisterSaver_LiveFloatReg( F16 ),
 170   RegisterSaver_LiveFloatReg( F17 ),
 171   RegisterSaver_LiveFloatReg( F18 ),
 172   RegisterSaver_LiveFloatReg( F19 ),
 173   RegisterSaver_LiveFloatReg( F20 ),
 174   RegisterSaver_LiveFloatReg( F21 ),
 175   RegisterSaver_LiveFloatReg( F22 ),
 176   RegisterSaver_LiveFloatReg( F23 ),
 177   RegisterSaver_LiveFloatReg( F24 ),
 178   RegisterSaver_LiveFloatReg( F25 ),
 179   RegisterSaver_LiveFloatReg( F26 ),
 180   RegisterSaver_LiveFloatReg( F27 ),
 181   RegisterSaver_LiveFloatReg( F28 ),
 182   RegisterSaver_LiveFloatReg( F29 ),
 183   RegisterSaver_LiveFloatReg( F30 ),
 184   RegisterSaver_LiveFloatReg( F31 ),
 185   //
 186   // live integer registers:
 187   //
 188   RegisterSaver_LiveIntReg(   R0  ),
 189   //RegisterSaver_LiveIntReg( R1  ), // stack pointer
 190   RegisterSaver_LiveIntReg(   R2  ),
 191   RegisterSaver_LiveIntReg(   R3  ),
 192   RegisterSaver_LiveIntReg(   R4  ),
 193   RegisterSaver_LiveIntReg(   R5  ),
 194   RegisterSaver_LiveIntReg(   R6  ),
 195   RegisterSaver_LiveIntReg(   R7  ),
 196   RegisterSaver_LiveIntReg(   R8  ),
 197   RegisterSaver_LiveIntReg(   R9  ),
 198   RegisterSaver_LiveIntReg(   R10 ),
 199   RegisterSaver_LiveIntReg(   R11 ),
 200   RegisterSaver_LiveIntReg(   R12 ),
 201   //RegisterSaver_LiveIntReg( R13 ), // system thread id
 202   RegisterSaver_LiveIntReg(   R14 ),
 203   RegisterSaver_LiveIntReg(   R15 ),
 204   RegisterSaver_LiveIntReg(   R16 ),
 205   RegisterSaver_LiveIntReg(   R17 ),
 206   RegisterSaver_LiveIntReg(   R18 ),
 207   RegisterSaver_LiveIntReg(   R19 ),
 208   RegisterSaver_LiveIntReg(   R20 ),
 209   RegisterSaver_LiveIntReg(   R21 ),
 210   RegisterSaver_LiveIntReg(   R22 ),
 211   RegisterSaver_LiveIntReg(   R23 ),
 212   RegisterSaver_LiveIntReg(   R24 ),
 213   RegisterSaver_LiveIntReg(   R25 ),
 214   RegisterSaver_LiveIntReg(   R26 ),
 215   RegisterSaver_LiveIntReg(   R27 ),
 216   RegisterSaver_LiveIntReg(   R28 ),
 217   RegisterSaver_LiveIntReg(   R29 ),
 218   RegisterSaver_LiveIntReg(   R30 ),
 219   RegisterSaver_LiveIntReg(   R31 )  // must be the last register (see save/restore functions below)
 220 };
 221 
 222 static const RegisterSaver::LiveRegType RegisterSaver_LiveVecRegs[] = {
 223   //
 224   // live vector registers (optional, only these ones are used by C2):
 225   //
 226   RegisterSaver_LiveVecReg( VR0 ),
 227   RegisterSaver_LiveVecReg( VR1 ),
 228   RegisterSaver_LiveVecReg( VR2 ),
 229   RegisterSaver_LiveVecReg( VR3 ),
 230   RegisterSaver_LiveVecReg( VR4 ),
 231   RegisterSaver_LiveVecReg( VR5 ),
 232   RegisterSaver_LiveVecReg( VR6 ),
 233   RegisterSaver_LiveVecReg( VR7 ),
 234   RegisterSaver_LiveVecReg( VR8 ),
 235   RegisterSaver_LiveVecReg( VR9 ),
 236   RegisterSaver_LiveVecReg( VR10 ),
 237   RegisterSaver_LiveVecReg( VR11 ),
 238   RegisterSaver_LiveVecReg( VR12 ),
 239   RegisterSaver_LiveVecReg( VR13 ),
 240   RegisterSaver_LiveVecReg( VR14 ),
 241   RegisterSaver_LiveVecReg( VR15 ),
 242   RegisterSaver_LiveVecReg( VR16 ),
 243   RegisterSaver_LiveVecReg( VR17 ),
 244   RegisterSaver_LiveVecReg( VR18 ),
 245   RegisterSaver_LiveVecReg( VR19 ),
 246   RegisterSaver_LiveVecReg( VR20 ),
 247   RegisterSaver_LiveVecReg( VR21 ),
 248   RegisterSaver_LiveVecReg( VR22 ),
 249   RegisterSaver_LiveVecReg( VR23 ),
 250   RegisterSaver_LiveVecReg( VR24 ),
 251   RegisterSaver_LiveVecReg( VR25 ),
 252   RegisterSaver_LiveVecReg( VR26 ),
 253   RegisterSaver_LiveVecReg( VR27 ),
 254   RegisterSaver_LiveVecReg( VR28 ),
 255   RegisterSaver_LiveVecReg( VR29 ),
 256   RegisterSaver_LiveVecReg( VR30 ),
 257   RegisterSaver_LiveVecReg( VR31 )
 258 };
 259 
 260 
 261 OopMap* RegisterSaver::push_frame_reg_args_and_save_live_registers(MacroAssembler* masm,
 262                          int* out_frame_size_in_bytes,
 263                          bool generate_oop_map,
 264                          ReturnPCLocation return_pc_location,
 265                          bool save_vectors) {
 266   // Push an abi_reg_args-frame and store all registers which may be live.
 267   // If requested, create an OopMap: Record volatile registers as
 268   // callee-save values in an OopMap so their save locations will be
 269   // propagated to the RegisterMap of the caller frame during
 270   // StackFrameStream construction (needed for deoptimization; see
 271   // compiledVFrame::create_stack_value).
 272   // Updated return pc is returned in R31 (if not return_pc_is_pre_saved).
 273 
 274   // calculate frame size
 275   const int regstosave_num       = sizeof(RegisterSaver_LiveRegs) /
 276                                    sizeof(RegisterSaver::LiveRegType);
 277   const int vecregstosave_num    = save_vectors ? (sizeof(RegisterSaver_LiveVecRegs) /
 278                                                    sizeof(RegisterSaver::LiveRegType))
 279                                                 : 0;
 280   const int register_save_size   = regstosave_num * reg_size + vecregstosave_num * vec_reg_size;
 281   const int frame_size_in_bytes  = align_up(register_save_size, frame::alignment_in_bytes)
 282                                    + frame::native_abi_reg_args_size;
 283 
 284   *out_frame_size_in_bytes       = frame_size_in_bytes;
 285   const int frame_size_in_slots  = frame_size_in_bytes / sizeof(jint);
 286   const int register_save_offset = frame_size_in_bytes - register_save_size;
 287 
 288   // OopMap frame size is in c2 stack slots (sizeof(jint)) not bytes or words.
 289   OopMap* map = generate_oop_map ? new OopMap(frame_size_in_slots, 0) : nullptr;
 290 
 291   BLOCK_COMMENT("push_frame_reg_args_and_save_live_registers {");
 292 
 293   // push a new frame
 294   __ push_frame(frame_size_in_bytes, noreg);
 295 
 296   // Save some registers in the last (non-vector) slots of the new frame so we
 297   // can use them as scratch regs or to determine the return pc.
 298   __ std(R31, frame_size_in_bytes -   reg_size - vecregstosave_num * vec_reg_size, R1_SP);
 299   __ std(R30, frame_size_in_bytes - 2*reg_size - vecregstosave_num * vec_reg_size, R1_SP);
 300 
 301   // save the flags
 302   // Do the save_LR by hand and adjust the return pc if requested.
 303   switch (return_pc_location) {
 304     case return_pc_is_lr: __ mflr(R31); break;
 305     case return_pc_is_pre_saved: break;
 306     case return_pc_is_thread_saved_exception_pc: __ ld(R31, thread_(saved_exception_pc)); break;
 307     default: ShouldNotReachHere();
 308   }
 309   if (return_pc_location != return_pc_is_pre_saved) {
 310     __ std(R31, frame_size_in_bytes + _abi0(lr), R1_SP);
 311   }
 312 
 313   // save all registers (ints and floats)
 314   int offset = register_save_offset;
 315 
 316   for (int i = 0; i < regstosave_num; i++) {
 317     int reg_num  = RegisterSaver_LiveRegs[i].reg_num;
 318     int reg_type = RegisterSaver_LiveRegs[i].reg_type;
 319 
 320     switch (reg_type) {
 321       case RegisterSaver::int_reg: {
 322         if (reg_num < 30) { // We spilled R30-31 right at the beginning.
 323           __ std(as_Register(reg_num), offset, R1_SP);
 324         }
 325         break;
 326       }
 327       case RegisterSaver::float_reg: {
 328         __ stfd(as_FloatRegister(reg_num), offset, R1_SP);
 329         break;
 330       }
 331       case RegisterSaver::special_reg: {
 332         if (reg_num == SR_CTR.encoding()) {
 333           __ mfctr(R30);
 334           __ std(R30, offset, R1_SP);
 335         } else {
 336           Unimplemented();
 337         }
 338         break;
 339       }
 340       default:
 341         ShouldNotReachHere();
 342     }
 343 
 344     if (generate_oop_map) {
 345       map->set_callee_saved(VMRegImpl::stack2reg(offset >> 2),
 346                             RegisterSaver_LiveRegs[i].vmreg);
 347     }
 348     offset += reg_size;
 349   }
 350 
 351   // Note that generate_oop_map in the following loop is only used for the
 352   // polling_page_vectors_safepoint_handler_blob and the deopt_blob.
 353   // The order in which the vector contents are stored depends on Endianess and
 354   // the utilized instructions (PowerArchitecturePPC64).
 355   assert(is_aligned(offset, StackAlignmentInBytes), "should be");
 356   if (PowerArchitecturePPC64 >= 10) {
 357     assert(is_even(vecregstosave_num), "expectation");
 358     for (int i = 0; i < vecregstosave_num; i += 2) {
 359       int reg_num = RegisterSaver_LiveVecRegs[i].reg_num;
 360       assert(RegisterSaver_LiveVecRegs[i + 1].reg_num == reg_num + 1, "or use other instructions!");
 361 
 362       __ stxvp(as_VectorRegister(reg_num).to_vsr(), offset, R1_SP);
 363       // Note: The contents were read in the same order (see loadV16 node in ppc.ad).
 364       // RegisterMap::pd_location only uses the first VMReg for each VectorRegister.
 365       if (generate_oop_map) {
 366         map->set_callee_saved(VMRegImpl::stack2reg(offset >> 2),
 367                               RegisterSaver_LiveVecRegs[i LITTLE_ENDIAN_ONLY(+1) ].vmreg);
 368         map->set_callee_saved(VMRegImpl::stack2reg((offset + vec_reg_size) >> 2),
 369                               RegisterSaver_LiveVecRegs[i BIG_ENDIAN_ONLY(+1) ].vmreg);
 370       }
 371       offset += (2 * vec_reg_size);
 372     }
 373   } else {
 374     for (int i = 0; i < vecregstosave_num; i++) {
 375       int reg_num = RegisterSaver_LiveVecRegs[i].reg_num;
 376 
 377       __ stxv(as_VectorRegister(reg_num)->to_vsr(), offset, R1_SP);
 378       // Note: The contents were read in the same order (see loadV16 node in ppc.ad).
 379       // RegisterMap::pd_location only uses the first VMReg for each VectorRegister.
 380       if (generate_oop_map) {
 381         VMReg vsr = RegisterSaver_LiveVecRegs[i].vmreg;
 382         map->set_callee_saved(VMRegImpl::stack2reg(offset >> 2), vsr);
 383       }
 384       offset += vec_reg_size;
 385     }
 386   }
 387 
 388   assert(offset == frame_size_in_bytes, "consistency check");
 389 
 390   BLOCK_COMMENT("} push_frame_reg_args_and_save_live_registers");
 391 
 392   // And we're done.
 393   return map;
 394 }
 395 
 396 
 397 // Pop the current frame and restore all the registers that we
 398 // saved.
 399 void RegisterSaver::restore_live_registers_and_pop_frame(MacroAssembler* masm,
 400                                                          int frame_size_in_bytes,
 401                                                          bool restore_ctr,
 402                                                          bool save_vectors) {
 403   const int regstosave_num       = sizeof(RegisterSaver_LiveRegs) /
 404                                    sizeof(RegisterSaver::LiveRegType);
 405   const int vecregstosave_num    = save_vectors ? (sizeof(RegisterSaver_LiveVecRegs) /
 406                                                    sizeof(RegisterSaver::LiveRegType))
 407                                                 : 0;
 408   const int register_save_size   = regstosave_num * reg_size + vecregstosave_num * vec_reg_size;
 409 
 410   const int register_save_offset = frame_size_in_bytes - register_save_size;
 411 
 412   BLOCK_COMMENT("restore_live_registers_and_pop_frame {");
 413 
 414   // restore all registers (ints and floats)
 415   int offset = register_save_offset;
 416 
 417   for (int i = 0; i < regstosave_num; i++) {
 418     int reg_num  = RegisterSaver_LiveRegs[i].reg_num;
 419     int reg_type = RegisterSaver_LiveRegs[i].reg_type;
 420 
 421     switch (reg_type) {
 422       case RegisterSaver::int_reg: {
 423         if (reg_num != 31) // R31 restored at the end, it's the tmp reg!
 424           __ ld(as_Register(reg_num), offset, R1_SP);
 425         break;
 426       }
 427       case RegisterSaver::float_reg: {
 428         __ lfd(as_FloatRegister(reg_num), offset, R1_SP);
 429         break;
 430       }
 431       case RegisterSaver::special_reg: {
 432         if (reg_num == SR_CTR.encoding()) {
 433           if (restore_ctr) { // Nothing to do here if ctr already contains the next address.
 434             __ ld(R31, offset, R1_SP);
 435             __ mtctr(R31);
 436           }
 437         } else {
 438           Unimplemented();
 439         }
 440         break;
 441       }
 442       default:
 443         ShouldNotReachHere();
 444     }
 445     offset += reg_size;
 446   }
 447 
 448   assert(is_aligned(offset, StackAlignmentInBytes), "should be");
 449   if (PowerArchitecturePPC64 >= 10) {
 450     for (int i = 0; i < vecregstosave_num; i += 2) {
 451       int reg_num  = RegisterSaver_LiveVecRegs[i].reg_num;
 452       assert(RegisterSaver_LiveVecRegs[i + 1].reg_num == reg_num + 1, "or use other instructions!");
 453 
 454       __ lxvp(as_VectorRegister(reg_num).to_vsr(), offset, R1_SP);
 455 
 456       offset += (2 * vec_reg_size);
 457     }
 458   } else {
 459     for (int i = 0; i < vecregstosave_num; i++) {
 460       int reg_num  = RegisterSaver_LiveVecRegs[i].reg_num;
 461 
 462       __ lxv(as_VectorRegister(reg_num).to_vsr(), offset, R1_SP);
 463 
 464       offset += vec_reg_size;
 465     }
 466   }
 467 
 468   assert(offset == frame_size_in_bytes, "consistency check");
 469 
 470   // restore link and the flags
 471   __ ld(R31, frame_size_in_bytes + _abi0(lr), R1_SP);
 472   __ mtlr(R31);
 473 
 474   // restore scratch register's value
 475   __ ld(R31, frame_size_in_bytes - reg_size - vecregstosave_num * vec_reg_size, R1_SP);
 476 
 477   // pop the frame
 478   __ addi(R1_SP, R1_SP, frame_size_in_bytes);
 479 
 480   BLOCK_COMMENT("} restore_live_registers_and_pop_frame");
 481 }
 482 
 483 void RegisterSaver::push_frame_and_save_argument_registers(MacroAssembler* masm, Register r_temp,
 484                                                            int frame_size,int total_args, const VMRegPair *regs,
 485                                                            const VMRegPair *regs2) {
 486   __ push_frame(frame_size, r_temp);
 487   int st_off = frame_size - wordSize;
 488   for (int i = 0; i < total_args; i++) {
 489     VMReg r_1 = regs[i].first();
 490     VMReg r_2 = regs[i].second();
 491     if (!r_1->is_valid()) {
 492       assert(!r_2->is_valid(), "");
 493       continue;
 494     }
 495     if (r_1->is_Register()) {
 496       Register r = r_1->as_Register();
 497       __ std(r, st_off, R1_SP);
 498       st_off -= wordSize;
 499     } else if (r_1->is_FloatRegister()) {
 500       FloatRegister f = r_1->as_FloatRegister();
 501       __ stfd(f, st_off, R1_SP);
 502       st_off -= wordSize;
 503     }
 504   }
 505   if (regs2 != nullptr) {
 506     for (int i = 0; i < total_args; i++) {
 507       VMReg r_1 = regs2[i].first();
 508       VMReg r_2 = regs2[i].second();
 509       if (!r_1->is_valid()) {
 510         assert(!r_2->is_valid(), "");
 511         continue;
 512       }
 513       if (r_1->is_Register()) {
 514         Register r = r_1->as_Register();
 515         __ std(r, st_off, R1_SP);
 516         st_off -= wordSize;
 517       } else if (r_1->is_FloatRegister()) {
 518         FloatRegister f = r_1->as_FloatRegister();
 519         __ stfd(f, st_off, R1_SP);
 520         st_off -= wordSize;
 521       }
 522     }
 523   }
 524 }
 525 
 526 void RegisterSaver::restore_argument_registers_and_pop_frame(MacroAssembler*masm, int frame_size,
 527                                                              int total_args, const VMRegPair *regs,
 528                                                              const VMRegPair *regs2) {
 529   int st_off = frame_size - wordSize;
 530   for (int i = 0; i < total_args; i++) {
 531     VMReg r_1 = regs[i].first();
 532     VMReg r_2 = regs[i].second();
 533     if (r_1->is_Register()) {
 534       Register r = r_1->as_Register();
 535       __ ld(r, st_off, R1_SP);
 536       st_off -= wordSize;
 537     } else if (r_1->is_FloatRegister()) {
 538       FloatRegister f = r_1->as_FloatRegister();
 539       __ lfd(f, st_off, R1_SP);
 540       st_off -= wordSize;
 541     }
 542   }
 543   if (regs2 != nullptr)
 544     for (int i = 0; i < total_args; i++) {
 545       VMReg r_1 = regs2[i].first();
 546       VMReg r_2 = regs2[i].second();
 547       if (r_1->is_Register()) {
 548         Register r = r_1->as_Register();
 549         __ ld(r, st_off, R1_SP);
 550         st_off -= wordSize;
 551       } else if (r_1->is_FloatRegister()) {
 552         FloatRegister f = r_1->as_FloatRegister();
 553         __ lfd(f, st_off, R1_SP);
 554         st_off -= wordSize;
 555       }
 556     }
 557   __ pop_frame();
 558 }
 559 
 560 // Restore the registers that might be holding a result.
 561 void RegisterSaver::restore_result_registers(MacroAssembler* masm, int frame_size_in_bytes, bool save_vectors) {
 562   const int regstosave_num       = sizeof(RegisterSaver_LiveRegs) /
 563                                    sizeof(RegisterSaver::LiveRegType);
 564   const int vecregstosave_num    = save_vectors ? (sizeof(RegisterSaver_LiveVecRegs) /
 565                                                    sizeof(RegisterSaver::LiveRegType))
 566                                                 : 0;
 567   const int register_save_size   = regstosave_num * reg_size + vecregstosave_num * vec_reg_size;
 568 
 569   const int register_save_offset = frame_size_in_bytes - register_save_size;
 570 
 571   // restore all result registers (ints and floats)
 572   int offset = register_save_offset;
 573   for (int i = 0; i < regstosave_num; i++) {
 574     int reg_num  = RegisterSaver_LiveRegs[i].reg_num;
 575     int reg_type = RegisterSaver_LiveRegs[i].reg_type;
 576     switch (reg_type) {
 577       case RegisterSaver::int_reg: {
 578         if (as_Register(reg_num)==R3_RET) // int result_reg
 579           __ ld(as_Register(reg_num), offset, R1_SP);
 580         break;
 581       }
 582       case RegisterSaver::float_reg: {
 583         if (as_FloatRegister(reg_num)==F1_RET) // float result_reg
 584           __ lfd(as_FloatRegister(reg_num), offset, R1_SP);
 585         break;
 586       }
 587       case RegisterSaver::special_reg: {
 588         // Special registers don't hold a result.
 589         break;
 590       }
 591       default:
 592         ShouldNotReachHere();
 593     }
 594     offset += reg_size;
 595   }
 596 
 597   assert(offset == frame_size_in_bytes - (save_vectors ? vecregstosave_num * vec_reg_size : 0), "consistency check");
 598 }
 599 
 600 // Is vector's size (in bytes) bigger than a size saved by default?
 601 bool SharedRuntime::is_wide_vector(int size) {
 602   // Note, MaxVectorSize == 8/16 on PPC64.
 603   assert(size <= (SuperwordUseVSX ? 16 : 8), "%d bytes vectors are not supported", size);
 604   return size > 8;
 605 }
 606 
 607 static int reg2slot(VMReg r) {
 608   return r->reg2stack() + SharedRuntime::out_preserve_stack_slots();
 609 }
 610 
 611 static int reg2offset(VMReg r) {
 612   return (r->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size;
 613 }
 614 
 615 // ---------------------------------------------------------------------------
 616 // Read the array of BasicTypes from a signature, and compute where the
 617 // arguments should go. Values in the VMRegPair regs array refer to 4-byte
 618 // quantities. Values less than VMRegImpl::stack0 are registers, those above
 619 // refer to 4-byte stack slots. All stack slots are based off of the stack pointer
 620 // as framesizes are fixed.
 621 // VMRegImpl::stack0 refers to the first slot 0(sp).
 622 // and VMRegImpl::stack0+1 refers to the memory word 4-bytes higher. Register
 623 // up to Register::number_of_registers) are the 64-bit
 624 // integer registers.
 625 
 626 // Note: the INPUTS in sig_bt are in units of Java argument words, which are
 627 // either 32-bit or 64-bit depending on the build. The OUTPUTS are in 32-bit
 628 // units regardless of build. Of course for i486 there is no 64 bit build
 629 
 630 // In contrast to other platforms the Java calling convention is *NOT* a
 631 // "shifted" version of the C ABI.
 632 
 633 const VMReg java_iarg_reg[8] = {
 634   R3->as_VMReg(),
 635   R4->as_VMReg(),
 636   R5->as_VMReg(),
 637   R6->as_VMReg(),
 638   R7->as_VMReg(),
 639   R8->as_VMReg(),
 640   R9->as_VMReg(),
 641   R10->as_VMReg()
 642 };
 643 
 644 const VMReg java_farg_reg[13] = {
 645   F1->as_VMReg(),
 646   F2->as_VMReg(),
 647   F3->as_VMReg(),
 648   F4->as_VMReg(),
 649   F5->as_VMReg(),
 650   F6->as_VMReg(),
 651   F7->as_VMReg(),
 652   F8->as_VMReg(),
 653   F9->as_VMReg(),
 654   F10->as_VMReg(),
 655   F11->as_VMReg(),
 656   F12->as_VMReg(),
 657   F13->as_VMReg()
 658 };
 659 
 660 const int num_java_iarg_registers = sizeof(java_iarg_reg) / sizeof(java_iarg_reg[0]);
 661 const int num_java_farg_registers = sizeof(java_farg_reg) / sizeof(java_farg_reg[0]);
 662 
 663 STATIC_ASSERT(num_java_iarg_registers == Argument::n_int_register_parameters_j);
 664 STATIC_ASSERT(num_java_farg_registers == Argument::n_float_register_parameters_j);
 665 
 666 int SharedRuntime::java_calling_convention(const BasicType *sig_bt,
 667                                            VMRegPair *regs,
 668                                            int total_args_passed) {
 669   // C2c calling conventions for compiled-compiled calls.
 670   // Put 8 ints/longs into registers _AND_ 13 float/doubles into
 671   // registers _AND_ put the rest on the stack.
 672 
 673   const int inc_stk_for_intfloat   = 1; // 1 slots for ints and floats
 674   const int inc_stk_for_longdouble = 2; // 2 slots for longs and doubles
 675 
 676   int i;
 677   VMReg reg;
 678   int stk = 0;
 679   int ireg = 0;
 680   int freg = 0;
 681 
 682   // We put the first 8 arguments into registers and the rest on the
 683   // stack, float arguments are already in their argument registers
 684   // due to c2c calling conventions (see calling_convention).
 685   for (int i = 0; i < total_args_passed; ++i) {
 686     switch(sig_bt[i]) {
 687     case T_BOOLEAN:
 688     case T_CHAR:
 689     case T_BYTE:
 690     case T_SHORT:
 691     case T_INT:
 692       if (ireg < num_java_iarg_registers) {
 693         // Put int/ptr in register
 694         reg = java_iarg_reg[ireg];
 695         ++ireg;
 696       } else {
 697         // Put int/ptr on stack.
 698         reg = VMRegImpl::stack2reg(stk);
 699         stk += inc_stk_for_intfloat;
 700       }
 701       regs[i].set1(reg);
 702       break;
 703     case T_LONG:
 704       assert((i + 1) < total_args_passed && sig_bt[i+1] == T_VOID, "expecting half");
 705       if (ireg < num_java_iarg_registers) {
 706         // Put long in register.
 707         reg = java_iarg_reg[ireg];
 708         ++ireg;
 709       } else {
 710         // Put long on stack. They must be aligned to 2 slots.
 711         if (stk & 0x1) ++stk;
 712         reg = VMRegImpl::stack2reg(stk);
 713         stk += inc_stk_for_longdouble;
 714       }
 715       regs[i].set2(reg);
 716       break;
 717     case T_OBJECT:
 718     case T_ARRAY:
 719     case T_ADDRESS:
 720       if (ireg < num_java_iarg_registers) {
 721         // Put ptr in register.
 722         reg = java_iarg_reg[ireg];
 723         ++ireg;
 724       } else {
 725         // Put ptr on stack. Objects must be aligned to 2 slots too,
 726         // because "64-bit pointers record oop-ishness on 2 aligned
 727         // adjacent registers." (see OopFlow::build_oop_map).
 728         if (stk & 0x1) ++stk;
 729         reg = VMRegImpl::stack2reg(stk);
 730         stk += inc_stk_for_longdouble;
 731       }
 732       regs[i].set2(reg);
 733       break;
 734     case T_FLOAT:
 735       if (freg < num_java_farg_registers) {
 736         // Put float in register.
 737         reg = java_farg_reg[freg];
 738         ++freg;
 739       } else {
 740         // Put float on stack.
 741         reg = VMRegImpl::stack2reg(stk);
 742         stk += inc_stk_for_intfloat;
 743       }
 744       regs[i].set1(reg);
 745       break;
 746     case T_DOUBLE:
 747       assert((i + 1) < total_args_passed && sig_bt[i+1] == T_VOID, "expecting half");
 748       if (freg < num_java_farg_registers) {
 749         // Put double in register.
 750         reg = java_farg_reg[freg];
 751         ++freg;
 752       } else {
 753         // Put double on stack. They must be aligned to 2 slots.
 754         if (stk & 0x1) ++stk;
 755         reg = VMRegImpl::stack2reg(stk);
 756         stk += inc_stk_for_longdouble;
 757       }
 758       regs[i].set2(reg);
 759       break;
 760     case T_VOID:
 761       // Do not count halves.
 762       regs[i].set_bad();
 763       break;
 764     default:
 765       ShouldNotReachHere();
 766     }
 767   }
 768   return stk;
 769 }
 770 
 771 // Similar to java_calling_convention() but for multiple return
 772 // values. There's no way to store them on the stack so if we don't
 773 // have enough registers, multiple values can't be returned.
 774 const uint SharedRuntime::java_return_convention_max_int = Argument::n_int_register_parameters_j;
 775 const uint SharedRuntime::java_return_convention_max_float = Argument::n_float_register_parameters_j;
 776 int SharedRuntime::java_return_convention(const BasicType *sig_bt,
 777                                           VMRegPair *regs,
 778                                           int total_args_passed) {
 779   // Create the mapping between argument positions and
 780   // registers.
 781   static const Register INT_ArgReg[java_return_convention_max_int] = {
 782     R3_RET, R10_ARG8, R9_ARG7, R8_ARG6, R7_ARG5, R6_ARG4, R5_ARG3, R4_ARG2
 783   };
 784   static const FloatRegister FP_ArgReg[java_return_convention_max_float] = {
 785     F1_RET, F2_ARG2, F3_ARG3, F4_ARG4, F5_ARG5, F6_ARG6, F7_ARG7, F8_ARG8,
 786     F9_ARG9, F10_ARG10, F11_ARG11, F12_ARG12, F13_ARG13
 787   };
 788 
 789 
 790   uint int_args = 0;
 791   uint fp_args = 0;
 792 
 793   for (int i = 0; i < total_args_passed; i++) {
 794     switch (sig_bt[i]) {
 795     case T_BOOLEAN:
 796     case T_CHAR:
 797     case T_BYTE:
 798     case T_SHORT:
 799     case T_INT:
 800       if (int_args < java_return_convention_max_int) {
 801         regs[i].set1(INT_ArgReg[int_args]->as_VMReg());
 802         int_args++;
 803       } else {
 804         return -1;
 805       }
 806       break;
 807     case T_VOID:
 808       // halves of T_LONG or T_DOUBLE
 809       assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half");
 810       regs[i].set_bad();
 811       break;
 812     case T_LONG:
 813       assert(sig_bt[i + 1] == T_VOID, "expecting half");
 814       // fall through
 815     case T_OBJECT:
 816     case T_ARRAY:
 817     case T_ADDRESS:
 818     case T_METADATA:
 819       if (int_args < java_return_convention_max_int) {
 820         regs[i].set2(INT_ArgReg[int_args]->as_VMReg());
 821         int_args++;
 822       } else {
 823         return -1;
 824       }
 825       break;
 826     case T_FLOAT:
 827       if (fp_args < java_return_convention_max_float) {
 828         regs[i].set1(FP_ArgReg[fp_args]->as_VMReg());
 829         fp_args++;
 830       } else {
 831         return -1;
 832       }
 833       break;
 834     case T_DOUBLE:
 835       assert(sig_bt[i + 1] == T_VOID, "expecting half");
 836       if (fp_args < java_return_convention_max_float) {
 837         regs[i].set2(FP_ArgReg[fp_args]->as_VMReg());
 838         fp_args++;
 839       } else {
 840         return -1;
 841       }
 842       break;
 843     default:
 844       ShouldNotReachHere();
 845       break;
 846     }
 847   }
 848 
 849   return int_args + fp_args;
 850 }
 851 
 852 // Calling convention for calling C code.
 853 int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
 854                                         VMRegPair *regs,
 855                                         int total_args_passed) {
 856   // Calling conventions for C runtime calls and calls to JNI native methods.
 857   //
 858   // PPC64 convention: Hoist the first 8 int/ptr/long's in the first 8
 859   // int regs, leaving int regs undefined if the arg is flt/dbl. Hoist
 860   // the first 13 flt/dbl's in the first 13 fp regs but additionally
 861   // copy flt/dbl to the stack if they are beyond the 8th argument.
 862 
 863   const VMReg iarg_reg[8] = {
 864     R3->as_VMReg(),
 865     R4->as_VMReg(),
 866     R5->as_VMReg(),
 867     R6->as_VMReg(),
 868     R7->as_VMReg(),
 869     R8->as_VMReg(),
 870     R9->as_VMReg(),
 871     R10->as_VMReg()
 872   };
 873 
 874   const VMReg farg_reg[13] = {
 875     F1->as_VMReg(),
 876     F2->as_VMReg(),
 877     F3->as_VMReg(),
 878     F4->as_VMReg(),
 879     F5->as_VMReg(),
 880     F6->as_VMReg(),
 881     F7->as_VMReg(),
 882     F8->as_VMReg(),
 883     F9->as_VMReg(),
 884     F10->as_VMReg(),
 885     F11->as_VMReg(),
 886     F12->as_VMReg(),
 887     F13->as_VMReg()
 888   };
 889 
 890   // Check calling conventions consistency.
 891   assert(sizeof(iarg_reg) / sizeof(iarg_reg[0]) == Argument::n_int_register_parameters_c &&
 892          sizeof(farg_reg) / sizeof(farg_reg[0]) == Argument::n_float_register_parameters_c,
 893          "consistency");
 894 
 895   const int additional_frame_header_slots = ((frame::native_abi_minframe_size - frame::jit_out_preserve_size)
 896                                             / VMRegImpl::stack_slot_size);
 897   const int float_offset_in_slots = Argument::float_on_stack_offset_in_bytes_c / VMRegImpl::stack_slot_size;
 898 
 899   VMReg reg;
 900   int arg = 0;
 901   int freg = 0;
 902   bool stack_used = false;
 903 
 904   for (int i = 0; i < total_args_passed; ++i, ++arg) {
 905     // Each argument corresponds to a slot in the Parameter Save Area (if not omitted)
 906     int stk = (arg * 2) + additional_frame_header_slots;
 907 
 908     switch(sig_bt[i]) {
 909     //
 910     // If arguments 0-7 are integers, they are passed in integer registers.
 911     // Argument i is placed in iarg_reg[i].
 912     //
 913     case T_BOOLEAN:
 914     case T_CHAR:
 915     case T_BYTE:
 916     case T_SHORT:
 917     case T_INT:
 918       // We must cast ints to longs and use full 64 bit stack slots
 919       // here.  Thus fall through, handle as long.
 920     case T_LONG:
 921     case T_OBJECT:
 922     case T_ARRAY:
 923     case T_ADDRESS:
 924     case T_METADATA:
 925       // Oops are already boxed if required (JNI).
 926       if (arg < Argument::n_int_register_parameters_c) {
 927         reg = iarg_reg[arg];
 928       } else {
 929         reg = VMRegImpl::stack2reg(stk);
 930         stack_used = true;
 931       }
 932       regs[i].set2(reg);
 933       break;
 934 
 935     //
 936     // Floats are treated differently from int regs:  The first 13 float arguments
 937     // are passed in registers (not the float args among the first 13 args).
 938     // Thus argument i is NOT passed in farg_reg[i] if it is float.  It is passed
 939     // in farg_reg[j] if argument i is the j-th float argument of this call.
 940     //
 941     case T_FLOAT:
 942       if (freg < Argument::n_float_register_parameters_c) {
 943         // Put float in register ...
 944         reg = farg_reg[freg];
 945         ++freg;
 946       } else {
 947         // Put float on stack.
 948         reg = VMRegImpl::stack2reg(stk + float_offset_in_slots);
 949         stack_used = true;
 950       }
 951       regs[i].set1(reg);
 952       break;
 953     case T_DOUBLE:
 954       assert((i + 1) < total_args_passed && sig_bt[i+1] == T_VOID, "expecting half");
 955       if (freg < Argument::n_float_register_parameters_c) {
 956         // Put double in register ...
 957         reg = farg_reg[freg];
 958         ++freg;
 959       } else {
 960         // Put double on stack.
 961         reg = VMRegImpl::stack2reg(stk);
 962         stack_used = true;
 963       }
 964       regs[i].set2(reg);
 965       break;
 966 
 967     case T_VOID:
 968       // Do not count halves.
 969       regs[i].set_bad();
 970       --arg;
 971       break;
 972     default:
 973       ShouldNotReachHere();
 974     }
 975   }
 976 
 977   // Return size of the stack frame excluding the jit_out_preserve part in single-word slots.
 978 #if defined(ABI_ELFv2)
 979   assert(additional_frame_header_slots == 0, "ABIv2 shouldn't use extra slots");
 980   // ABIv2 allows omitting the Parameter Save Area if the callee's prototype
 981   // indicates that all parameters can be passed in registers.
 982   return stack_used ? (arg * 2) : 0;
 983 #else
 984   // The Parameter Save Area needs to be at least 8 double-word slots for ABIv1.
 985   // We have to add extra slots because ABIv1 uses a larger header.
 986   return MAX2(arg, 8) * 2 + additional_frame_header_slots;
 987 #endif
 988 }
 989 
 990 int SharedRuntime::vector_calling_convention(VMRegPair *regs,
 991                                              uint num_bits,
 992                                              uint total_args_passed) {
 993   Unimplemented();
 994   return 0;
 995 }
 996 
 997 // Patch the callers callsite with entry to compiled code if it exists.
 998 static void patch_callers_callsite(MacroAssembler *masm, int adapter_size, int total_args_passed, const VMRegPair *regs) {
 999   Label L;
1000   __ ld(R0, in_bytes(Method::code_offset()), R19_method);
1001   __ cmpdi(CR0, R0, 0);
1002   __ beq(CR0, L);
1003 
1004   // Patch caller's callsite, method_(code) was not null which means that
1005   // compiled code exists.
1006   const Register return_pc = R11_scratch1;
1007   const Register tmp       = R12_scratch2;
1008   __ mflr(return_pc);
1009   __ std(return_pc, _abi0(lr), R1_SP);
1010   RegisterSaver::push_frame_and_save_argument_registers(masm, tmp, adapter_size, total_args_passed, regs);
1011 
1012   __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite), R19_method, return_pc);
1013 
1014   RegisterSaver::restore_argument_registers_and_pop_frame(masm, adapter_size, total_args_passed, regs);
1015   __ ld(return_pc, _abi0(lr), R1_SP);
1016   __ mtlr(return_pc);
1017 
1018   // callsite->set_to_clean() uses icache flush including isync
1019 
1020   __ bind(L);
1021 }
1022 
1023 // For each inline type argument, sig includes the list of fields of
1024 // the inline type. This utility function computes the number of
1025 // arguments for the call if inline types are passed by reference (the
1026 // calling convention the interpreter expects).
1027 static int compute_total_args_passed_int(const GrowableArray<SigEntry>* sig_extended) {
1028   int total_args_passed = 0;
1029   if (InlineTypePassFieldsAsArgs) {
1030     for (int i = 0; i < sig_extended->length(); i++) {
1031       BasicType bt = sig_extended->at(i)._bt;
1032       if (bt == T_METADATA) {
1033         // In sig_extended, an inline type argument starts with:
1034         // T_METADATA, followed by the types of the fields of the
1035         // inline type and T_VOID to mark the end of the value
1036         // type. Inline types are flattened so, for instance, in the
1037         // case of an inline type with an int field and an inline type
1038         // field that itself has 2 fields, an int and a long:
1039         // T_METADATA T_INT T_METADATA T_INT T_LONG T_VOID (second
1040         // slot for the T_LONG) T_VOID (inner inline type) T_VOID
1041         // (outer inline type)
1042         total_args_passed++;
1043         int vt = 1;
1044         do {
1045           i++;
1046           BasicType bt = sig_extended->at(i)._bt;
1047           BasicType prev_bt = sig_extended->at(i-1)._bt;
1048           if (bt == T_METADATA) {
1049             vt++;
1050           } else if (bt == T_VOID &&
1051                      prev_bt != T_LONG &&
1052                      prev_bt != T_DOUBLE) {
1053             vt--;
1054           }
1055         } while (vt != 0);
1056       } else {
1057         total_args_passed++;
1058       }
1059     }
1060   } else {
1061     total_args_passed = sig_extended->length();
1062   }
1063   return total_args_passed;
1064 }
1065 
1066 static void gen_c2i_adapter(MacroAssembler *masm,
1067                             const GrowableArray<SigEntry>* sig_extended,
1068                             const VMRegPair *regs,
1069                             bool requires_clinit_barrier,
1070                             address& c2i_no_clinit_check_entry,
1071                             Label& skip_fixup,
1072                             address start,
1073                             OopMapSet* oop_maps,
1074                             int& frame_complete,
1075                             int& frame_size_in_words,
1076                             bool alloc_inline_receiver) {
1077   if (requires_clinit_barrier) {
1078     assert(VM_Version::supports_fast_class_init_checks(), "sanity");
1079     Label L_skip_barrier;
1080 
1081     // Bypass the barrier for non-static methods
1082     __ lhz(R0, in_bytes(Method::access_flags_offset()), R19_method);
1083     __ andi_(R0, R0, JVM_ACC_STATIC);
1084     __ beq(CR0, L_skip_barrier); // non-static
1085 
1086     Register klass = R11_scratch1;
1087     __ load_method_holder(klass, R19_method);
1088     __ clinit_barrier(klass, R16_thread, &L_skip_barrier /*L_fast_path*/);
1089 
1090     __ load_const_optimized(klass, SharedRuntime::get_handle_wrong_method_stub(), R0);
1091     __ mtctr(klass);
1092     __ bctr();
1093 
1094     __ bind(L_skip_barrier);
1095     c2i_no_clinit_check_entry = __ pc();
1096   }
1097 
1098   BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
1099   bs->c2i_entry_barrier(masm, R11_scratch1, R12_scratch2, R21_tmp1);
1100 
1101   // Since all args are passed on the stack, total_args_passed *
1102   // Interpreter::stackElementSize is the space we need.
1103   int total_args_passed = compute_total_args_passed_int(sig_extended);
1104   assert(total_args_passed >= 0, "total_args_passed is %d", total_args_passed);
1105 
1106     // Adapter needs TOP_IJAVA_FRAME_ABI.
1107   const int adapter_size = frame::top_ijava_frame_abi_size +
1108                            align_up(total_args_passed * wordSize, frame::alignment_in_bytes);
1109 
1110   // Before we get into the guts of the C2I adapter, see if we should be here
1111   // at all.  We've come from compiled code and are attempting to jump to the
1112   // interpreter, which means the caller made a static call to get here
1113   // (vcalls always get a compiled target if there is one).  Check for a
1114   // compiled target.  If there is one, we need to patch the caller's call.
1115   patch_callers_callsite(masm, adapter_size, total_args_passed, regs);
1116 
1117   __ bind(skip_fixup);
1118 
1119   if (InlineTypePassFieldsAsArgs) {
1120     // Is there an inline type argument?
1121     bool has_inline_argument = false;
1122     for (int i = 0; i < sig_extended->length() && !has_inline_argument; i++) {
1123       has_inline_argument = (sig_extended->at(i)._bt == T_METADATA);
1124     }
1125     if (has_inline_argument) {
1126       __ unimplemented("c2i has_inline_argument");
1127     }
1128   }
1129 
1130   // Call the interpreter.
1131   const Register tmp = R22_tmp2, ientry = R23_tmp3;
1132   const Register value_regs[] = { R24_tmp4, R25_tmp5, R26_tmp6 };
1133   const int num_value_regs = sizeof(value_regs) / sizeof(Register);
1134   int value_regs_index = 0;
1135 
1136   __ ld(ientry, method_(interpreter_entry)); // preloaded
1137   __ mtctr(ientry);
1138 
1139   // Get a copy of the current SP for loading caller's arguments.
1140   __ mr(R21_sender_SP, R1_SP);
1141 
1142   // Add space for the adapter.
1143   __ resize_frame(-adapter_size, R12_scratch2);
1144 
1145   int st_off = adapter_size - wordSize;
1146 
1147   // Write the args into the outgoing interpreter space.
1148   // TODO: support for InlineTypePassFieldsAsArgs
1149   for (int i = 0; i < total_args_passed; i++) {
1150     BasicType bt = sig_extended->at(i)._bt;
1151 
1152     VMReg r_1 = regs[i].first();
1153     VMReg r_2 = regs[i].second();
1154     if (!r_1->is_valid()) {
1155       assert(!r_2->is_valid(), "");
1156       continue;
1157     }
1158     if (r_1->is_stack()) {
1159       Register tmp_reg = value_regs[value_regs_index];
1160       value_regs_index = (value_regs_index + 1) % num_value_regs;
1161       // The calling convention produces OptoRegs that ignore the out
1162       // preserve area (JIT's ABI). We must account for it here.
1163       int ld_off = (r_1->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size;
1164       if (!r_2->is_valid()) {
1165         __ lwz(tmp_reg, ld_off, R21_sender_SP);
1166       } else {
1167         __ ld(tmp_reg, ld_off, R21_sender_SP);
1168       }
1169       // Pretend stack targets were loaded into tmp_reg.
1170       r_1 = tmp_reg->as_VMReg();
1171     }
1172 
1173     if (r_1->is_Register()) {
1174       Register r = r_1->as_Register();
1175       if (!r_2->is_valid()) {
1176         __ stw(r, st_off, R1_SP);
1177         st_off-=wordSize;
1178       } else {
1179         // Longs are given 2 64-bit slots in the interpreter, but the
1180         // data is passed in only 1 slot.
1181         if (bt == T_LONG || bt == T_DOUBLE) {
1182           DEBUG_ONLY( __ li(tmp, 0); __ std(tmp, st_off, R1_SP); )
1183           st_off-=wordSize;
1184         }
1185         __ std(r, st_off, R1_SP);
1186         st_off-=wordSize;
1187       }
1188     } else {
1189       assert(r_1->is_FloatRegister(), "");
1190       FloatRegister f = r_1->as_FloatRegister();
1191       if (!r_2->is_valid()) {
1192         __ stfs(f, st_off, R1_SP);
1193         st_off-=wordSize;
1194       } else {
1195         // In 64bit, doubles are given 2 64-bit slots in the interpreter, but the
1196         // data is passed in only 1 slot.
1197         // One of these should get known junk...
1198         DEBUG_ONLY( __ li(tmp, 0); __ std(tmp, st_off, R1_SP); )
1199         st_off-=wordSize;
1200         __ stfd(f, st_off, R1_SP);
1201         st_off-=wordSize;
1202       }
1203     }
1204   }
1205 
1206   // Jump to the interpreter just as if interpreter was doing it.
1207 
1208   __ load_const_optimized(R25_templateTableBase, (address)Interpreter::dispatch_table((TosState)0), R11_scratch1);
1209 
1210   // load TOS
1211   __ addi(R15_esp, R1_SP, st_off);
1212 
1213   __ bctr();
1214 }
1215 
1216 void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm,
1217                                     int comp_args_on_stack,
1218                                     const GrowableArray<SigEntry>* sig,
1219                                     const VMRegPair *regs) {
1220 
1221   // Load method's entry-point from method.
1222   __ ld(R12_scratch2, in_bytes(Method::from_compiled_offset()), R19_method);
1223   __ mtctr(R12_scratch2);
1224 
1225   // We will only enter here from an interpreted frame and never from after
1226   // passing thru a c2i. Azul allowed this but we do not. If we lose the
1227   // race and use a c2i we will remain interpreted for the race loser(s).
1228   // This removes all sorts of headaches on the x86 side and also eliminates
1229   // the possibility of having c2i -> i2c -> c2i -> ... endless transitions.
1230 
1231   // Note: r13 contains the senderSP on entry. We must preserve it since
1232   // we may do a i2c -> c2i transition if we lose a race where compiled
1233   // code goes non-entrant while we get args ready.
1234   // In addition we use r13 to locate all the interpreter args as
1235   // we must align the stack to 16 bytes on an i2c entry else we
1236   // lose alignment we expect in all compiled code and register
1237   // save code can segv when fxsave instructions find improperly
1238   // aligned stack pointer.
1239 
1240   const Register ld_ptr = R15_esp;
1241   const Register value_regs[] = { R22_tmp2, R23_tmp3, R24_tmp4, R25_tmp5, R26_tmp6 };
1242   const int num_value_regs = sizeof(value_regs) / sizeof(Register);
1243   int value_regs_index = 0;
1244 
1245   int total_args_passed = sig->length();
1246   int ld_offset = total_args_passed*wordSize;
1247 
1248   // Cut-out for having no stack args. Since up to 2 int/oop args are passed
1249   // in registers, we will occasionally have no stack args.
1250   int comp_words_on_stack = 0;
1251   if (comp_args_on_stack) {
1252     // Sig words on the stack are greater-than VMRegImpl::stack0. Those in
1253     // registers are below. By subtracting stack0, we either get a negative
1254     // number (all values in registers) or the maximum stack slot accessed.
1255 
1256     // Convert 4-byte c2 stack slots to words.
1257     comp_words_on_stack = align_up(comp_args_on_stack*VMRegImpl::stack_slot_size, wordSize)>>LogBytesPerWord;
1258     // Round up to miminum stack alignment, in wordSize.
1259     comp_words_on_stack = align_up(comp_words_on_stack, 2);
1260     __ resize_frame(-comp_words_on_stack * wordSize, R11_scratch1);
1261   }
1262 
1263   // Now generate the shuffle code.  Pick up all register args and move the
1264   // rest through register value=Z_R12.
1265   BLOCK_COMMENT("Shuffle arguments");
1266 
1267   for (int i = 0; i < total_args_passed; i++) {
1268     BasicType bt = sig->at(i)._bt;
1269     if (bt == T_VOID) {
1270       assert(i > 0 && (sig->at(i - 1)._bt == T_LONG || sig->at(i - 1)._bt == T_DOUBLE), "missing half");
1271       continue;
1272     }
1273 
1274     // Pick up 0, 1 or 2 words from ld_ptr.
1275     assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(),
1276             "scrambled load targets?");
1277     VMReg r_1 = regs[i].first();
1278     VMReg r_2 = regs[i].second();
1279     if (!r_1->is_valid()) {
1280       assert(!r_2->is_valid(), "");
1281       continue;
1282     }
1283     if (r_1->is_FloatRegister()) {
1284       if (!r_2->is_valid()) {
1285         __ lfs(r_1->as_FloatRegister(), ld_offset, ld_ptr);
1286         ld_offset-=wordSize;
1287       } else {
1288         // Skip the unused interpreter slot.
1289         __ lfd(r_1->as_FloatRegister(), ld_offset-wordSize, ld_ptr);
1290         ld_offset-=2*wordSize;
1291       }
1292     } else {
1293       Register r;
1294       if (r_1->is_stack()) {
1295         // Must do a memory to memory move thru "value".
1296         r = value_regs[value_regs_index];
1297         value_regs_index = (value_regs_index + 1) % num_value_regs;
1298       } else {
1299         r = r_1->as_Register();
1300       }
1301       if (!r_2->is_valid()) {
1302         // Not sure we need to do this but it shouldn't hurt.
1303         if (is_reference_type(bt) || bt == T_ADDRESS) {
1304           __ ld(r, ld_offset, ld_ptr);
1305           ld_offset-=wordSize;
1306         } else {
1307           __ lwz(r, ld_offset, ld_ptr);
1308           ld_offset-=wordSize;
1309         }
1310       } else {
1311         // In 64bit, longs are given 2 64-bit slots in the interpreter, but the
1312         // data is passed in only 1 slot.
1313         if (bt == T_LONG || bt == T_DOUBLE) {
1314           ld_offset-=wordSize;
1315         }
1316         __ ld(r, ld_offset, ld_ptr);
1317         ld_offset-=wordSize;
1318       }
1319 
1320       if (r_1->is_stack()) {
1321         // Now store value where the compiler expects it
1322         int st_off = (r_1->reg2stack() + SharedRuntime::out_preserve_stack_slots())*VMRegImpl::stack_slot_size;
1323 
1324         if (bt == T_INT   || bt == T_FLOAT || bt == T_BOOLEAN ||
1325             bt == T_SHORT || bt == T_CHAR  || bt == T_BYTE) {
1326           __ stw(r, st_off, R1_SP);
1327         } else {
1328           __ std(r, st_off, R1_SP);
1329         }
1330       }
1331     }
1332   }
1333 
1334   __ push_cont_fastpath(); // Set JavaThread::_cont_fastpath to the sp of the oldest interpreted frame we know about
1335 
1336   BLOCK_COMMENT("Store method");
1337   // Store method into thread->callee_target.
1338   // We might end up in handle_wrong_method if the callee is
1339   // deoptimized as we race thru here. If that happens we don't want
1340   // to take a safepoint because the caller frame will look
1341   // interpreted and arguments are now "compiled" so it is much better
1342   // to make this transition invisible to the stack walking
1343   // code. Unfortunately if we try and find the callee by normal means
1344   // a safepoint is possible. So we stash the desired callee in the
1345   // thread and the vm will find there should this case occur.
1346   __ std(R19_method, thread_(callee_target));
1347 
1348   // Jump to the compiled code just as if compiled code was doing it.
1349   __ bctr();
1350 }
1351 
1352 static void gen_inline_cache_check(MacroAssembler *masm, Label& skip_fixup) {
1353   __ ic_check(BytesPerInstWord /* end_alignment */);
1354   __ ld(R19_method, CompiledICData::speculated_method_offset(), R19_inline_cache_reg);
1355 
1356   // Method might have been compiled since the call site was patched to
1357   // interpreted; if that is the case treat it as a miss so we can get
1358   // the call site corrected.
1359   __ ld(R0, method_(code));
1360   __ cmpdi(CR0, R0, 0);
1361   __ beq_predict_taken(CR0, skip_fixup);
1362 
1363   // Branch to ic_miss_stub.
1364   __ b64_patchable((address)SharedRuntime::get_ic_miss_stub(), relocInfo::runtime_call_type);
1365 }
1366 
1367 void SharedRuntime::generate_i2c2i_adapters(MacroAssembler* masm,
1368                                             int comp_args_on_stack,
1369                                             const GrowableArray<SigEntry>* sig,
1370                                             const VMRegPair* regs,
1371                                             const GrowableArray<SigEntry>* sig_cc,
1372                                             const VMRegPair* regs_cc,
1373                                             const GrowableArray<SigEntry>* sig_cc_ro,
1374                                             const VMRegPair* regs_cc_ro,
1375                                             address entry_address[AdapterBlob::ENTRY_COUNT],
1376                                             AdapterBlob*& new_adapter,
1377                                             bool allocate_code_blob) {
1378 
1379   entry_address[AdapterBlob::I2C] = __ pc();
1380   gen_i2c_adapter(masm, comp_args_on_stack, sig, regs);
1381 
1382   // -------------------------------------------------------------------------
1383   // Generate a C2I adapter.  On entry we know rmethod holds the Method* during calls
1384   // to the interpreter.  The args start out packed in the compiled layout.  They
1385   // need to be unpacked into the interpreter layout.  This will almost always
1386   // require some stack space.  We grow the current (compiled) stack, then repack
1387   // the args.  We  finally end in a jump to the generic interpreter entry point.
1388   // On exit from the interpreter, the interpreter will restore our SP (lest the
1389   // compiled code, which relies solely on SP and not FP, get sick).
1390 
1391   entry_address[AdapterBlob::C2I_Unverified] = __ pc();
1392   entry_address[AdapterBlob::C2I_Unverified_Inline] = __ pc();
1393   Label skip_fixup;
1394 
1395   gen_inline_cache_check(masm, skip_fixup);
1396 
1397   OopMapSet* oop_maps = new OopMapSet();
1398   int frame_complete = CodeOffsets::frame_never_safe;
1399   int frame_size_in_words = 0;
1400 
1401   // Scalarized c2i adapter with non-scalarized receiver (i.e., don't pack receiver)
1402   entry_address[AdapterBlob::C2I_No_Clinit_Check] = nullptr;
1403   entry_address[AdapterBlob::C2I_Inline_RO] = __ pc();
1404   if (regs_cc != regs_cc_ro) {
1405     // No class init barrier needed because method is guaranteed to be non-static
1406     __ unimplemented("C2I_Inline_RO");
1407 #if 0
1408     gen_c2i_adapter(masm, sig_cc_ro, regs_cc_ro, /* requires_clinit_barrier = */ false, entry_address[AdapterBlob::C2I_No_Clinit_Check],
1409                     skip_fixup, entry_address[AdapterBlob::I2C], oop_maps, frame_complete, frame_size_in_words, /* alloc_inline_receiver = */ false);
1410 #endif
1411     skip_fixup.reset();
1412   }
1413 
1414   // Scalarized c2i adapter
1415   entry_address[AdapterBlob::C2I]        = __ pc();
1416   entry_address[AdapterBlob::C2I_Inline] = __ pc();
1417   gen_c2i_adapter(masm, sig_cc, regs_cc, /* requires_clinit_barrier = */ true, entry_address[AdapterBlob::C2I_No_Clinit_Check],
1418                   skip_fixup, entry_address[AdapterBlob::I2C], oop_maps, frame_complete, frame_size_in_words, /* alloc_inline_receiver = */ true);
1419 
1420   // Non-scalarized c2i adapter
1421   if (regs != regs_cc) {
1422     entry_address[AdapterBlob::C2I_Unverified_Inline] = __ pc();
1423     Label inline_entry_skip_fixup;
1424     __ unimplemented("C2I_Unverified_Inline");
1425 #if 0
1426     gen_inline_cache_check(masm, inline_entry_skip_fixup);
1427 #endif
1428 
1429     entry_address[AdapterBlob::C2I_Inline] = __ pc();
1430     __ unimplemented("C2I_Inline2");
1431 #if 0
1432     gen_c2i_adapter(masm, sig, regs, /* requires_clinit_barrier = */ true, entry_address[AdapterBlob::C2I_No_Clinit_Check],
1433                     inline_entry_skip_fixup, entry_address[AdapterBlob::I2C], oop_maps, frame_complete, frame_size_in_words, /* alloc_inline_receiver = */ false);
1434 #endif
1435   }
1436   // The c2i adapters might safepoint and trigger a GC. The caller must make sure that
1437   // the GC knows about the location of oop argument locations passed to the c2i adapter.
1438   if (allocate_code_blob) {
1439     bool caller_must_gc_arguments = (regs != regs_cc);
1440     int entry_offset[AdapterHandlerEntry::ENTRIES_COUNT];
1441     assert(AdapterHandlerEntry::ENTRIES_COUNT == 7, "sanity");
1442     AdapterHandlerLibrary::address_to_offset(entry_address, entry_offset);
1443     new_adapter = AdapterBlob::create(masm->code(), entry_offset, frame_complete, frame_size_in_words, oop_maps, caller_must_gc_arguments);
1444   }
1445 }
1446 
1447 // An oop arg. Must pass a handle not the oop itself.
1448 static void object_move(MacroAssembler* masm,
1449                         int frame_size_in_slots,
1450                         OopMap* oop_map, int oop_handle_offset,
1451                         bool is_receiver, int* receiver_offset,
1452                         VMRegPair src, VMRegPair dst,
1453                         Register r_caller_sp, Register r_temp_1, Register r_temp_2) {
1454   assert(!is_receiver || (is_receiver && (*receiver_offset == -1)),
1455          "receiver has already been moved");
1456 
1457   // We must pass a handle. First figure out the location we use as a handle.
1458 
1459   if (src.first()->is_stack()) {
1460     // stack to stack or reg
1461 
1462     const Register r_handle = dst.first()->is_stack() ? r_temp_1 : dst.first()->as_Register();
1463     Label skip;
1464     const int oop_slot_in_callers_frame = reg2slot(src.first());
1465 
1466     guarantee(!is_receiver, "expecting receiver in register");
1467     oop_map->set_oop(VMRegImpl::stack2reg(oop_slot_in_callers_frame + frame_size_in_slots));
1468 
1469     __ addi(r_handle, r_caller_sp, reg2offset(src.first()));
1470     __ ld(  r_temp_2, reg2offset(src.first()), r_caller_sp);
1471     __ cmpdi(CR0, r_temp_2, 0);
1472     __ bne(CR0, skip);
1473     // Use a null handle if oop is null.
1474     __ li(r_handle, 0);
1475     __ bind(skip);
1476 
1477     if (dst.first()->is_stack()) {
1478       // stack to stack
1479       __ std(r_handle, reg2offset(dst.first()), R1_SP);
1480     } else {
1481       // stack to reg
1482       // Nothing to do, r_handle is already the dst register.
1483     }
1484   } else {
1485     // reg to stack or reg
1486     const Register r_oop      = src.first()->as_Register();
1487     const Register r_handle   = dst.first()->is_stack() ? r_temp_1 : dst.first()->as_Register();
1488     const int oop_slot        = (r_oop->encoding()-R3_ARG1->encoding()) * VMRegImpl::slots_per_word
1489                                 + oop_handle_offset; // in slots
1490     const int oop_offset = oop_slot * VMRegImpl::stack_slot_size;
1491     Label skip;
1492 
1493     if (is_receiver) {
1494       *receiver_offset = oop_offset;
1495     }
1496     oop_map->set_oop(VMRegImpl::stack2reg(oop_slot));
1497 
1498     __ std( r_oop,    oop_offset, R1_SP);
1499     __ addi(r_handle, R1_SP, oop_offset);
1500 
1501     __ cmpdi(CR0, r_oop, 0);
1502     __ bne(CR0, skip);
1503     // Use a null handle if oop is null.
1504     __ li(r_handle, 0);
1505     __ bind(skip);
1506 
1507     if (dst.first()->is_stack()) {
1508       // reg to stack
1509       __ std(r_handle, reg2offset(dst.first()), R1_SP);
1510     } else {
1511       // reg to reg
1512       // Nothing to do, r_handle is already the dst register.
1513     }
1514   }
1515 }
1516 
1517 static void int_move(MacroAssembler*masm,
1518                      VMRegPair src, VMRegPair dst,
1519                      Register r_caller_sp, Register r_temp) {
1520   assert(src.first()->is_valid(), "incoming must be int");
1521   assert(dst.first()->is_valid() && dst.second() == dst.first()->next(), "outgoing must be long");
1522 
1523   if (src.first()->is_stack()) {
1524     if (dst.first()->is_stack()) {
1525       // stack to stack
1526       __ lwa(r_temp, reg2offset(src.first()), r_caller_sp);
1527       __ std(r_temp, reg2offset(dst.first()), R1_SP);
1528     } else {
1529       // stack to reg
1530       __ lwa(dst.first()->as_Register(), reg2offset(src.first()), r_caller_sp);
1531     }
1532   } else if (dst.first()->is_stack()) {
1533     // reg to stack
1534     __ extsw(r_temp, src.first()->as_Register());
1535     __ std(r_temp, reg2offset(dst.first()), R1_SP);
1536   } else {
1537     // reg to reg
1538     __ extsw(dst.first()->as_Register(), src.first()->as_Register());
1539   }
1540 }
1541 
1542 static void long_move(MacroAssembler*masm,
1543                       VMRegPair src, VMRegPair dst,
1544                       Register r_caller_sp, Register r_temp) {
1545   assert(src.first()->is_valid() && src.second() == src.first()->next(), "incoming must be long");
1546   assert(dst.first()->is_valid() && dst.second() == dst.first()->next(), "outgoing must be long");
1547 
1548   if (src.first()->is_stack()) {
1549     if (dst.first()->is_stack()) {
1550       // stack to stack
1551       __ ld( r_temp, reg2offset(src.first()), r_caller_sp);
1552       __ std(r_temp, reg2offset(dst.first()), R1_SP);
1553     } else {
1554       // stack to reg
1555       __ ld(dst.first()->as_Register(), reg2offset(src.first()), r_caller_sp);
1556     }
1557   } else if (dst.first()->is_stack()) {
1558     // reg to stack
1559     __ std(src.first()->as_Register(), reg2offset(dst.first()), R1_SP);
1560   } else {
1561     // reg to reg
1562     if (dst.first()->as_Register() != src.first()->as_Register())
1563       __ mr(dst.first()->as_Register(), src.first()->as_Register());
1564   }
1565 }
1566 
1567 static void float_move(MacroAssembler*masm,
1568                        VMRegPair src, VMRegPair dst,
1569                        Register r_caller_sp, Register r_temp) {
1570   assert(src.first()->is_valid() && !src.second()->is_valid(), "incoming must be float");
1571   assert(dst.first()->is_valid() && !dst.second()->is_valid(), "outgoing must be float");
1572 
1573   if (src.first()->is_stack()) {
1574     if (dst.first()->is_stack()) {
1575       // stack to stack
1576       __ lwz(r_temp, reg2offset(src.first()), r_caller_sp);
1577       __ stw(r_temp, reg2offset(dst.first()), R1_SP);
1578     } else {
1579       // stack to reg
1580       __ lfs(dst.first()->as_FloatRegister(), reg2offset(src.first()), r_caller_sp);
1581     }
1582   } else if (dst.first()->is_stack()) {
1583     // reg to stack
1584     __ stfs(src.first()->as_FloatRegister(), reg2offset(dst.first()), R1_SP);
1585   } else {
1586     // reg to reg
1587     if (dst.first()->as_FloatRegister() != src.first()->as_FloatRegister())
1588       __ fmr(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister());
1589   }
1590 }
1591 
1592 static void double_move(MacroAssembler*masm,
1593                         VMRegPair src, VMRegPair dst,
1594                         Register r_caller_sp, Register r_temp) {
1595   assert(src.first()->is_valid() && src.second() == src.first()->next(), "incoming must be double");
1596   assert(dst.first()->is_valid() && dst.second() == dst.first()->next(), "outgoing must be double");
1597 
1598   if (src.first()->is_stack()) {
1599     if (dst.first()->is_stack()) {
1600       // stack to stack
1601       __ ld( r_temp, reg2offset(src.first()), r_caller_sp);
1602       __ std(r_temp, reg2offset(dst.first()), R1_SP);
1603     } else {
1604       // stack to reg
1605       __ lfd(dst.first()->as_FloatRegister(), reg2offset(src.first()), r_caller_sp);
1606     }
1607   } else if (dst.first()->is_stack()) {
1608     // reg to stack
1609     __ stfd(src.first()->as_FloatRegister(), reg2offset(dst.first()), R1_SP);
1610   } else {
1611     // reg to reg
1612     if (dst.first()->as_FloatRegister() != src.first()->as_FloatRegister())
1613       __ fmr(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister());
1614   }
1615 }
1616 
1617 void SharedRuntime::save_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) {
1618   switch (ret_type) {
1619     case T_BOOLEAN:
1620     case T_CHAR:
1621     case T_BYTE:
1622     case T_SHORT:
1623     case T_INT:
1624       __ stw (R3_RET,  frame_slots*VMRegImpl::stack_slot_size, R1_SP);
1625       break;
1626     case T_ARRAY:
1627     case T_OBJECT:
1628     case T_LONG:
1629       __ std (R3_RET,  frame_slots*VMRegImpl::stack_slot_size, R1_SP);
1630       break;
1631     case T_FLOAT:
1632       __ stfs(F1_RET, frame_slots*VMRegImpl::stack_slot_size, R1_SP);
1633       break;
1634     case T_DOUBLE:
1635       __ stfd(F1_RET, frame_slots*VMRegImpl::stack_slot_size, R1_SP);
1636       break;
1637     case T_VOID:
1638       break;
1639     default:
1640       ShouldNotReachHere();
1641       break;
1642   }
1643 }
1644 
1645 void SharedRuntime::restore_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) {
1646   switch (ret_type) {
1647     case T_BOOLEAN:
1648     case T_CHAR:
1649     case T_BYTE:
1650     case T_SHORT:
1651     case T_INT:
1652       __ lwz(R3_RET,  frame_slots*VMRegImpl::stack_slot_size, R1_SP);
1653       break;
1654     case T_ARRAY:
1655     case T_OBJECT:
1656     case T_LONG:
1657       __ ld (R3_RET,  frame_slots*VMRegImpl::stack_slot_size, R1_SP);
1658       break;
1659     case T_FLOAT:
1660       __ lfs(F1_RET, frame_slots*VMRegImpl::stack_slot_size, R1_SP);
1661       break;
1662     case T_DOUBLE:
1663       __ lfd(F1_RET, frame_slots*VMRegImpl::stack_slot_size, R1_SP);
1664       break;
1665     case T_VOID:
1666       break;
1667     default:
1668       ShouldNotReachHere();
1669       break;
1670   }
1671 }
1672 
1673 static void verify_oop_args(MacroAssembler* masm,
1674                             const methodHandle& method,
1675                             const BasicType* sig_bt,
1676                             const VMRegPair* regs) {
1677   Register temp_reg = R19_method;  // not part of any compiled calling seq
1678   if (VerifyOops) {
1679     for (int i = 0; i < method->size_of_parameters(); i++) {
1680       if (is_reference_type(sig_bt[i])) {
1681         VMReg r = regs[i].first();
1682         assert(r->is_valid(), "bad oop arg");
1683         if (r->is_stack()) {
1684           __ ld(temp_reg, reg2offset(r), R1_SP);
1685           __ verify_oop(temp_reg, FILE_AND_LINE);
1686         } else {
1687           __ verify_oop(r->as_Register(), FILE_AND_LINE);
1688         }
1689       }
1690     }
1691   }
1692 }
1693 
1694 static void gen_special_dispatch(MacroAssembler* masm,
1695                                  const methodHandle& method,
1696                                  const BasicType* sig_bt,
1697                                  const VMRegPair* regs) {
1698   verify_oop_args(masm, method, sig_bt, regs);
1699   vmIntrinsics::ID iid = method->intrinsic_id();
1700 
1701   // Now write the args into the outgoing interpreter space
1702   bool     has_receiver   = false;
1703   Register receiver_reg   = noreg;
1704   int      member_arg_pos = -1;
1705   Register member_reg     = noreg;
1706   int      ref_kind       = MethodHandles::signature_polymorphic_intrinsic_ref_kind(iid);
1707   if (ref_kind != 0) {
1708     member_arg_pos = method->size_of_parameters() - 1;  // trailing MemberName argument
1709     member_reg = R19_method;  // known to be free at this point
1710     has_receiver = MethodHandles::ref_kind_has_receiver(ref_kind);
1711   } else if (iid == vmIntrinsics::_invokeBasic) {
1712     has_receiver = true;
1713   } else if (iid == vmIntrinsics::_linkToNative) {
1714     member_arg_pos = method->size_of_parameters() - 1;  // trailing NativeEntryPoint argument
1715     member_reg = R19_method;  // known to be free at this point
1716   } else {
1717     fatal("unexpected intrinsic id %d", vmIntrinsics::as_int(iid));
1718   }
1719 
1720   if (member_reg != noreg) {
1721     // Load the member_arg into register, if necessary.
1722     SharedRuntime::check_member_name_argument_is_last_argument(method, sig_bt, regs);
1723     VMReg r = regs[member_arg_pos].first();
1724     if (r->is_stack()) {
1725       __ ld(member_reg, reg2offset(r), R1_SP);
1726     } else {
1727       // no data motion is needed
1728       member_reg = r->as_Register();
1729     }
1730   }
1731 
1732   if (has_receiver) {
1733     // Make sure the receiver is loaded into a register.
1734     assert(method->size_of_parameters() > 0, "oob");
1735     assert(sig_bt[0] == T_OBJECT, "receiver argument must be an object");
1736     VMReg r = regs[0].first();
1737     assert(r->is_valid(), "bad receiver arg");
1738     if (r->is_stack()) {
1739       // Porting note:  This assumes that compiled calling conventions always
1740       // pass the receiver oop in a register.  If this is not true on some
1741       // platform, pick a temp and load the receiver from stack.
1742       fatal("receiver always in a register");
1743       receiver_reg = R11_scratch1;  // TODO (hs24): is R11_scratch1 really free at this point?
1744       __ ld(receiver_reg, reg2offset(r), R1_SP);
1745     } else {
1746       // no data motion is needed
1747       receiver_reg = r->as_Register();
1748     }
1749   }
1750 
1751   // Figure out which address we are really jumping to:
1752   MethodHandles::generate_method_handle_dispatch(masm, iid,
1753                                                  receiver_reg, member_reg, /*for_compiler_entry:*/ true);
1754 }
1755 
1756 //---------------------------- continuation_enter_setup ---------------------------
1757 //
1758 // Frame setup.
1759 //
1760 // Arguments:
1761 //   None.
1762 //
1763 // Results:
1764 //   R1_SP: pointer to blank ContinuationEntry in the pushed frame.
1765 //
1766 // Kills:
1767 //   R0, R20
1768 //
1769 static OopMap* continuation_enter_setup(MacroAssembler* masm, int& framesize_words) {
1770   assert(ContinuationEntry::size() % VMRegImpl::stack_slot_size == 0, "");
1771   assert(in_bytes(ContinuationEntry::cont_offset())  % VMRegImpl::stack_slot_size == 0, "");
1772   assert(in_bytes(ContinuationEntry::chunk_offset()) % VMRegImpl::stack_slot_size == 0, "");
1773 
1774   const int frame_size_in_bytes = (int)ContinuationEntry::size();
1775   assert(is_aligned(frame_size_in_bytes, frame::alignment_in_bytes), "alignment error");
1776 
1777   framesize_words = frame_size_in_bytes / wordSize;
1778 
1779   DEBUG_ONLY(__ block_comment("setup {"));
1780   // Save return pc and push entry frame
1781   const Register return_pc = R20;
1782   __ mflr(return_pc);
1783   __ std(return_pc, _abi0(lr), R1_SP);     // SP->lr = return_pc
1784   __ push_frame(frame_size_in_bytes , R0); // SP -= frame_size_in_bytes
1785 
1786   OopMap* map = new OopMap((int)frame_size_in_bytes / VMRegImpl::stack_slot_size, 0 /* arg_slots*/);
1787 
1788   __ ld_ptr(R0, JavaThread::cont_entry_offset(), R16_thread);
1789   __ st_ptr(R1_SP, JavaThread::cont_entry_offset(), R16_thread);
1790   __ st_ptr(R0, ContinuationEntry::parent_offset(), R1_SP);
1791   DEBUG_ONLY(__ block_comment("} setup"));
1792 
1793   return map;
1794 }
1795 
1796 //---------------------------- fill_continuation_entry ---------------------------
1797 //
1798 // Initialize the new ContinuationEntry.
1799 //
1800 // Arguments:
1801 //   R1_SP: pointer to blank Continuation entry
1802 //   reg_cont_obj: pointer to the continuation
1803 //   reg_flags: flags
1804 //
1805 // Results:
1806 //   R1_SP: pointer to filled out ContinuationEntry
1807 //
1808 // Kills:
1809 //   R8_ARG6, R9_ARG7, R10_ARG8
1810 //
1811 static void fill_continuation_entry(MacroAssembler* masm, Register reg_cont_obj, Register reg_flags) {
1812   assert_different_registers(reg_cont_obj, reg_flags);
1813   Register zero = R8_ARG6;
1814   Register tmp2 = R9_ARG7;
1815 
1816   DEBUG_ONLY(__ block_comment("fill {"));
1817 #ifdef ASSERT
1818   __ load_const_optimized(tmp2, ContinuationEntry::cookie_value());
1819   __ stw(tmp2, in_bytes(ContinuationEntry::cookie_offset()), R1_SP);
1820 #endif //ASSERT
1821 
1822   __ li(zero, 0);
1823   __ st_ptr(reg_cont_obj, ContinuationEntry::cont_offset(), R1_SP);
1824   __ stw(reg_flags, in_bytes(ContinuationEntry::flags_offset()), R1_SP);
1825   __ st_ptr(zero, ContinuationEntry::chunk_offset(), R1_SP);
1826   __ stw(zero, in_bytes(ContinuationEntry::argsize_offset()), R1_SP);
1827   __ stw(zero, in_bytes(ContinuationEntry::pin_count_offset()), R1_SP);
1828 
1829   __ ld_ptr(tmp2, JavaThread::cont_fastpath_offset(), R16_thread);
1830   __ st_ptr(tmp2, ContinuationEntry::parent_cont_fastpath_offset(), R1_SP);
1831 
1832   __ st_ptr(zero, JavaThread::cont_fastpath_offset(), R16_thread);
1833   DEBUG_ONLY(__ block_comment("} fill"));
1834 }
1835 
1836 //---------------------------- continuation_enter_cleanup ---------------------------
1837 //
1838 // Copy corresponding attributes from the top ContinuationEntry to the JavaThread
1839 // before deleting it.
1840 //
1841 // Arguments:
1842 //   R1_SP: pointer to the ContinuationEntry
1843 //
1844 // Results:
1845 //   None.
1846 //
1847 // Kills:
1848 //   R8_ARG6, R9_ARG7, R10_ARG8, R15_esp
1849 //
1850 static void continuation_enter_cleanup(MacroAssembler* masm) {
1851   Register tmp1 = R8_ARG6;
1852   Register tmp2 = R9_ARG7;
1853 
1854 #ifdef ASSERT
1855   __ block_comment("clean {");
1856   __ ld_ptr(tmp1, JavaThread::cont_entry_offset(), R16_thread);
1857   __ cmpd(CR0, R1_SP, tmp1);
1858   __ asm_assert_eq(FILE_AND_LINE ": incorrect R1_SP");
1859 #endif
1860 
1861   __ ld_ptr(tmp1, ContinuationEntry::parent_cont_fastpath_offset(), R1_SP);
1862   __ st_ptr(tmp1, JavaThread::cont_fastpath_offset(), R16_thread);
1863   __ ld_ptr(tmp2, ContinuationEntry::parent_offset(), R1_SP);
1864   __ st_ptr(tmp2, JavaThread::cont_entry_offset(), R16_thread);
1865   DEBUG_ONLY(__ block_comment("} clean"));
1866 }
1867 
1868 static void check_continuation_enter_argument(VMReg actual_vmreg,
1869                                               Register expected_reg,
1870                                               const char* name) {
1871   assert(!actual_vmreg->is_stack(), "%s cannot be on stack", name);
1872   assert(actual_vmreg->as_Register() == expected_reg,
1873          "%s is in unexpected register: %s instead of %s",
1874          name, actual_vmreg->as_Register()->name(), expected_reg->name());
1875 }
1876 
1877 static void gen_continuation_enter(MacroAssembler* masm,
1878                                    const VMRegPair* regs,
1879                                    int& exception_offset,
1880                                    OopMapSet* oop_maps,
1881                                    int& frame_complete,
1882                                    int& framesize_words,
1883                                    int& interpreted_entry_offset,
1884                                    int& compiled_entry_offset) {
1885 
1886   // enterSpecial(Continuation c, boolean isContinue, boolean isVirtualThread)
1887   int pos_cont_obj   = 0;
1888   int pos_is_cont    = 1;
1889   int pos_is_virtual = 2;
1890 
1891   // The platform-specific calling convention may present the arguments in various registers.
1892   // To simplify the rest of the code, we expect the arguments to reside at these known
1893   // registers, and we additionally check the placement here in case calling convention ever
1894   // changes.
1895   Register reg_cont_obj   = R3_ARG1;
1896   Register reg_is_cont    = R4_ARG2;
1897   Register reg_is_virtual = R5_ARG3;
1898 
1899   check_continuation_enter_argument(regs[pos_cont_obj].first(),   reg_cont_obj,   "Continuation object");
1900   check_continuation_enter_argument(regs[pos_is_cont].first(),    reg_is_cont,    "isContinue");
1901   check_continuation_enter_argument(regs[pos_is_virtual].first(), reg_is_virtual, "isVirtualThread");
1902 
1903   AddressLiteral resolve(SharedRuntime::get_resolve_static_call_stub(), relocInfo::static_call_type);
1904   address start = __ pc();
1905   Label L_thaw, L_exit;
1906 
1907   // i2i entry used at interp_only_mode only
1908   interpreted_entry_offset = __ pc() - start;
1909   {
1910 #ifdef ASSERT
1911     Label is_interp_only;
1912     __ lwz(R0, in_bytes(JavaThread::interp_only_mode_offset()), R16_thread);
1913     __ cmpwi(CR0, R0, 0);
1914     __ bne(CR0, is_interp_only);
1915     __ stop("enterSpecial interpreter entry called when not in interp_only_mode");
1916     __ bind(is_interp_only);
1917 #endif
1918 
1919     // Read interpreter arguments into registers (this is an ad-hoc i2c adapter)
1920     __ ld(reg_cont_obj,    Interpreter::stackElementSize*3, R15_esp);
1921     __ lwz(reg_is_cont,    Interpreter::stackElementSize*2, R15_esp);
1922     __ lwz(reg_is_virtual, Interpreter::stackElementSize*1, R15_esp);
1923 
1924     __ push_cont_fastpath();
1925 
1926     OopMap* map = continuation_enter_setup(masm, framesize_words);
1927 
1928     // The frame is complete here, but we only record it for the compiled entry, so the frame would appear unsafe,
1929     // but that's okay because at the very worst we'll miss an async sample, but we're in interp_only_mode anyway.
1930 
1931     fill_continuation_entry(masm, reg_cont_obj, reg_is_virtual);
1932 
1933     // If isContinue, call to thaw. Otherwise, call Continuation.enter(Continuation c, boolean isContinue)
1934     __ cmpwi(CR0, reg_is_cont, 0);
1935     __ bne(CR0, L_thaw);
1936 
1937     // --- call Continuation.enter(Continuation c, boolean isContinue)
1938 
1939     // Emit compiled static call. The call will be always resolved to the c2i
1940     // entry of Continuation.enter(Continuation c, boolean isContinue).
1941     address c2i_call_pc = __ trampoline_call(resolve);
1942     guarantee(c2i_call_pc != nullptr, "CodeCache is full at gen_continuation_enter");
1943 
1944     // Emit stub for static call
1945     address stub = CompiledDirectCall::emit_to_interp_stub(masm, c2i_call_pc);
1946     guarantee(stub != nullptr, "CodeCache is full at gen_continuation_enter");
1947 
1948     oop_maps->add_gc_map(__ pc() - start, map);
1949     __ post_call_nop();
1950 
1951     __ b(L_exit);
1952   }
1953 
1954   // compiled entry
1955   __ align(CodeEntryAlignment);
1956   compiled_entry_offset = __ pc() - start;
1957 
1958   OopMap* map = continuation_enter_setup(masm, framesize_words);
1959 
1960   // Frame is now completed as far as size and linkage.
1961   frame_complete =__ pc() - start;
1962 
1963   fill_continuation_entry(masm, reg_cont_obj, reg_is_virtual);
1964 
1965   // If isContinue, call to thaw. Otherwise, call Continuation.enter(Continuation c, boolean isContinue)
1966   __ cmpwi(CR0, reg_is_cont, 0);
1967   __ bne(CR0, L_thaw);
1968 
1969   // --- call Continuation.enter(Continuation c, boolean isContinue)
1970 
1971   // Emit compiled static call
1972   // The call needs to be resolved. There's a special case for this in
1973   // SharedRuntime::find_callee_info_helper() which calls
1974   // LinkResolver::resolve_continuation_enter() which resolves the call to
1975   // Continuation.enter(Continuation c, boolean isContinue).
1976   address call_pc = __ trampoline_call(resolve);
1977   guarantee(call_pc != nullptr, "CodeCache is full at gen_continuation_enter");
1978 
1979   oop_maps->add_gc_map(__ pc() - start, map);
1980   __ post_call_nop();
1981 
1982   __ b(L_exit);
1983 
1984   // --- Thawing path
1985 
1986   __ bind(L_thaw);
1987   ContinuationEntry::_thaw_call_pc_offset = __ pc() - start;
1988   __ add_const_optimized(R0, R29_TOC, MacroAssembler::offset_to_global_toc(StubRoutines::cont_thaw()));
1989   __ mtctr(R0);
1990   __ bctrl();
1991   oop_maps->add_gc_map(__ pc() - start, map->deep_copy());
1992   ContinuationEntry::_return_pc_offset = __ pc() - start;
1993   __ post_call_nop();
1994 
1995   // --- Normal exit (resolve/thawing)
1996 
1997   __ bind(L_exit);
1998   ContinuationEntry::_cleanup_offset = __ pc() - start;
1999   continuation_enter_cleanup(masm);
2000 
2001   // Pop frame and return
2002   DEBUG_ONLY(__ ld_ptr(R0, 0, R1_SP));
2003   __ addi(R1_SP, R1_SP, framesize_words*wordSize);
2004   DEBUG_ONLY(__ cmpd(CR0, R0, R1_SP));
2005   __ asm_assert_eq(FILE_AND_LINE ": inconsistent frame size");
2006   __ ld(R0, _abi0(lr), R1_SP); // Return pc
2007   __ mtlr(R0);
2008   __ blr();
2009 
2010   // --- Exception handling path
2011 
2012   exception_offset = __ pc() - start;
2013 
2014   continuation_enter_cleanup(masm);
2015   Register ex_pc  = R17_tos;   // nonvolatile register
2016   Register ex_oop = R15_esp;   // nonvolatile register
2017   __ ld(ex_pc, _abi0(callers_sp), R1_SP); // Load caller's return pc
2018   __ ld(ex_pc, _abi0(lr), ex_pc);
2019   __ mr(ex_oop, R3_RET);                  // save return value containing the exception oop
2020   __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), R16_thread, ex_pc);
2021   __ mtlr(R3_RET);                        // the exception handler
2022   __ ld(R1_SP, _abi0(callers_sp), R1_SP); // remove enterSpecial frame
2023 
2024   // Continue at exception handler
2025   // See OptoRuntime::generate_exception_blob for register arguments
2026   __ mr(R3_ARG1, ex_oop); // pass exception oop
2027   __ mr(R4_ARG2, ex_pc);  // pass exception pc
2028   __ blr();
2029 
2030   // static stub for the call above
2031   address stub = CompiledDirectCall::emit_to_interp_stub(masm, call_pc);
2032   guarantee(stub != nullptr, "CodeCache is full at gen_continuation_enter");
2033 }
2034 
2035 static void gen_continuation_yield(MacroAssembler* masm,
2036                                    const VMRegPair* regs,
2037                                    OopMapSet* oop_maps,
2038                                    int& frame_complete,
2039                                    int& framesize_words,
2040                                    int& compiled_entry_offset) {
2041   Register tmp = R10_ARG8;
2042 
2043   const int framesize_bytes = (int)align_up((int)frame::native_abi_reg_args_size, frame::alignment_in_bytes);
2044   framesize_words = framesize_bytes / wordSize;
2045 
2046   address start = __ pc();
2047   compiled_entry_offset = __ pc() - start;
2048 
2049   // Save return pc and push entry frame
2050   __ mflr(tmp);
2051   __ std(tmp, _abi0(lr), R1_SP);       // SP->lr = return_pc
2052   __ push_frame(framesize_bytes , R0); // SP -= frame_size_in_bytes
2053 
2054   DEBUG_ONLY(__ block_comment("Frame Complete"));
2055   frame_complete = __ pc() - start;
2056   address last_java_pc = __ pc();
2057 
2058   // This nop must be exactly at the PC we push into the frame info.
2059   // We use this nop for fast CodeBlob lookup, associate the OopMap
2060   // with it right away.
2061   __ post_call_nop();
2062   OopMap* map = new OopMap(framesize_bytes / VMRegImpl::stack_slot_size, 1);
2063   oop_maps->add_gc_map(last_java_pc - start, map);
2064 
2065   __ calculate_address_from_global_toc(tmp, last_java_pc); // will be relocated
2066   __ set_last_Java_frame(R1_SP, tmp);
2067   __ call_VM_leaf(Continuation::freeze_entry(), R16_thread, R1_SP);
2068   __ reset_last_Java_frame();
2069 
2070   Label L_pinned;
2071 
2072   __ cmpwi(CR0, R3_RET, 0);
2073   __ bne(CR0, L_pinned);
2074 
2075   // yield succeeded
2076 
2077   // Pop frames of continuation including this stub's frame
2078   __ ld_ptr(R1_SP, JavaThread::cont_entry_offset(), R16_thread);
2079   // The frame pushed by gen_continuation_enter is on top now again
2080   continuation_enter_cleanup(masm);
2081 
2082   // Pop frame and return
2083   Label L_return;
2084   __ bind(L_return);
2085   __ pop_frame();
2086   __ ld(R0, _abi0(lr), R1_SP); // Return pc
2087   __ mtlr(R0);
2088   __ blr();
2089 
2090   // yield failed - continuation is pinned
2091 
2092   __ bind(L_pinned);
2093 
2094   // handle pending exception thrown by freeze
2095   __ ld(tmp, in_bytes(JavaThread::pending_exception_offset()), R16_thread);
2096   __ cmpdi(CR0, tmp, 0);
2097   __ beq(CR0, L_return); // return if no exception is pending
2098   __ pop_frame();
2099   __ ld(R0, _abi0(lr), R1_SP); // Return pc
2100   __ mtlr(R0);
2101   __ load_const_optimized(tmp, StubRoutines::forward_exception_entry(), R0);
2102   __ mtctr(tmp);
2103   __ bctr();
2104 }
2105 
2106 void SharedRuntime::continuation_enter_cleanup(MacroAssembler* masm) {
2107   ::continuation_enter_cleanup(masm);
2108 }
2109 
2110 // ---------------------------------------------------------------------------
2111 // Generate a native wrapper for a given method. The method takes arguments
2112 // in the Java compiled code convention, marshals them to the native
2113 // convention (handlizes oops, etc), transitions to native, makes the call,
2114 // returns to java state (possibly blocking), unhandlizes any result and
2115 // returns.
2116 //
2117 // Critical native functions are a shorthand for the use of
2118 // GetPrimtiveArrayCritical and disallow the use of any other JNI
2119 // functions.  The wrapper is expected to unpack the arguments before
2120 // passing them to the callee. Critical native functions leave the state _in_Java,
2121 // since they cannot stop for GC.
2122 // Some other parts of JNI setup are skipped like the tear down of the JNI handle
2123 // block and the check for pending exceptions it's impossible for them
2124 // to be thrown.
2125 //
2126 nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm,
2127                                                 const methodHandle& method,
2128                                                 int compile_id,
2129                                                 BasicType *in_sig_bt,
2130                                                 VMRegPair *in_regs,
2131                                                 BasicType ret_type) {
2132   if (method->is_continuation_native_intrinsic()) {
2133     int exception_offset = -1;
2134     OopMapSet* oop_maps = new OopMapSet();
2135     int frame_complete = -1;
2136     int stack_slots = -1;
2137     int interpreted_entry_offset = -1;
2138     int vep_offset = -1;
2139     if (method->is_continuation_enter_intrinsic()) {
2140       gen_continuation_enter(masm,
2141                              in_regs,
2142                              exception_offset,
2143                              oop_maps,
2144                              frame_complete,
2145                              stack_slots,
2146                              interpreted_entry_offset,
2147                              vep_offset);
2148     } else if (method->is_continuation_yield_intrinsic()) {
2149       gen_continuation_yield(masm,
2150                              in_regs,
2151                              oop_maps,
2152                              frame_complete,
2153                              stack_slots,
2154                              vep_offset);
2155     } else {
2156       guarantee(false, "Unknown Continuation native intrinsic");
2157     }
2158 
2159 #ifdef ASSERT
2160     if (method->is_continuation_enter_intrinsic()) {
2161       assert(interpreted_entry_offset != -1, "Must be set");
2162       assert(exception_offset != -1,         "Must be set");
2163     } else {
2164       assert(interpreted_entry_offset == -1, "Must be unset");
2165       assert(exception_offset == -1,         "Must be unset");
2166     }
2167     assert(frame_complete != -1,    "Must be set");
2168     assert(stack_slots != -1,       "Must be set");
2169     assert(vep_offset != -1,        "Must be set");
2170 #endif
2171 
2172     __ flush();
2173     nmethod* nm = nmethod::new_native_nmethod(method,
2174                                               compile_id,
2175                                               masm->code(),
2176                                               vep_offset,
2177                                               frame_complete,
2178                                               stack_slots,
2179                                               in_ByteSize(-1),
2180                                               in_ByteSize(-1),
2181                                               oop_maps,
2182                                               exception_offset);
2183     if (nm == nullptr) return nm;
2184     if (method->is_continuation_enter_intrinsic()) {
2185       ContinuationEntry::set_enter_code(nm, interpreted_entry_offset);
2186     } else if (method->is_continuation_yield_intrinsic()) {
2187       _cont_doYield_stub = nm;
2188     }
2189     return nm;
2190   }
2191 
2192   if (method->is_method_handle_intrinsic()) {
2193     vmIntrinsics::ID iid = method->intrinsic_id();
2194     intptr_t start = (intptr_t)__ pc();
2195     int vep_offset = ((intptr_t)__ pc()) - start;
2196     gen_special_dispatch(masm,
2197                          method,
2198                          in_sig_bt,
2199                          in_regs);
2200     int frame_complete = ((intptr_t)__ pc()) - start;  // not complete, period
2201     __ flush();
2202     int stack_slots = SharedRuntime::out_preserve_stack_slots();  // no out slots at all, actually
2203     return nmethod::new_native_nmethod(method,
2204                                        compile_id,
2205                                        masm->code(),
2206                                        vep_offset,
2207                                        frame_complete,
2208                                        stack_slots / VMRegImpl::slots_per_word,
2209                                        in_ByteSize(-1),
2210                                        in_ByteSize(-1),
2211                                        (OopMapSet*)nullptr);
2212   }
2213 
2214   address native_func = method->native_function();
2215   assert(native_func != nullptr, "must have function");
2216 
2217   // First, create signature for outgoing C call
2218   // --------------------------------------------------------------------------
2219 
2220   int total_in_args = method->size_of_parameters();
2221   // We have received a description of where all the java args are located
2222   // on entry to the wrapper. We need to convert these args to where
2223   // the jni function will expect them. To figure out where they go
2224   // we convert the java signature to a C signature by inserting
2225   // the hidden arguments as arg[0] and possibly arg[1] (static method)
2226 
2227   // Calculate the total number of C arguments and create arrays for the
2228   // signature and the outgoing registers.
2229   // On ppc64, we have two arrays for the outgoing registers, because
2230   // some floating-point arguments must be passed in registers _and_
2231   // in stack locations.
2232   bool method_is_static = method->is_static();
2233   int  total_c_args     = total_in_args + (method_is_static ? 2 : 1);
2234 
2235   BasicType *out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args);
2236   VMRegPair *out_regs   = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args);
2237 
2238   // Create the signature for the C call:
2239   //   1) add the JNIEnv*
2240   //   2) add the class if the method is static
2241   //   3) copy the rest of the incoming signature (shifted by the number of
2242   //      hidden arguments).
2243 
2244   int argc = 0;
2245   out_sig_bt[argc++] = T_ADDRESS;
2246   if (method->is_static()) {
2247     out_sig_bt[argc++] = T_OBJECT;
2248   }
2249 
2250   for (int i = 0; i < total_in_args ; i++ ) {
2251     out_sig_bt[argc++] = in_sig_bt[i];
2252   }
2253 
2254 
2255   // Compute the wrapper's frame size.
2256   // --------------------------------------------------------------------------
2257 
2258   // Now figure out where the args must be stored and how much stack space
2259   // they require.
2260   //
2261   // Compute framesize for the wrapper. We need to handlize all oops in
2262   // incoming registers.
2263   //
2264   // Calculate the total number of stack slots we will need:
2265   //   1) abi requirements
2266   //   2) outgoing arguments
2267   //   3) space for inbound oop handle area
2268   //   4) space for handlizing a klass if static method
2269   //   5) space for a lock if synchronized method
2270   //   6) workspace for saving return values, int <-> float reg moves, etc.
2271   //   7) alignment
2272   //
2273   // Layout of the native wrapper frame:
2274   // (stack grows upwards, memory grows downwards)
2275   //
2276   // NW     [ABI_REG_ARGS]             <-- 1) R1_SP
2277   //        [outgoing arguments]       <-- 2) R1_SP + out_arg_slot_offset
2278   //        [oopHandle area]           <-- 3) R1_SP + oop_handle_offset
2279   //        klass                      <-- 4) R1_SP + klass_offset
2280   //        lock                       <-- 5) R1_SP + lock_offset
2281   //        [workspace]                <-- 6) R1_SP + workspace_offset
2282   //        [alignment] (optional)     <-- 7)
2283   // caller [JIT_TOP_ABI_48]           <-- r_callers_sp
2284   //
2285   // - *_slot_offset Indicates offset from SP in number of stack slots.
2286   // - *_offset      Indicates offset from SP in bytes.
2287 
2288   int stack_slots = c_calling_convention(out_sig_bt, out_regs, total_c_args) + // 1+2)
2289                     SharedRuntime::out_preserve_stack_slots(); // See c_calling_convention.
2290 
2291   // Now the space for the inbound oop handle area.
2292   int total_save_slots = num_java_iarg_registers * VMRegImpl::slots_per_word;
2293 
2294   int oop_handle_slot_offset = stack_slots;
2295   stack_slots += total_save_slots;                                                // 3)
2296 
2297   int klass_slot_offset = 0;
2298   int klass_offset      = -1;
2299   if (method_is_static) {                                                         // 4)
2300     klass_slot_offset  = stack_slots;
2301     klass_offset       = klass_slot_offset * VMRegImpl::stack_slot_size;
2302     stack_slots       += VMRegImpl::slots_per_word;
2303   }
2304 
2305   int lock_slot_offset = 0;
2306   int lock_offset      = -1;
2307   if (method->is_synchronized()) {                                                // 5)
2308     lock_slot_offset   = stack_slots;
2309     lock_offset        = lock_slot_offset * VMRegImpl::stack_slot_size;
2310     stack_slots       += VMRegImpl::slots_per_word;
2311   }
2312 
2313   int workspace_slot_offset = stack_slots;                                        // 6)
2314   stack_slots         += 2;
2315 
2316   // Now compute actual number of stack words we need.
2317   // Rounding to make stack properly aligned.
2318   stack_slots = align_up(stack_slots,                                             // 7)
2319                          frame::alignment_in_bytes / VMRegImpl::stack_slot_size);
2320   int frame_size_in_bytes = stack_slots * VMRegImpl::stack_slot_size;
2321 
2322 
2323   // Now we can start generating code.
2324   // --------------------------------------------------------------------------
2325 
2326   intptr_t start_pc = (intptr_t)__ pc();
2327   intptr_t vep_start_pc;
2328   intptr_t frame_done_pc;
2329 
2330   Label    handle_pending_exception;
2331   Label    last_java_pc;
2332 
2333   Register r_callers_sp = R21;
2334   Register r_temp_1     = R22;
2335   Register r_temp_2     = R23;
2336   Register r_temp_3     = R24;
2337   Register r_temp_4     = R25;
2338   Register r_temp_5     = R26;
2339   Register r_temp_6     = R27;
2340   Register r_last_java_pc = R28;
2341 
2342   Register r_carg1_jnienv        = noreg;
2343   Register r_carg2_classorobject = noreg;
2344   r_carg1_jnienv        = out_regs[0].first()->as_Register();
2345   r_carg2_classorobject = out_regs[1].first()->as_Register();
2346 
2347 
2348   // Generate the Unverified Entry Point (UEP).
2349   // --------------------------------------------------------------------------
2350   assert(start_pc == (intptr_t)__ pc(), "uep must be at start");
2351 
2352   // Check ic: object class == cached class?
2353   if (!method_is_static) {
2354     __ ic_check(4 /* end_alignment */);
2355   }
2356 
2357   // Generate the Verified Entry Point (VEP).
2358   // --------------------------------------------------------------------------
2359   vep_start_pc = (intptr_t)__ pc();
2360 
2361   if (method->needs_clinit_barrier()) {
2362     assert(VM_Version::supports_fast_class_init_checks(), "sanity");
2363     Label L_skip_barrier;
2364     Register klass = r_temp_1;
2365     // Notify OOP recorder (don't need the relocation)
2366     AddressLiteral md = __ constant_metadata_address(method->method_holder());
2367     __ load_const_optimized(klass, md.value(), R0);
2368     __ clinit_barrier(klass, R16_thread, &L_skip_barrier /*L_fast_path*/);
2369 
2370     __ load_const_optimized(klass, SharedRuntime::get_handle_wrong_method_stub(), R0);
2371     __ mtctr(klass);
2372     __ bctr();
2373 
2374     __ bind(L_skip_barrier);
2375   }
2376 
2377   __ save_LR(r_temp_1);
2378   __ generate_stack_overflow_check(frame_size_in_bytes); // Check before creating frame.
2379   __ mr(r_callers_sp, R1_SP);                            // Remember frame pointer.
2380   __ push_frame(frame_size_in_bytes, r_temp_1);          // Push the c2n adapter's frame.
2381 
2382   BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
2383   bs->nmethod_entry_barrier(masm, r_temp_1);
2384 
2385   frame_done_pc = (intptr_t)__ pc();
2386 
2387   // Native nmethod wrappers never take possession of the oop arguments.
2388   // So the caller will gc the arguments.
2389   // The only thing we need an oopMap for is if the call is static.
2390   //
2391   // An OopMap for lock (and class if static), and one for the VM call itself.
2392   OopMapSet *oop_maps = new OopMapSet();
2393   OopMap    *oop_map  = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
2394 
2395   // Move arguments from register/stack to register/stack.
2396   // --------------------------------------------------------------------------
2397   //
2398   // We immediately shuffle the arguments so that for any vm call we have
2399   // to make from here on out (sync slow path, jvmti, etc.) we will have
2400   // captured the oops from our caller and have a valid oopMap for them.
2401   //
2402   // Natives require 1 or 2 extra arguments over the normal ones: the JNIEnv*
2403   // (derived from JavaThread* which is in R16_thread) and, if static,
2404   // the class mirror instead of a receiver. This pretty much guarantees that
2405   // register layout will not match. We ignore these extra arguments during
2406   // the shuffle. The shuffle is described by the two calling convention
2407   // vectors we have in our possession. We simply walk the java vector to
2408   // get the source locations and the c vector to get the destinations.
2409 
2410   // Record sp-based slot for receiver on stack for non-static methods.
2411   int receiver_offset = -1;
2412 
2413   // We move the arguments backward because the floating point registers
2414   // destination will always be to a register with a greater or equal
2415   // register number or the stack.
2416   //   in  is the index of the incoming Java arguments
2417   //   out is the index of the outgoing C arguments
2418 
2419 #ifdef ASSERT
2420   bool reg_destroyed[Register::number_of_registers];
2421   bool freg_destroyed[FloatRegister::number_of_registers];
2422   for (int r = 0 ; r < Register::number_of_registers ; r++) {
2423     reg_destroyed[r] = false;
2424   }
2425   for (int f = 0 ; f < FloatRegister::number_of_registers ; f++) {
2426     freg_destroyed[f] = false;
2427   }
2428 #endif // ASSERT
2429 
2430   for (int in = total_in_args - 1, out = total_c_args - 1; in >= 0 ; in--, out--) {
2431 
2432 #ifdef ASSERT
2433     if (in_regs[in].first()->is_Register()) {
2434       assert(!reg_destroyed[in_regs[in].first()->as_Register()->encoding()], "ack!");
2435     } else if (in_regs[in].first()->is_FloatRegister()) {
2436       assert(!freg_destroyed[in_regs[in].first()->as_FloatRegister()->encoding()], "ack!");
2437     }
2438     if (out_regs[out].first()->is_Register()) {
2439       reg_destroyed[out_regs[out].first()->as_Register()->encoding()] = true;
2440     } else if (out_regs[out].first()->is_FloatRegister()) {
2441       freg_destroyed[out_regs[out].first()->as_FloatRegister()->encoding()] = true;
2442     }
2443 #endif // ASSERT
2444 
2445     switch (in_sig_bt[in]) {
2446       case T_BOOLEAN:
2447       case T_CHAR:
2448       case T_BYTE:
2449       case T_SHORT:
2450       case T_INT:
2451         // Move int and do sign extension.
2452         int_move(masm, in_regs[in], out_regs[out], r_callers_sp, r_temp_1);
2453         break;
2454       case T_LONG:
2455         long_move(masm, in_regs[in], out_regs[out], r_callers_sp, r_temp_1);
2456         break;
2457       case T_ARRAY:
2458       case T_OBJECT:
2459         object_move(masm, stack_slots,
2460                     oop_map, oop_handle_slot_offset,
2461                     ((in == 0) && (!method_is_static)), &receiver_offset,
2462                     in_regs[in], out_regs[out],
2463                     r_callers_sp, r_temp_1, r_temp_2);
2464         break;
2465       case T_VOID:
2466         break;
2467       case T_FLOAT:
2468         float_move(masm, in_regs[in], out_regs[out], r_callers_sp, r_temp_1);
2469         break;
2470       case T_DOUBLE:
2471         double_move(masm, in_regs[in], out_regs[out], r_callers_sp, r_temp_1);
2472         break;
2473       case T_ADDRESS:
2474         fatal("found type (T_ADDRESS) in java args");
2475         break;
2476       default:
2477         ShouldNotReachHere();
2478         break;
2479     }
2480   }
2481 
2482   // Pre-load a static method's oop into ARG2.
2483   // Used both by locking code and the normal JNI call code.
2484   if (method_is_static) {
2485     __ set_oop_constant(JNIHandles::make_local(method->method_holder()->java_mirror()),
2486                         r_carg2_classorobject);
2487 
2488     // Now handlize the static class mirror in carg2. It's known not-null.
2489     __ std(r_carg2_classorobject, klass_offset, R1_SP);
2490     oop_map->set_oop(VMRegImpl::stack2reg(klass_slot_offset));
2491     __ addi(r_carg2_classorobject, R1_SP, klass_offset);
2492   }
2493 
2494   // Get JNIEnv* which is first argument to native.
2495   __ addi(r_carg1_jnienv, R16_thread, in_bytes(JavaThread::jni_environment_offset()));
2496 
2497   // NOTE:
2498   //
2499   // We have all of the arguments setup at this point.
2500   // We MUST NOT touch any outgoing regs from this point on.
2501   // So if we must call out we must push a new frame.
2502 
2503   // The last java pc will also be used as resume pc if this is the wrapper for wait0.
2504   // For this purpose the precise location matters but not for oopmap lookup.
2505   __ calculate_address_from_global_toc(r_last_java_pc, last_java_pc, true, true, true, true);
2506 
2507   // Make sure that thread is non-volatile; it crosses a bunch of VM calls below.
2508   assert(R16_thread->is_nonvolatile(), "thread must be in non-volatile register");
2509 
2510   // Lock a synchronized method.
2511   // --------------------------------------------------------------------------
2512 
2513   if (method->is_synchronized()) {
2514     Register          r_oop  = r_temp_4;
2515     const Register    r_box  = r_temp_5;
2516     Label             done, locked;
2517 
2518     // Load the oop for the object or class. r_carg2_classorobject contains
2519     // either the handlized oop from the incoming arguments or the handlized
2520     // class mirror (if the method is static).
2521     __ ld(r_oop, 0, r_carg2_classorobject);
2522 
2523     // Get the lock box slot's address.
2524     __ addi(r_box, R1_SP, lock_offset);
2525 
2526     // Try fastpath for locking.
2527     // fast_lock kills r_temp_1, r_temp_2, r_temp_3.
2528     Register r_temp_3_or_noreg = UseObjectMonitorTable ? r_temp_3 : noreg;
2529     __ compiler_fast_lock_object(CR0, r_oop, r_box, r_temp_1, r_temp_2, r_temp_3_or_noreg);
2530     __ beq(CR0, locked);
2531 
2532     // None of the above fast optimizations worked so we have to get into the
2533     // slow case of monitor enter. Inline a special case of call_VM that
2534     // disallows any pending_exception.
2535 
2536     // Save argument registers and leave room for C-compatible ABI_REG_ARGS.
2537     int frame_size = frame::native_abi_reg_args_size + align_up(total_c_args * wordSize, frame::alignment_in_bytes);
2538     __ mr(R11_scratch1, R1_SP);
2539     RegisterSaver::push_frame_and_save_argument_registers(masm, R12_scratch2, frame_size, total_c_args, out_regs);
2540 
2541     // Do the call.
2542     __ set_last_Java_frame(R11_scratch1, r_last_java_pc);
2543     assert(r_last_java_pc->is_nonvolatile(), "r_last_java_pc needs to be preserved accross complete_monitor_locking_C call");
2544     // The following call will not be preempted.
2545     // push_cont_fastpath forces freeze slow path in case we try to preempt where we will pin the
2546     // vthread to the carrier (see FreezeBase::recurse_freeze_native_frame()).
2547     __ push_cont_fastpath();
2548     __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_locking_C), r_oop, r_box, R16_thread);
2549     __ pop_cont_fastpath();
2550     __ reset_last_Java_frame();
2551 
2552     RegisterSaver::restore_argument_registers_and_pop_frame(masm, frame_size, total_c_args, out_regs);
2553 
2554     __ asm_assert_mem8_is_zero(thread_(pending_exception),
2555        "no pending exception allowed on exit from SharedRuntime::complete_monitor_locking_C");
2556 
2557     __ bind(locked);
2558   }
2559 
2560   __ set_last_Java_frame(R1_SP, r_last_java_pc);
2561 
2562   // Publish thread state
2563   // --------------------------------------------------------------------------
2564 
2565   // Transition from _thread_in_Java to _thread_in_native.
2566   __ li(R0, _thread_in_native);
2567   __ release();
2568   // TODO: PPC port assert(4 == JavaThread::sz_thread_state(), "unexpected field size");
2569   __ stw(R0, thread_(thread_state));
2570 
2571 
2572   // The JNI call
2573   // --------------------------------------------------------------------------
2574   __ call_c(native_func, relocInfo::runtime_call_type);
2575 
2576 
2577   // Now, we are back from the native code.
2578 
2579 
2580   // Unpack the native result.
2581   // --------------------------------------------------------------------------
2582 
2583   // For int-types, we do any needed sign-extension required.
2584   // Care must be taken that the return values (R3_RET and F1_RET)
2585   // will survive any VM calls for blocking or unlocking.
2586   // An OOP result (handle) is done specially in the slow-path code.
2587 
2588   switch (ret_type) {
2589     case T_VOID:    break;        // Nothing to do!
2590     case T_FLOAT:   break;        // Got it where we want it (unless slow-path).
2591     case T_DOUBLE:  break;        // Got it where we want it (unless slow-path).
2592     case T_LONG:    break;        // Got it where we want it (unless slow-path).
2593     case T_OBJECT:  break;        // Really a handle.
2594                                   // Cannot de-handlize until after reclaiming jvm_lock.
2595     case T_ARRAY:   break;
2596 
2597     case T_BOOLEAN: {             // 0 -> false(0); !0 -> true(1)
2598       __ normalize_bool(R3_RET);
2599       break;
2600       }
2601     case T_BYTE: {                // sign extension
2602       __ extsb(R3_RET, R3_RET);
2603       break;
2604       }
2605     case T_CHAR: {                // unsigned result
2606       __ andi(R3_RET, R3_RET, 0xffff);
2607       break;
2608       }
2609     case T_SHORT: {               // sign extension
2610       __ extsh(R3_RET, R3_RET);
2611       break;
2612       }
2613     case T_INT:                   // nothing to do
2614       break;
2615     default:
2616       ShouldNotReachHere();
2617       break;
2618   }
2619 
2620   // Publish thread state
2621   // --------------------------------------------------------------------------
2622 
2623   // Switch thread to "native transition" state before reading the
2624   // synchronization state. This additional state is necessary because reading
2625   // and testing the synchronization state is not atomic w.r.t. GC, as this
2626   // scenario demonstrates:
2627   //   - Java thread A, in _thread_in_native state, loads _not_synchronized
2628   //     and is preempted.
2629   //   - VM thread changes sync state to synchronizing and suspends threads
2630   //     for GC.
2631   //   - Thread A is resumed to finish this native method, but doesn't block
2632   //     here since it didn't see any synchronization in progress, and escapes.
2633 
2634   // Transition from _thread_in_native to _thread_in_native_trans.
2635   __ li(R0, _thread_in_native_trans);
2636   __ release();
2637   // TODO: PPC port assert(4 == JavaThread::sz_thread_state(), "unexpected field size");
2638   __ stw(R0, thread_(thread_state));
2639 
2640 
2641   // Must we block?
2642   // --------------------------------------------------------------------------
2643 
2644   // Block, if necessary, before resuming in _thread_in_Java state.
2645   // In order for GC to work, don't clear the last_Java_sp until after blocking.
2646   {
2647     Label no_block, sync;
2648 
2649     // Force this write out before the read below.
2650     if (!UseSystemMemoryBarrier) {
2651       __ fence();
2652     }
2653 
2654     Register sync_state_addr = r_temp_4;
2655     Register sync_state      = r_temp_5;
2656     Register suspend_flags   = r_temp_6;
2657 
2658     // No synchronization in progress nor yet synchronized
2659     // (cmp-br-isync on one path, release (same as acquire on PPC64) on the other path).
2660     __ safepoint_poll(sync, sync_state, true /* at_return */, false /* in_nmethod */);
2661 
2662     // Not suspended.
2663     // TODO: PPC port assert(4 == Thread::sz_suspend_flags(), "unexpected field size");
2664     __ lwz(suspend_flags, thread_(suspend_flags));
2665     __ cmpwi(CR1, suspend_flags, 0);
2666     __ beq(CR1, no_block);
2667 
2668     // Block. Save any potential method result value before the operation and
2669     // use a leaf call to leave the last_Java_frame setup undisturbed. Doing this
2670     // lets us share the oopMap we used when we went native rather than create
2671     // a distinct one for this pc.
2672     __ bind(sync);
2673     __ isync();
2674 
2675     address entry_point =
2676       CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans);
2677     save_native_result(masm, ret_type, workspace_slot_offset);
2678     __ call_VM_leaf(entry_point, R16_thread);
2679     restore_native_result(masm, ret_type, workspace_slot_offset);
2680 
2681     __ bind(no_block);
2682 
2683     // Publish thread state.
2684     // --------------------------------------------------------------------------
2685 
2686     // Thread state is thread_in_native_trans. Any safepoint blocking has
2687     // already happened so we can now change state to _thread_in_Java.
2688 
2689     // Transition from _thread_in_native_trans to _thread_in_Java.
2690     __ li(R0, _thread_in_Java);
2691     __ lwsync(); // Acquire safepoint and suspend state, release thread state.
2692     // TODO: PPC port assert(4 == JavaThread::sz_thread_state(), "unexpected field size");
2693     __ stw(R0, thread_(thread_state));
2694 
2695     // Check preemption for Object.wait()
2696     if (method->is_object_wait0()) {
2697       Label not_preempted;
2698       __ ld(R0, in_bytes(JavaThread::preempt_alternate_return_offset()), R16_thread);
2699       __ cmpdi(CR0, R0, 0);
2700       __ beq(CR0, not_preempted);
2701       __ mtlr(R0);
2702       __ li(R0, 0);
2703       __ std(R0, in_bytes(JavaThread::preempt_alternate_return_offset()), R16_thread);
2704       __ blr();
2705       __ bind(not_preempted);
2706     }
2707     __ bind(last_java_pc);
2708     // We use the same pc/oopMap repeatedly when we call out above.
2709     intptr_t oopmap_pc = (intptr_t) __ pc();
2710     oop_maps->add_gc_map(oopmap_pc - start_pc, oop_map);
2711   }
2712 
2713   // Reguard any pages if necessary.
2714   // --------------------------------------------------------------------------
2715 
2716   Label no_reguard;
2717   __ lwz(r_temp_1, thread_(stack_guard_state));
2718   __ cmpwi(CR0, r_temp_1, StackOverflow::stack_guard_yellow_reserved_disabled);
2719   __ bne(CR0, no_reguard);
2720 
2721   save_native_result(masm, ret_type, workspace_slot_offset);
2722   __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages));
2723   restore_native_result(masm, ret_type, workspace_slot_offset);
2724 
2725   __ bind(no_reguard);
2726 
2727 
2728   // Unlock
2729   // --------------------------------------------------------------------------
2730 
2731   if (method->is_synchronized()) {
2732     const Register r_oop       = r_temp_4;
2733     const Register r_box       = r_temp_5;
2734     const Register r_exception = r_temp_6;
2735     Label done;
2736 
2737     // Get oop and address of lock object box.
2738     if (method_is_static) {
2739       assert(klass_offset != -1, "");
2740       __ ld(r_oop, klass_offset, R1_SP);
2741     } else {
2742       assert(receiver_offset != -1, "");
2743       __ ld(r_oop, receiver_offset, R1_SP);
2744     }
2745     __ addi(r_box, R1_SP, lock_offset);
2746 
2747     // Try fastpath for unlocking.
2748     __ compiler_fast_unlock_object(CR0, r_oop, r_box, r_temp_1, r_temp_2, r_temp_3);
2749     __ beq(CR0, done);
2750 
2751     // Save and restore any potential method result value around the unlocking operation.
2752     save_native_result(masm, ret_type, workspace_slot_offset);
2753 
2754     // Must save pending exception around the slow-path VM call. Since it's a
2755     // leaf call, the pending exception (if any) can be kept in a register.
2756     __ ld(r_exception, thread_(pending_exception));
2757     assert(r_exception->is_nonvolatile(), "exception register must be non-volatile");
2758     __ li(R0, 0);
2759     __ std(R0, thread_(pending_exception));
2760 
2761     // Slow case of monitor enter.
2762     // Inline a special case of call_VM that disallows any pending_exception.
2763     // Arguments are (oop obj, BasicLock* lock, JavaThread* thread).
2764     __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C), r_oop, r_box, R16_thread);
2765 
2766     __ asm_assert_mem8_is_zero(thread_(pending_exception),
2767        "no pending exception allowed on exit from SharedRuntime::complete_monitor_unlocking_C");
2768 
2769     restore_native_result(masm, ret_type, workspace_slot_offset);
2770 
2771     // Check_forward_pending_exception jump to forward_exception if any pending
2772     // exception is set. The forward_exception routine expects to see the
2773     // exception in pending_exception and not in a register. Kind of clumsy,
2774     // since all folks who branch to forward_exception must have tested
2775     // pending_exception first and hence have it in a register already.
2776     __ std(r_exception, thread_(pending_exception));
2777 
2778     __ bind(done);
2779   }
2780 
2781   // Clear "last Java frame" SP and PC.
2782   // --------------------------------------------------------------------------
2783 
2784   // Last java frame won't be set if we're resuming after preemption
2785   bool maybe_preempted = method->is_object_wait0();
2786   __ reset_last_Java_frame(!maybe_preempted /* check_last_java_sp */);
2787 
2788   // Unbox oop result, e.g. JNIHandles::resolve value.
2789   // --------------------------------------------------------------------------
2790 
2791   if (is_reference_type(ret_type)) {
2792     __ resolve_jobject(R3_RET, r_temp_1, r_temp_2, MacroAssembler::PRESERVATION_NONE);
2793   }
2794 
2795   if (CheckJNICalls) {
2796     // clear_pending_jni_exception_check
2797     __ load_const_optimized(R0, 0L);
2798     __ st_ptr(R0, JavaThread::pending_jni_exception_check_fn_offset(), R16_thread);
2799   }
2800 
2801   // Reset handle block.
2802   // --------------------------------------------------------------------------
2803   __ ld(r_temp_1, thread_(active_handles));
2804   // TODO: PPC port assert(4 == JNIHandleBlock::top_size_in_bytes(), "unexpected field size");
2805   __ li(r_temp_2, 0);
2806   __ stw(r_temp_2, in_bytes(JNIHandleBlock::top_offset()), r_temp_1);
2807 
2808   // Prepare for return
2809   // --------------------------------------------------------------------------
2810   __ pop_frame();
2811   __ restore_LR(R11);
2812 
2813 #if INCLUDE_JFR
2814   // We need to do a poll test after unwind in case the sampler
2815   // managed to sample the native frame after returning to Java.
2816   Label L_stub;
2817   int safepoint_offset = __ offset();
2818   if (!UseSIGTRAP) {
2819     __ relocate(relocInfo::poll_return_type);
2820   }
2821   __ safepoint_poll(L_stub, r_temp_2, true /* at_return */, true /* in_nmethod: frame already popped */);
2822 #endif // INCLUDE_JFR
2823 
2824   // Check for pending exceptions.
2825   // --------------------------------------------------------------------------
2826   __ ld(r_temp_2, thread_(pending_exception));
2827   __ cmpdi(CR0, r_temp_2, 0);
2828   __ bne(CR0, handle_pending_exception);
2829 
2830   // Return.
2831   __ blr();
2832 
2833   // Handler for return safepoint (out-of-line).
2834 #if INCLUDE_JFR
2835   if (!UseSIGTRAP) {
2836     __ bind(L_stub);
2837     __ jump_to_polling_page_return_handler_blob(safepoint_offset);
2838   }
2839 #endif // INCLUDE_JFR
2840 
2841   // Handler for pending exceptions (out-of-line).
2842   // --------------------------------------------------------------------------
2843   // Since this is a native call, we know the proper exception handler
2844   // is the empty function. We just pop this frame and then jump to
2845   // forward_exception_entry.
2846   __ bind(handle_pending_exception);
2847   __ b64_patchable((address)StubRoutines::forward_exception_entry(),
2848                        relocInfo::runtime_call_type);
2849 
2850   // Done.
2851   // --------------------------------------------------------------------------
2852 
2853   __ flush();
2854 
2855   nmethod *nm = nmethod::new_native_nmethod(method,
2856                                             compile_id,
2857                                             masm->code(),
2858                                             vep_start_pc-start_pc,
2859                                             frame_done_pc-start_pc,
2860                                             stack_slots / VMRegImpl::slots_per_word,
2861                                             (method_is_static ? in_ByteSize(klass_offset) : in_ByteSize(receiver_offset)),
2862                                             in_ByteSize(lock_offset),
2863                                             oop_maps);
2864 
2865   return nm;
2866 }
2867 
2868 // This function returns the adjust size (in number of words) to a c2i adapter
2869 // activation for use during deoptimization.
2870 int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals) {
2871   return align_up((callee_locals - callee_parameters) * Interpreter::stackElementWords, frame::frame_alignment_in_words);
2872 }
2873 
2874 uint SharedRuntime::in_preserve_stack_slots() {
2875   return frame::jit_in_preserve_size / VMRegImpl::stack_slot_size;
2876 }
2877 
2878 uint SharedRuntime::out_preserve_stack_slots() {
2879 #if defined(COMPILER1) || defined(COMPILER2)
2880   return frame::jit_out_preserve_size / VMRegImpl::stack_slot_size;
2881 #else
2882   return 0;
2883 #endif
2884 }
2885 
2886 VMReg SharedRuntime::thread_register() {
2887   // On PPC virtual threads don't save the JavaThread* in their context (e.g. C1 stub frames).
2888   ShouldNotCallThis();
2889   return nullptr;
2890 }
2891 
2892 #if defined(COMPILER1) || defined(COMPILER2)
2893 // Frame generation for deopt and uncommon trap blobs.
2894 static void push_skeleton_frame(MacroAssembler* masm, bool deopt,
2895                                 /* Read */
2896                                 Register unroll_block_reg,
2897                                 /* Update */
2898                                 Register frame_sizes_reg,
2899                                 Register number_of_frames_reg,
2900                                 Register pcs_reg,
2901                                 /* Invalidate */
2902                                 Register frame_size_reg,
2903                                 Register pc_reg) {
2904 
2905   __ ld(pc_reg, 0, pcs_reg);
2906   __ ld(frame_size_reg, 0, frame_sizes_reg);
2907   __ std(pc_reg, _abi0(lr), R1_SP);
2908   __ push_frame(frame_size_reg, R0/*tmp*/);
2909   __ std(R1_SP, _ijava_state_neg(sender_sp), R1_SP);
2910   __ addi(number_of_frames_reg, number_of_frames_reg, -1);
2911   __ addi(frame_sizes_reg, frame_sizes_reg, wordSize);
2912   __ addi(pcs_reg, pcs_reg, wordSize);
2913 }
2914 
2915 // Loop through the UnrollBlock info and create new frames.
2916 static void push_skeleton_frames(MacroAssembler* masm, bool deopt,
2917                                  /* read */
2918                                  Register unroll_block_reg,
2919                                  /* invalidate */
2920                                  Register frame_sizes_reg,
2921                                  Register number_of_frames_reg,
2922                                  Register pcs_reg,
2923                                  Register frame_size_reg,
2924                                  Register pc_reg) {
2925   Label loop;
2926 
2927  // _number_of_frames is of type int (deoptimization.hpp)
2928   __ lwa(number_of_frames_reg,
2929              in_bytes(Deoptimization::UnrollBlock::number_of_frames_offset()),
2930              unroll_block_reg);
2931   __ ld(pcs_reg,
2932             in_bytes(Deoptimization::UnrollBlock::frame_pcs_offset()),
2933             unroll_block_reg);
2934   __ ld(frame_sizes_reg,
2935             in_bytes(Deoptimization::UnrollBlock::frame_sizes_offset()),
2936             unroll_block_reg);
2937 
2938   // stack: (caller_of_deoptee, ...).
2939 
2940   // At this point we either have an interpreter frame or a compiled
2941   // frame on top of stack. If it is a compiled frame we push a new c2i
2942   // adapter here
2943 
2944   // Memorize top-frame stack-pointer.
2945   __ mr(frame_size_reg/*old_sp*/, R1_SP);
2946 
2947   // Resize interpreter top frame OR C2I adapter.
2948 
2949   // At this moment, the top frame (which is the caller of the deoptee) is
2950   // an interpreter frame or a newly pushed C2I adapter or an entry frame.
2951   // The top frame has a TOP_IJAVA_FRAME_ABI and the frame contains the
2952   // outgoing arguments.
2953   //
2954   // In order to push the interpreter frame for the deoptee, we need to
2955   // resize the top frame such that we are able to place the deoptee's
2956   // locals in the frame.
2957   // Additionally, we have to turn the top frame's TOP_IJAVA_FRAME_ABI
2958   // into a valid PARENT_IJAVA_FRAME_ABI.
2959 
2960   __ lwa(R11_scratch1,
2961              in_bytes(Deoptimization::UnrollBlock::caller_adjustment_offset()),
2962              unroll_block_reg);
2963   __ neg(R11_scratch1, R11_scratch1);
2964 
2965   // R11_scratch1 contains size of locals for frame resizing.
2966   // R12_scratch2 contains top frame's lr.
2967 
2968   // Resize frame by complete frame size prevents TOC from being
2969   // overwritten by locals. A more stack space saving way would be
2970   // to copy the TOC to its location in the new abi.
2971   __ addi(R11_scratch1, R11_scratch1, - frame::parent_ijava_frame_abi_size);
2972 
2973   // now, resize the frame
2974   __ resize_frame(R11_scratch1, pc_reg/*tmp*/);
2975 
2976   // In the case where we have resized a c2i frame above, the optional
2977   // alignment below the locals has size 32 (why?).
2978   __ std(R12_scratch2, _abi0(lr), R1_SP);
2979 
2980   // Initialize initial_caller_sp.
2981  __ std(frame_size_reg, _ijava_state_neg(sender_sp), R1_SP);
2982 
2983 #ifdef ASSERT
2984   // Make sure that there is at least one entry in the array.
2985   __ cmpdi(CR0, number_of_frames_reg, 0);
2986   __ asm_assert_ne("array_size must be > 0");
2987 #endif
2988 
2989   // Now push the new interpreter frames.
2990   //
2991   __ bind(loop);
2992   // Allocate a new frame, fill in the pc.
2993   push_skeleton_frame(masm, deopt,
2994                       unroll_block_reg,
2995                       frame_sizes_reg,
2996                       number_of_frames_reg,
2997                       pcs_reg,
2998                       frame_size_reg,
2999                       pc_reg);
3000   __ cmpdi(CR0, number_of_frames_reg, 0);
3001   __ bne(CR0, loop);
3002 
3003   // Get the return address pointing into the template interpreter.
3004   __ ld(R0, 0, pcs_reg);
3005   // Store it in the top interpreter frame.
3006   __ std(R0, _abi0(lr), R1_SP);
3007   // Initialize frame_manager_lr of interpreter top frame.
3008 }
3009 #endif
3010 
3011 void SharedRuntime::generate_deopt_blob() {
3012   // Allocate space for the code
3013   ResourceMark rm;
3014   // Setup code generation tools
3015   const char* name = SharedRuntime::stub_name(StubId::shared_deopt_id);
3016   CodeBuffer buffer(name, 2048, 1024);
3017   InterpreterMacroAssembler* masm = new InterpreterMacroAssembler(&buffer);
3018   Label exec_mode_initialized;
3019   OopMap* map = nullptr;
3020   OopMapSet *oop_maps = new OopMapSet();
3021 
3022   // size of ABI112 plus spill slots for R3_RET and F1_RET.
3023   const int frame_size_in_bytes = frame::native_abi_reg_args_spill_size;
3024   const int frame_size_in_slots = frame_size_in_bytes / sizeof(jint);
3025   int first_frame_size_in_bytes = 0; // frame size of "unpack frame" for call to fetch_unroll_info.
3026 
3027   const Register exec_mode_reg = R21_tmp1;
3028 
3029   const address start = __ pc();
3030   int exception_offset = 0;
3031   int exception_in_tls_offset = 0;
3032   int reexecute_offset = 0;
3033 
3034 #if defined(COMPILER1) || defined(COMPILER2)
3035   // --------------------------------------------------------------------------
3036   // Prolog for non exception case!
3037 
3038   // We have been called from the deopt handler of the deoptee.
3039   //
3040   // deoptee:
3041   //                      ...
3042   //                      call X
3043   //                      ...
3044   //  deopt_handler:      call_deopt_stub
3045   //  cur. return pc  --> ...
3046   //
3047   // The return_pc has been stored in the frame of the deoptee and
3048   // will replace the address of the deopt_handler in the call
3049   // to Deoptimization::fetch_unroll_info below.
3050 
3051   // Push the "unpack frame"
3052   // Save everything in sight.
3053   map = RegisterSaver::push_frame_reg_args_and_save_live_registers(masm,
3054                                                                    &first_frame_size_in_bytes,
3055                                                                    /*generate_oop_map=*/ true,
3056                                                                    RegisterSaver::return_pc_is_lr,
3057                                                                    /*save_vectors*/ SuperwordUseVSX);
3058   assert(map != nullptr, "OopMap must have been created");
3059 
3060   __ li(exec_mode_reg, Deoptimization::Unpack_deopt);
3061   // Save exec mode for unpack_frames.
3062   __ b(exec_mode_initialized);
3063 
3064   // --------------------------------------------------------------------------
3065   // Prolog for exception case
3066 
3067   // An exception is pending.
3068   // We have been called with a return (interpreter) or a jump (exception blob).
3069   //
3070   // - R3_ARG1: exception oop
3071   // - R4_ARG2: exception pc
3072 
3073   exception_offset = __ pc() - start;
3074 
3075   BLOCK_COMMENT("Prolog for exception case");
3076 
3077   // Store exception oop and pc in thread (location known to GC).
3078   // This is needed since the call to "fetch_unroll_info()" may safepoint.
3079   __ std(R3_ARG1, in_bytes(JavaThread::exception_oop_offset()), R16_thread);
3080   __ std(R4_ARG2, in_bytes(JavaThread::exception_pc_offset()),  R16_thread);
3081   __ std(R4_ARG2, _abi0(lr), R1_SP);
3082 
3083   // Vanilla deoptimization with an exception pending in exception_oop.
3084   exception_in_tls_offset = __ pc() - start;
3085 
3086   // Push the "unpack frame".
3087   // Save everything in sight.
3088   RegisterSaver::push_frame_reg_args_and_save_live_registers(masm,
3089                                                              &first_frame_size_in_bytes,
3090                                                              /*generate_oop_map=*/ false,
3091                                                              RegisterSaver::return_pc_is_pre_saved,
3092                                                              /*save_vectors*/ SuperwordUseVSX);
3093 
3094   // Deopt during an exception. Save exec mode for unpack_frames.
3095   __ li(exec_mode_reg, Deoptimization::Unpack_exception);
3096 
3097   // fall through
3098 #ifdef COMPILER1
3099   __ b(exec_mode_initialized);
3100 
3101   // Reexecute entry, similar to c2 uncommon trap
3102   reexecute_offset = __ pc() - start;
3103 
3104   RegisterSaver::push_frame_reg_args_and_save_live_registers(masm,
3105                                                              &first_frame_size_in_bytes,
3106                                                              /*generate_oop_map=*/ false,
3107                                                              RegisterSaver::return_pc_is_pre_saved,
3108                                                              /*save_vectors*/ SuperwordUseVSX);
3109   __ li(exec_mode_reg, Deoptimization::Unpack_reexecute);
3110 #endif
3111 
3112   // --------------------------------------------------------------------------
3113   __ BIND(exec_mode_initialized);
3114 
3115   const Register unroll_block_reg = R22_tmp2;
3116 
3117   // We need to set `last_Java_frame' because `fetch_unroll_info' will
3118   // call `last_Java_frame()'. The value of the pc in the frame is not
3119   // particularly important. It just needs to identify this blob.
3120   __ set_last_Java_frame(R1_SP, noreg);
3121 
3122   // With EscapeAnalysis turned on, this call may safepoint!
3123   __ call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info), R16_thread, exec_mode_reg);
3124   address calls_return_pc = __ last_calls_return_pc();
3125   // Set an oopmap for the call site that describes all our saved registers.
3126   oop_maps->add_gc_map(calls_return_pc - start, map);
3127 
3128   __ reset_last_Java_frame();
3129   // Save the return value.
3130   __ mr(unroll_block_reg, R3_RET);
3131 
3132   // Restore only the result registers that have been saved
3133   // by save_volatile_registers(...).
3134   RegisterSaver::restore_result_registers(masm, first_frame_size_in_bytes, /*save_vectors*/ SuperwordUseVSX);
3135 
3136   // reload the exec mode from the UnrollBlock (it might have changed)
3137   __ lwz(exec_mode_reg, in_bytes(Deoptimization::UnrollBlock::unpack_kind_offset()), unroll_block_reg);
3138   // In excp_deopt_mode, restore and clear exception oop which we
3139   // stored in the thread during exception entry above. The exception
3140   // oop will be the return value of this stub.
3141   Label skip_restore_excp;
3142   __ cmpdi(CR0, exec_mode_reg, Deoptimization::Unpack_exception);
3143   __ bne(CR0, skip_restore_excp);
3144   __ ld(R3_RET, in_bytes(JavaThread::exception_oop_offset()), R16_thread);
3145   __ ld(R4_ARG2, in_bytes(JavaThread::exception_pc_offset()), R16_thread);
3146   __ li(R0, 0);
3147   __ std(R0, in_bytes(JavaThread::exception_pc_offset()),  R16_thread);
3148   __ std(R0, in_bytes(JavaThread::exception_oop_offset()), R16_thread);
3149   __ BIND(skip_restore_excp);
3150 
3151   __ pop_frame();
3152 
3153   // stack: (deoptee, optional i2c, caller of deoptee, ...).
3154 
3155   // pop the deoptee's frame
3156   __ pop_frame();
3157 
3158   // stack: (caller_of_deoptee, ...).
3159 
3160   // Freezing continuation frames requires that the caller is trimmed to unextended sp if compiled.
3161   // If not compiled the loaded value is equal to the current SP (see frame::initial_deoptimization_info())
3162   // and the frame is effectively not resized.
3163   Register caller_sp = R23_tmp3;
3164   __ ld_ptr(caller_sp, Deoptimization::UnrollBlock::initial_info_offset(), unroll_block_reg);
3165   __ resize_frame_absolute(caller_sp, R24_tmp4, R25_tmp5);
3166 
3167   // Loop through the `UnrollBlock' info and create interpreter frames.
3168   push_skeleton_frames(masm, true/*deopt*/,
3169                        unroll_block_reg,
3170                        R23_tmp3,
3171                        R24_tmp4,
3172                        R25_tmp5,
3173                        R26_tmp6,
3174                        R27_tmp7);
3175 
3176   // stack: (skeletal interpreter frame, ..., optional skeletal
3177   // interpreter frame, optional c2i, caller of deoptee, ...).
3178 
3179   // push an `unpack_frame' taking care of float / int return values.
3180   __ push_frame(frame_size_in_bytes, R0/*tmp*/);
3181 
3182   // stack: (unpack frame, skeletal interpreter frame, ..., optional
3183   // skeletal interpreter frame, optional c2i, caller of deoptee,
3184   // ...).
3185 
3186   // Spill live volatile registers since we'll do a call.
3187   __ std( R3_RET, _native_abi_reg_args_spill(spill_ret),  R1_SP);
3188   __ stfd(F1_RET, _native_abi_reg_args_spill(spill_fret), R1_SP);
3189 
3190   // Let the unpacker layout information in the skeletal frames just
3191   // allocated.
3192   __ calculate_address_from_global_toc(R3_RET, calls_return_pc, true, true, true, true);
3193   __ set_last_Java_frame(/*sp*/R1_SP, /*pc*/R3_RET);
3194   // This is a call to a LEAF method, so no oop map is required.
3195   __ call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames),
3196                   R16_thread/*thread*/, exec_mode_reg/*exec_mode*/);
3197   __ reset_last_Java_frame();
3198 
3199   // Restore the volatiles saved above.
3200   __ ld( R3_RET, _native_abi_reg_args_spill(spill_ret),  R1_SP);
3201   __ lfd(F1_RET, _native_abi_reg_args_spill(spill_fret), R1_SP);
3202 
3203   // Pop the unpack frame.
3204   __ pop_frame();
3205   __ restore_LR(R0);
3206 
3207   // stack: (top interpreter frame, ..., optional interpreter frame,
3208   // optional c2i, caller of deoptee, ...).
3209 
3210   // Initialize R14_state.
3211   __ restore_interpreter_state(R11_scratch1);
3212   __ load_const_optimized(R25_templateTableBase, (address)Interpreter::dispatch_table((TosState)0), R11_scratch1);
3213 
3214   // Return to the interpreter entry point.
3215   __ blr();
3216 #else // !defined(COMPILER1) && !defined(COMPILER2)
3217   __ unimplemented("deopt blob needed only with compiler");
3218 #endif
3219 
3220   // Make sure all code is generated
3221   __ flush();
3222 
3223   _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset,
3224                                            reexecute_offset, first_frame_size_in_bytes / wordSize);
3225   _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset);
3226 }
3227 
3228 #ifdef COMPILER2
3229 UncommonTrapBlob* OptoRuntime::generate_uncommon_trap_blob() {
3230   // Allocate space for the code.
3231   ResourceMark rm;
3232   // Setup code generation tools.
3233   const char* name = OptoRuntime::stub_name(StubId::c2_uncommon_trap_id);
3234   CodeBuffer buffer(name, 2048, 1024);
3235   if (buffer.blob() == nullptr) {
3236     return nullptr;
3237   }
3238   InterpreterMacroAssembler* masm = new InterpreterMacroAssembler(&buffer);
3239   address start = __ pc();
3240 
3241   Register unroll_block_reg = R21_tmp1;
3242   Register klass_index_reg  = R22_tmp2;
3243   Register unc_trap_reg     = R23_tmp3;
3244   Register r_return_pc      = R27_tmp7;
3245 
3246   OopMapSet* oop_maps = new OopMapSet();
3247   int frame_size_in_bytes = frame::native_abi_reg_args_size;
3248   OopMap* map = new OopMap(frame_size_in_bytes / sizeof(jint), 0);
3249 
3250   // stack: (deoptee, optional i2c, caller_of_deoptee, ...).
3251 
3252   // Push a dummy `unpack_frame' and call
3253   // `Deoptimization::uncommon_trap' to pack the compiled frame into a
3254   // vframe array and return the `UnrollBlock' information.
3255 
3256   // Save LR to compiled frame.
3257   __ save_LR(R11_scratch1);
3258 
3259   // Push an "uncommon_trap" frame.
3260   __ push_frame_reg_args(0, R11_scratch1);
3261 
3262   // stack: (unpack frame, deoptee, optional i2c, caller_of_deoptee, ...).
3263 
3264   // Set the `unpack_frame' as last_Java_frame.
3265   // `Deoptimization::uncommon_trap' expects it and considers its
3266   // sender frame as the deoptee frame.
3267   // Remember the offset of the instruction whose address will be
3268   // moved to R11_scratch1.
3269   address gc_map_pc = __ pc();
3270   __ calculate_address_from_global_toc(r_return_pc, gc_map_pc, true, true, true, true);
3271   __ set_last_Java_frame(/*sp*/R1_SP, r_return_pc);
3272 
3273   __ mr(klass_index_reg, R3);
3274   __ li(R5_ARG3, Deoptimization::Unpack_uncommon_trap);
3275   __ call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::uncommon_trap),
3276                   R16_thread, klass_index_reg, R5_ARG3);
3277 
3278   // Set an oopmap for the call site.
3279   oop_maps->add_gc_map(gc_map_pc - start, map);
3280 
3281   __ reset_last_Java_frame();
3282 
3283   // Pop the `unpack frame'.
3284   __ pop_frame();
3285 
3286   // stack: (deoptee, optional i2c, caller_of_deoptee, ...).
3287 
3288   // Save the return value.
3289   __ mr(unroll_block_reg, R3_RET);
3290 
3291   // Pop the uncommon_trap frame.
3292   __ pop_frame();
3293 
3294   // stack: (caller_of_deoptee, ...).
3295 
3296 #ifdef ASSERT
3297   __ lwz(R22_tmp2, in_bytes(Deoptimization::UnrollBlock::unpack_kind_offset()), unroll_block_reg);
3298   __ cmpdi(CR0, R22_tmp2, (unsigned)Deoptimization::Unpack_uncommon_trap);
3299   __ asm_assert_eq("OptoRuntime::generate_uncommon_trap_blob: expected Unpack_uncommon_trap");
3300 #endif
3301 
3302   // Freezing continuation frames requires that the caller is trimmed to unextended sp if compiled.
3303   // If not compiled the loaded value is equal to the current SP (see frame::initial_deoptimization_info())
3304   // and the frame is effectively not resized.
3305   Register caller_sp = R23_tmp3;
3306   __ ld_ptr(caller_sp, Deoptimization::UnrollBlock::initial_info_offset(), unroll_block_reg);
3307   __ resize_frame_absolute(caller_sp, R24_tmp4, R25_tmp5);
3308 
3309   // Allocate new interpreter frame(s) and possibly a c2i adapter
3310   // frame.
3311   push_skeleton_frames(masm, false/*deopt*/,
3312                        unroll_block_reg,
3313                        R22_tmp2,
3314                        R23_tmp3,
3315                        R24_tmp4,
3316                        R25_tmp5,
3317                        R26_tmp6);
3318 
3319   // stack: (skeletal interpreter frame, ..., optional skeletal
3320   // interpreter frame, optional c2i, caller of deoptee, ...).
3321 
3322   // Push a dummy `unpack_frame' taking care of float return values.
3323   // Call `Deoptimization::unpack_frames' to layout information in the
3324   // interpreter frames just created.
3325 
3326   // Push a simple "unpack frame" here.
3327   __ push_frame_reg_args(0, R11_scratch1);
3328 
3329   // stack: (unpack frame, skeletal interpreter frame, ..., optional
3330   // skeletal interpreter frame, optional c2i, caller of deoptee,
3331   // ...).
3332 
3333   // Set the "unpack_frame" as last_Java_frame.
3334   __ set_last_Java_frame(/*sp*/R1_SP, r_return_pc);
3335 
3336   // Indicate it is the uncommon trap case.
3337   __ li(unc_trap_reg, Deoptimization::Unpack_uncommon_trap);
3338   // Let the unpacker layout information in the skeletal frames just
3339   // allocated.
3340   __ call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames),
3341                   R16_thread, unc_trap_reg);
3342 
3343   __ reset_last_Java_frame();
3344   // Pop the `unpack frame'.
3345   __ pop_frame();
3346   // Restore LR from top interpreter frame.
3347   __ restore_LR(R11_scratch1);
3348 
3349   // stack: (top interpreter frame, ..., optional interpreter frame,
3350   // optional c2i, caller of deoptee, ...).
3351 
3352   __ restore_interpreter_state(R11_scratch1);
3353   __ load_const_optimized(R25_templateTableBase, (address)Interpreter::dispatch_table((TosState)0), R11_scratch1);
3354 
3355   // Return to the interpreter entry point.
3356   __ blr();
3357 
3358   masm->flush();
3359 
3360   return UncommonTrapBlob::create(&buffer, oop_maps, frame_size_in_bytes/wordSize);
3361 }
3362 #endif // COMPILER2
3363 
3364 // Generate a special Compile2Runtime blob that saves all registers, and setup oopmap.
3365 SafepointBlob* SharedRuntime::generate_handler_blob(StubId id, address call_ptr) {
3366   assert(StubRoutines::forward_exception_entry() != nullptr,
3367          "must be generated before");
3368   assert(is_polling_page_id(id), "expected a polling page stub id");
3369 
3370   ResourceMark rm;
3371   OopMapSet *oop_maps = new OopMapSet();
3372   OopMap* map;
3373 
3374   // Allocate space for the code. Setup code generation tools.
3375   const char* name = SharedRuntime::stub_name(id);
3376   CodeBuffer buffer(name, 2048, 1024);
3377   MacroAssembler* masm = new MacroAssembler(&buffer);
3378 
3379   address start = __ pc();
3380   int frame_size_in_bytes = 0;
3381 
3382   RegisterSaver::ReturnPCLocation return_pc_location;
3383   bool cause_return = (id == StubId::shared_polling_page_return_handler_id);
3384   if (cause_return) {
3385     // Nothing to do here. The frame has already been popped in MachEpilogNode.
3386     // Register LR already contains the return pc.
3387     return_pc_location = RegisterSaver::return_pc_is_pre_saved;
3388   } else {
3389     // Use thread()->saved_exception_pc() as return pc.
3390     return_pc_location = RegisterSaver::return_pc_is_thread_saved_exception_pc;
3391   }
3392 
3393   bool save_vectors = (id == StubId::shared_polling_page_vectors_safepoint_handler_id);
3394 
3395   // Save registers, fpu state, and flags. Set R31 = return pc.
3396   map = RegisterSaver::push_frame_reg_args_and_save_live_registers(masm,
3397                                                                    &frame_size_in_bytes,
3398                                                                    /*generate_oop_map=*/ true,
3399                                                                    return_pc_location, save_vectors);
3400 
3401   // The following is basically a call_VM. However, we need the precise
3402   // address of the call in order to generate an oopmap. Hence, we do all the
3403   // work ourselves.
3404   __ set_last_Java_frame(/*sp=*/R1_SP, /*pc=*/noreg);
3405 
3406   // The return address must always be correct so that the frame constructor
3407   // never sees an invalid pc.
3408 
3409   // Do the call
3410   __ call_VM_leaf(call_ptr, R16_thread);
3411   address calls_return_pc = __ last_calls_return_pc();
3412 
3413   // Set an oopmap for the call site. This oopmap will map all
3414   // oop-registers and debug-info registers as callee-saved. This
3415   // will allow deoptimization at this safepoint to find all possible
3416   // debug-info recordings, as well as let GC find all oops.
3417   oop_maps->add_gc_map(calls_return_pc - start, map);
3418 
3419   Label noException;
3420 
3421   // Clear the last Java frame.
3422   __ reset_last_Java_frame();
3423 
3424   BLOCK_COMMENT("  Check pending exception.");
3425   const Register pending_exception = R0;
3426   __ ld(pending_exception, thread_(pending_exception));
3427   __ cmpdi(CR0, pending_exception, 0);
3428   __ beq(CR0, noException);
3429 
3430   // Exception pending
3431   RegisterSaver::restore_live_registers_and_pop_frame(masm,
3432                                                       frame_size_in_bytes,
3433                                                       /*restore_ctr=*/true, save_vectors);
3434 
3435   BLOCK_COMMENT("  Jump to forward_exception_entry.");
3436   // Jump to forward_exception_entry, with the issuing PC in LR
3437   // so it looks like the original nmethod called forward_exception_entry.
3438   __ b64_patchable(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
3439 
3440   // No exception case.
3441   __ BIND(noException);
3442 
3443   if (!cause_return) {
3444     Label no_adjust;
3445     // If our stashed return pc was modified by the runtime we avoid touching it
3446     __ ld(R0, frame_size_in_bytes + _abi0(lr), R1_SP);
3447     __ cmpd(CR0, R0, R31);
3448     __ bne(CR0, no_adjust);
3449 
3450     // Adjust return pc forward to step over the safepoint poll instruction
3451     __ addi(R31, R31, 4);
3452     __ std(R31, frame_size_in_bytes + _abi0(lr), R1_SP);
3453 
3454     __ bind(no_adjust);
3455   }
3456 
3457   // Normal exit, restore registers and exit.
3458   RegisterSaver::restore_live_registers_and_pop_frame(masm,
3459                                                       frame_size_in_bytes,
3460                                                       /*restore_ctr=*/true, save_vectors);
3461 
3462   __ blr();
3463 
3464   // Make sure all code is generated
3465   masm->flush();
3466 
3467   // Fill-out other meta info
3468   // CodeBlob frame size is in words.
3469   return SafepointBlob::create(&buffer, oop_maps, frame_size_in_bytes / wordSize);
3470 }
3471 
3472 // generate_resolve_blob - call resolution (static/virtual/opt-virtual/ic-miss)
3473 //
3474 // Generate a stub that calls into the vm to find out the proper destination
3475 // of a java call. All the argument registers are live at this point
3476 // but since this is generic code we don't know what they are and the caller
3477 // must do any gc of the args.
3478 //
3479 RuntimeStub* SharedRuntime::generate_resolve_blob(StubId id, address destination) {
3480   assert(is_resolve_id(id), "expected a resolve stub id");
3481 
3482   // allocate space for the code
3483   ResourceMark rm;
3484 
3485   const char* name = SharedRuntime::stub_name(id);
3486   CodeBuffer buffer(name, 1000, 512);
3487   MacroAssembler* masm = new MacroAssembler(&buffer);
3488 
3489   int frame_size_in_bytes;
3490 
3491   OopMapSet *oop_maps = new OopMapSet();
3492   OopMap* map = nullptr;
3493 
3494   address start = __ pc();
3495 
3496   map = RegisterSaver::push_frame_reg_args_and_save_live_registers(masm,
3497                                                                    &frame_size_in_bytes,
3498                                                                    /*generate_oop_map*/ true,
3499                                                                    RegisterSaver::return_pc_is_lr);
3500 
3501   // Use noreg as last_Java_pc, the return pc will be reconstructed
3502   // from the physical frame.
3503   __ set_last_Java_frame(/*sp*/R1_SP, noreg);
3504 
3505   int frame_complete = __ offset();
3506 
3507   // Pass R19_method as 2nd (optional) argument, used by
3508   // counter_overflow_stub.
3509   __ call_VM_leaf(destination, R16_thread, R19_method);
3510   address calls_return_pc = __ last_calls_return_pc();
3511   // Set an oopmap for the call site.
3512   // We need this not only for callee-saved registers, but also for volatile
3513   // registers that the compiler might be keeping live across a safepoint.
3514   // Create the oopmap for the call's return pc.
3515   oop_maps->add_gc_map(calls_return_pc - start, map);
3516 
3517   // R3_RET contains the address we are going to jump to assuming no exception got installed.
3518 
3519   // clear last_Java_sp
3520   __ reset_last_Java_frame();
3521 
3522   // Check for pending exceptions.
3523   BLOCK_COMMENT("Check for pending exceptions.");
3524   Label pending;
3525   __ ld(R11_scratch1, thread_(pending_exception));
3526   __ cmpdi(CR0, R11_scratch1, 0);
3527   __ bne(CR0, pending);
3528 
3529   __ mtctr(R3_RET); // Ctr will not be touched by restore_live_registers_and_pop_frame.
3530 
3531   RegisterSaver::restore_live_registers_and_pop_frame(masm, frame_size_in_bytes, /*restore_ctr*/ false);
3532 
3533   // Get the returned method.
3534   __ get_vm_result_metadata(R19_method);
3535 
3536   __ bctr();
3537 
3538 
3539   // Pending exception after the safepoint.
3540   __ BIND(pending);
3541 
3542   RegisterSaver::restore_live_registers_and_pop_frame(masm, frame_size_in_bytes, /*restore_ctr*/ true);
3543 
3544   // exception pending => remove activation and forward to exception handler
3545 
3546   __ li(R11_scratch1, 0);
3547   __ ld(R3_ARG1, thread_(pending_exception));
3548   __ std(R11_scratch1, in_bytes(JavaThread::vm_result_oop_offset()), R16_thread);
3549   __ b64_patchable(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
3550 
3551   // -------------
3552   // Make sure all code is generated.
3553   masm->flush();
3554 
3555   // return the blob
3556   // frame_size_words or bytes??
3557   return RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_in_bytes/wordSize,
3558                                        oop_maps, true);
3559 }
3560 
3561 // Continuation point for throwing of implicit exceptions that are
3562 // not handled in the current activation. Fabricates an exception
3563 // oop and initiates normal exception dispatching in this
3564 // frame. Only callee-saved registers are preserved (through the
3565 // normal register window / RegisterMap handling).  If the compiler
3566 // needs all registers to be preserved between the fault point and
3567 // the exception handler then it must assume responsibility for that
3568 // in AbstractCompiler::continuation_for_implicit_null_exception or
3569 // continuation_for_implicit_division_by_zero_exception. All other
3570 // implicit exceptions (e.g., NullPointerException or
3571 // AbstractMethodError on entry) are either at call sites or
3572 // otherwise assume that stack unwinding will be initiated, so
3573 // caller saved registers were assumed volatile in the compiler.
3574 //
3575 // Note that we generate only this stub into a RuntimeStub, because
3576 // it needs to be properly traversed and ignored during GC, so we
3577 // change the meaning of the "__" macro within this method.
3578 //
3579 // Note: the routine set_pc_not_at_call_for_caller in
3580 // SharedRuntime.cpp requires that this code be generated into a
3581 // RuntimeStub.
3582 RuntimeStub* SharedRuntime::generate_throw_exception(StubId id, address runtime_entry) {
3583   assert(is_throw_id(id), "expected a throw stub id");
3584 
3585   const char* name = SharedRuntime::stub_name(id);
3586 
3587   ResourceMark rm;
3588   const char* timer_msg = "SharedRuntime generate_throw_exception";
3589   TraceTime timer(timer_msg, TRACETIME_LOG(Info, startuptime));
3590 
3591   CodeBuffer code(name, 1024 DEBUG_ONLY(+ 512), 0);
3592   MacroAssembler* masm = new MacroAssembler(&code);
3593 
3594   OopMapSet* oop_maps  = new OopMapSet();
3595   int frame_size_in_bytes = frame::native_abi_reg_args_size;
3596   OopMap* map = new OopMap(frame_size_in_bytes / sizeof(jint), 0);
3597 
3598   address start = __ pc();
3599 
3600   __ save_LR(R11_scratch1);
3601 
3602   // Push a frame.
3603   __ push_frame_reg_args(0, R11_scratch1);
3604 
3605   address frame_complete_pc = __ pc();
3606 
3607   // Note that we always have a runtime stub frame on the top of
3608   // stack by this point. Remember the offset of the instruction
3609   // whose address will be moved to R11_scratch1.
3610   address gc_map_pc = __ get_PC_trash_LR(R11_scratch1);
3611 
3612   __ set_last_Java_frame(/*sp*/R1_SP, /*pc*/R11_scratch1);
3613 
3614   __ mr(R3_ARG1, R16_thread);
3615   __ call_c(runtime_entry);
3616 
3617   // Set an oopmap for the call site.
3618   oop_maps->add_gc_map((int)(gc_map_pc - start), map);
3619 
3620   __ reset_last_Java_frame();
3621 
3622 #ifdef ASSERT
3623   // Make sure that this code is only executed if there is a pending
3624   // exception.
3625   {
3626     Label L;
3627     __ ld(R0,
3628           in_bytes(Thread::pending_exception_offset()),
3629           R16_thread);
3630     __ cmpdi(CR0, R0, 0);
3631     __ bne(CR0, L);
3632     __ stop("SharedRuntime::throw_exception: no pending exception");
3633     __ bind(L);
3634   }
3635 #endif
3636 
3637   // Pop frame.
3638   __ pop_frame();
3639 
3640   __ restore_LR(R11_scratch1);
3641 
3642   __ load_const(R11_scratch1, StubRoutines::forward_exception_entry());
3643   __ mtctr(R11_scratch1);
3644   __ bctr();
3645 
3646   // Create runtime stub with OopMap.
3647   RuntimeStub* stub =
3648     RuntimeStub::new_runtime_stub(name, &code,
3649                                   /*frame_complete=*/ (int)(frame_complete_pc - start),
3650                                   frame_size_in_bytes/wordSize,
3651                                   oop_maps,
3652                                   false);
3653   return stub;
3654 }
3655 
3656 //------------------------------Montgomery multiplication------------------------
3657 //
3658 
3659 // Subtract 0:b from carry:a. Return carry.
3660 static unsigned long
3661 sub(unsigned long a[], unsigned long b[], unsigned long carry, long len) {
3662   long i = 0;
3663   unsigned long tmp, tmp2;
3664   __asm__ __volatile__ (
3665     "subfc  %[tmp], %[tmp], %[tmp]   \n" // pre-set CA
3666     "mtctr  %[len]                   \n"
3667     "0:                              \n"
3668     "ldx    %[tmp], %[i], %[a]       \n"
3669     "ldx    %[tmp2], %[i], %[b]      \n"
3670     "subfe  %[tmp], %[tmp2], %[tmp]  \n" // subtract extended
3671     "stdx   %[tmp], %[i], %[a]       \n"
3672     "addi   %[i], %[i], 8            \n"
3673     "bdnz   0b                       \n"
3674     "addme  %[tmp], %[carry]         \n" // carry + CA - 1
3675     : [i]"+b"(i), [tmp]"=&r"(tmp), [tmp2]"=&r"(tmp2)
3676     : [a]"r"(a), [b]"r"(b), [carry]"r"(carry), [len]"r"(len)
3677     : "ctr", "xer", "memory"
3678   );
3679   return tmp;
3680 }
3681 
3682 // Multiply (unsigned) Long A by Long B, accumulating the double-
3683 // length result into the accumulator formed of T0, T1, and T2.
3684 inline void MACC(unsigned long A, unsigned long B, unsigned long &T0, unsigned long &T1, unsigned long &T2) {
3685   unsigned long hi, lo;
3686   __asm__ __volatile__ (
3687     "mulld  %[lo], %[A], %[B]    \n"
3688     "mulhdu %[hi], %[A], %[B]    \n"
3689     "addc   %[T0], %[T0], %[lo]  \n"
3690     "adde   %[T1], %[T1], %[hi]  \n"
3691     "addze  %[T2], %[T2]         \n"
3692     : [hi]"=&r"(hi), [lo]"=&r"(lo), [T0]"+r"(T0), [T1]"+r"(T1), [T2]"+r"(T2)
3693     : [A]"r"(A), [B]"r"(B)
3694     : "xer"
3695   );
3696 }
3697 
3698 // As above, but add twice the double-length result into the
3699 // accumulator.
3700 inline void MACC2(unsigned long A, unsigned long B, unsigned long &T0, unsigned long &T1, unsigned long &T2) {
3701   unsigned long hi, lo;
3702   __asm__ __volatile__ (
3703     "mulld  %[lo], %[A], %[B]    \n"
3704     "mulhdu %[hi], %[A], %[B]    \n"
3705     "addc   %[T0], %[T0], %[lo]  \n"
3706     "adde   %[T1], %[T1], %[hi]  \n"
3707     "addze  %[T2], %[T2]         \n"
3708     "addc   %[T0], %[T0], %[lo]  \n"
3709     "adde   %[T1], %[T1], %[hi]  \n"
3710     "addze  %[T2], %[T2]         \n"
3711     : [hi]"=&r"(hi), [lo]"=&r"(lo), [T0]"+r"(T0), [T1]"+r"(T1), [T2]"+r"(T2)
3712     : [A]"r"(A), [B]"r"(B)
3713     : "xer"
3714   );
3715 }
3716 
3717 // Fast Montgomery multiplication. The derivation of the algorithm is
3718 // in "A Cryptographic Library for the Motorola DSP56000,
3719 // Dusse and Kaliski, Proc. EUROCRYPT 90, pp. 230-237".
3720 static void
3721 montgomery_multiply(unsigned long a[], unsigned long b[], unsigned long n[],
3722                     unsigned long m[], unsigned long inv, int len) {
3723   unsigned long t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator
3724   int i;
3725 
3726   assert(inv * n[0] == -1UL, "broken inverse in Montgomery multiply");
3727 
3728   for (i = 0; i < len; i++) {
3729     int j;
3730     for (j = 0; j < i; j++) {
3731       MACC(a[j], b[i-j], t0, t1, t2);
3732       MACC(m[j], n[i-j], t0, t1, t2);
3733     }
3734     MACC(a[i], b[0], t0, t1, t2);
3735     m[i] = t0 * inv;
3736     MACC(m[i], n[0], t0, t1, t2);
3737 
3738     assert(t0 == 0, "broken Montgomery multiply");
3739 
3740     t0 = t1; t1 = t2; t2 = 0;
3741   }
3742 
3743   for (i = len; i < 2*len; i++) {
3744     int j;
3745     for (j = i-len+1; j < len; j++) {
3746       MACC(a[j], b[i-j], t0, t1, t2);
3747       MACC(m[j], n[i-j], t0, t1, t2);
3748     }
3749     m[i-len] = t0;
3750     t0 = t1; t1 = t2; t2 = 0;
3751   }
3752 
3753   while (t0) {
3754     t0 = sub(m, n, t0, len);
3755   }
3756 }
3757 
3758 // Fast Montgomery squaring. This uses asymptotically 25% fewer
3759 // multiplies so it should be up to 25% faster than Montgomery
3760 // multiplication. However, its loop control is more complex and it
3761 // may actually run slower on some machines.
3762 static void
3763 montgomery_square(unsigned long a[], unsigned long n[],
3764                   unsigned long m[], unsigned long inv, int len) {
3765   unsigned long t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator
3766   int i;
3767 
3768   assert(inv * n[0] == -1UL, "broken inverse in Montgomery multiply");
3769 
3770   for (i = 0; i < len; i++) {
3771     int j;
3772     int end = (i+1)/2;
3773     for (j = 0; j < end; j++) {
3774       MACC2(a[j], a[i-j], t0, t1, t2);
3775       MACC(m[j], n[i-j], t0, t1, t2);
3776     }
3777     if ((i & 1) == 0) {
3778       MACC(a[j], a[j], t0, t1, t2);
3779     }
3780     for (; j < i; j++) {
3781       MACC(m[j], n[i-j], t0, t1, t2);
3782     }
3783     m[i] = t0 * inv;
3784     MACC(m[i], n[0], t0, t1, t2);
3785 
3786     assert(t0 == 0, "broken Montgomery square");
3787 
3788     t0 = t1; t1 = t2; t2 = 0;
3789   }
3790 
3791   for (i = len; i < 2*len; i++) {
3792     int start = i-len+1;
3793     int end = start + (len - start)/2;
3794     int j;
3795     for (j = start; j < end; j++) {
3796       MACC2(a[j], a[i-j], t0, t1, t2);
3797       MACC(m[j], n[i-j], t0, t1, t2);
3798     }
3799     if ((i & 1) == 0) {
3800       MACC(a[j], a[j], t0, t1, t2);
3801     }
3802     for (; j < len; j++) {
3803       MACC(m[j], n[i-j], t0, t1, t2);
3804     }
3805     m[i-len] = t0;
3806     t0 = t1; t1 = t2; t2 = 0;
3807   }
3808 
3809   while (t0) {
3810     t0 = sub(m, n, t0, len);
3811   }
3812 }
3813 
3814 // The threshold at which squaring is advantageous was determined
3815 // experimentally on an i7-3930K (Ivy Bridge) CPU @ 3.5GHz.
3816 // Doesn't seem to be relevant for Power8 so we use the same value.
3817 #define MONTGOMERY_SQUARING_THRESHOLD 64
3818 
3819 // Copy len longwords from s to d, word-swapping as we go. The
3820 // destination array is reversed.
3821 static void reverse_words(unsigned long *s, unsigned long *d, int len) {
3822   d += len;
3823   while(len-- > 0) {
3824     d--;
3825     unsigned long s_val = *s;
3826     // Swap words in a longword on little endian machines.
3827 #ifdef VM_LITTLE_ENDIAN
3828      s_val = (s_val << 32) | (s_val >> 32);
3829 #endif
3830     *d = s_val;
3831     s++;
3832   }
3833 }
3834 
3835 void SharedRuntime::montgomery_multiply(jint *a_ints, jint *b_ints, jint *n_ints,
3836                                         jint len, jlong inv,
3837                                         jint *m_ints) {
3838   len = len & 0x7fffFFFF; // C2 does not respect int to long conversion for stub calls.
3839   assert(len % 2 == 0, "array length in montgomery_multiply must be even");
3840   int longwords = len/2;
3841 
3842   // Make very sure we don't use so much space that the stack might
3843   // overflow. 512 jints corresponds to an 16384-bit integer and
3844   // will use here a total of 8k bytes of stack space.
3845   int divisor = sizeof(unsigned long) * 4;
3846   guarantee(longwords <= 8192 / divisor, "must be");
3847   int total_allocation = longwords * sizeof (unsigned long) * 4;
3848   unsigned long *scratch = (unsigned long *)alloca(total_allocation);
3849 
3850   // Local scratch arrays
3851   unsigned long
3852     *a = scratch + 0 * longwords,
3853     *b = scratch + 1 * longwords,
3854     *n = scratch + 2 * longwords,
3855     *m = scratch + 3 * longwords;
3856 
3857   reverse_words((unsigned long *)a_ints, a, longwords);
3858   reverse_words((unsigned long *)b_ints, b, longwords);
3859   reverse_words((unsigned long *)n_ints, n, longwords);
3860 
3861   ::montgomery_multiply(a, b, n, m, (unsigned long)inv, longwords);
3862 
3863   reverse_words(m, (unsigned long *)m_ints, longwords);
3864 }
3865 
3866 void SharedRuntime::montgomery_square(jint *a_ints, jint *n_ints,
3867                                       jint len, jlong inv,
3868                                       jint *m_ints) {
3869   len = len & 0x7fffFFFF; // C2 does not respect int to long conversion for stub calls.
3870   assert(len % 2 == 0, "array length in montgomery_square must be even");
3871   int longwords = len/2;
3872 
3873   // Make very sure we don't use so much space that the stack might
3874   // overflow. 512 jints corresponds to an 16384-bit integer and
3875   // will use here a total of 6k bytes of stack space.
3876   int divisor = sizeof(unsigned long) * 3;
3877   guarantee(longwords <= (8192 / divisor), "must be");
3878   int total_allocation = longwords * sizeof (unsigned long) * 3;
3879   unsigned long *scratch = (unsigned long *)alloca(total_allocation);
3880 
3881   // Local scratch arrays
3882   unsigned long
3883     *a = scratch + 0 * longwords,
3884     *n = scratch + 1 * longwords,
3885     *m = scratch + 2 * longwords;
3886 
3887   reverse_words((unsigned long *)a_ints, a, longwords);
3888   reverse_words((unsigned long *)n_ints, n, longwords);
3889 
3890   if (len >= MONTGOMERY_SQUARING_THRESHOLD) {
3891     ::montgomery_square(a, n, m, (unsigned long)inv, longwords);
3892   } else {
3893     ::montgomery_multiply(a, a, n, m, (unsigned long)inv, longwords);
3894   }
3895 
3896   reverse_words(m, (unsigned long *)m_ints, longwords);
3897 }
3898 
3899 BufferedInlineTypeBlob* SharedRuntime::generate_buffered_inline_type_adapter(const InlineKlass* vk) {
3900   Unimplemented();
3901   return nullptr;
3902 }
3903 
3904 #if INCLUDE_JFR
3905 
3906 // For c2: c_rarg0 is junk, call to runtime to write a checkpoint.
3907 // It returns a jobject handle to the event writer.
3908 // The handle is dereferenced and the return value is the event writer oop.
3909 RuntimeStub* SharedRuntime::generate_jfr_write_checkpoint() {
3910   const char* name = SharedRuntime::stub_name(StubId::shared_jfr_write_checkpoint_id);
3911   CodeBuffer code(name, 512, 64);
3912   MacroAssembler* masm = new MacroAssembler(&code);
3913 
3914   Register tmp1 = R10_ARG8;
3915   Register tmp2 = R9_ARG7;
3916 
3917   int framesize = frame::native_abi_reg_args_size / VMRegImpl::stack_slot_size;
3918   address start = __ pc();
3919   __ mflr(tmp1);
3920   __ std(tmp1, _abi0(lr), R1_SP);  // save return pc
3921   __ push_frame_reg_args(0, tmp1);
3922   int frame_complete = __ pc() - start;
3923   __ set_last_Java_frame(R1_SP, noreg);
3924   __ call_VM_leaf(CAST_FROM_FN_PTR(address, JfrIntrinsicSupport::write_checkpoint), R16_thread);
3925   address calls_return_pc = __ last_calls_return_pc();
3926   __ reset_last_Java_frame();
3927   // The handle is dereferenced through a load barrier.
3928   __ resolve_global_jobject(R3_RET, tmp1, tmp2, MacroAssembler::PRESERVATION_NONE);
3929   __ pop_frame();
3930   __ ld(tmp1, _abi0(lr), R1_SP);
3931   __ mtlr(tmp1);
3932   __ blr();
3933 
3934   OopMapSet* oop_maps = new OopMapSet();
3935   OopMap* map = new OopMap(framesize, 0);
3936   oop_maps->add_gc_map(calls_return_pc - start, map);
3937 
3938   RuntimeStub* stub = // codeBlob framesize is in words (not VMRegImpl::slot_size)
3939     RuntimeStub::new_runtime_stub(name, &code, frame_complete,
3940                                   (framesize >> (LogBytesPerWord - LogBytesPerInt)),
3941                                   oop_maps, false);
3942   return stub;
3943 }
3944 
3945 // For c2: call to return a leased buffer.
3946 RuntimeStub* SharedRuntime::generate_jfr_return_lease() {
3947   const char* name = SharedRuntime::stub_name(StubId::shared_jfr_return_lease_id);
3948   CodeBuffer code(name, 512, 64);
3949   MacroAssembler* masm = new MacroAssembler(&code);
3950 
3951   Register tmp1 = R10_ARG8;
3952   Register tmp2 = R9_ARG7;
3953 
3954   int framesize = frame::native_abi_reg_args_size / VMRegImpl::stack_slot_size;
3955   address start = __ pc();
3956   __ mflr(tmp1);
3957   __ std(tmp1, _abi0(lr), R1_SP);  // save return pc
3958   __ push_frame_reg_args(0, tmp1);
3959   int frame_complete = __ pc() - start;
3960   __ set_last_Java_frame(R1_SP, noreg);
3961   __ call_VM_leaf(CAST_FROM_FN_PTR(address, JfrIntrinsicSupport::return_lease), R16_thread);
3962   address calls_return_pc = __ last_calls_return_pc();
3963   __ reset_last_Java_frame();
3964   __ pop_frame();
3965   __ ld(tmp1, _abi0(lr), R1_SP);
3966   __ mtlr(tmp1);
3967   __ blr();
3968 
3969   OopMapSet* oop_maps = new OopMapSet();
3970   OopMap* map = new OopMap(framesize, 0);
3971   oop_maps->add_gc_map(calls_return_pc - start, map);
3972 
3973   RuntimeStub* stub = // codeBlob framesize is in words (not VMRegImpl::slot_size)
3974     RuntimeStub::new_runtime_stub(name, &code, frame_complete,
3975                                   (framesize >> (LogBytesPerWord - LogBytesPerInt)),
3976                                   oop_maps, false);
3977   return stub;
3978 }
3979 #endif // INCLUDE_JFR