1 /*
   2  * Copyright (c) 2016, 2026, Oracle and/or its affiliates. All rights reserved.
   3  * Copyright (c) 2016, 2024 SAP SE. All rights reserved.
   4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   5  *
   6  * This code is free software; you can redistribute it and/or modify it
   7  * under the terms of the GNU General Public License version 2 only, as
   8  * published by the Free Software Foundation.
   9  *
  10  * This code is distributed in the hope that it will be useful, but WITHOUT
  11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  13  * version 2 for more details (a copy is included in the LICENSE file that
  14  * accompanied this code).
  15  *
  16  * You should have received a copy of the GNU General Public License version
  17  * 2 along with this work; if not, write to the Free Software Foundation,
  18  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  19  *
  20  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  21  * or visit www.oracle.com if you need additional information or have any
  22  * questions.
  23  *
  24  */
  25 
  26 #include "asm/macroAssembler.inline.hpp"
  27 #include "code/debugInfoRec.hpp"
  28 #include "code/vtableStubs.hpp"
  29 #include "code/compiledIC.hpp"
  30 #include "compiler/oopMap.hpp"
  31 #include "gc/shared/barrierSetAssembler.hpp"
  32 #include "gc/shared/gcLocker.hpp"
  33 #include "interpreter/interpreter.hpp"
  34 #include "interpreter/interp_masm.hpp"
  35 #include "memory/resourceArea.hpp"
  36 #include "nativeInst_s390.hpp"
  37 #include "oops/klass.inline.hpp"
  38 #include "prims/methodHandles.hpp"
  39 #include "registerSaver_s390.hpp"
  40 #include "runtime/jniHandles.hpp"
  41 #include "runtime/safepointMechanism.hpp"
  42 #include "runtime/sharedRuntime.hpp"
  43 #include "runtime/signature.hpp"
  44 #include "runtime/stubRoutines.hpp"
  45 #include "runtime/timerTrace.hpp"
  46 #include "runtime/vframeArray.hpp"
  47 #include "utilities/align.hpp"
  48 #include "utilities/macros.hpp"
  49 #include "vmreg_s390.inline.hpp"
  50 #ifdef COMPILER1
  51 #include "c1/c1_Runtime1.hpp"
  52 #endif
  53 #ifdef COMPILER2
  54 #include "opto/ad.hpp"
  55 #include "opto/runtime.hpp"
  56 #endif
  57 
  58 #ifdef PRODUCT
  59 #define __ masm->
  60 #else
  61 #define __ (Verbose ? (masm->block_comment(FILE_AND_LINE),masm):masm)->
  62 #endif
  63 
  64 #define BLOCK_COMMENT(str) __ block_comment(str)
  65 #define BIND(label)        bind(label); BLOCK_COMMENT(#label ":")
  66 
  67 #define RegisterSaver_LiveIntReg(regname) \
  68   { RegisterSaver::int_reg,   regname->encoding(), regname->as_VMReg() }
  69 
  70 #define RegisterSaver_LiveFloatReg(regname) \
  71   { RegisterSaver::float_reg, regname->encoding(), regname->as_VMReg() }
  72 
  73 // Registers which are not saved/restored, but still they have got a frame slot.
  74 // Used to get same frame size for RegisterSaver_LiveRegs and RegisterSaver_LiveRegsWithoutR2
  75 #define RegisterSaver_ExcludedIntReg(regname) \
  76   { RegisterSaver::excluded_reg, regname->encoding(), regname->as_VMReg() }
  77 
  78 // Registers which are not saved/restored, but still they have got a frame slot.
  79 // Used to get same frame size for RegisterSaver_LiveRegs and RegisterSaver_LiveRegsWithoutR2.
  80 #define RegisterSaver_ExcludedFloatReg(regname) \
  81   { RegisterSaver::excluded_reg, regname->encoding(), regname->as_VMReg() }
  82 
  83 #define RegisterSaver_LiveVReg(regname) \
  84   { RegisterSaver::v_reg,      regname->encoding(), regname->as_VMReg() }
  85 
  86 static const RegisterSaver::LiveRegType RegisterSaver_LiveRegs[] = {
  87   // Live registers which get spilled to the stack. Register positions
  88   // in this array correspond directly to the stack layout.
  89   //
  90   // live float registers:
  91   //
  92   RegisterSaver_LiveFloatReg(Z_F0 ),
  93   // RegisterSaver_ExcludedFloatReg(Z_F1 ), // scratch (Z_fscratch_1)
  94   RegisterSaver_LiveFloatReg(Z_F2 ),
  95   RegisterSaver_LiveFloatReg(Z_F3 ),
  96   RegisterSaver_LiveFloatReg(Z_F4 ),
  97   RegisterSaver_LiveFloatReg(Z_F5 ),
  98   RegisterSaver_LiveFloatReg(Z_F6 ),
  99   RegisterSaver_LiveFloatReg(Z_F7 ),
 100   RegisterSaver_LiveFloatReg(Z_F8 ),
 101   RegisterSaver_LiveFloatReg(Z_F9 ),
 102   RegisterSaver_LiveFloatReg(Z_F10),
 103   RegisterSaver_LiveFloatReg(Z_F11),
 104   RegisterSaver_LiveFloatReg(Z_F12),
 105   RegisterSaver_LiveFloatReg(Z_F13),
 106   RegisterSaver_LiveFloatReg(Z_F14),
 107   RegisterSaver_LiveFloatReg(Z_F15),
 108   //
 109   // RegisterSaver_ExcludedIntReg(Z_R0), // scratch
 110   // RegisterSaver_ExcludedIntReg(Z_R1), // scratch
 111   RegisterSaver_LiveIntReg(Z_R2 ),
 112   RegisterSaver_LiveIntReg(Z_R3 ),
 113   RegisterSaver_LiveIntReg(Z_R4 ),
 114   RegisterSaver_LiveIntReg(Z_R5 ),
 115   RegisterSaver_LiveIntReg(Z_R6 ),
 116   RegisterSaver_LiveIntReg(Z_R7 ),
 117   RegisterSaver_LiveIntReg(Z_R8 ),
 118   RegisterSaver_LiveIntReg(Z_R9 ),
 119   RegisterSaver_LiveIntReg(Z_R10),
 120   RegisterSaver_LiveIntReg(Z_R11),
 121   RegisterSaver_LiveIntReg(Z_R12),
 122   RegisterSaver_LiveIntReg(Z_R13),
 123   // RegisterSaver_ExcludedIntReg(Z_R14), // return pc (Saved in caller frame.)
 124   // RegisterSaver_ExcludedIntReg(Z_R15)  // stack pointer
 125 };
 126 
 127 static const RegisterSaver::LiveRegType RegisterSaver_LiveIntRegs[] = {
 128   // Live registers which get spilled to the stack. Register positions
 129   // in this array correspond directly to the stack layout.
 130   //
 131   // live float registers: All excluded, but still they get a stack slot to get same frame size.
 132   //
 133   RegisterSaver_ExcludedFloatReg(Z_F0 ),
 134   // RegisterSaver_ExcludedFloatReg(Z_F1 ), // scratch (Z_fscratch_1)
 135   RegisterSaver_ExcludedFloatReg(Z_F2 ),
 136   RegisterSaver_ExcludedFloatReg(Z_F3 ),
 137   RegisterSaver_ExcludedFloatReg(Z_F4 ),
 138   RegisterSaver_ExcludedFloatReg(Z_F5 ),
 139   RegisterSaver_ExcludedFloatReg(Z_F6 ),
 140   RegisterSaver_ExcludedFloatReg(Z_F7 ),
 141   RegisterSaver_ExcludedFloatReg(Z_F8 ),
 142   RegisterSaver_ExcludedFloatReg(Z_F9 ),
 143   RegisterSaver_ExcludedFloatReg(Z_F10),
 144   RegisterSaver_ExcludedFloatReg(Z_F11),
 145   RegisterSaver_ExcludedFloatReg(Z_F12),
 146   RegisterSaver_ExcludedFloatReg(Z_F13),
 147   RegisterSaver_ExcludedFloatReg(Z_F14),
 148   RegisterSaver_ExcludedFloatReg(Z_F15),
 149   //
 150   // RegisterSaver_ExcludedIntReg(Z_R0), // scratch
 151   // RegisterSaver_ExcludedIntReg(Z_R1), // scratch
 152   RegisterSaver_LiveIntReg(Z_R2 ),
 153   RegisterSaver_LiveIntReg(Z_R3 ),
 154   RegisterSaver_LiveIntReg(Z_R4 ),
 155   RegisterSaver_LiveIntReg(Z_R5 ),
 156   RegisterSaver_LiveIntReg(Z_R6 ),
 157   RegisterSaver_LiveIntReg(Z_R7 ),
 158   RegisterSaver_LiveIntReg(Z_R8 ),
 159   RegisterSaver_LiveIntReg(Z_R9 ),
 160   RegisterSaver_LiveIntReg(Z_R10),
 161   RegisterSaver_LiveIntReg(Z_R11),
 162   RegisterSaver_LiveIntReg(Z_R12),
 163   RegisterSaver_LiveIntReg(Z_R13),
 164   // RegisterSaver_ExcludedIntReg(Z_R14), // return pc (Saved in caller frame.)
 165   // RegisterSaver_ExcludedIntReg(Z_R15)  // stack pointer
 166 };
 167 
 168 static const RegisterSaver::LiveRegType RegisterSaver_LiveRegsWithoutR2[] = {
 169   // Live registers which get spilled to the stack. Register positions
 170   // in this array correspond directly to the stack layout.
 171   //
 172   // live float registers:
 173   //
 174   RegisterSaver_LiveFloatReg(Z_F0 ),
 175   // RegisterSaver_ExcludedFloatReg(Z_F1 ), // scratch (Z_fscratch_1)
 176   RegisterSaver_LiveFloatReg(Z_F2 ),
 177   RegisterSaver_LiveFloatReg(Z_F3 ),
 178   RegisterSaver_LiveFloatReg(Z_F4 ),
 179   RegisterSaver_LiveFloatReg(Z_F5 ),
 180   RegisterSaver_LiveFloatReg(Z_F6 ),
 181   RegisterSaver_LiveFloatReg(Z_F7 ),
 182   RegisterSaver_LiveFloatReg(Z_F8 ),
 183   RegisterSaver_LiveFloatReg(Z_F9 ),
 184   RegisterSaver_LiveFloatReg(Z_F10),
 185   RegisterSaver_LiveFloatReg(Z_F11),
 186   RegisterSaver_LiveFloatReg(Z_F12),
 187   RegisterSaver_LiveFloatReg(Z_F13),
 188   RegisterSaver_LiveFloatReg(Z_F14),
 189   RegisterSaver_LiveFloatReg(Z_F15),
 190   //
 191   // RegisterSaver_ExcludedIntReg(Z_R0), // scratch
 192   // RegisterSaver_ExcludedIntReg(Z_R1), // scratch
 193   RegisterSaver_ExcludedIntReg(Z_R2), // Omit saving R2.
 194   RegisterSaver_LiveIntReg(Z_R3 ),
 195   RegisterSaver_LiveIntReg(Z_R4 ),
 196   RegisterSaver_LiveIntReg(Z_R5 ),
 197   RegisterSaver_LiveIntReg(Z_R6 ),
 198   RegisterSaver_LiveIntReg(Z_R7 ),
 199   RegisterSaver_LiveIntReg(Z_R8 ),
 200   RegisterSaver_LiveIntReg(Z_R9 ),
 201   RegisterSaver_LiveIntReg(Z_R10),
 202   RegisterSaver_LiveIntReg(Z_R11),
 203   RegisterSaver_LiveIntReg(Z_R12),
 204   RegisterSaver_LiveIntReg(Z_R13),
 205   // RegisterSaver_ExcludedIntReg(Z_R14), // return pc (Saved in caller frame.)
 206   // RegisterSaver_ExcludedIntReg(Z_R15)  // stack pointer
 207 };
 208 
 209 // Live argument registers which get spilled to the stack.
 210 static const RegisterSaver::LiveRegType RegisterSaver_LiveArgRegs[] = {
 211   RegisterSaver_LiveFloatReg(Z_FARG1),
 212   RegisterSaver_LiveFloatReg(Z_FARG2),
 213   RegisterSaver_LiveFloatReg(Z_FARG3),
 214   RegisterSaver_LiveFloatReg(Z_FARG4),
 215   RegisterSaver_LiveIntReg(Z_ARG1),
 216   RegisterSaver_LiveIntReg(Z_ARG2),
 217   RegisterSaver_LiveIntReg(Z_ARG3),
 218   RegisterSaver_LiveIntReg(Z_ARG4),
 219   RegisterSaver_LiveIntReg(Z_ARG5)
 220 };
 221 
 222 static const RegisterSaver::LiveRegType RegisterSaver_LiveVolatileRegs[] = {
 223   // Live registers which get spilled to the stack. Register positions
 224   // in this array correspond directly to the stack layout.
 225   //
 226   // live float registers:
 227   //
 228   RegisterSaver_LiveFloatReg(Z_F0 ),
 229   // RegisterSaver_ExcludedFloatReg(Z_F1 ), // scratch (Z_fscratch_1)
 230   RegisterSaver_LiveFloatReg(Z_F2 ),
 231   RegisterSaver_LiveFloatReg(Z_F3 ),
 232   RegisterSaver_LiveFloatReg(Z_F4 ),
 233   RegisterSaver_LiveFloatReg(Z_F5 ),
 234   RegisterSaver_LiveFloatReg(Z_F6 ),
 235   RegisterSaver_LiveFloatReg(Z_F7 ),
 236   // RegisterSaver_LiveFloatReg(Z_F8 ), // non-volatile
 237   // RegisterSaver_LiveFloatReg(Z_F9 ), // non-volatile
 238   // RegisterSaver_LiveFloatReg(Z_F10), // non-volatile
 239   // RegisterSaver_LiveFloatReg(Z_F11), // non-volatile
 240   // RegisterSaver_LiveFloatReg(Z_F12), // non-volatile
 241   // RegisterSaver_LiveFloatReg(Z_F13), // non-volatile
 242   // RegisterSaver_LiveFloatReg(Z_F14), // non-volatile
 243   // RegisterSaver_LiveFloatReg(Z_F15), // non-volatile
 244   //
 245   // RegisterSaver_ExcludedIntReg(Z_R0), // scratch
 246   // RegisterSaver_ExcludedIntReg(Z_R1), // scratch
 247   RegisterSaver_LiveIntReg(Z_R2 ),
 248   RegisterSaver_LiveIntReg(Z_R3 ),
 249   RegisterSaver_LiveIntReg(Z_R4 ),
 250   RegisterSaver_LiveIntReg(Z_R5 ),
 251   // RegisterSaver_LiveIntReg(Z_R6 ), // non-volatile
 252   // RegisterSaver_LiveIntReg(Z_R7 ), // non-volatile
 253   // RegisterSaver_LiveIntReg(Z_R8 ), // non-volatile
 254   // RegisterSaver_LiveIntReg(Z_R9 ), // non-volatile
 255   // RegisterSaver_LiveIntReg(Z_R10), // non-volatile
 256   // RegisterSaver_LiveIntReg(Z_R11), // non-volatile
 257   // RegisterSaver_LiveIntReg(Z_R12), // non-volatile
 258   // RegisterSaver_LiveIntReg(Z_R13), // non-volatile
 259   // RegisterSaver_ExcludedIntReg(Z_R14), // return pc (Saved in caller frame.)
 260   // RegisterSaver_ExcludedIntReg(Z_R15)  // stack pointer
 261 };
 262 
 263 static const RegisterSaver::LiveRegType RegisterSaver_LiveVRegs[] = {
 264   // live vector registers (optional, only these are used by C2):
 265   RegisterSaver_LiveVReg( Z_V16 ),
 266   RegisterSaver_LiveVReg( Z_V17 ),
 267   RegisterSaver_LiveVReg( Z_V18 ),
 268   RegisterSaver_LiveVReg( Z_V19 ),
 269   RegisterSaver_LiveVReg( Z_V20 ),
 270   RegisterSaver_LiveVReg( Z_V21 ),
 271   RegisterSaver_LiveVReg( Z_V22 ),
 272   RegisterSaver_LiveVReg( Z_V23 ),
 273   RegisterSaver_LiveVReg( Z_V24 ),
 274   RegisterSaver_LiveVReg( Z_V25 ),
 275   RegisterSaver_LiveVReg( Z_V26 ),
 276   RegisterSaver_LiveVReg( Z_V27 ),
 277   RegisterSaver_LiveVReg( Z_V28 ),
 278   RegisterSaver_LiveVReg( Z_V29 ),
 279   RegisterSaver_LiveVReg( Z_V30 ),
 280   RegisterSaver_LiveVReg( Z_V31 )
 281 };
 282 
 283 int RegisterSaver::live_reg_save_size(RegisterSet reg_set) {
 284   int reg_space = -1;
 285   switch (reg_set) {
 286     case all_registers:           reg_space = sizeof(RegisterSaver_LiveRegs); break;
 287     case all_registers_except_r2: reg_space = sizeof(RegisterSaver_LiveRegsWithoutR2); break;
 288     case all_integer_registers:   reg_space = sizeof(RegisterSaver_LiveIntRegs); break;
 289     case all_volatile_registers:  reg_space = sizeof(RegisterSaver_LiveVolatileRegs); break;
 290     case arg_registers:           reg_space = sizeof(RegisterSaver_LiveArgRegs); break;
 291     default: ShouldNotReachHere();
 292   }
 293   return (reg_space / sizeof(RegisterSaver::LiveRegType)) * reg_size;
 294 }
 295 
 296 int RegisterSaver::calculate_vregstosave_num() {
 297   return (sizeof(RegisterSaver_LiveVRegs) / sizeof(RegisterSaver::LiveRegType));
 298 }
 299 
 300 int RegisterSaver::live_reg_frame_size(RegisterSet reg_set, bool save_vectors) {
 301   const int vregstosave_num = save_vectors ? calculate_vregstosave_num() : 0;
 302   return live_reg_save_size(reg_set) + vregstosave_num * v_reg_size + frame::z_abi_160_size;
 303 }
 304 
 305 
 306 // return_pc: Specify the register that should be stored as the return pc in the current frame.
 307 OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, RegisterSet reg_set, Register return_pc, bool save_vectors) {
 308   // Record volatile registers as callee-save values in an OopMap so
 309   // their save locations will be propagated to the caller frame's
 310   // RegisterMap during StackFrameStream construction (needed for
 311   // deoptimization; see compiledVFrame::create_stack_value).
 312 
 313   // Calculate frame size.
 314   const int frame_size_in_bytes  = live_reg_frame_size(reg_set, save_vectors);
 315   const int frame_size_in_slots  = frame_size_in_bytes / sizeof(jint);
 316   const int vregstosave_num = save_vectors ? calculate_vregstosave_num() : 0;
 317   const int register_save_offset = frame_size_in_bytes - (live_reg_save_size(reg_set) + vregstosave_num * v_reg_size);
 318 
 319   // OopMap frame size is in c2 stack slots (sizeof(jint)) not bytes or words.
 320   OopMap* map = new OopMap(frame_size_in_slots, 0);
 321 
 322   int regstosave_num = 0;
 323   const RegisterSaver::LiveRegType* live_regs = nullptr;
 324 
 325   switch (reg_set) {
 326     case all_registers:
 327       regstosave_num = sizeof(RegisterSaver_LiveRegs)/sizeof(RegisterSaver::LiveRegType);
 328       live_regs      = RegisterSaver_LiveRegs;
 329       break;
 330     case all_registers_except_r2:
 331       regstosave_num = sizeof(RegisterSaver_LiveRegsWithoutR2)/sizeof(RegisterSaver::LiveRegType);;
 332       live_regs      = RegisterSaver_LiveRegsWithoutR2;
 333       break;
 334     case all_integer_registers:
 335       regstosave_num = sizeof(RegisterSaver_LiveIntRegs)/sizeof(RegisterSaver::LiveRegType);
 336       live_regs      = RegisterSaver_LiveIntRegs;
 337       break;
 338     case all_volatile_registers:
 339       regstosave_num = sizeof(RegisterSaver_LiveVolatileRegs)/sizeof(RegisterSaver::LiveRegType);
 340       live_regs      = RegisterSaver_LiveVolatileRegs;
 341       break;
 342     case arg_registers:
 343       regstosave_num = sizeof(RegisterSaver_LiveArgRegs)/sizeof(RegisterSaver::LiveRegType);;
 344       live_regs      = RegisterSaver_LiveArgRegs;
 345       break;
 346     default: ShouldNotReachHere();
 347   }
 348 
 349   // Save return pc in old frame.
 350   __ save_return_pc(return_pc);
 351 
 352   // Push a new frame (includes stack linkage).
 353   // Use return_pc as scratch for push_frame. Z_R0_scratch (the default) and Z_R1_scratch are
 354   // illegally used to pass parameters by RangeCheckStub::emit_code().
 355   __ push_frame(frame_size_in_bytes, return_pc);
 356   // We have to restore return_pc right away.
 357   // Nobody else will. Furthermore, return_pc isn't necessarily the default (Z_R14).
 358   // Nobody else knows which register we saved.
 359   __ z_lg(return_pc, _z_common_abi(return_pc) + frame_size_in_bytes, Z_SP);
 360 
 361   // Register save area in new frame starts above z_abi_160 area.
 362   int offset = register_save_offset;
 363 
 364   Register first = noreg;
 365   Register last  = noreg;
 366   int      first_offset = -1;
 367   bool     float_spilled = false;
 368 
 369   for (int i = 0; i < regstosave_num; i++, offset += reg_size) {
 370     int reg_num  = live_regs[i].reg_num;
 371     int reg_type = live_regs[i].reg_type;
 372 
 373     switch (reg_type) {
 374       case RegisterSaver::int_reg: {
 375         Register reg = as_Register(reg_num);
 376         if (last != reg->predecessor()) {
 377           if (first != noreg) {
 378             __ z_stmg(first, last, first_offset, Z_SP);
 379           }
 380           first = reg;
 381           first_offset = offset;
 382           DEBUG_ONLY(float_spilled = false);
 383         }
 384         last = reg;
 385         assert(last != Z_R0, "r0 would require special treatment");
 386         assert(!float_spilled, "for simplicity, do not mix up ints and floats in RegisterSaver_LiveRegs[]");
 387         break;
 388       }
 389 
 390       case RegisterSaver::excluded_reg: // Not saved/restored, but with dedicated slot.
 391         continue; // Continue with next loop iteration.
 392 
 393       case RegisterSaver::float_reg: {
 394         FloatRegister freg = as_FloatRegister(reg_num);
 395         __ z_std(freg, offset, Z_SP);
 396         DEBUG_ONLY(float_spilled = true);
 397         break;
 398       }
 399 
 400       default:
 401         ShouldNotReachHere();
 402         break;
 403     }
 404 
 405     // Second set_callee_saved is really a waste but we'll keep things as they were for now
 406     map->set_callee_saved(VMRegImpl::stack2reg(offset >> 2), live_regs[i].vmreg);
 407     map->set_callee_saved(VMRegImpl::stack2reg((offset + half_reg_size) >> 2), live_regs[i].vmreg->next());
 408   }
 409   assert(first != noreg, "Should spill at least one int reg.");
 410   __ z_stmg(first, last, first_offset, Z_SP);
 411 
 412   for (int i = 0; i < vregstosave_num; i++, offset += v_reg_size) {
 413     int reg_num  = RegisterSaver_LiveVRegs[i].reg_num;
 414 
 415     __ z_vst(as_VectorRegister(reg_num), Address(Z_SP, offset));
 416 
 417     map->set_callee_saved(VMRegImpl::stack2reg(offset>>2),
 418                    RegisterSaver_LiveVRegs[i].vmreg);
 419     map->set_callee_saved(VMRegImpl::stack2reg((offset + half_reg_size ) >> 2),
 420                    RegisterSaver_LiveVRegs[i].vmreg->next());
 421     map->set_callee_saved(VMRegImpl::stack2reg((offset + (half_reg_size * 2)) >> 2),
 422                    RegisterSaver_LiveVRegs[i].vmreg->next(2));
 423     map->set_callee_saved(VMRegImpl::stack2reg((offset + (half_reg_size * 3)) >> 2),
 424                    RegisterSaver_LiveVRegs[i].vmreg->next(3));
 425   }
 426 
 427   assert(offset == frame_size_in_bytes, "consistency check");
 428 
 429   // And we're done.
 430   return map;
 431 }
 432 
 433 
 434 // Generate the OopMap (again, regs where saved before).
 435 OopMap* RegisterSaver::generate_oop_map(MacroAssembler* masm, RegisterSet reg_set) {
 436   // Calculate frame size.
 437   const int frame_size_in_bytes  = live_reg_frame_size(reg_set);
 438   const int frame_size_in_slots  = frame_size_in_bytes / sizeof(jint);
 439   const int register_save_offset = frame_size_in_bytes - live_reg_save_size(reg_set);
 440 
 441   // OopMap frame size is in c2 stack slots (sizeof(jint)) not bytes or words.
 442   OopMap* map = new OopMap(frame_size_in_slots, 0);
 443 
 444   int regstosave_num = 0;
 445   const RegisterSaver::LiveRegType* live_regs = nullptr;
 446 
 447   switch (reg_set) {
 448     case all_registers:
 449       regstosave_num = sizeof(RegisterSaver_LiveRegs)/sizeof(RegisterSaver::LiveRegType);
 450       live_regs      = RegisterSaver_LiveRegs;
 451       break;
 452     case all_registers_except_r2:
 453       regstosave_num = sizeof(RegisterSaver_LiveRegsWithoutR2)/sizeof(RegisterSaver::LiveRegType);;
 454       live_regs      = RegisterSaver_LiveRegsWithoutR2;
 455       break;
 456     case all_integer_registers:
 457       regstosave_num = sizeof(RegisterSaver_LiveIntRegs)/sizeof(RegisterSaver::LiveRegType);
 458       live_regs      = RegisterSaver_LiveIntRegs;
 459       break;
 460     case all_volatile_registers:
 461       regstosave_num = sizeof(RegisterSaver_LiveVolatileRegs)/sizeof(RegisterSaver::LiveRegType);
 462       live_regs      = RegisterSaver_LiveVolatileRegs;
 463       break;
 464     case arg_registers:
 465       regstosave_num = sizeof(RegisterSaver_LiveArgRegs)/sizeof(RegisterSaver::LiveRegType);;
 466       live_regs      = RegisterSaver_LiveArgRegs;
 467       break;
 468     default: ShouldNotReachHere();
 469   }
 470 
 471   // Register save area in new frame starts above z_abi_160 area.
 472   int offset = register_save_offset;
 473   for (int i = 0; i < regstosave_num; i++) {
 474     if (live_regs[i].reg_type < RegisterSaver::excluded_reg) {
 475       map->set_callee_saved(VMRegImpl::stack2reg(offset>>2), live_regs[i].vmreg);
 476       map->set_callee_saved(VMRegImpl::stack2reg((offset + half_reg_size)>>2), live_regs[i].vmreg->next());
 477     }
 478     offset += reg_size;
 479   }
 480 #ifdef ASSERT
 481   assert(offset == frame_size_in_bytes, "consistency check");
 482 #endif
 483   return map;
 484 }
 485 
 486 
 487 // Pop the current frame and restore all the registers that we saved.
 488 void RegisterSaver::restore_live_registers(MacroAssembler* masm, RegisterSet reg_set, bool save_vectors) {
 489   int offset;
 490   const int vregstosave_num = save_vectors ? calculate_vregstosave_num() : 0;
 491   const int register_save_offset = live_reg_frame_size(reg_set, save_vectors) - (live_reg_save_size(reg_set) + vregstosave_num * v_reg_size);
 492 
 493   Register first = noreg;
 494   Register last = noreg;
 495   int      first_offset = -1;
 496   bool     float_spilled = false;
 497 
 498   int regstosave_num = 0;
 499   const RegisterSaver::LiveRegType* live_regs = nullptr;
 500 
 501   switch (reg_set) {
 502     case all_registers:
 503       regstosave_num = sizeof(RegisterSaver_LiveRegs)/sizeof(RegisterSaver::LiveRegType);;
 504       live_regs      = RegisterSaver_LiveRegs;
 505       break;
 506     case all_registers_except_r2:
 507       regstosave_num = sizeof(RegisterSaver_LiveRegsWithoutR2)/sizeof(RegisterSaver::LiveRegType);;
 508       live_regs      = RegisterSaver_LiveRegsWithoutR2;
 509       break;
 510     case all_integer_registers:
 511       regstosave_num = sizeof(RegisterSaver_LiveIntRegs)/sizeof(RegisterSaver::LiveRegType);
 512       live_regs      = RegisterSaver_LiveIntRegs;
 513       break;
 514     case all_volatile_registers:
 515       regstosave_num = sizeof(RegisterSaver_LiveVolatileRegs)/sizeof(RegisterSaver::LiveRegType);;
 516       live_regs      = RegisterSaver_LiveVolatileRegs;
 517       break;
 518     case arg_registers:
 519       regstosave_num = sizeof(RegisterSaver_LiveArgRegs)/sizeof(RegisterSaver::LiveRegType);;
 520       live_regs      = RegisterSaver_LiveArgRegs;
 521       break;
 522     default: ShouldNotReachHere();
 523   }
 524 
 525   // Restore all registers (ints and floats).
 526 
 527   // Register save area in new frame starts above z_abi_160 area.
 528   offset = register_save_offset;
 529 
 530   for (int i = 0; i < regstosave_num; i++, offset += reg_size) {
 531     int reg_num  = live_regs[i].reg_num;
 532     int reg_type = live_regs[i].reg_type;
 533 
 534     switch (reg_type) {
 535       case RegisterSaver::excluded_reg:
 536         continue; // Continue with next loop iteration.
 537 
 538       case RegisterSaver::int_reg: {
 539         Register reg = as_Register(reg_num);
 540         if (last != reg->predecessor()) {
 541           if (first != noreg) {
 542             __ z_lmg(first, last, first_offset, Z_SP);
 543           }
 544           first = reg;
 545           first_offset = offset;
 546           DEBUG_ONLY(float_spilled = false);
 547         }
 548         last = reg;
 549         assert(last != Z_R0, "r0 would require special treatment");
 550         assert(!float_spilled, "for simplicity, do not mix up ints and floats in RegisterSaver_LiveRegs[]");
 551         break;
 552       }
 553 
 554       case RegisterSaver::float_reg: {
 555         FloatRegister freg = as_FloatRegister(reg_num);
 556         __ z_ld(freg, offset, Z_SP);
 557         DEBUG_ONLY(float_spilled = true);
 558         break;
 559       }
 560 
 561       default:
 562         ShouldNotReachHere();
 563     }
 564   }
 565   assert(first != noreg, "Should spill at least one int reg.");
 566   __ z_lmg(first, last, first_offset, Z_SP);
 567 
 568   for (int i = 0; i < vregstosave_num; i++, offset += v_reg_size) {
 569     int reg_num  = RegisterSaver_LiveVRegs[i].reg_num;
 570 
 571     __ z_vl(as_VectorRegister(reg_num), Address(Z_SP, offset));
 572   }
 573 
 574   // Pop the frame.
 575   __ pop_frame();
 576 
 577   // Restore the flags.
 578   __ restore_return_pc();
 579 }
 580 
 581 
 582 // Pop the current frame and restore the registers that might be holding a result.
 583 void RegisterSaver::restore_result_registers(MacroAssembler* masm) {
 584   const int regstosave_num       = sizeof(RegisterSaver_LiveRegs) /
 585                                    sizeof(RegisterSaver::LiveRegType);
 586   const int register_save_offset = live_reg_frame_size(all_registers) - live_reg_save_size(all_registers);
 587 
 588   // Restore all result registers (ints and floats).
 589   int offset = register_save_offset;
 590   for (int i = 0; i < regstosave_num; i++, offset += reg_size) {
 591     int reg_num = RegisterSaver_LiveRegs[i].reg_num;
 592     int reg_type = RegisterSaver_LiveRegs[i].reg_type;
 593     switch (reg_type) {
 594       case RegisterSaver::excluded_reg:
 595         continue; // Continue with next loop iteration.
 596       case RegisterSaver::int_reg: {
 597         if (as_Register(reg_num) == Z_RET) { // int result_reg
 598           __ z_lg(as_Register(reg_num), offset, Z_SP);
 599         }
 600         break;
 601       }
 602       case RegisterSaver::float_reg: {
 603         if (as_FloatRegister(reg_num) == Z_FRET) { // float result_reg
 604           __ z_ld(as_FloatRegister(reg_num), offset, Z_SP);
 605         }
 606         break;
 607       }
 608       default:
 609         ShouldNotReachHere();
 610     }
 611   }
 612   assert(offset == live_reg_frame_size(all_registers), "consistency check");
 613 }
 614 
 615 // ---------------------------------------------------------------------------
 616 void SharedRuntime::save_native_result(MacroAssembler * masm,
 617                                        BasicType ret_type,
 618                                        int frame_slots) {
 619   Address memaddr(Z_SP, frame_slots * VMRegImpl::stack_slot_size);
 620 
 621   switch (ret_type) {
 622     case T_BOOLEAN:  // Save shorter types as int. Do we need sign extension at restore??
 623     case T_BYTE:
 624     case T_CHAR:
 625     case T_SHORT:
 626     case T_INT:
 627       __ reg2mem_opt(Z_RET, memaddr, false);
 628       break;
 629     case T_OBJECT:   // Save pointer types as long.
 630     case T_ARRAY:
 631     case T_ADDRESS:
 632     case T_VOID:
 633     case T_LONG:
 634       __ reg2mem_opt(Z_RET, memaddr);
 635       break;
 636     case T_FLOAT:
 637       __ freg2mem_opt(Z_FRET, memaddr, false);
 638       break;
 639     case T_DOUBLE:
 640       __ freg2mem_opt(Z_FRET, memaddr);
 641       break;
 642     default:
 643       ShouldNotReachHere();
 644       break;
 645   }
 646 }
 647 
 648 void SharedRuntime::restore_native_result(MacroAssembler *masm,
 649                                           BasicType       ret_type,
 650                                           int             frame_slots) {
 651   Address memaddr(Z_SP, frame_slots * VMRegImpl::stack_slot_size);
 652 
 653   switch (ret_type) {
 654     case T_BOOLEAN:  // Restore shorter types as int. Do we need sign extension at restore??
 655     case T_BYTE:
 656     case T_CHAR:
 657     case T_SHORT:
 658     case T_INT:
 659       __ mem2reg_opt(Z_RET, memaddr, false);
 660       break;
 661     case T_OBJECT:   // Restore pointer types as long.
 662     case T_ARRAY:
 663     case T_ADDRESS:
 664     case T_VOID:
 665     case T_LONG:
 666       __ mem2reg_opt(Z_RET, memaddr);
 667       break;
 668     case T_FLOAT:
 669       __ mem2freg_opt(Z_FRET, memaddr, false);
 670       break;
 671     case T_DOUBLE:
 672       __ mem2freg_opt(Z_FRET, memaddr);
 673       break;
 674     default:
 675       ShouldNotReachHere();
 676       break;
 677   }
 678 }
 679 
 680 // ---------------------------------------------------------------------------
 681 // Read the array of BasicTypes from a signature, and compute where the
 682 // arguments should go. Values in the VMRegPair regs array refer to 4-byte
 683 // quantities. Values less than VMRegImpl::stack0 are registers, those above
 684 // refer to 4-byte stack slots. All stack slots are based off of the stack pointer
 685 // as framesizes are fixed.
 686 // VMRegImpl::stack0 refers to the first slot 0(sp).
 687 // VMRegImpl::stack0+1 refers to the memory word 4-byes higher. Registers
 688 // up to Register::number_of_registers are the 64-bit integer registers.
 689 
 690 // Note: the INPUTS in sig_bt are in units of Java argument words, which are
 691 // either 32-bit or 64-bit depending on the build. The OUTPUTS are in 32-bit
 692 // units regardless of build.
 693 
 694 // The Java calling convention is a "shifted" version of the C ABI.
 695 // By skipping the first C ABI register we can call non-static jni methods
 696 // with small numbers of arguments without having to shuffle the arguments
 697 // at all. Since we control the java ABI we ought to at least get some
 698 // advantage out of it.
 699 int SharedRuntime::java_calling_convention(const BasicType *sig_bt,
 700                                            VMRegPair *regs,
 701                                            int total_args_passed) {
 702   // c2c calling conventions for compiled-compiled calls.
 703 
 704   // An int/float occupies 1 slot here.
 705   const int inc_stk_for_intfloat   = 1; // 1 slots for ints and floats.
 706   const int inc_stk_for_longdouble = 2; // 2 slots for longs and doubles.
 707 
 708   const VMReg z_iarg_reg[5] = {
 709     Z_R2->as_VMReg(),
 710     Z_R3->as_VMReg(),
 711     Z_R4->as_VMReg(),
 712     Z_R5->as_VMReg(),
 713     Z_R6->as_VMReg()
 714   };
 715   const VMReg z_farg_reg[4] = {
 716     Z_F0->as_VMReg(),
 717     Z_F2->as_VMReg(),
 718     Z_F4->as_VMReg(),
 719     Z_F6->as_VMReg()
 720   };
 721   const int z_num_iarg_registers = sizeof(z_iarg_reg) / sizeof(z_iarg_reg[0]);
 722   const int z_num_farg_registers = sizeof(z_farg_reg) / sizeof(z_farg_reg[0]);
 723 
 724   assert(Register::number_of_arg_registers == z_num_iarg_registers, "iarg reg count mismatch");
 725   assert(FloatRegister::number_of_arg_registers == z_num_farg_registers, "farg reg count mismatch");
 726 
 727   int i;
 728   int stk = 0;
 729   int ireg = 0;
 730   int freg = 0;
 731 
 732   for (int i = 0; i < total_args_passed; ++i) {
 733     switch (sig_bt[i]) {
 734       case T_BOOLEAN:
 735       case T_CHAR:
 736       case T_BYTE:
 737       case T_SHORT:
 738       case T_INT:
 739         if (ireg < z_num_iarg_registers) {
 740           // Put int/ptr in register.
 741           regs[i].set1(z_iarg_reg[ireg]);
 742           ++ireg;
 743         } else {
 744           // Put int/ptr on stack.
 745           regs[i].set1(VMRegImpl::stack2reg(stk));
 746           stk += inc_stk_for_intfloat;
 747         }
 748         break;
 749       case T_LONG:
 750         assert((i + 1) < total_args_passed && sig_bt[i+1] == T_VOID, "expecting half");
 751         if (ireg < z_num_iarg_registers) {
 752           // Put long in register.
 753           regs[i].set2(z_iarg_reg[ireg]);
 754           ++ireg;
 755         } else {
 756           // Put long on stack and align to 2 slots.
 757           if (stk & 0x1) { ++stk; }
 758           regs[i].set2(VMRegImpl::stack2reg(stk));
 759           stk += inc_stk_for_longdouble;
 760         }
 761         break;
 762       case T_OBJECT:
 763       case T_ARRAY:
 764       case T_ADDRESS:
 765         if (ireg < z_num_iarg_registers) {
 766           // Put ptr in register.
 767           regs[i].set2(z_iarg_reg[ireg]);
 768           ++ireg;
 769         } else {
 770           // Put ptr on stack and align to 2 slots, because
 771           // "64-bit pointers record oop-ishness on 2 aligned adjacent
 772           // registers." (see OopFlow::build_oop_map).
 773           if (stk & 0x1) { ++stk; }
 774           regs[i].set2(VMRegImpl::stack2reg(stk));
 775           stk += inc_stk_for_longdouble;
 776         }
 777         break;
 778       case T_FLOAT:
 779         if (freg < z_num_farg_registers) {
 780           // Put float in register.
 781           regs[i].set1(z_farg_reg[freg]);
 782           ++freg;
 783         } else {
 784           // Put float on stack.
 785           regs[i].set1(VMRegImpl::stack2reg(stk));
 786           stk += inc_stk_for_intfloat;
 787         }
 788         break;
 789       case T_DOUBLE:
 790         assert((i + 1) < total_args_passed && sig_bt[i+1] == T_VOID, "expecting half");
 791         if (freg < z_num_farg_registers) {
 792           // Put double in register.
 793           regs[i].set2(z_farg_reg[freg]);
 794           ++freg;
 795         } else {
 796           // Put double on stack and align to 2 slots.
 797           if (stk & 0x1) { ++stk; }
 798           regs[i].set2(VMRegImpl::stack2reg(stk));
 799           stk += inc_stk_for_longdouble;
 800         }
 801         break;
 802       case T_VOID:
 803         assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half");
 804         // Do not count halves.
 805         regs[i].set_bad();
 806         break;
 807       default:
 808         ShouldNotReachHere();
 809     }
 810   }
 811   return stk;
 812 }
 813 
 814 int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
 815                                         VMRegPair *regs,
 816                                         int total_args_passed) {
 817 
 818   // Calling conventions for C runtime calls and calls to JNI native methods.
 819   const VMReg z_iarg_reg[5] = {
 820     Z_R2->as_VMReg(),
 821     Z_R3->as_VMReg(),
 822     Z_R4->as_VMReg(),
 823     Z_R5->as_VMReg(),
 824     Z_R6->as_VMReg()
 825   };
 826   const VMReg z_farg_reg[4] = {
 827     Z_F0->as_VMReg(),
 828     Z_F2->as_VMReg(),
 829     Z_F4->as_VMReg(),
 830     Z_F6->as_VMReg()
 831   };
 832   const int z_num_iarg_registers = sizeof(z_iarg_reg) / sizeof(z_iarg_reg[0]);
 833   const int z_num_farg_registers = sizeof(z_farg_reg) / sizeof(z_farg_reg[0]);
 834 
 835   // Check calling conventions consistency.
 836   assert(Register::number_of_arg_registers == z_num_iarg_registers, "iarg reg count mismatch");
 837   assert(FloatRegister::number_of_arg_registers == z_num_farg_registers, "farg reg count mismatch");
 838 
 839   // Avoid passing C arguments in the wrong stack slots.
 840 
 841   // 'Stk' counts stack slots. Due to alignment, 32 bit values occupy
 842   // 2 such slots, like 64 bit values do.
 843   const int inc_stk_for_intfloat   = 2; // 2 slots for ints and floats.
 844   const int inc_stk_for_longdouble = 2; // 2 slots for longs and doubles.
 845 
 846   int i;
 847   // Leave room for C-compatible ABI
 848   int stk = (frame::z_abi_160_size - frame::z_jit_out_preserve_size) / VMRegImpl::stack_slot_size;
 849   int freg = 0;
 850   int ireg = 0;
 851 
 852   // We put the first 5 arguments into registers and the rest on the
 853   // stack. Float arguments are already in their argument registers
 854   // due to c2c calling conventions (see calling_convention).
 855   for (int i = 0; i < total_args_passed; ++i) {
 856     switch (sig_bt[i]) {
 857       case T_BOOLEAN:
 858       case T_CHAR:
 859       case T_BYTE:
 860       case T_SHORT:
 861       case T_INT:
 862         // Fall through, handle as long.
 863       case T_LONG:
 864       case T_OBJECT:
 865       case T_ARRAY:
 866       case T_ADDRESS:
 867       case T_METADATA:
 868         // Oops are already boxed if required (JNI).
 869         if (ireg < z_num_iarg_registers) {
 870           regs[i].set2(z_iarg_reg[ireg]);
 871           ++ireg;
 872         } else {
 873           regs[i].set2(VMRegImpl::stack2reg(stk));
 874           stk += inc_stk_for_longdouble;
 875         }
 876         break;
 877       case T_FLOAT:
 878         if (freg < z_num_farg_registers) {
 879           regs[i].set1(z_farg_reg[freg]);
 880           ++freg;
 881         } else {
 882           regs[i].set1(VMRegImpl::stack2reg(stk+1));
 883           stk +=  inc_stk_for_intfloat;
 884         }
 885         break;
 886       case T_DOUBLE:
 887         assert((i + 1) < total_args_passed && sig_bt[i+1] == T_VOID, "expecting half");
 888         if (freg < z_num_farg_registers) {
 889           regs[i].set2(z_farg_reg[freg]);
 890           ++freg;
 891         } else {
 892           // Put double on stack.
 893           regs[i].set2(VMRegImpl::stack2reg(stk));
 894           stk += inc_stk_for_longdouble;
 895         }
 896         break;
 897       case T_VOID:
 898         // Do not count halves.
 899         regs[i].set_bad();
 900         break;
 901       default:
 902         ShouldNotReachHere();
 903     }
 904   }
 905   return align_up(stk, 2);
 906 }
 907 
 908 int SharedRuntime::vector_calling_convention(VMRegPair *regs,
 909                                              uint num_bits,
 910                                              uint total_args_passed) {
 911   Unimplemented();
 912   return 0;
 913 }
 914 
 915 ////////////////////////////////////////////////////////////////////////
 916 //
 917 //  Argument shufflers
 918 //
 919 ////////////////////////////////////////////////////////////////////////
 920 
 921 //----------------------------------------------------------------------
 922 // The java_calling_convention describes stack locations as ideal slots on
 923 // a frame with no abi restrictions. Since we must observe abi restrictions
 924 // (like the placement of the register window) the slots must be biased by
 925 // the following value.
 926 //----------------------------------------------------------------------
 927 static int reg2slot(VMReg r) {
 928   return r->reg2stack() + SharedRuntime::out_preserve_stack_slots();
 929 }
 930 
 931 static int reg2offset(VMReg r) {
 932   return reg2slot(r) * VMRegImpl::stack_slot_size;
 933 }
 934 
 935 static void verify_oop_args(MacroAssembler *masm,
 936                             int total_args_passed,
 937                             const BasicType *sig_bt,
 938                             const VMRegPair *regs) {
 939   if (!VerifyOops) { return; }
 940 
 941   for (int i = 0; i < total_args_passed; i++) {
 942     if (is_reference_type(sig_bt[i])) {
 943       VMReg r = regs[i].first();
 944       assert(r->is_valid(), "bad oop arg");
 945 
 946       if (r->is_stack()) {
 947         __ z_lg(Z_R0_scratch,
 948                 Address(Z_SP, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize));
 949         __ verify_oop(Z_R0_scratch, FILE_AND_LINE);
 950       } else {
 951         __ verify_oop(r->as_Register(), FILE_AND_LINE);
 952       }
 953     }
 954   }
 955 }
 956 
 957 static void gen_special_dispatch(MacroAssembler *masm,
 958                                  int total_args_passed,
 959                                  vmIntrinsics::ID special_dispatch,
 960                                  const BasicType *sig_bt,
 961                                  const VMRegPair *regs) {
 962   verify_oop_args(masm, total_args_passed, sig_bt, regs);
 963 
 964   // Now write the args into the outgoing interpreter space.
 965   bool     has_receiver   = false;
 966   Register receiver_reg   = noreg;
 967   int      member_arg_pos = -1;
 968   Register member_reg     = noreg;
 969   int      ref_kind       = MethodHandles::signature_polymorphic_intrinsic_ref_kind(special_dispatch);
 970 
 971   if (ref_kind != 0) {
 972     member_arg_pos = total_args_passed - 1;  // trailing MemberName argument
 973     member_reg = Z_R9;                       // Known to be free at this point.
 974     has_receiver = MethodHandles::ref_kind_has_receiver(ref_kind);
 975   } else if (special_dispatch == vmIntrinsics::_linkToNative) {
 976     member_arg_pos = total_args_passed - 1;  // trailing NativeEntryPoint argument
 977     member_reg = Z_R9;  // known to be free at this point
 978   } else {
 979     guarantee(special_dispatch == vmIntrinsics::_invokeBasic,
 980               "special_dispatch=%d", vmIntrinsics::as_int(special_dispatch));
 981     has_receiver = true;
 982   }
 983 
 984   if (member_reg != noreg) {
 985     // Load the member_arg into register, if necessary.
 986     assert(member_arg_pos >= 0 && member_arg_pos < total_args_passed, "oob");
 987     assert(sig_bt[member_arg_pos] == T_OBJECT, "dispatch argument must be an object");
 988 
 989     VMReg r = regs[member_arg_pos].first();
 990     assert(r->is_valid(), "bad member arg");
 991 
 992     if (r->is_stack()) {
 993       __ z_lg(member_reg, Address(Z_SP, reg2offset(r)));
 994     } else {
 995       // No data motion is needed.
 996       member_reg = r->as_Register();
 997     }
 998   }
 999 
1000   if (has_receiver) {
1001     // Make sure the receiver is loaded into a register.
1002     assert(total_args_passed > 0, "oob");
1003     assert(sig_bt[0] == T_OBJECT, "receiver argument must be an object");
1004 
1005     VMReg r = regs[0].first();
1006     assert(r->is_valid(), "bad receiver arg");
1007 
1008     if (r->is_stack()) {
1009       // Porting note: This assumes that compiled calling conventions always
1010       // pass the receiver oop in a register. If this is not true on some
1011       // platform, pick a temp and load the receiver from stack.
1012       assert(false, "receiver always in a register");
1013       receiver_reg = Z_R13;  // Known to be free at this point.
1014       __ z_lg(receiver_reg, Address(Z_SP, reg2offset(r)));
1015     } else {
1016       // No data motion is needed.
1017       receiver_reg = r->as_Register();
1018     }
1019   }
1020 
1021   // Figure out which address we are really jumping to:
1022   MethodHandles::generate_method_handle_dispatch(masm, special_dispatch,
1023                                                  receiver_reg, member_reg,
1024                                                  /*for_compiler_entry:*/ true);
1025 }
1026 
1027 ////////////////////////////////////////////////////////////////////////
1028 //
1029 //  Argument shufflers
1030 //
1031 ////////////////////////////////////////////////////////////////////////
1032 
1033 // Is the size of a vector size (in bytes) bigger than a size saved by default?
1034 // 8 bytes registers are saved by default on z/Architecture.
1035 bool SharedRuntime::is_wide_vector(int size) {
1036   // Note, MaxVectorSize == 8/16 on this platform.
1037   assert(size <= (SuperwordUseVX ? 16 : 8), "%d bytes vectors are not supported", size);
1038   return size > 8;
1039 }
1040 
1041 //----------------------------------------------------------------------
1042 // An oop arg. Must pass a handle not the oop itself
1043 //----------------------------------------------------------------------
1044 static void object_move(MacroAssembler *masm,
1045                         OopMap *map,
1046                         int oop_handle_offset,
1047                         int framesize_in_slots,
1048                         VMRegPair src,
1049                         VMRegPair dst,
1050                         bool is_receiver,
1051                         int *receiver_offset) {
1052   int frame_offset = framesize_in_slots*VMRegImpl::stack_slot_size;
1053 
1054   assert(!is_receiver || (is_receiver && (*receiver_offset == -1)), "only one receiving object per call, please.");
1055 
1056   // Must pass a handle. First figure out the location we use as a handle.
1057 
1058   if (src.first()->is_stack()) {
1059     // Oop is already on the stack, put handle on stack or in register
1060     // If handle will be on the stack, use temp reg to calculate it.
1061     Register rHandle = dst.first()->is_stack() ? Z_R1 : dst.first()->as_Register();
1062     Label    skip;
1063     int      slot_in_older_frame = reg2slot(src.first());
1064 
1065     guarantee(!is_receiver, "expecting receiver in register");
1066     map->set_oop(VMRegImpl::stack2reg(slot_in_older_frame + framesize_in_slots));
1067 
1068     __ add2reg(rHandle, reg2offset(src.first())+frame_offset, Z_SP);
1069     __ load_and_test_long(Z_R0, Address(rHandle));
1070     __ z_brne(skip);
1071     // Use a null handle if oop is null.
1072     __ clear_reg(rHandle, true, false);
1073     __ bind(skip);
1074 
1075     // Copy handle to the right place (register or stack).
1076     if (dst.first()->is_stack()) {
1077       __ z_stg(rHandle, reg2offset(dst.first()), Z_SP);
1078     } // else
1079       // nothing to do. rHandle uses the correct register
1080   } else {
1081     // Oop is passed in an input register. We must flush it to the stack.
1082     const Register rOop = src.first()->as_Register();
1083     const Register rHandle = dst.first()->is_stack() ? Z_R1 : dst.first()->as_Register();
1084     int            oop_slot = (rOop->encoding()-Z_ARG1->encoding()) * VMRegImpl::slots_per_word + oop_handle_offset;
1085     int            oop_slot_offset = oop_slot*VMRegImpl::stack_slot_size;
1086     NearLabel skip;
1087 
1088     if (is_receiver) {
1089       *receiver_offset = oop_slot_offset;
1090     }
1091     map->set_oop(VMRegImpl::stack2reg(oop_slot));
1092 
1093     // Flush Oop to stack, calculate handle.
1094     __ z_stg(rOop, oop_slot_offset, Z_SP);
1095     __ add2reg(rHandle, oop_slot_offset, Z_SP);
1096 
1097     // If Oop is null, use a null handle.
1098     __ compare64_and_branch(rOop, (RegisterOrConstant)0L, Assembler::bcondNotEqual, skip);
1099     __ clear_reg(rHandle, true, false);
1100     __ bind(skip);
1101 
1102     // Copy handle to the right place (register or stack).
1103     if (dst.first()->is_stack()) {
1104       __ z_stg(rHandle, reg2offset(dst.first()), Z_SP);
1105     } // else
1106       // nothing to do here, since rHandle = dst.first()->as_Register in this case.
1107   }
1108 }
1109 
1110 //----------------------------------------------------------------------
1111 // A float arg. May have to do float reg to int reg conversion
1112 //----------------------------------------------------------------------
1113 static void float_move(MacroAssembler *masm,
1114                        VMRegPair src,
1115                        VMRegPair dst,
1116                        int framesize_in_slots,
1117                        int workspace_slot_offset) {
1118   int frame_offset = framesize_in_slots * VMRegImpl::stack_slot_size;
1119   int workspace_offset = workspace_slot_offset * VMRegImpl::stack_slot_size;
1120 
1121   // We do not accept an argument in a VMRegPair to be spread over two slots,
1122   // no matter what physical location (reg or stack) the slots may have.
1123   // We just check for the unaccepted slot to be invalid.
1124   assert(!src.second()->is_valid(), "float in arg spread over two slots");
1125   assert(!dst.second()->is_valid(), "float out arg spread over two slots");
1126 
1127   if (src.first()->is_stack()) {
1128     if (dst.first()->is_stack()) {
1129       // stack -> stack. The easiest of the bunch.
1130       __ z_mvc(Address(Z_SP, reg2offset(dst.first())),
1131                Address(Z_SP, reg2offset(src.first()) + frame_offset), sizeof(float));
1132     } else {
1133       // stack to reg
1134       Address memaddr(Z_SP, reg2offset(src.first()) + frame_offset);
1135       if (dst.first()->is_Register()) {
1136         __ mem2reg_opt(dst.first()->as_Register(), memaddr, false);
1137       } else {
1138         __ mem2freg_opt(dst.first()->as_FloatRegister(), memaddr, false);
1139       }
1140     }
1141   } else if (src.first()->is_Register()) {
1142     if (dst.first()->is_stack()) {
1143       // gpr -> stack
1144       __ reg2mem_opt(src.first()->as_Register(),
1145                      Address(Z_SP, reg2offset(dst.first()), false ));
1146     } else {
1147       if (dst.first()->is_Register()) {
1148         // gpr -> gpr
1149         __ move_reg_if_needed(dst.first()->as_Register(), T_INT,
1150                               src.first()->as_Register(), T_INT);
1151       } else {
1152         if (VM_Version::has_FPSupportEnhancements()) {
1153           // gpr -> fpr. Exploit z10 capability of direct transfer.
1154           __ z_ldgr(dst.first()->as_FloatRegister(), src.first()->as_Register());
1155         } else {
1156           // gpr -> fpr. Use work space on stack to transfer data.
1157           Address   stackaddr(Z_SP, workspace_offset);
1158 
1159           __ reg2mem_opt(src.first()->as_Register(), stackaddr, false);
1160           __ mem2freg_opt(dst.first()->as_FloatRegister(), stackaddr, false);
1161         }
1162       }
1163     }
1164   } else {
1165     if (dst.first()->is_stack()) {
1166       // fpr -> stack
1167       __ freg2mem_opt(src.first()->as_FloatRegister(),
1168                       Address(Z_SP, reg2offset(dst.first())), false);
1169     } else {
1170       if (dst.first()->is_Register()) {
1171         if (VM_Version::has_FPSupportEnhancements()) {
1172           // fpr -> gpr.
1173           __ z_lgdr(dst.first()->as_Register(), src.first()->as_FloatRegister());
1174         } else {
1175           // fpr -> gpr. Use work space on stack to transfer data.
1176           Address   stackaddr(Z_SP, workspace_offset);
1177 
1178           __ freg2mem_opt(src.first()->as_FloatRegister(), stackaddr, false);
1179           __ mem2reg_opt(dst.first()->as_Register(), stackaddr, false);
1180         }
1181       } else {
1182         // fpr -> fpr
1183         __ move_freg_if_needed(dst.first()->as_FloatRegister(), T_FLOAT,
1184                                src.first()->as_FloatRegister(), T_FLOAT);
1185       }
1186     }
1187   }
1188 }
1189 
1190 //----------------------------------------------------------------------
1191 // A double arg. May have to do double reg to long reg conversion
1192 //----------------------------------------------------------------------
1193 static void double_move(MacroAssembler *masm,
1194                         VMRegPair src,
1195                         VMRegPair dst,
1196                         int framesize_in_slots,
1197                         int workspace_slot_offset) {
1198   int frame_offset = framesize_in_slots*VMRegImpl::stack_slot_size;
1199   int workspace_offset = workspace_slot_offset*VMRegImpl::stack_slot_size;
1200 
1201   // Since src is always a java calling convention we know that the
1202   // src pair is always either all registers or all stack (and aligned?)
1203 
1204   if (src.first()->is_stack()) {
1205     if (dst.first()->is_stack()) {
1206       // stack -> stack. The easiest of the bunch.
1207       __ z_mvc(Address(Z_SP, reg2offset(dst.first())),
1208                Address(Z_SP, reg2offset(src.first()) + frame_offset), sizeof(double));
1209     } else {
1210       // stack to reg
1211       Address stackaddr(Z_SP, reg2offset(src.first()) + frame_offset);
1212 
1213       if (dst.first()->is_Register()) {
1214         __ mem2reg_opt(dst.first()->as_Register(), stackaddr);
1215       } else {
1216         __ mem2freg_opt(dst.first()->as_FloatRegister(), stackaddr);
1217       }
1218     }
1219   } else if (src.first()->is_Register()) {
1220     if (dst.first()->is_stack()) {
1221       // gpr -> stack
1222       __ reg2mem_opt(src.first()->as_Register(),
1223                      Address(Z_SP, reg2offset(dst.first())));
1224     } else {
1225       if (dst.first()->is_Register()) {
1226         // gpr -> gpr
1227         __ move_reg_if_needed(dst.first()->as_Register(), T_LONG,
1228                               src.first()->as_Register(), T_LONG);
1229       } else {
1230         if (VM_Version::has_FPSupportEnhancements()) {
1231           // gpr -> fpr. Exploit z10 capability of direct transfer.
1232           __ z_ldgr(dst.first()->as_FloatRegister(), src.first()->as_Register());
1233         } else {
1234           // gpr -> fpr. Use work space on stack to transfer data.
1235           Address stackaddr(Z_SP, workspace_offset);
1236           __ reg2mem_opt(src.first()->as_Register(), stackaddr);
1237           __ mem2freg_opt(dst.first()->as_FloatRegister(), stackaddr);
1238         }
1239       }
1240     }
1241   } else {
1242     if (dst.first()->is_stack()) {
1243       // fpr -> stack
1244       __ freg2mem_opt(src.first()->as_FloatRegister(),
1245                       Address(Z_SP, reg2offset(dst.first())));
1246     } else {
1247       if (dst.first()->is_Register()) {
1248         if (VM_Version::has_FPSupportEnhancements()) {
1249           // fpr -> gpr. Exploit z10 capability of direct transfer.
1250           __ z_lgdr(dst.first()->as_Register(), src.first()->as_FloatRegister());
1251         } else {
1252           // fpr -> gpr. Use work space on stack to transfer data.
1253           Address stackaddr(Z_SP, workspace_offset);
1254 
1255           __ freg2mem_opt(src.first()->as_FloatRegister(), stackaddr);
1256           __ mem2reg_opt(dst.first()->as_Register(), stackaddr);
1257         }
1258       } else {
1259         // fpr -> fpr
1260         // In theory these overlap but the ordering is such that this is likely a nop.
1261         __ move_freg_if_needed(dst.first()->as_FloatRegister(), T_DOUBLE,
1262                                src.first()->as_FloatRegister(), T_DOUBLE);
1263       }
1264     }
1265   }
1266 }
1267 
1268 //----------------------------------------------------------------------
1269 // A long arg.
1270 //----------------------------------------------------------------------
1271 static void long_move(MacroAssembler *masm,
1272                       VMRegPair src,
1273                       VMRegPair dst,
1274                       int framesize_in_slots) {
1275   int frame_offset = framesize_in_slots*VMRegImpl::stack_slot_size;
1276 
1277   if (src.first()->is_stack()) {
1278     if (dst.first()->is_stack()) {
1279       // stack -> stack. The easiest of the bunch.
1280       __ z_mvc(Address(Z_SP, reg2offset(dst.first())),
1281                Address(Z_SP, reg2offset(src.first()) + frame_offset), sizeof(long));
1282     } else {
1283       // stack to reg
1284       assert(dst.first()->is_Register(), "long dst value must be in GPR");
1285       __ mem2reg_opt(dst.first()->as_Register(),
1286                       Address(Z_SP, reg2offset(src.first()) + frame_offset));
1287     }
1288   } else {
1289     // reg to reg
1290     assert(src.first()->is_Register(), "long src value must be in GPR");
1291     if (dst.first()->is_stack()) {
1292       // reg -> stack
1293       __ reg2mem_opt(src.first()->as_Register(),
1294                      Address(Z_SP, reg2offset(dst.first())));
1295     } else {
1296       // reg -> reg
1297       assert(dst.first()->is_Register(), "long dst value must be in GPR");
1298       __ move_reg_if_needed(dst.first()->as_Register(),
1299                             T_LONG, src.first()->as_Register(), T_LONG);
1300     }
1301   }
1302 }
1303 
1304 
1305 //----------------------------------------------------------------------
1306 // A int-like arg.
1307 //----------------------------------------------------------------------
1308 // On z/Architecture we will store integer like items to the stack as 64 bit
1309 // items, according to the z/Architecture ABI, even though Java would only store
1310 // 32 bits for a parameter.
1311 // We do sign extension for all base types. That is ok since the only
1312 // unsigned base type is T_CHAR, and T_CHAR uses only 16 bits of an int.
1313 // Sign extension 32->64 bit will thus not affect the value.
1314 //----------------------------------------------------------------------
1315 static void move32_64(MacroAssembler *masm,
1316                       VMRegPair src,
1317                       VMRegPair dst,
1318                       int framesize_in_slots) {
1319   int frame_offset = framesize_in_slots * VMRegImpl::stack_slot_size;
1320 
1321   if (src.first()->is_stack()) {
1322     Address memaddr(Z_SP, reg2offset(src.first()) + frame_offset);
1323     if (dst.first()->is_stack()) {
1324       // stack -> stack. MVC not possible due to sign extension.
1325       Address firstaddr(Z_SP, reg2offset(dst.first()));
1326       __ mem2reg_signed_opt(Z_R0_scratch, memaddr);
1327       __ reg2mem_opt(Z_R0_scratch, firstaddr);
1328     } else {
1329       // stack -> reg, sign extended
1330       __ mem2reg_signed_opt(dst.first()->as_Register(), memaddr);
1331     }
1332   } else {
1333     if (dst.first()->is_stack()) {
1334       // reg -> stack, sign extended
1335       Address firstaddr(Z_SP, reg2offset(dst.first()));
1336       __ z_lgfr(src.first()->as_Register(), src.first()->as_Register());
1337       __ reg2mem_opt(src.first()->as_Register(), firstaddr);
1338     } else {
1339       // reg -> reg, sign extended
1340       __ z_lgfr(dst.first()->as_Register(), src.first()->as_Register());
1341     }
1342   }
1343 }
1344 
1345 //----------------------------------------------------------------------
1346 // Wrap a JNI call.
1347 //----------------------------------------------------------------------
1348 #undef USE_RESIZE_FRAME
1349 nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm,
1350                                                 const methodHandle& method,
1351                                                 int compile_id,
1352                                                 BasicType *in_sig_bt,
1353                                                 VMRegPair *in_regs,
1354                                                 BasicType ret_type) {
1355   int total_in_args = method->size_of_parameters();
1356   if (method->is_method_handle_intrinsic()) {
1357     vmIntrinsics::ID iid = method->intrinsic_id();
1358     intptr_t start = (intptr_t) __ pc();
1359     int vep_offset = ((intptr_t) __ pc()) - start;
1360 
1361     gen_special_dispatch(masm, total_in_args,
1362                          method->intrinsic_id(), in_sig_bt, in_regs);
1363 
1364     int frame_complete = ((intptr_t)__ pc()) - start; // Not complete, period.
1365 
1366     __ flush();
1367 
1368     int stack_slots = SharedRuntime::out_preserve_stack_slots();  // No out slots at all, actually.
1369 
1370     return nmethod::new_native_nmethod(method,
1371                                        compile_id,
1372                                        masm->code(),
1373                                        vep_offset,
1374                                        frame_complete,
1375                                        stack_slots / VMRegImpl::slots_per_word,
1376                                        in_ByteSize(-1),
1377                                        in_ByteSize(-1),
1378                                        (OopMapSet *) nullptr);
1379   }
1380 
1381 
1382   ///////////////////////////////////////////////////////////////////////
1383   //
1384   //  Precalculations before generating any code
1385   //
1386   ///////////////////////////////////////////////////////////////////////
1387 
1388   address native_func = method->native_function();
1389   assert(native_func != nullptr, "must have function");
1390 
1391   //---------------------------------------------------------------------
1392   // We have received a description of where all the java args are located
1393   // on entry to the wrapper. We need to convert these args to where
1394   // the jni function will expect them. To figure out where they go
1395   // we convert the java signature to a C signature by inserting
1396   // the hidden arguments as arg[0] and possibly arg[1] (static method).
1397   //
1398   // The first hidden argument arg[0] is a pointer to the JNI environment.
1399   // It is generated for every call.
1400   // The second argument arg[1] to the JNI call, which is hidden for static
1401   // methods, is the boxed lock object. For static calls, the lock object
1402   // is the static method itself. The oop is constructed here. for instance
1403   // calls, the lock is performed on the object itself, the pointer of
1404   // which is passed as the first visible argument.
1405   //---------------------------------------------------------------------
1406 
1407   // Additionally, on z/Architecture we must convert integers
1408   // to longs in the C signature. We do this in advance in order to have
1409   // no trouble with indexes into the bt-arrays.
1410   // So convert the signature and registers now, and adjust the total number
1411   // of in-arguments accordingly.
1412   bool method_is_static = method->is_static();
1413   int  total_c_args     = total_in_args + (method_is_static ? 2 : 1);
1414 
1415   BasicType *out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args);
1416   VMRegPair *out_regs   = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args);
1417 
1418   // Create the signature for the C call:
1419   //   1) add the JNIEnv*
1420   //   2) add the class if the method is static
1421   //   3) copy the rest of the incoming signature (shifted by the number of
1422   //      hidden arguments)
1423 
1424   int argc = 0;
1425   out_sig_bt[argc++] = T_ADDRESS;
1426   if (method->is_static()) {
1427     out_sig_bt[argc++] = T_OBJECT;
1428   }
1429 
1430   for (int i = 0; i < total_in_args; i++) {
1431     out_sig_bt[argc++] = in_sig_bt[i];
1432   }
1433 
1434   ///////////////////////////////////////////////////////////////////////
1435   // Now figure out where the args must be stored and how much stack space
1436   // they require (neglecting out_preserve_stack_slots but providing space
1437   // for storing the first five register arguments).
1438   // It's weird, see int_stk_helper.
1439   ///////////////////////////////////////////////////////////////////////
1440 
1441   //---------------------------------------------------------------------
1442   // Compute framesize for the wrapper.
1443   //
1444   // - We need to handlize all oops passed in registers.
1445   // - We must create space for them here that is disjoint from the save area.
1446   // - We always just allocate 5 words for storing down these object.
1447   //   This allows us to simply record the base and use the Ireg number to
1448   //   decide which slot to use.
1449   // - Note that the reg number used to index the stack slot is the inbound
1450   //   number, not the outbound number.
1451   // - We must shuffle args to match the native convention,
1452   //   and to include var-args space.
1453   //---------------------------------------------------------------------
1454 
1455   //---------------------------------------------------------------------
1456   // Calculate the total number of stack slots we will need:
1457   // - 1) abi requirements
1458   // - 2) outgoing args
1459   // - 3) space for inbound oop handle area
1460   // - 4) space for handlizing a klass if static method
1461   // - 5) space for a lock if synchronized method
1462   // - 6) workspace (save rtn value, int<->float reg moves, ...)
1463   // - 7) filler slots for alignment
1464   //---------------------------------------------------------------------
1465   // Here is how the space we have allocated will look like.
1466   // Since we use resize_frame, we do not create a new stack frame,
1467   // but just extend the one we got with our own data area.
1468   //
1469   // If an offset or pointer name points to a separator line, it is
1470   // assumed that addressing with offset 0 selects storage starting
1471   // at the first byte above the separator line.
1472   //
1473   //
1474   //     ...                   ...
1475   //      | caller's frame      |
1476   // FP-> |---------------------|
1477   //      | filler slots, if any|
1478   //     7| #slots == mult of 2 |
1479   //      |---------------------|
1480   //      | work space          |
1481   //     6| 2 slots = 8 bytes   |
1482   //      |---------------------|
1483   //     5| lock box (if sync)  |
1484   //      |---------------------| <- lock_slot_offset
1485   //     4| klass (if static)   |
1486   //      |---------------------| <- klass_slot_offset
1487   //     3| oopHandle area      |
1488   //      |                     |
1489   //      |                     |
1490   //      |---------------------| <- oop_handle_offset
1491   //     2| outbound memory     |
1492   //     ...                   ...
1493   //      | based arguments     |
1494   //      |---------------------|
1495   //      | vararg              |
1496   //     ...                   ...
1497   //      | area                |
1498   //      |---------------------| <- out_arg_slot_offset
1499   //     1| out_preserved_slots |
1500   //     ...                   ...
1501   //      | (z_abi spec)        |
1502   // SP-> |---------------------| <- FP_slot_offset (back chain)
1503   //     ...                   ...
1504   //
1505   //---------------------------------------------------------------------
1506 
1507   // *_slot_offset indicates offset from SP in #stack slots
1508   // *_offset      indicates offset from SP in #bytes
1509 
1510   int stack_slots = c_calling_convention(out_sig_bt, out_regs, total_c_args) + // 1+2
1511                     SharedRuntime::out_preserve_stack_slots(); // see c_calling_convention
1512 
1513   // Now the space for the inbound oop handle area.
1514   int total_save_slots = Register::number_of_arg_registers * VMRegImpl::slots_per_word;
1515 
1516   int oop_handle_slot_offset = stack_slots;
1517   stack_slots += total_save_slots;                                        // 3)
1518 
1519   int klass_slot_offset = 0;
1520   int klass_offset      = -1;
1521   if (method_is_static) {                                                 // 4)
1522     klass_slot_offset  = stack_slots;
1523     klass_offset       = klass_slot_offset * VMRegImpl::stack_slot_size;
1524     stack_slots       += VMRegImpl::slots_per_word;
1525   }
1526 
1527   int lock_slot_offset = 0;
1528   int lock_offset      = -1;
1529   if (method->is_synchronized()) {                                        // 5)
1530     lock_slot_offset   = stack_slots;
1531     lock_offset        = lock_slot_offset * VMRegImpl::stack_slot_size;
1532     stack_slots       += VMRegImpl::slots_per_word;
1533   }
1534 
1535   int workspace_slot_offset= stack_slots;                                 // 6)
1536   stack_slots         += 2;
1537 
1538   // Now compute actual number of stack words we need.
1539   // Round to align stack properly.
1540   stack_slots = align_up(stack_slots,                                     // 7)
1541                          frame::alignment_in_bytes / VMRegImpl::stack_slot_size);
1542   int frame_size_in_bytes = stack_slots * VMRegImpl::stack_slot_size;
1543 
1544 
1545   ///////////////////////////////////////////////////////////////////////
1546   // Now we can start generating code
1547   ///////////////////////////////////////////////////////////////////////
1548 
1549   unsigned int wrapper_CodeStart  = __ offset();
1550   unsigned int wrapper_UEPStart;
1551   unsigned int wrapper_VEPStart;
1552   unsigned int wrapper_FrameDone;
1553   unsigned int wrapper_CRegsSet;
1554   Label     handle_pending_exception;
1555 
1556   //---------------------------------------------------------------------
1557   // Unverified entry point (UEP)
1558   //---------------------------------------------------------------------
1559 
1560   // check ic: object class <-> cached class
1561   if (!method_is_static) {
1562     wrapper_UEPStart = __ ic_check(CodeEntryAlignment /* end_alignment */);
1563   }
1564 
1565   //---------------------------------------------------------------------
1566   // Verified entry point (VEP)
1567   //---------------------------------------------------------------------
1568   wrapper_VEPStart = __ offset();
1569 
1570   if (method->needs_clinit_barrier()) {
1571     assert(VM_Version::supports_fast_class_init_checks(), "sanity");
1572     Label L_skip_barrier;
1573     Register klass = Z_R1_scratch;
1574     // Notify OOP recorder (don't need the relocation)
1575     AddressLiteral md = __ constant_metadata_address(method->method_holder());
1576     __ load_const_optimized(klass, md.value());
1577     __ clinit_barrier(klass, Z_thread, &L_skip_barrier /*L_fast_path*/);
1578 
1579     __ load_const_optimized(klass, SharedRuntime::get_handle_wrong_method_stub());
1580     __ z_br(klass);
1581 
1582     __ bind(L_skip_barrier);
1583   }
1584 
1585   __ save_return_pc();
1586   __ generate_stack_overflow_check(frame_size_in_bytes);  // Check before creating frame.
1587 #ifndef USE_RESIZE_FRAME
1588   __ push_frame(frame_size_in_bytes);                     // Create a new frame for the wrapper.
1589 #else
1590   __ resize_frame(-frame_size_in_bytes, Z_R0_scratch);    // No new frame for the wrapper.
1591                                                           // Just resize the existing one.
1592 #endif
1593 
1594   BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
1595   bs->nmethod_entry_barrier(masm);
1596 
1597   wrapper_FrameDone = __ offset();
1598 
1599   // Native nmethod wrappers never take possession of the oop arguments.
1600   // So the caller will gc the arguments.
1601   // The only thing we need an oopMap for is if the call is static.
1602   //
1603   // An OopMap for lock (and class if static), and one for the VM call itself
1604   OopMapSet  *oop_maps        = new OopMapSet();
1605   OopMap     *map             = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
1606 
1607   //////////////////////////////////////////////////////////////////////
1608   //
1609   // The Grand Shuffle
1610   //
1611   //////////////////////////////////////////////////////////////////////
1612   //
1613   // We immediately shuffle the arguments so that for any vm call we have
1614   // to make from here on out (sync slow path, jvmti, etc.) we will have
1615   // captured the oops from our caller and have a valid oopMap for them.
1616   //
1617   //--------------------------------------------------------------------
1618   // Natives require 1 or 2 extra arguments over the normal ones: the JNIEnv*
1619   // (derived from JavaThread* which is in Z_thread) and, if static,
1620   // the class mirror instead of a receiver. This pretty much guarantees that
1621   // register layout will not match. We ignore these extra arguments during
1622   // the shuffle. The shuffle is described by the two calling convention
1623   // vectors we have in our possession. We simply walk the java vector to
1624   // get the source locations and the c vector to get the destinations.
1625   //
1626   // This is a trick. We double the stack slots so we can claim
1627   // the oops in the caller's frame. Since we are sure to have
1628   // more args than the caller doubling is enough to make
1629   // sure we can capture all the incoming oop args from the caller.
1630   //--------------------------------------------------------------------
1631 
1632   // Record sp-based slot for receiver on stack for non-static methods.
1633   int receiver_offset = -1;
1634 
1635   //--------------------------------------------------------------------
1636   // We move the arguments backwards because the floating point registers
1637   // destination will always be to a register with a greater or equal
1638   // register number or the stack.
1639   //   jix is the index of the incoming Java arguments.
1640   //   cix is the index of the outgoing C arguments.
1641   //--------------------------------------------------------------------
1642 
1643 #ifdef ASSERT
1644   bool reg_destroyed[Register::number_of_registers];
1645   bool freg_destroyed[FloatRegister::number_of_registers];
1646   for (int r = 0; r < Register::number_of_registers; r++) {
1647     reg_destroyed[r] = false;
1648   }
1649   for (int f = 0; f < FloatRegister::number_of_registers; f++) {
1650     freg_destroyed[f] = false;
1651   }
1652 #endif // ASSERT
1653 
1654   for (int jix = total_in_args - 1, cix = total_c_args - 1; jix >= 0; jix--, cix--) {
1655 #ifdef ASSERT
1656     if (in_regs[jix].first()->is_Register()) {
1657       assert(!reg_destroyed[in_regs[jix].first()->as_Register()->encoding()], "ack!");
1658     } else {
1659       if (in_regs[jix].first()->is_FloatRegister()) {
1660         assert(!freg_destroyed[in_regs[jix].first()->as_FloatRegister()->encoding()], "ack!");
1661       }
1662     }
1663     if (out_regs[cix].first()->is_Register()) {
1664       reg_destroyed[out_regs[cix].first()->as_Register()->encoding()] = true;
1665     } else {
1666       if (out_regs[cix].first()->is_FloatRegister()) {
1667         freg_destroyed[out_regs[cix].first()->as_FloatRegister()->encoding()] = true;
1668       }
1669     }
1670 #endif // ASSERT
1671 
1672     switch (in_sig_bt[jix]) {
1673       // Due to casting, small integers should only occur in pairs with type T_LONG.
1674       case T_BOOLEAN:
1675       case T_CHAR:
1676       case T_BYTE:
1677       case T_SHORT:
1678       case T_INT:
1679         // Move int and do sign extension.
1680         move32_64(masm, in_regs[jix], out_regs[cix], stack_slots);
1681         break;
1682 
1683       case T_LONG :
1684         long_move(masm, in_regs[jix], out_regs[cix], stack_slots);
1685         break;
1686 
1687       case T_ARRAY:
1688       case T_OBJECT:
1689         object_move(masm, map, oop_handle_slot_offset, stack_slots, in_regs[jix], out_regs[cix],
1690                     ((jix == 0) && (!method_is_static)),
1691                     &receiver_offset);
1692         break;
1693       case T_VOID:
1694         break;
1695 
1696       case T_FLOAT:
1697         float_move(masm, in_regs[jix], out_regs[cix], stack_slots, workspace_slot_offset);
1698         break;
1699 
1700       case T_DOUBLE:
1701         assert(jix+1 <  total_in_args && in_sig_bt[jix+1]  == T_VOID && out_sig_bt[cix+1] == T_VOID, "bad arg list");
1702         double_move(masm, in_regs[jix], out_regs[cix], stack_slots, workspace_slot_offset);
1703         break;
1704 
1705       case T_ADDRESS:
1706         assert(false, "found T_ADDRESS in java args");
1707         break;
1708 
1709       default:
1710         ShouldNotReachHere();
1711     }
1712   }
1713 
1714   //--------------------------------------------------------------------
1715   // Pre-load a static method's oop into ARG2.
1716   // Used both by locking code and the normal JNI call code.
1717   //--------------------------------------------------------------------
1718   if (method_is_static) {
1719     __ set_oop_constant(JNIHandles::make_local(method->method_holder()->java_mirror()), Z_ARG2);
1720 
1721     // Now handlize the static class mirror in ARG2. It's known not-null.
1722     __ z_stg(Z_ARG2, klass_offset, Z_SP);
1723     map->set_oop(VMRegImpl::stack2reg(klass_slot_offset));
1724     __ add2reg(Z_ARG2, klass_offset, Z_SP);
1725   }
1726 
1727   // Get JNIEnv* which is first argument to native.
1728   __ add2reg(Z_ARG1, in_bytes(JavaThread::jni_environment_offset()), Z_thread);
1729 
1730   //////////////////////////////////////////////////////////////////////
1731   // We have all of the arguments setup at this point.
1732   // We MUST NOT touch any outgoing regs from this point on.
1733   // So if we must call out we must push a new frame.
1734   //////////////////////////////////////////////////////////////////////
1735 
1736 
1737   // Calc the current pc into Z_R10 and into wrapper_CRegsSet.
1738   // Both values represent the same position.
1739   __ get_PC(Z_R10);                // PC into register
1740   wrapper_CRegsSet = __ offset();  // and into into variable.
1741 
1742   // Z_R10 now has the pc loaded that we will use when we finally call to native.
1743 
1744   // We use the same pc/oopMap repeatedly when we call out.
1745   oop_maps->add_gc_map((int)(wrapper_CRegsSet-wrapper_CodeStart), map);
1746 
1747   // Lock a synchronized method.
1748 
1749   if (method->is_synchronized()) {
1750 
1751     // ATTENTION: args and Z_R10 must be preserved.
1752     Register r_oop  = Z_R11;
1753     Register r_box  = Z_R12;
1754     Register r_tmp1 = Z_R13;
1755     Register r_tmp2 = Z_R7;
1756     Label done;
1757 
1758     // Load the oop for the object or class. R_carg2_classorobject contains
1759     // either the handlized oop from the incoming arguments or the handlized
1760     // class mirror (if the method is static).
1761     __ z_lg(r_oop, 0, Z_ARG2);
1762 
1763     lock_offset = (lock_slot_offset * VMRegImpl::stack_slot_size);
1764     // Get the lock box slot's address.
1765     __ add2reg(r_box, lock_offset, Z_SP);
1766 
1767     // Try fastpath for locking.
1768     // Fast_lock kills r_temp_1, r_temp_2.
1769     __ compiler_fast_lock_object(r_oop, r_box, r_tmp1, r_tmp2);
1770     __ z_bre(done);
1771 
1772     //-------------------------------------------------------------------------
1773     // None of the above fast optimizations worked so we have to get into the
1774     // slow case of monitor enter. Inline a special case of call_VM that
1775     // disallows any pending_exception.
1776     //-------------------------------------------------------------------------
1777 
1778     Register oldSP = Z_R11;
1779 
1780     __ z_lgr(oldSP, Z_SP);
1781 
1782     RegisterSaver::save_live_registers(masm, RegisterSaver::arg_registers);
1783 
1784     // Prepare arguments for call.
1785     __ z_lg(Z_ARG1, 0, Z_ARG2); // Ynboxed class mirror or unboxed object.
1786     __ add2reg(Z_ARG2, lock_offset, oldSP);
1787     __ z_lgr(Z_ARG3, Z_thread);
1788 
1789     __ set_last_Java_frame(oldSP, Z_R10 /* gc map pc */);
1790 
1791     // Do the call.
1792     __ load_const_optimized(Z_R1_scratch, CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_locking_C));
1793     __ call(Z_R1_scratch);
1794 
1795     __ reset_last_Java_frame();
1796 
1797     RegisterSaver::restore_live_registers(masm, RegisterSaver::arg_registers);
1798 #ifdef ASSERT
1799     { Label L;
1800       __ load_and_test_long(Z_R0, Address(Z_thread, Thread::pending_exception_offset()));
1801       __ z_bre(L);
1802       __ stop("no pending exception allowed on exit from IR::monitorenter");
1803       __ bind(L);
1804     }
1805 #endif
1806     __ bind(done);
1807   } // lock for synchronized methods
1808 
1809 
1810   //////////////////////////////////////////////////////////////////////
1811   // Finally just about ready to make the JNI call.
1812   //////////////////////////////////////////////////////////////////////
1813 
1814   // Use that pc we placed in Z_R10 a while back as the current frame anchor.
1815   __ set_last_Java_frame(Z_SP, Z_R10);
1816 
1817   // Transition from _thread_in_Java to _thread_in_native.
1818   __ set_thread_state(_thread_in_native);
1819 
1820   //////////////////////////////////////////////////////////////////////
1821   // This is the JNI call.
1822   //////////////////////////////////////////////////////////////////////
1823 
1824   __ call_c(native_func);
1825 
1826 
1827   //////////////////////////////////////////////////////////////////////
1828   // We have survived the call once we reach here.
1829   //////////////////////////////////////////////////////////////////////
1830 
1831 
1832   //--------------------------------------------------------------------
1833   // Unpack native results.
1834   //--------------------------------------------------------------------
1835   // For int-types, we do any needed sign-extension required.
1836   // Care must be taken that the return value (in Z_ARG1 = Z_RET = Z_R2
1837   // or in Z_FARG0 = Z_FRET = Z_F0) will survive any VM calls for
1838   // blocking or unlocking.
1839   // An OOP result (handle) is done specially in the slow-path code.
1840   //--------------------------------------------------------------------
1841   switch (ret_type) {
1842     case T_VOID:    break;         // Nothing to do!
1843     case T_FLOAT:   break;         // Got it where we want it (unless slow-path)
1844     case T_DOUBLE:  break;         // Got it where we want it (unless slow-path)
1845     case T_LONG:    break;         // Got it where we want it (unless slow-path)
1846     case T_OBJECT:  break;         // Really a handle.
1847                                    // Cannot de-handlize until after reclaiming jvm_lock.
1848     case T_ARRAY:   break;
1849 
1850     case T_BOOLEAN:                // 0 -> false(0); !0 -> true(1)
1851       __ z_lngfr(Z_RET, Z_RET);    // Force sign bit on except for zero.
1852       __ z_srlg(Z_RET, Z_RET, 63); // Shift sign bit into least significant pos.
1853       break;
1854     case T_BYTE:    __ z_lgbr(Z_RET, Z_RET);  break; // sign extension
1855     case T_CHAR:    __ z_llghr(Z_RET, Z_RET); break; // unsigned result
1856     case T_SHORT:   __ z_lghr(Z_RET, Z_RET);  break; // sign extension
1857     case T_INT:     __ z_lgfr(Z_RET, Z_RET);  break; // sign-extend for beauty.
1858 
1859     default:
1860       ShouldNotReachHere();
1861       break;
1862   }
1863 
1864   // Switch thread to "native transition" state before reading the synchronization state.
1865   // This additional state is necessary because reading and testing the synchronization
1866   // state is not atomic w.r.t. GC, as this scenario demonstrates:
1867   //   - Java thread A, in _thread_in_native state, loads _not_synchronized and is preempted.
1868   //   - VM thread changes sync state to synchronizing and suspends threads for GC.
1869   //   - Thread A is resumed to finish this native method, but doesn't block here since it
1870   //     didn't see any synchronization in progress, and escapes.
1871 
1872   // Transition from _thread_in_native to _thread_in_native_trans.
1873   __ set_thread_state(_thread_in_native_trans);
1874 
1875   // Safepoint synchronization
1876   //--------------------------------------------------------------------
1877   // Must we block?
1878   //--------------------------------------------------------------------
1879   // Block, if necessary, before resuming in _thread_in_Java state.
1880   // In order for GC to work, don't clear the last_Java_sp until after blocking.
1881   //--------------------------------------------------------------------
1882   {
1883     Label no_block, sync;
1884 
1885     save_native_result(masm, ret_type, workspace_slot_offset); // Make Z_R2 available as work reg.
1886 
1887     // Force this write out before the read below.
1888     if (!UseSystemMemoryBarrier) {
1889       __ z_fence();
1890     }
1891 
1892     __ safepoint_poll(sync, Z_R1);
1893 
1894     __ load_and_test_int(Z_R0, Address(Z_thread, JavaThread::suspend_flags_offset()));
1895     __ z_bre(no_block);
1896 
1897     // Block. Save any potential method result value before the operation and
1898     // use a leaf call to leave the last_Java_frame setup undisturbed. Doing this
1899     // lets us share the oopMap we used when we went native rather than create
1900     // a distinct one for this pc.
1901     //
1902     __ bind(sync);
1903     __ z_acquire();
1904 
1905     address entry_point = CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans);
1906 
1907     __ call_VM_leaf(entry_point, Z_thread);
1908 
1909     __ bind(no_block);
1910     restore_native_result(masm, ret_type, workspace_slot_offset);
1911   }
1912 
1913   //--------------------------------------------------------------------
1914   // Thread state is thread_in_native_trans. Any safepoint blocking has
1915   // already happened so we can now change state to _thread_in_Java.
1916   //--------------------------------------------------------------------
1917   // Transition from _thread_in_native_trans to _thread_in_Java.
1918   __ set_thread_state(_thread_in_Java);
1919 
1920   //--------------------------------------------------------------------
1921   // Reguard any pages if necessary.
1922   // Protect native result from being destroyed.
1923   //--------------------------------------------------------------------
1924 
1925   Label no_reguard;
1926 
1927   __ z_cli(Address(Z_thread, JavaThread::stack_guard_state_offset() + in_ByteSize(sizeof(StackOverflow::StackGuardState) - 1)),
1928            StackOverflow::stack_guard_yellow_reserved_disabled);
1929 
1930   __ z_bre(no_reguard);
1931 
1932   save_native_result(masm, ret_type, workspace_slot_offset);
1933   __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages), Z_method);
1934   restore_native_result(masm, ret_type, workspace_slot_offset);
1935 
1936   __ bind(no_reguard);
1937 
1938 
1939   // Synchronized methods (slow path only)
1940   // No pending exceptions for now.
1941   //--------------------------------------------------------------------
1942   // Handle possibly pending exception (will unlock if necessary).
1943   // Native result is, if any is live, in Z_FRES or Z_RES.
1944   //--------------------------------------------------------------------
1945   // Unlock
1946   //--------------------------------------------------------------------
1947   if (method->is_synchronized()) {
1948     const Register r_oop        = Z_R11;
1949     const Register r_box        = Z_R12;
1950     const Register r_tmp1       = Z_R13;
1951     const Register r_tmp2       = Z_R7;
1952     Label done;
1953 
1954     // Get unboxed oop of class mirror or object ...
1955     int   offset = method_is_static ? klass_offset : receiver_offset;
1956 
1957     assert(offset != -1, "");
1958     __ z_lg(r_oop, offset, Z_SP);
1959 
1960     // ... and address of lock object box.
1961     __ add2reg(r_box, lock_offset, Z_SP);
1962 
1963     // Try fastpath for unlocking.
1964     // Fast_unlock kills r_tmp1, r_tmp2.
1965     __ compiler_fast_unlock_object(r_oop, r_box, r_tmp1, r_tmp2);
1966     __ z_bre(done);
1967 
1968     // Slow path for unlocking.
1969     // Save and restore any potential method result value around the unlocking operation.
1970     const Register R_exc = Z_R11;
1971 
1972     save_native_result(masm, ret_type, workspace_slot_offset);
1973 
1974     // Must save pending exception around the slow-path VM call. Since it's a
1975     // leaf call, the pending exception (if any) can be kept in a register.
1976     __ z_lg(R_exc, Address(Z_thread, Thread::pending_exception_offset()));
1977     assert(R_exc->is_nonvolatile(), "exception register must be non-volatile");
1978 
1979     // Must clear pending-exception before re-entering the VM. Since this is
1980     // a leaf call, pending-exception-oop can be safely kept in a register.
1981     __ clear_mem(Address(Z_thread, Thread::pending_exception_offset()), sizeof(intptr_t));
1982 
1983     // Inline a special case of call_VM that disallows any pending_exception.
1984 
1985     // Get locked oop from the handle we passed to jni.
1986     __ z_lg(Z_ARG1, offset, Z_SP);
1987     __ add2reg(Z_ARG2, lock_offset, Z_SP);
1988     __ z_lgr(Z_ARG3, Z_thread);
1989 
1990     __ load_const_optimized(Z_R1_scratch, CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C));
1991 
1992     __ call(Z_R1_scratch);
1993 
1994 #ifdef ASSERT
1995     {
1996       Label L;
1997       __ load_and_test_long(Z_R0, Address(Z_thread, Thread::pending_exception_offset()));
1998       __ z_bre(L);
1999       __ stop("no pending exception allowed on exit from IR::monitorexit");
2000       __ bind(L);
2001     }
2002 #endif
2003 
2004     // Check_forward_pending_exception jump to forward_exception if any pending
2005     // exception is set. The forward_exception routine expects to see the
2006     // exception in pending_exception and not in a register. Kind of clumsy,
2007     // since all folks who branch to forward_exception must have tested
2008     // pending_exception first and hence have it in a register already.
2009     __ z_stg(R_exc, Address(Z_thread, Thread::pending_exception_offset()));
2010     restore_native_result(masm, ret_type, workspace_slot_offset);
2011     __ z_bru(done);
2012     __ z_illtrap(0x66);
2013 
2014     __ bind(done);
2015   }
2016 
2017 
2018   //--------------------------------------------------------------------
2019   // Clear "last Java frame" SP and PC.
2020   //--------------------------------------------------------------------
2021 
2022   __ reset_last_Java_frame();
2023 
2024   // Unpack oop result, e.g. JNIHandles::resolve result.
2025   if (is_reference_type(ret_type)) {
2026     __ resolve_jobject(Z_RET, /* tmp1 */ Z_R13, /* tmp2 */ Z_R7);
2027   }
2028 
2029   if (CheckJNICalls) {
2030     // clear_pending_jni_exception_check
2031     __ clear_mem(Address(Z_thread, JavaThread::pending_jni_exception_check_fn_offset()), sizeof(oop));
2032   }
2033 
2034   // Reset handle block.
2035   __ z_lg(Z_R1_scratch, Address(Z_thread, JavaThread::active_handles_offset()));
2036   __ clear_mem(Address(Z_R1_scratch, JNIHandleBlock::top_offset()), 4);
2037 
2038   // Check for pending exceptions.
2039   __ load_and_test_long(Z_R0, Address(Z_thread, Thread::pending_exception_offset()));
2040   __ z_brne(handle_pending_exception);
2041 
2042 
2043   //////////////////////////////////////////////////////////////////////
2044   // Return
2045   //////////////////////////////////////////////////////////////////////
2046 
2047 
2048 #ifndef USE_RESIZE_FRAME
2049   __ pop_frame();                     // Pop wrapper frame.
2050 #else
2051   __ resize_frame(frame_size_in_bytes, Z_R0_scratch);  // Revert stack extension.
2052 #endif
2053   __ restore_return_pc();             // This is the way back to the caller.
2054   __ z_br(Z_R14);
2055 
2056 
2057   //////////////////////////////////////////////////////////////////////
2058   // Out-of-line calls to the runtime.
2059   //////////////////////////////////////////////////////////////////////
2060 
2061 
2062   //---------------------------------------------------------------------
2063   // Handler for pending exceptions (out-of-line).
2064   //---------------------------------------------------------------------
2065   // Since this is a native call, we know the proper exception handler
2066   // is the empty function. We just pop this frame and then jump to
2067   // forward_exception_entry. Z_R14 will contain the native caller's
2068   // return PC.
2069   __ bind(handle_pending_exception);
2070   __ pop_frame();
2071   __ load_const_optimized(Z_R1_scratch, StubRoutines::forward_exception_entry());
2072   __ restore_return_pc();
2073   __ z_br(Z_R1_scratch);
2074 
2075   __ flush();
2076   //////////////////////////////////////////////////////////////////////
2077   // end of code generation
2078   //////////////////////////////////////////////////////////////////////
2079 
2080 
2081   nmethod *nm = nmethod::new_native_nmethod(method,
2082                                             compile_id,
2083                                             masm->code(),
2084                                             (int)(wrapper_VEPStart-wrapper_CodeStart),
2085                                             (int)(wrapper_FrameDone-wrapper_CodeStart),
2086                                             stack_slots / VMRegImpl::slots_per_word,
2087                                             (method_is_static ? in_ByteSize(klass_offset) : in_ByteSize(receiver_offset)),
2088                                             in_ByteSize(lock_offset),
2089                                             oop_maps);
2090 
2091   return nm;
2092 }
2093 
2094 static address gen_c2i_adapter(MacroAssembler  *masm,
2095                                int comp_args_on_stack,
2096                                const GrowableArray<SigEntry>* sig,
2097                                const VMRegPair *regs,
2098                                Label &skip_fixup) {
2099   // Before we get into the guts of the C2I adapter, see if we should be here
2100   // at all. We've come from compiled code and are attempting to jump to the
2101   // interpreter, which means the caller made a static call to get here
2102   // (vcalls always get a compiled target if there is one). Check for a
2103   // compiled target. If there is one, we need to patch the caller's call.
2104 
2105   // These two defs MUST MATCH code in gen_i2c2i_adapter!
2106   const Register ientry = Z_R11;
2107   const Register code   = Z_R11;
2108 
2109   address c2i_entrypoint;
2110   Label   patch_callsite;
2111 
2112   // Regular (verified) c2i entry point.
2113   c2i_entrypoint = __ pc();
2114 
2115   // Call patching needed?
2116   __ load_and_test_long(Z_R0_scratch, method_(code));
2117   __ z_lg(ientry, method_(interpreter_entry));  // Preload interpreter entry (also if patching).
2118   __ z_brne(patch_callsite);                    // Patch required if code isn't null (compiled target exists).
2119 
2120   __ bind(skip_fixup);  // Return point from patch_callsite.
2121 
2122   // Since all args are passed on the stack, total_args_passed*wordSize is the
2123   // space we need. We need ABI scratch area but we use the caller's since
2124   // it has already been allocated.
2125   int       total_args_passed = sig->length();
2126   const int abi_scratch = frame::z_top_ijava_frame_abi_size;
2127   int       extraspace  = align_up(total_args_passed, 2)*wordSize + abi_scratch;
2128   Register  sender_SP   = Z_R10;
2129   Register  value       = Z_R12;
2130 
2131   // Remember the senderSP so we can pop the interpreter arguments off of the stack.
2132   // In addition, template interpreter expects initial_caller_sp in Z_R10.
2133   __ z_lgr(sender_SP, Z_SP);
2134 
2135   // This should always fit in 14 bit immediate.
2136   __ resize_frame(-extraspace, Z_R0_scratch);
2137 
2138   // We use the caller's ABI scratch area (out_preserved_stack_slots) for the initial
2139   // args. This essentially moves the callers ABI scratch area from the top to the
2140   // bottom of the arg area.
2141 
2142   int st_off =  extraspace - wordSize;
2143 
2144   // Now write the args into the outgoing interpreter space.
2145   for (int i = 0; i < total_args_passed; i++) {
2146     BasicType bt = sig->at(i)._bt;
2147 
2148     VMReg r_1 = regs[i].first();
2149     VMReg r_2 = regs[i].second();
2150     if (!r_1->is_valid()) {
2151       assert(!r_2->is_valid(), "");
2152       continue;
2153     }
2154     if (r_1->is_stack()) {
2155       // The calling convention produces OptoRegs that ignore the preserve area (abi scratch).
2156       // We must account for it here.
2157       int ld_off = (r_1->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size;
2158 
2159       if (!r_2->is_valid()) {
2160         __ z_mvc(Address(Z_SP, st_off), Address(sender_SP, ld_off), sizeof(void*));
2161       } else {
2162         // longs are given 2 64-bit slots in the interpreter,
2163         // but the data is passed in only 1 slot.
2164         if (bt == T_LONG || bt == T_DOUBLE) {
2165 #ifdef ASSERT
2166           __ clear_mem(Address(Z_SP, st_off), sizeof(void *));
2167 #endif
2168           st_off -= wordSize;
2169         }
2170         __ z_mvc(Address(Z_SP, st_off), Address(sender_SP, ld_off), sizeof(void*));
2171       }
2172     } else {
2173       if (r_1->is_Register()) {
2174         if (!r_2->is_valid()) {
2175           __ z_st(r_1->as_Register(), st_off, Z_SP);
2176         } else {
2177           // longs are given 2 64-bit slots in the interpreter, but the
2178           // data is passed in only 1 slot.
2179           if (bt == T_LONG || bt == T_DOUBLE) {
2180 #ifdef ASSERT
2181             __ clear_mem(Address(Z_SP, st_off), sizeof(void *));
2182 #endif
2183             st_off -= wordSize;
2184           }
2185           __ z_stg(r_1->as_Register(), st_off, Z_SP);
2186         }
2187       } else {
2188         assert(r_1->is_FloatRegister(), "");
2189         if (!r_2->is_valid()) {
2190           __ z_ste(r_1->as_FloatRegister(), st_off, Z_SP);
2191         } else {
2192           // In 64bit, doubles are given 2 64-bit slots in the interpreter, but the
2193           // data is passed in only 1 slot.
2194           // One of these should get known junk...
2195 #ifdef ASSERT
2196           __ z_lzdr(Z_F1);
2197           __ z_std(Z_F1, st_off, Z_SP);
2198 #endif
2199           st_off-=wordSize;
2200           __ z_std(r_1->as_FloatRegister(), st_off, Z_SP);
2201         }
2202       }
2203     }
2204     st_off -= wordSize;
2205   }
2206 
2207 
2208   // Jump to the interpreter just as if interpreter was doing it.
2209   __ add2reg(Z_esp, st_off, Z_SP);
2210 
2211   // Frame_manager expects initial_caller_sp (= SP without resize by c2i) in Z_R10.
2212   __ z_br(ientry);
2213 
2214 
2215   // Prevent illegal entry to out-of-line code.
2216   __ z_illtrap(0x22);
2217 
2218   // Generate out-of-line runtime call to patch caller,
2219   // then continue as interpreted.
2220 
2221   // IF you lose the race you go interpreted.
2222   // We don't see any possible endless c2i -> i2c -> c2i ...
2223   // transitions no matter how rare.
2224   __ bind(patch_callsite);
2225 
2226   RegisterSaver::save_live_registers(masm, RegisterSaver::arg_registers);
2227   __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite), Z_method, Z_R14);
2228   RegisterSaver::restore_live_registers(masm, RegisterSaver::arg_registers);
2229   __ z_bru(skip_fixup);
2230 
2231   // end of out-of-line code
2232 
2233   return c2i_entrypoint;
2234 }
2235 
2236 // On entry, the following registers are set
2237 //
2238 //    Z_thread  r8  - JavaThread*
2239 //    Z_method  r9  - callee's method (method to be invoked)
2240 //    Z_esp     r7  - operand (or expression) stack pointer of caller. one slot above last arg.
2241 //    Z_SP      r15 - SP prepared by call stub such that caller's outgoing args are near top
2242 //
2243 void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm,
2244                                     int comp_args_on_stack,
2245                                     const GrowableArray<SigEntry>* sig,
2246                                     const VMRegPair *regs) {
2247   const Register value = Z_R12;
2248   const Register ld_ptr= Z_esp;
2249   int total_args_passed = sig->length();
2250 
2251   int ld_offset = total_args_passed * wordSize;
2252 
2253   // Cut-out for having no stack args.
2254   if (comp_args_on_stack) {
2255     // Sig words on the stack are greater than VMRegImpl::stack0. Those in
2256     // registers are below. By subtracting stack0, we either get a negative
2257     // number (all values in registers) or the maximum stack slot accessed.
2258     // Convert VMRegImpl (4 byte) stack slots to words.
2259     int comp_words_on_stack = align_up(comp_args_on_stack*VMRegImpl::stack_slot_size, wordSize)>>LogBytesPerWord;
2260     // Round up to miminum stack alignment, in wordSize
2261     comp_words_on_stack = align_up(comp_words_on_stack, 2);
2262 
2263     __ resize_frame(-comp_words_on_stack*wordSize, Z_R0_scratch);
2264   }
2265 
2266   // Now generate the shuffle code. Pick up all register args and move the
2267   // rest through register value=Z_R12.
2268   for (int i = 0; i < total_args_passed; i++) {
2269     BasicType bt = sig->at(i)._bt;
2270     if (bt == T_VOID) {
2271       assert(i > 0 && (sig->at(i - 1)._bt == T_LONG || sig->at(i - 1)._bt == T_DOUBLE), "missing half");
2272       continue;
2273     }
2274 
2275     // Pick up 0, 1 or 2 words from ld_ptr.
2276     assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(),
2277            "scrambled load targets?");
2278     VMReg r_1 = regs[i].first();
2279     VMReg r_2 = regs[i].second();
2280     if (!r_1->is_valid()) {
2281       assert(!r_2->is_valid(), "");
2282       continue;
2283     }
2284     if (r_1->is_FloatRegister()) {
2285       if (!r_2->is_valid()) {
2286         __ z_le(r_1->as_FloatRegister(), ld_offset, ld_ptr);
2287         ld_offset-=wordSize;
2288       } else {
2289         // Skip the unused interpreter slot.
2290         __ z_ld(r_1->as_FloatRegister(), ld_offset - wordSize, ld_ptr);
2291         ld_offset -= 2 * wordSize;
2292       }
2293     } else {
2294       if (r_1->is_stack()) {
2295         // Must do a memory to memory move.
2296         int st_off = (r_1->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size;
2297 
2298         if (!r_2->is_valid()) {
2299           __ z_mvc(Address(Z_SP, st_off), Address(ld_ptr, ld_offset), sizeof(void*));
2300         } else {
2301           // In 64bit, longs are given 2 64-bit slots in the interpreter, but the
2302           // data is passed in only 1 slot.
2303           if (bt == T_LONG || bt == T_DOUBLE) {
2304             ld_offset -= wordSize;
2305           }
2306           __ z_mvc(Address(Z_SP, st_off), Address(ld_ptr, ld_offset), sizeof(void*));
2307         }
2308       } else {
2309         if (!r_2->is_valid()) {
2310           // Not sure we need to do this but it shouldn't hurt.
2311           if (is_reference_type(bt) || bt == T_ADDRESS) {
2312             __ z_lg(r_1->as_Register(), ld_offset, ld_ptr);
2313           } else {
2314             __ z_l(r_1->as_Register(), ld_offset, ld_ptr);
2315           }
2316         } else {
2317           // In 64bit, longs are given 2 64-bit slots in the interpreter, but the
2318           // data is passed in only 1 slot.
2319           if (bt == T_LONG || bt == T_DOUBLE) {
2320             ld_offset -= wordSize;
2321           }
2322           __ z_lg(r_1->as_Register(), ld_offset, ld_ptr);
2323         }
2324       }
2325       ld_offset -= wordSize;
2326     }
2327   }
2328 
2329   // Jump to the compiled code just as if compiled code was doing it.
2330   // load target address from method:
2331   __ z_lg(Z_R1_scratch, Address(Z_method, Method::from_compiled_offset()));
2332 
2333   // Store method into thread->callee_target.
2334   // 6243940: We might end up in handle_wrong_method if
2335   // the callee is deoptimized as we race thru here. If that
2336   // happens we don't want to take a safepoint because the
2337   // caller frame will look interpreted and arguments are now
2338   // "compiled" so it is much better to make this transition
2339   // invisible to the stack walking code. Unfortunately, if
2340   // we try and find the callee by normal means a safepoint
2341   // is possible. So we stash the desired callee in the thread
2342   // and the vm will find it there should this case occur.
2343   __ z_stg(Z_method, thread_(callee_target));
2344 
2345   __ z_br(Z_R1_scratch);
2346 }
2347 
2348 void SharedRuntime::generate_i2c2i_adapters(MacroAssembler* masm,
2349                                             int comp_args_on_stack,
2350                                             const GrowableArray<SigEntry>* sig,
2351                                             const VMRegPair* regs,
2352                                             const GrowableArray<SigEntry>* sig_cc,
2353                                             const VMRegPair* regs_cc,
2354                                             const GrowableArray<SigEntry>* sig_cc_ro,
2355                                             const VMRegPair* regs_cc_ro,
2356                                             address entry_address[AdapterBlob::ENTRY_COUNT],
2357                                             AdapterBlob*& new_adapter,
2358                                             bool allocate_code_blob) {
2359   __ align(CodeEntryAlignment);
2360   entry_address[AdapterBlob::I2C] = __ pc();
2361   gen_i2c_adapter(masm, comp_args_on_stack, sig, regs);
2362 
2363   Label skip_fixup;
2364   {
2365     Label ic_miss;
2366 
2367     // Out-of-line call to ic_miss handler.
2368     __ call_ic_miss_handler(ic_miss, 0x11, 0, Z_R1_scratch);
2369 
2370     // Unverified Entry Point UEP
2371     __ align(CodeEntryAlignment);
2372     entry_address[AdapterBlob::C2I_Unverified] = __ pc();
2373 
2374     __ ic_check(2);
2375     __ z_lg(Z_method, Address(Z_inline_cache, CompiledICData::speculated_method_offset()));
2376     // This def MUST MATCH code in gen_c2i_adapter!
2377     const Register code = Z_R11;
2378 
2379     __ load_and_test_long(Z_R0, method_(code));
2380     __ z_brne(ic_miss);  // Cache miss: call runtime to handle this.
2381 
2382     // Fallthru to VEP. Duplicate LTG, but saved taken branch.
2383   }
2384 
2385   entry_address[AdapterBlob::C2I] = __ pc();
2386 
2387   // Class initialization barrier for static methods
2388   entry_address[AdapterBlob::C2I_No_Clinit_Check] = nullptr;
2389   assert(VM_Version::supports_fast_class_init_checks(), "sanity");
2390   Label L_skip_barrier;
2391 
2392   // Bypass the barrier for non-static methods
2393   __ testbit_ushort(Address(Z_method, Method::access_flags_offset()), JVM_ACC_STATIC_BIT);
2394   __ z_bfalse(L_skip_barrier); // non-static
2395 
2396   Register klass = Z_R11;
2397   __ load_method_holder(klass, Z_method);
2398   __ clinit_barrier(klass, Z_thread, &L_skip_barrier /*L_fast_path*/);
2399 
2400   __ load_const_optimized(klass, SharedRuntime::get_handle_wrong_method_stub());
2401   __ z_br(klass);
2402 
2403   __ bind(L_skip_barrier);
2404   entry_address[AdapterBlob::C2I_No_Clinit_Check] = __ pc();
2405 
2406   gen_c2i_adapter(masm, comp_args_on_stack, sig, regs, skip_fixup);
2407   return;
2408 }
2409 
2410 // This function returns the adjust size (in number of words) to a c2i adapter
2411 // activation for use during deoptimization.
2412 //
2413 // Actually only compiled frames need to be adjusted, but it
2414 // doesn't harm to adjust entry and interpreter frames, too.
2415 //
2416 int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals) {
2417   assert(callee_locals >= callee_parameters,
2418           "test and remove; got more parms than locals");
2419   // Handle the abi adjustment here instead of doing it in push_skeleton_frames.
2420   return (callee_locals - callee_parameters) * Interpreter::stackElementWords +
2421          frame::z_parent_ijava_frame_abi_size / BytesPerWord;
2422 }
2423 
2424 uint SharedRuntime::in_preserve_stack_slots() {
2425   return frame::jit_in_preserve_size_in_4_byte_units;
2426 }
2427 
2428 uint SharedRuntime::out_preserve_stack_slots() {
2429   return frame::z_jit_out_preserve_size/VMRegImpl::stack_slot_size;
2430 }
2431 
2432 VMReg SharedRuntime::thread_register() {
2433   Unimplemented();
2434   return nullptr;
2435 }
2436 
2437 //
2438 // Frame generation for deopt and uncommon trap blobs.
2439 //
2440 static void push_skeleton_frame(MacroAssembler* masm,
2441                           /* Unchanged */
2442                           Register frame_sizes_reg,
2443                           Register pcs_reg,
2444                           /* Invalidate */
2445                           Register frame_size_reg,
2446                           Register pc_reg) {
2447   BLOCK_COMMENT("  push_skeleton_frame {");
2448    __ z_lg(pc_reg, 0, pcs_reg);
2449    __ z_lg(frame_size_reg, 0, frame_sizes_reg);
2450    __ z_stg(pc_reg, _z_abi(return_pc), Z_SP);
2451    Register fp = pc_reg;
2452    __ push_frame(frame_size_reg, fp);
2453 #ifdef ASSERT
2454    // The magic is required for successful walking skeletal frames.
2455    __ load_const_optimized(frame_size_reg/*tmp*/, frame::z_istate_magic_number);
2456    __ z_stg(frame_size_reg, _z_ijava_state_neg(magic), fp);
2457    // Fill other slots that are supposedly not necessary with eye catchers.
2458    __ load_const_optimized(frame_size_reg/*use as tmp*/, 0xdeadbad1);
2459    __ z_stg(frame_size_reg, _z_ijava_state_neg(top_frame_sp), fp);
2460    // The sender_sp of the bottom frame is set before pushing it.
2461    // The sender_sp of non bottom frames is their caller's top_frame_sp, which
2462    // is unknown here. Luckily it is not needed before filling the frame in
2463    // layout_activation(), we assert this by setting an eye catcher (see
2464    // comments on sender_sp in frame_s390.hpp).
2465    __ z_stg(frame_size_reg, _z_ijava_state_neg(sender_sp), Z_SP);
2466 #endif // ASSERT
2467   BLOCK_COMMENT("  } push_skeleton_frame");
2468 }
2469 
2470 // Loop through the UnrollBlock info and create new frames.
2471 static void push_skeleton_frames(MacroAssembler* masm, bool deopt,
2472                             /* read */
2473                             Register unroll_block_reg,
2474                             /* invalidate */
2475                             Register frame_sizes_reg,
2476                             Register number_of_frames_reg,
2477                             Register pcs_reg,
2478                             Register tmp1,
2479                             Register tmp2) {
2480   BLOCK_COMMENT("push_skeleton_frames {");
2481   // _number_of_frames is of type int (deoptimization.hpp).
2482   __ z_lgf(number_of_frames_reg,
2483            Address(unroll_block_reg, Deoptimization::UnrollBlock::number_of_frames_offset()));
2484   __ z_lg(pcs_reg,
2485           Address(unroll_block_reg, Deoptimization::UnrollBlock::frame_pcs_offset()));
2486   __ z_lg(frame_sizes_reg,
2487           Address(unroll_block_reg, Deoptimization::UnrollBlock::frame_sizes_offset()));
2488 
2489   // stack: (caller_of_deoptee, ...).
2490 
2491   // If caller_of_deoptee is a compiled frame, then we extend it to make
2492   // room for the callee's locals and the frame::z_parent_ijava_frame_abi.
2493   // See also Deoptimization::last_frame_adjust() above.
2494   // Note: entry and interpreted frames are adjusted, too. But this doesn't harm.
2495 
2496   __ z_lgf(Z_R1_scratch,
2497            Address(unroll_block_reg, Deoptimization::UnrollBlock::caller_adjustment_offset()));
2498   __ z_lgr(tmp1, Z_SP);  // Save the sender sp before extending the frame.
2499   __ resize_frame_sub(Z_R1_scratch, tmp2/*tmp*/);
2500   // The oldest skeletal frame requires a valid sender_sp to make it walkable
2501   // (it is required to find the original pc of caller_of_deoptee if it is marked
2502   // for deoptimization - see nmethod::orig_pc_addr()).
2503   __ z_stg(tmp1, _z_ijava_state_neg(sender_sp), Z_SP);
2504 
2505   // Now push the new interpreter frames.
2506   Label loop, loop_entry;
2507 
2508   // Make sure that there is at least one entry in the array.
2509   DEBUG_ONLY(__ z_ltgr(number_of_frames_reg, number_of_frames_reg));
2510   __ asm_assert(Assembler::bcondNotZero, "array_size must be > 0", 0x205);
2511 
2512   __ z_bru(loop_entry);
2513 
2514   __ bind(loop);
2515 
2516   __ add2reg(frame_sizes_reg, wordSize);
2517   __ add2reg(pcs_reg, wordSize);
2518 
2519   __ bind(loop_entry);
2520 
2521   // Allocate a new frame, fill in the pc.
2522   push_skeleton_frame(masm, frame_sizes_reg, pcs_reg, tmp1, tmp2);
2523 
2524   __ z_aghi(number_of_frames_reg, -1);  // Emit AGHI, because it sets the condition code
2525   __ z_brne(loop);
2526 
2527   // Set the top frame's return pc.
2528   __ add2reg(pcs_reg, wordSize);
2529   __ z_lg(Z_R0_scratch, 0, pcs_reg);
2530   __ z_stg(Z_R0_scratch, _z_abi(return_pc), Z_SP);
2531   BLOCK_COMMENT("} push_skeleton_frames");
2532 }
2533 
2534 //------------------------------generate_deopt_blob----------------------------
2535 void SharedRuntime::generate_deopt_blob() {
2536   // Allocate space for the code.
2537   ResourceMark rm;
2538   // Setup code generation tools.
2539   const char* name = SharedRuntime::stub_name(StubId::shared_deopt_id);
2540   CodeBuffer buffer(name, 2048, 1024);
2541   InterpreterMacroAssembler* masm = new InterpreterMacroAssembler(&buffer);
2542   Label exec_mode_initialized;
2543   OopMap* map = nullptr;
2544   OopMapSet *oop_maps = new OopMapSet();
2545 
2546   unsigned int start_off = __ offset();
2547   Label cont;
2548 
2549   // --------------------------------------------------------------------------
2550   // Normal entry (non-exception case)
2551   //
2552   // We have been called from the deopt handler of the deoptee.
2553   // Z_R14 points to the entry point of the deopt handler.
2554   // The return_pc has been stored in the frame of the deoptee and
2555   // will replace the address of the deopt_handler in the call
2556   // to Deoptimization::fetch_unroll_info below.
2557 
2558   const Register   exec_mode_reg = Z_tmp_1;
2559 
2560   // stack: (deoptee, caller of deoptee, ...)
2561 
2562   // pushes an "unpack" frame
2563   // R14 contains the return address pointing into the deoptimized
2564   // nmethod that was valid just before the nmethod was deoptimized.
2565   // save R14 into the deoptee frame.  the `fetch_unroll_info'
2566   // procedure called below will read it from there.
2567   map = RegisterSaver::save_live_registers(masm, RegisterSaver::all_registers);
2568 
2569   // note the entry point.
2570   __ load_const_optimized(exec_mode_reg, Deoptimization::Unpack_deopt);
2571   __ z_bru(exec_mode_initialized);
2572 
2573 #ifndef COMPILER1
2574   int reexecute_offset = 1; // odd offset will produce odd pc, which triggers an hardware trap
2575 #else
2576   // --------------------------------------------------------------------------
2577   // Reexecute entry
2578   // - Z_R14 = Deopt Handler in nmethod
2579 
2580   int reexecute_offset = __ offset() - start_off;
2581 
2582   // No need to update map as each call to save_live_registers will produce identical oopmap
2583   (void) RegisterSaver::save_live_registers(masm, RegisterSaver::all_registers);
2584 
2585   __ load_const_optimized(exec_mode_reg, Deoptimization::Unpack_reexecute);
2586   __ z_bru(exec_mode_initialized);
2587 #endif
2588 
2589 
2590   // --------------------------------------------------------------------------
2591   // Exception entry. We reached here via a branch. Registers on entry:
2592   // - Z_EXC_OOP (Z_ARG1) = exception oop
2593   // - Z_EXC_PC  (Z_ARG2) = the exception pc.
2594 
2595   int exception_offset = __ offset() - start_off;
2596 
2597   // all registers are dead at this entry point, except for Z_EXC_OOP, and
2598   // Z_EXC_PC which contain the exception oop and exception pc
2599   // respectively.  Set them in TLS and fall thru to the
2600   // unpack_with_exception_in_tls entry point.
2601 
2602   // Store exception oop and pc in thread (location known to GC).
2603   // Need this since the call to "fetch_unroll_info()" may safepoint.
2604   __ z_stg(Z_EXC_OOP, Address(Z_thread, JavaThread::exception_oop_offset()));
2605   __ z_stg(Z_EXC_PC,  Address(Z_thread, JavaThread::exception_pc_offset()));
2606 
2607   // fall through
2608 
2609   int exception_in_tls_offset = __ offset() - start_off;
2610 
2611   // new implementation because exception oop is now passed in JavaThread
2612 
2613   // Prolog for exception case
2614   // All registers must be preserved because they might be used by LinearScan
2615   // Exceptiop oop and throwing PC are passed in JavaThread
2616 
2617   // load throwing pc from JavaThread and us it as the return address of the current frame.
2618   __ z_lg(Z_R1_scratch, Address(Z_thread, JavaThread::exception_pc_offset()));
2619 
2620   // Save everything in sight.
2621   (void) RegisterSaver::save_live_registers(masm, RegisterSaver::all_registers, Z_R1_scratch);
2622 
2623   // Now it is safe to overwrite any register
2624 
2625   // Clear the exception pc field in JavaThread
2626   __ clear_mem(Address(Z_thread, JavaThread::exception_pc_offset()), 8);
2627 
2628   // Deopt during an exception.  Save exec mode for unpack_frames.
2629   __ load_const_optimized(exec_mode_reg, Deoptimization::Unpack_exception);
2630 
2631 
2632 #ifdef ASSERT
2633   // verify that there is really an exception oop in JavaThread
2634   __ z_lg(Z_ARG1, Address(Z_thread, JavaThread::exception_oop_offset()));
2635   __ MacroAssembler::verify_oop(Z_ARG1, FILE_AND_LINE);
2636 
2637   // verify that there is no pending exception
2638   __ asm_assert_mem8_is_zero(in_bytes(Thread::pending_exception_offset()), Z_thread,
2639                              "must not have pending exception here", __LINE__);
2640 #endif
2641 
2642   // --------------------------------------------------------------------------
2643   // At this point, the live registers are saved and
2644   // the exec_mode_reg has been set up correctly.
2645   __ bind(exec_mode_initialized);
2646 
2647   // stack: ("unpack" frame, deoptee, caller_of_deoptee, ...).
2648 
2649   const Register unroll_block_reg  = Z_tmp_2;
2650 
2651   // we need to set `last_Java_frame' because `fetch_unroll_info' will
2652   // call `last_Java_frame()'.  however we can't block and no gc will
2653   // occur so we don't need an oopmap. the value of the pc in the
2654   // frame is not particularly important.  it just needs to identify the blob.
2655 
2656   // Don't set last_Java_pc anymore here (is implicitly null then).
2657   // the correct PC is retrieved in pd_last_frame() in that case.
2658   __ set_last_Java_frame(/*sp*/Z_SP, noreg);
2659   // With EscapeAnalysis turned on, this call may safepoint
2660   // despite it's marked as "leaf call"!
2661   __ call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info), Z_thread, exec_mode_reg);
2662   // Set an oopmap for the call site this describes all our saved volatile registers
2663   int oop_map_offs = __ offset();
2664   oop_maps->add_gc_map(oop_map_offs, map);
2665 
2666   __ reset_last_Java_frame();
2667   // save the return value.
2668   __ z_lgr(unroll_block_reg, Z_RET);
2669   // restore the return registers that have been saved
2670   // (among other registers) by save_live_registers(...).
2671   RegisterSaver::restore_result_registers(masm);
2672 
2673   // reload the exec mode from the UnrollBlock (it might have changed)
2674   __ z_llgf(exec_mode_reg, Address(unroll_block_reg, Deoptimization::UnrollBlock::unpack_kind_offset()));
2675 
2676   // In excp_deopt_mode, restore and clear exception oop which we
2677   // stored in the thread during exception entry above. The exception
2678   // oop will be the return value of this stub.
2679   NearLabel skip_restore_excp;
2680   __ compare64_and_branch(exec_mode_reg, Deoptimization::Unpack_exception, Assembler::bcondNotEqual, skip_restore_excp);
2681   __ z_lg(Z_RET, thread_(exception_oop));
2682   __ clear_mem(thread_(exception_oop), 8);
2683   __ bind(skip_restore_excp);
2684 
2685   // remove the "unpack" frame
2686   __ pop_frame();
2687 
2688   // stack: (deoptee, caller of deoptee, ...).
2689 
2690   // pop the deoptee's frame
2691   __ pop_frame();
2692 
2693   // stack: (caller_of_deoptee, ...).
2694 
2695   // loop through the `UnrollBlock' info and create interpreter frames.
2696   push_skeleton_frames(masm, true/*deopt*/,
2697                   unroll_block_reg,
2698                   Z_tmp_3,
2699                   Z_tmp_4,
2700                   Z_ARG5,
2701                   Z_ARG4,
2702                   Z_ARG3);
2703 
2704   // stack: (skeletal interpreter frame, ..., optional skeletal
2705   // interpreter frame, caller of deoptee, ...).
2706 
2707   // push an "unpack" frame taking care of float / int return values.
2708   __ push_frame(RegisterSaver::live_reg_frame_size(RegisterSaver::all_registers));
2709 
2710   // stack: (unpack frame, skeletal interpreter frame, ..., optional
2711   // skeletal interpreter frame, caller of deoptee, ...).
2712 
2713   // spill live volatile registers since we'll do a call.
2714   __ z_stg(Z_RET, offset_of(frame::z_abi_160_spill, spill[0]), Z_SP);
2715   __ z_std(Z_FRET, offset_of(frame::z_abi_160_spill, spill[1]), Z_SP);
2716 
2717   // let the unpacker layout information in the skeletal frames just allocated.
2718   __ get_PC(Z_RET, oop_map_offs - __ offset());
2719   __ set_last_Java_frame(/*sp*/Z_SP, /*pc*/Z_RET);
2720   __ call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames),
2721                   Z_thread/*thread*/, exec_mode_reg/*exec_mode*/);
2722 
2723   __ reset_last_Java_frame();
2724 
2725   // restore the volatiles saved above.
2726   __ z_lg(Z_RET, offset_of(frame::z_abi_160_spill, spill[0]), Z_SP);
2727   __ z_ld(Z_FRET, offset_of(frame::z_abi_160_spill, spill[1]), Z_SP);
2728 
2729   // pop the "unpack" frame.
2730   __ pop_frame();
2731   __ restore_return_pc();
2732 
2733   // stack: (top interpreter frame, ..., optional interpreter frame,
2734   // caller of deoptee, ...).
2735 
2736   __ z_lg(Z_fp, _z_abi(callers_sp), Z_SP); // restore frame pointer
2737   __ restore_bcp();
2738   __ restore_locals();
2739   __ restore_esp();
2740 
2741   // return to the interpreter entry point.
2742   __ z_br(Z_R14);
2743 
2744   // Make sure all code is generated
2745   masm->flush();
2746 
2747   _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset, reexecute_offset, RegisterSaver::live_reg_frame_size(RegisterSaver::all_registers)/wordSize);
2748   _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset);
2749 }
2750 
2751 
2752 #ifdef COMPILER2
2753 //------------------------------generate_uncommon_trap_blob--------------------
2754 UncommonTrapBlob* OptoRuntime::generate_uncommon_trap_blob() {
2755   // Allocate space for the code
2756   ResourceMark rm;
2757   // Setup code generation tools
2758   const char* name = OptoRuntime::stub_name(StubId::c2_uncommon_trap_id);
2759   CodeBuffer buffer(name, 2048, 1024);
2760   if (buffer.blob() == nullptr) {
2761     return nullptr;
2762   }
2763   InterpreterMacroAssembler* masm = new InterpreterMacroAssembler(&buffer);
2764 
2765   Register unroll_block_reg = Z_tmp_1;
2766   Register klass_index_reg  = Z_ARG2;
2767   Register unc_trap_reg     = Z_ARG2;
2768 
2769   // stack: (deoptee, caller_of_deoptee, ...).
2770 
2771   // push a dummy "unpack" frame and call
2772   // `Deoptimization::uncommon_trap' to pack the compiled frame into a
2773   // vframe array and return the `UnrollBlock' information.
2774 
2775   // save R14 to compiled frame.
2776   __ save_return_pc();
2777   // push the "unpack_frame".
2778   __ push_frame_abi160(0);
2779 
2780   // stack: (unpack frame, deoptee, caller_of_deoptee, ...).
2781 
2782   // set the "unpack" frame as last_Java_frame.
2783   // `Deoptimization::uncommon_trap' expects it and considers its
2784   // sender frame as the deoptee frame.
2785   __ get_PC(Z_R1_scratch);
2786   __ set_last_Java_frame(/*sp*/Z_SP, /*pc*/Z_R1_scratch);
2787 
2788   __ z_lgr(klass_index_reg, Z_ARG1);  // passed implicitly as ARG2
2789   __ z_lghi(Z_ARG3, Deoptimization::Unpack_uncommon_trap);  // passed implicitly as ARG3
2790   BLOCK_COMMENT("call Deoptimization::uncommon_trap()");
2791   __ call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::uncommon_trap), Z_thread);
2792 
2793   __ reset_last_Java_frame();
2794 
2795   // pop the "unpack" frame
2796   __ pop_frame();
2797 
2798   // stack: (deoptee, caller_of_deoptee, ...).
2799 
2800   // save the return value.
2801   __ z_lgr(unroll_block_reg, Z_RET);
2802 
2803   // pop the deoptee frame.
2804   __ pop_frame();
2805 
2806   // stack: (caller_of_deoptee, ...).
2807 
2808 #ifdef ASSERT
2809   assert(Immediate::is_uimm8(Deoptimization::Unpack_LIMIT), "Code not fit for larger immediates");
2810   assert(Immediate::is_uimm8(Deoptimization::Unpack_uncommon_trap), "Code not fit for larger immediates");
2811   const int unpack_kind_byte_offset = in_bytes(Deoptimization::UnrollBlock::unpack_kind_offset())
2812 #ifndef VM_LITTLE_ENDIAN
2813   + 3
2814 #endif
2815   ;
2816   if (Displacement::is_shortDisp(unpack_kind_byte_offset)) {
2817     __ z_cli(unpack_kind_byte_offset, unroll_block_reg, Deoptimization::Unpack_uncommon_trap);
2818   } else {
2819     __ z_cliy(unpack_kind_byte_offset, unroll_block_reg, Deoptimization::Unpack_uncommon_trap);
2820   }
2821   __ asm_assert(Assembler::bcondEqual, "OptoRuntime::generate_deopt_blob: expected Unpack_uncommon_trap", 0);
2822 #endif
2823 
2824   __ zap_from_to(Z_SP, Z_SP, Z_R0_scratch, Z_R1, 500, -1);
2825 
2826   // allocate new interpreter frame(s) and possibly resize the caller's frame
2827   // (no more adapters !)
2828   push_skeleton_frames(masm, false/*deopt*/,
2829                   unroll_block_reg,
2830                   Z_tmp_2,
2831                   Z_tmp_3,
2832                   Z_tmp_4,
2833                   Z_ARG5,
2834                   Z_ARG4);
2835 
2836   // stack: (skeletal interpreter frame, ..., optional skeletal
2837   // interpreter frame, (resized) caller of deoptee, ...).
2838 
2839   // push a dummy "unpack" frame taking care of float return values.
2840   // call `Deoptimization::unpack_frames' to layout information in the
2841   // interpreter frames just created
2842 
2843   // push the "unpack" frame
2844    const unsigned int framesize_in_bytes = __ push_frame_abi160(0);
2845 
2846   // stack: (unpack frame, skeletal interpreter frame, ..., optional
2847   // skeletal interpreter frame, (resized) caller of deoptee, ...).
2848 
2849   // set the "unpack" frame as last_Java_frame
2850   __ get_PC(Z_R1_scratch);
2851   __ set_last_Java_frame(/*sp*/Z_SP, /*pc*/Z_R1_scratch);
2852 
2853   // indicate it is the uncommon trap case
2854   BLOCK_COMMENT("call Deoptimization::Unpack_uncommon_trap()");
2855   __ load_const_optimized(unc_trap_reg, Deoptimization::Unpack_uncommon_trap);
2856   // let the unpacker layout information in the skeletal frames just allocated.
2857   __ call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames), Z_thread);
2858 
2859   __ reset_last_Java_frame();
2860   // pop the "unpack" frame
2861   __ pop_frame();
2862   // restore LR from top interpreter frame
2863   __ restore_return_pc();
2864 
2865   // stack: (top interpreter frame, ..., optional interpreter frame,
2866   // (resized) caller of deoptee, ...).
2867 
2868   __ z_lg(Z_fp, _z_abi(callers_sp), Z_SP); // restore frame pointer
2869   __ restore_bcp();
2870   __ restore_locals();
2871   __ restore_esp();
2872 
2873   // return to the interpreter entry point
2874   __ z_br(Z_R14);
2875 
2876   masm->flush();
2877   return UncommonTrapBlob::create(&buffer, nullptr, framesize_in_bytes/wordSize);
2878 }
2879 #endif // COMPILER2
2880 
2881 
2882 //------------------------------generate_handler_blob------
2883 //
2884 // Generate a special Compile2Runtime blob that saves all registers,
2885 // and setup oopmap.
2886 SafepointBlob* SharedRuntime::generate_handler_blob(StubId id, address call_ptr) {
2887   assert(StubRoutines::forward_exception_entry() != nullptr,
2888          "must be generated before");
2889   assert(is_polling_page_id(id), "expected a polling page stub id");
2890 
2891   ResourceMark rm;
2892   OopMapSet *oop_maps = new OopMapSet();
2893   OopMap* map;
2894 
2895   // Allocate space for the code. Setup code generation tools.
2896   const char* name = SharedRuntime::stub_name(id);
2897   CodeBuffer buffer(name, 2048, 1024);
2898   MacroAssembler* masm = new MacroAssembler(&buffer);
2899 
2900   unsigned int start_off = __ offset();
2901   address call_pc = nullptr;
2902   int frame_size_in_bytes;
2903 
2904   bool cause_return = (id == StubId::shared_polling_page_return_handler_id);
2905   // Make room for return address (or push it again)
2906   if (!cause_return) {
2907     __ z_lg(Z_R14, Address(Z_thread, JavaThread::saved_exception_pc_offset()));
2908   }
2909 
2910   bool save_vectors = (id == StubId::shared_polling_page_vectors_safepoint_handler_id);
2911   // Save registers, fpu state, and flags
2912   map = RegisterSaver::save_live_registers(masm, RegisterSaver::all_registers, Z_R14, save_vectors);
2913 
2914   if (!cause_return) {
2915     // Keep a copy of the return pc to detect if it gets modified.
2916     __ z_lgr(Z_R6, Z_R14);
2917   }
2918 
2919   // The following is basically a call_VM. However, we need the precise
2920   // address of the call in order to generate an oopmap. Hence, we do all the
2921   // work ourselves.
2922   __ set_last_Java_frame(Z_SP, noreg);
2923 
2924   // call into the runtime to handle the safepoint poll
2925   __ call_VM_leaf(call_ptr, Z_thread);
2926 
2927 
2928   // Set an oopmap for the call site. This oopmap will map all
2929   // oop-registers and debug-info registers as callee-saved. This
2930   // will allow deoptimization at this safepoint to find all possible
2931   // debug-info recordings, as well as let GC find all oops.
2932 
2933   oop_maps->add_gc_map((int)(__ offset()-start_off), map);
2934 
2935   Label noException;
2936 
2937   __ reset_last_Java_frame();
2938 
2939   __ load_and_test_long(Z_R1, thread_(pending_exception));
2940   __ z_bre(noException);
2941 
2942   // Pending exception case, used (sporadically) by
2943   // api/java_lang/Thread.State/index#ThreadState et al.
2944   RegisterSaver::restore_live_registers(masm, RegisterSaver::all_registers, save_vectors);
2945 
2946   // Jump to forward_exception_entry, with the issuing PC in Z_R14
2947   // so it looks like the original nmethod called forward_exception_entry.
2948   __ load_const_optimized(Z_R1_scratch, StubRoutines::forward_exception_entry());
2949   __ z_br(Z_R1_scratch);
2950 
2951   // No exception case
2952   __ bind(noException);
2953 
2954   if (!cause_return) {
2955     Label no_adjust;
2956      // If our stashed return pc was modified by the runtime we avoid touching it
2957     const int offset_of_return_pc = _z_common_abi(return_pc) + RegisterSaver::live_reg_frame_size(RegisterSaver::all_registers, save_vectors);
2958     __ z_cg(Z_R6, offset_of_return_pc, Z_SP);
2959     __ z_brne(no_adjust);
2960 
2961     // Adjust return pc forward to step over the safepoint poll instruction
2962     __ instr_size(Z_R1_scratch, Z_R6);
2963     __ z_agr(Z_R6, Z_R1_scratch);
2964     __ z_stg(Z_R6, offset_of_return_pc, Z_SP);
2965 
2966     __ bind(no_adjust);
2967   }
2968 
2969   // Normal exit, restore registers and exit.
2970   RegisterSaver::restore_live_registers(masm, RegisterSaver::all_registers, save_vectors);
2971 
2972   __ z_br(Z_R14);
2973 
2974   // Make sure all code is generated
2975   masm->flush();
2976 
2977   // Fill-out other meta info
2978   return SafepointBlob::create(&buffer, oop_maps, RegisterSaver::live_reg_frame_size(RegisterSaver::all_registers, save_vectors)/wordSize);
2979 }
2980 
2981 
2982 //
2983 // generate_resolve_blob - call resolution (static/virtual/opt-virtual/ic-miss
2984 //
2985 // Generate a stub that calls into vm to find out the proper destination
2986 // of a Java call. All the argument registers are live at this point
2987 // but since this is generic code we don't know what they are and the caller
2988 // must do any gc of the args.
2989 //
2990 RuntimeStub* SharedRuntime::generate_resolve_blob(StubId id, address destination) {
2991   assert (StubRoutines::forward_exception_entry() != nullptr, "must be generated before");
2992   assert(is_resolve_id(id), "expected a resolve stub id");
2993 
2994   // allocate space for the code
2995   ResourceMark rm;
2996 
2997   const char* name = SharedRuntime::stub_name(id);
2998   CodeBuffer buffer(name, 1000, 512);
2999   MacroAssembler* masm                = new MacroAssembler(&buffer);
3000 
3001   OopMapSet *oop_maps = new OopMapSet();
3002   OopMap* map = nullptr;
3003 
3004   unsigned int start_off = __ offset();
3005 
3006   map = RegisterSaver::save_live_registers(masm, RegisterSaver::all_registers);
3007 
3008   // We must save a PC from within the stub as return PC
3009   // C code doesn't store the LR where we expect the PC,
3010   // so we would run into trouble upon stack walking.
3011   __ get_PC(Z_R1_scratch);
3012 
3013   unsigned int frame_complete = __ offset();
3014 
3015   __ set_last_Java_frame(/*sp*/Z_SP, Z_R1_scratch);
3016 
3017   __ call_VM_leaf(destination, Z_thread, Z_method);
3018 
3019 
3020   // Set an oopmap for the call site.
3021   // We need this not only for callee-saved registers, but also for volatile
3022   // registers that the compiler might be keeping live across a safepoint.
3023 
3024   oop_maps->add_gc_map((int)(frame_complete-start_off), map);
3025 
3026   // clear last_Java_sp
3027   __ reset_last_Java_frame();
3028 
3029   // check for pending exceptions
3030   Label pending;
3031   __ load_and_test_long(Z_R0, Address(Z_thread, Thread::pending_exception_offset()));
3032   __ z_brne(pending);
3033 
3034   __ z_lgr(Z_R1_scratch, Z_R2); // r1 is neither saved nor restored, r2 contains the continuation.
3035   RegisterSaver::restore_live_registers(masm, RegisterSaver::all_registers);
3036 
3037   // get the returned method
3038   __ get_vm_result_metadata(Z_method);
3039 
3040   // We are back to the original state on entry and ready to go.
3041   __ z_br(Z_R1_scratch);
3042 
3043   // Pending exception after the safepoint
3044 
3045   __ bind(pending);
3046 
3047   RegisterSaver::restore_live_registers(masm, RegisterSaver::all_registers);
3048 
3049   // exception pending => remove activation and forward to exception handler
3050 
3051   __ z_lgr(Z_R2, Z_R0); // pending_exception
3052   __ clear_mem(Address(Z_thread, JavaThread::vm_result_oop_offset()), sizeof(jlong));
3053   __ load_const_optimized(Z_R1_scratch, StubRoutines::forward_exception_entry());
3054   __ z_br(Z_R1_scratch);
3055 
3056   // -------------
3057   // make sure all code is generated
3058   masm->flush();
3059 
3060   // return the blob
3061   // frame_size_words or bytes??
3062   return RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, RegisterSaver::live_reg_frame_size(RegisterSaver::all_registers)/wordSize,
3063                                        oop_maps, true);
3064 
3065 }
3066 
3067 // Continuation point for throwing of implicit exceptions that are
3068 // not handled in the current activation. Fabricates an exception
3069 // oop and initiates normal exception dispatching in this
3070 // frame. Only callee-saved registers are preserved (through the
3071 // normal RegisterMap handling). If the compiler
3072 // needs all registers to be preserved between the fault point and
3073 // the exception handler then it must assume responsibility for that
3074 // in AbstractCompiler::continuation_for_implicit_null_exception or
3075 // continuation_for_implicit_division_by_zero_exception. All other
3076 // implicit exceptions (e.g., NullPointerException or
3077 // AbstractMethodError on entry) are either at call sites or
3078 // otherwise assume that stack unwinding will be initiated, so
3079 // caller saved registers were assumed volatile in the compiler.
3080 
3081 // Note that we generate only this stub into a RuntimeStub, because
3082 // it needs to be properly traversed and ignored during GC, so we
3083 // change the meaning of the "__" macro within this method.
3084 
3085 // Note: the routine set_pc_not_at_call_for_caller in
3086 // SharedRuntime.cpp requires that this code be generated into a
3087 // RuntimeStub.
3088 
3089 RuntimeStub* SharedRuntime::generate_throw_exception(StubId id, address runtime_entry) {
3090   assert(is_throw_id(id), "expected a throw stub id");
3091 
3092   const char* name = SharedRuntime::stub_name(id);
3093 
3094   int insts_size = 256;
3095   int locs_size  = 0;
3096 
3097   ResourceMark rm;
3098   const char* timer_msg = "SharedRuntime generate_throw_exception";
3099   TraceTime timer(timer_msg, TRACETIME_LOG(Info, startuptime));
3100 
3101   CodeBuffer      code(name, insts_size, locs_size);
3102   MacroAssembler* masm = new MacroAssembler(&code);
3103   int framesize_in_bytes;
3104   address start = __ pc();
3105 
3106   __ save_return_pc();
3107   framesize_in_bytes = __ push_frame_abi160(0);
3108 
3109   address frame_complete_pc = __ pc();
3110 
3111   // Note that we always have a runtime stub frame on the top of stack at this point.
3112   __ get_PC(Z_R1);
3113   __ set_last_Java_frame(/*sp*/Z_SP, /*pc*/Z_R1);
3114 
3115   // Do the call.
3116   BLOCK_COMMENT("call runtime_entry");
3117   __ call_VM_leaf(runtime_entry, Z_thread);
3118 
3119   __ reset_last_Java_frame();
3120 
3121 #ifdef ASSERT
3122   // Make sure that this code is only executed if there is a pending exception.
3123   { Label L;
3124     __ z_lg(Z_R0,
3125             in_bytes(Thread::pending_exception_offset()),
3126             Z_thread);
3127     __ z_ltgr(Z_R0, Z_R0);
3128     __ z_brne(L);
3129     __ stop("SharedRuntime::throw_exception: no pending exception");
3130     __ bind(L);
3131   }
3132 #endif
3133 
3134   __ pop_frame();
3135   __ restore_return_pc();
3136 
3137   __ load_const_optimized(Z_R1, StubRoutines::forward_exception_entry());
3138   __ z_br(Z_R1);
3139 
3140   RuntimeStub* stub =
3141     RuntimeStub::new_runtime_stub(name, &code,
3142                                   frame_complete_pc - start,
3143                                   framesize_in_bytes/wordSize,
3144                                   nullptr /*oop_maps*/, false);
3145 
3146   return stub;
3147 }
3148 
3149 //------------------------------Montgomery multiplication------------------------
3150 //
3151 
3152 // Subtract 0:b from carry:a. Return carry.
3153 static unsigned long
3154 sub(unsigned long a[], unsigned long b[], unsigned long carry, long len) {
3155   unsigned long i, c = 8 * (unsigned long)(len - 1);
3156   __asm__ __volatile__ (
3157     "SLGR   %[i], %[i]         \n" // initialize to 0 and pre-set carry
3158     "LGHI   0, 8               \n" // index increment (for BRXLG)
3159     "LGR    1, %[c]            \n" // index limit (for BRXLG)
3160     "0:                        \n"
3161     "LG     %[c], 0(%[i],%[a]) \n"
3162     "SLBG   %[c], 0(%[i],%[b]) \n" // subtract with borrow
3163     "STG    %[c], 0(%[i],%[a]) \n"
3164     "BRXLG  %[i], 0, 0b        \n" // while ((i+=8)<limit);
3165     "SLBGR  %[c], %[c]         \n" // save carry - 1
3166     : [i]"=&a"(i), [c]"+r"(c)
3167     : [a]"a"(a), [b]"a"(b)
3168     : "cc", "memory", "r0", "r1"
3169  );
3170   return carry + c;
3171 }
3172 
3173 // Multiply (unsigned) Long A by Long B, accumulating the double-
3174 // length result into the accumulator formed of T0, T1, and T2.
3175 inline void MACC(unsigned long A[], long A_ind,
3176                  unsigned long B[], long B_ind,
3177                  unsigned long &T0, unsigned long &T1, unsigned long &T2) {
3178   long A_si = 8 * A_ind,
3179        B_si = 8 * B_ind;
3180   __asm__ __volatile__ (
3181     "LG     1, 0(%[A_si],%[A]) \n"
3182     "MLG    0, 0(%[B_si],%[B]) \n" // r0r1 = A * B
3183     "ALGR   %[T0], 1           \n"
3184     "LGHI   1, 0               \n" // r1 = 0
3185     "ALCGR  %[T1], 0           \n"
3186     "ALCGR  %[T2], 1           \n"
3187     : [T0]"+r"(T0), [T1]"+r"(T1), [T2]"+r"(T2)
3188     : [A]"r"(A), [A_si]"r"(A_si), [B]"r"(B), [B_si]"r"(B_si)
3189     : "cc", "r0", "r1"
3190  );
3191 }
3192 
3193 // As above, but add twice the double-length result into the
3194 // accumulator.
3195 inline void MACC2(unsigned long A[], long A_ind,
3196                   unsigned long B[], long B_ind,
3197                   unsigned long &T0, unsigned long &T1, unsigned long &T2) {
3198   const unsigned long zero = 0;
3199   long A_si = 8 * A_ind,
3200        B_si = 8 * B_ind;
3201   __asm__ __volatile__ (
3202     "LG     1, 0(%[A_si],%[A]) \n"
3203     "MLG    0, 0(%[B_si],%[B]) \n" // r0r1 = A * B
3204     "ALGR   %[T0], 1           \n"
3205     "ALCGR  %[T1], 0           \n"
3206     "ALCGR  %[T2], %[zero]     \n"
3207     "ALGR   %[T0], 1           \n"
3208     "ALCGR  %[T1], 0           \n"
3209     "ALCGR  %[T2], %[zero]     \n"
3210     : [T0]"+r"(T0), [T1]"+r"(T1), [T2]"+r"(T2)
3211     : [A]"r"(A), [A_si]"r"(A_si), [B]"r"(B), [B_si]"r"(B_si), [zero]"r"(zero)
3212     : "cc", "r0", "r1"
3213  );
3214 }
3215 
3216 // Fast Montgomery multiplication. The derivation of the algorithm is
3217 // in "A Cryptographic Library for the Motorola DSP56000,
3218 // Dusse and Kaliski, Proc. EUROCRYPT 90, pp. 230-237".
3219 static void
3220 montgomery_multiply(unsigned long a[], unsigned long b[], unsigned long n[],
3221                     unsigned long m[], unsigned long inv, int len) {
3222   unsigned long t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator
3223   int i;
3224 
3225   assert(inv * n[0] == -1UL, "broken inverse in Montgomery multiply");
3226 
3227   for (i = 0; i < len; i++) {
3228     int j;
3229     for (j = 0; j < i; j++) {
3230       MACC(a, j, b, i-j, t0, t1, t2);
3231       MACC(m, j, n, i-j, t0, t1, t2);
3232     }
3233     MACC(a, i, b, 0, t0, t1, t2);
3234     m[i] = t0 * inv;
3235     MACC(m, i, n, 0, t0, t1, t2);
3236 
3237     assert(t0 == 0, "broken Montgomery multiply");
3238 
3239     t0 = t1; t1 = t2; t2 = 0;
3240   }
3241 
3242   for (i = len; i < 2 * len; i++) {
3243     int j;
3244     for (j = i - len + 1; j < len; j++) {
3245       MACC(a, j, b, i-j, t0, t1, t2);
3246       MACC(m, j, n, i-j, t0, t1, t2);
3247     }
3248     m[i-len] = t0;
3249     t0 = t1; t1 = t2; t2 = 0;
3250   }
3251 
3252   while (t0) {
3253     t0 = sub(m, n, t0, len);
3254   }
3255 }
3256 
3257 // Fast Montgomery squaring. This uses asymptotically 25% fewer
3258 // multiplies so it should be up to 25% faster than Montgomery
3259 // multiplication. However, its loop control is more complex and it
3260 // may actually run slower on some machines.
3261 static void
3262 montgomery_square(unsigned long a[], unsigned long n[],
3263                   unsigned long m[], unsigned long inv, int len) {
3264   unsigned long t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator
3265   int i;
3266 
3267   assert(inv * n[0] == -1UL, "broken inverse in Montgomery multiply");
3268 
3269   for (i = 0; i < len; i++) {
3270     int j;
3271     int end = (i+1)/2;
3272     for (j = 0; j < end; j++) {
3273       MACC2(a, j, a, i-j, t0, t1, t2);
3274       MACC(m, j, n, i-j, t0, t1, t2);
3275     }
3276     if ((i & 1) == 0) {
3277       MACC(a, j, a, j, t0, t1, t2);
3278     }
3279     for (; j < i; j++) {
3280       MACC(m, j, n, i-j, t0, t1, t2);
3281     }
3282     m[i] = t0 * inv;
3283     MACC(m, i, n, 0, t0, t1, t2);
3284 
3285     assert(t0 == 0, "broken Montgomery square");
3286 
3287     t0 = t1; t1 = t2; t2 = 0;
3288   }
3289 
3290   for (i = len; i < 2*len; i++) {
3291     int start = i-len+1;
3292     int end = start + (len - start)/2;
3293     int j;
3294     for (j = start; j < end; j++) {
3295       MACC2(a, j, a, i-j, t0, t1, t2);
3296       MACC(m, j, n, i-j, t0, t1, t2);
3297     }
3298     if ((i & 1) == 0) {
3299       MACC(a, j, a, j, t0, t1, t2);
3300     }
3301     for (; j < len; j++) {
3302       MACC(m, j, n, i-j, t0, t1, t2);
3303     }
3304     m[i-len] = t0;
3305     t0 = t1; t1 = t2; t2 = 0;
3306   }
3307 
3308   while (t0) {
3309     t0 = sub(m, n, t0, len);
3310   }
3311 }
3312 
3313 // The threshold at which squaring is advantageous was determined
3314 // experimentally on an i7-3930K (Ivy Bridge) CPU @ 3.5GHz.
3315 // Value seems to be ok for other platforms, too.
3316 #define MONTGOMERY_SQUARING_THRESHOLD 64
3317 
3318 // Copy len longwords from s to d, word-swapping as we go. The
3319 // destination array is reversed.
3320 static void reverse_words(unsigned long *s, unsigned long *d, int len) {
3321   d += len;
3322   while(len-- > 0) {
3323     d--;
3324     unsigned long s_val = *s;
3325     // Swap words in a longword on little endian machines.
3326 #ifdef VM_LITTLE_ENDIAN
3327      Unimplemented();
3328 #endif
3329     *d = s_val;
3330     s++;
3331   }
3332 }
3333 
3334 void SharedRuntime::montgomery_multiply(jint *a_ints, jint *b_ints, jint *n_ints,
3335                                         jint len, jlong inv,
3336                                         jint *m_ints) {
3337   len = len & 0x7fffFFFF; // C2 does not respect int to long conversion for stub calls.
3338   assert(len % 2 == 0, "array length in montgomery_multiply must be even");
3339   int longwords = len/2;
3340 
3341   // Make very sure we don't use so much space that the stack might
3342   // overflow. 512 jints corresponds to an 16384-bit integer and
3343   // will use here a total of 8k bytes of stack space.
3344   int divisor = sizeof(unsigned long) * 4;
3345   guarantee(longwords <= 8192 / divisor, "must be");
3346   int total_allocation = longwords * sizeof (unsigned long) * 4;
3347   unsigned long *scratch = (unsigned long *)alloca(total_allocation);
3348 
3349   // Local scratch arrays
3350   unsigned long
3351     *a = scratch + 0 * longwords,
3352     *b = scratch + 1 * longwords,
3353     *n = scratch + 2 * longwords,
3354     *m = scratch + 3 * longwords;
3355 
3356   reverse_words((unsigned long *)a_ints, a, longwords);
3357   reverse_words((unsigned long *)b_ints, b, longwords);
3358   reverse_words((unsigned long *)n_ints, n, longwords);
3359 
3360   ::montgomery_multiply(a, b, n, m, (unsigned long)inv, longwords);
3361 
3362   reverse_words(m, (unsigned long *)m_ints, longwords);
3363 }
3364 
3365 void SharedRuntime::montgomery_square(jint *a_ints, jint *n_ints,
3366                                       jint len, jlong inv,
3367                                       jint *m_ints) {
3368   len = len & 0x7fffFFFF; // C2 does not respect int to long conversion for stub calls.
3369   assert(len % 2 == 0, "array length in montgomery_square must be even");
3370   int longwords = len/2;
3371 
3372   // Make very sure we don't use so much space that the stack might
3373   // overflow. 512 jints corresponds to an 16384-bit integer and
3374   // will use here a total of 6k bytes of stack space.
3375   int divisor = sizeof(unsigned long) * 3;
3376   guarantee(longwords <= (8192 / divisor), "must be");
3377   int total_allocation = longwords * sizeof (unsigned long) * 3;
3378   unsigned long *scratch = (unsigned long *)alloca(total_allocation);
3379 
3380   // Local scratch arrays
3381   unsigned long
3382     *a = scratch + 0 * longwords,
3383     *n = scratch + 1 * longwords,
3384     *m = scratch + 2 * longwords;
3385 
3386   reverse_words((unsigned long *)a_ints, a, longwords);
3387   reverse_words((unsigned long *)n_ints, n, longwords);
3388 
3389   if (len >= MONTGOMERY_SQUARING_THRESHOLD) {
3390     ::montgomery_square(a, n, m, (unsigned long)inv, longwords);
3391   } else {
3392     ::montgomery_multiply(a, a, n, m, (unsigned long)inv, longwords);
3393   }
3394 
3395   reverse_words(m, (unsigned long *)m_ints, longwords);
3396 }
3397 
3398 extern "C"
3399 int SpinPause() {
3400   return 0;
3401 }
3402 
3403 #if INCLUDE_JFR
3404 RuntimeStub* SharedRuntime::generate_jfr_write_checkpoint() {
3405   if (!Continuations::enabled()) return nullptr;
3406   Unimplemented();
3407   return nullptr;
3408 }
3409 
3410 RuntimeStub* SharedRuntime::generate_jfr_return_lease() {
3411   if (!Continuations::enabled()) return nullptr;
3412   Unimplemented();
3413   return nullptr;
3414 }
3415 
3416 #endif // INCLUDE_JFR
3417 
3418 const uint SharedRuntime::java_return_convention_max_int = Argument::n_int_register_parameters_j;
3419 const uint SharedRuntime::java_return_convention_max_float = Argument::n_float_register_parameters_j;
3420 
3421 int SharedRuntime::java_return_convention(const BasicType *sig_bt, VMRegPair *regs, int total_args_passed) {
3422   Unimplemented();
3423   return 0;
3424 }
3425 
3426 BufferedInlineTypeBlob* SharedRuntime::generate_buffered_inline_type_adapter(const InlineKlass* vk) {
3427   Unimplemented();
3428   return nullptr;
3429 }