1 /*
   2  * Copyright (c) 2016, 2023, Oracle and/or its affiliates. All rights reserved.
   3  * Copyright (c) 2016, 2023 SAP SE. All rights reserved.
   4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   5  *
   6  * This code is free software; you can redistribute it and/or modify it
   7  * under the terms of the GNU General Public License version 2 only, as
   8  * published by the Free Software Foundation.
   9  *
  10  * This code is distributed in the hope that it will be useful, but WITHOUT
  11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  13  * version 2 for more details (a copy is included in the LICENSE file that
  14  * accompanied this code).
  15  *
  16  * You should have received a copy of the GNU General Public License version
  17  * 2 along with this work; if not, write to the Free Software Foundation,
  18  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  19  *
  20  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  21  * or visit www.oracle.com if you need additional information or have any
  22  * questions.
  23  *
  24  */
  25 
  26 #include "precompiled.hpp"
  27 #include "asm/macroAssembler.inline.hpp"
  28 #include "code/debugInfoRec.hpp"
  29 #include "code/icBuffer.hpp"
  30 #include "code/vtableStubs.hpp"
  31 #include "compiler/oopMap.hpp"
  32 #include "gc/shared/barrierSetAssembler.hpp"
  33 #include "gc/shared/gcLocker.hpp"
  34 #include "interpreter/interpreter.hpp"
  35 #include "interpreter/interp_masm.hpp"
  36 #include "memory/resourceArea.hpp"
  37 #include "nativeInst_s390.hpp"
  38 #include "oops/compiledICHolder.hpp"
  39 #include "oops/klass.inline.hpp"
  40 #include "prims/methodHandles.hpp"
  41 #include "registerSaver_s390.hpp"
  42 #include "runtime/jniHandles.hpp"
  43 #include "runtime/safepointMechanism.hpp"
  44 #include "runtime/sharedRuntime.hpp"
  45 #include "runtime/signature.hpp"
  46 #include "runtime/stubRoutines.hpp"
  47 #include "runtime/vframeArray.hpp"
  48 #include "utilities/align.hpp"
  49 #include "utilities/macros.hpp"
  50 #include "vmreg_s390.inline.hpp"
  51 #ifdef COMPILER1
  52 #include "c1/c1_Runtime1.hpp"
  53 #endif
  54 #ifdef COMPILER2
  55 #include "opto/ad.hpp"
  56 #include "opto/runtime.hpp"
  57 #endif
  58 
  59 #ifdef PRODUCT
  60 #define __ masm->
  61 #else
  62 #define __ (Verbose ? (masm->block_comment(FILE_AND_LINE),masm):masm)->
  63 #endif
  64 
  65 #define BLOCK_COMMENT(str) __ block_comment(str)
  66 #define BIND(label)        bind(label); BLOCK_COMMENT(#label ":")
  67 
  68 #define RegisterSaver_LiveIntReg(regname) \
  69   { RegisterSaver::int_reg,   regname->encoding(), regname->as_VMReg() }
  70 
  71 #define RegisterSaver_LiveFloatReg(regname) \
  72   { RegisterSaver::float_reg, regname->encoding(), regname->as_VMReg() }
  73 
  74 // Registers which are not saved/restored, but still they have got a frame slot.
  75 // Used to get same frame size for RegisterSaver_LiveRegs and RegisterSaver_LiveRegsWithoutR2
  76 #define RegisterSaver_ExcludedIntReg(regname) \
  77   { RegisterSaver::excluded_reg, regname->encoding(), regname->as_VMReg() }
  78 
  79 // Registers which are not saved/restored, but still they have got a frame slot.
  80 // Used to get same frame size for RegisterSaver_LiveRegs and RegisterSaver_LiveRegsWithoutR2.
  81 #define RegisterSaver_ExcludedFloatReg(regname) \
  82   { RegisterSaver::excluded_reg, regname->encoding(), regname->as_VMReg() }
  83 
  84 static const RegisterSaver::LiveRegType RegisterSaver_LiveRegs[] = {
  85   // Live registers which get spilled to the stack. Register positions
  86   // in this array correspond directly to the stack layout.
  87   //
  88   // live float registers:
  89   //
  90   RegisterSaver_LiveFloatReg(Z_F0 ),
  91   // RegisterSaver_ExcludedFloatReg(Z_F1 ), // scratch (Z_fscratch_1)
  92   RegisterSaver_LiveFloatReg(Z_F2 ),
  93   RegisterSaver_LiveFloatReg(Z_F3 ),
  94   RegisterSaver_LiveFloatReg(Z_F4 ),
  95   RegisterSaver_LiveFloatReg(Z_F5 ),
  96   RegisterSaver_LiveFloatReg(Z_F6 ),
  97   RegisterSaver_LiveFloatReg(Z_F7 ),
  98   RegisterSaver_LiveFloatReg(Z_F8 ),
  99   RegisterSaver_LiveFloatReg(Z_F9 ),
 100   RegisterSaver_LiveFloatReg(Z_F10),
 101   RegisterSaver_LiveFloatReg(Z_F11),
 102   RegisterSaver_LiveFloatReg(Z_F12),
 103   RegisterSaver_LiveFloatReg(Z_F13),
 104   RegisterSaver_LiveFloatReg(Z_F14),
 105   RegisterSaver_LiveFloatReg(Z_F15),
 106   //
 107   // RegisterSaver_ExcludedIntReg(Z_R0), // scratch
 108   // RegisterSaver_ExcludedIntReg(Z_R1), // scratch
 109   RegisterSaver_LiveIntReg(Z_R2 ),
 110   RegisterSaver_LiveIntReg(Z_R3 ),
 111   RegisterSaver_LiveIntReg(Z_R4 ),
 112   RegisterSaver_LiveIntReg(Z_R5 ),
 113   RegisterSaver_LiveIntReg(Z_R6 ),
 114   RegisterSaver_LiveIntReg(Z_R7 ),
 115   RegisterSaver_LiveIntReg(Z_R8 ),
 116   RegisterSaver_LiveIntReg(Z_R9 ),
 117   RegisterSaver_LiveIntReg(Z_R10),
 118   RegisterSaver_LiveIntReg(Z_R11),
 119   RegisterSaver_LiveIntReg(Z_R12),
 120   RegisterSaver_LiveIntReg(Z_R13),
 121   // RegisterSaver_ExcludedIntReg(Z_R14), // return pc (Saved in caller frame.)
 122   // RegisterSaver_ExcludedIntReg(Z_R15)  // stack pointer
 123 };
 124 
 125 static const RegisterSaver::LiveRegType RegisterSaver_LiveIntRegs[] = {
 126   // Live registers which get spilled to the stack. Register positions
 127   // in this array correspond directly to the stack layout.
 128   //
 129   // live float registers: All excluded, but still they get a stack slot to get same frame size.
 130   //
 131   RegisterSaver_ExcludedFloatReg(Z_F0 ),
 132   // RegisterSaver_ExcludedFloatReg(Z_F1 ), // scratch (Z_fscratch_1)
 133   RegisterSaver_ExcludedFloatReg(Z_F2 ),
 134   RegisterSaver_ExcludedFloatReg(Z_F3 ),
 135   RegisterSaver_ExcludedFloatReg(Z_F4 ),
 136   RegisterSaver_ExcludedFloatReg(Z_F5 ),
 137   RegisterSaver_ExcludedFloatReg(Z_F6 ),
 138   RegisterSaver_ExcludedFloatReg(Z_F7 ),
 139   RegisterSaver_ExcludedFloatReg(Z_F8 ),
 140   RegisterSaver_ExcludedFloatReg(Z_F9 ),
 141   RegisterSaver_ExcludedFloatReg(Z_F10),
 142   RegisterSaver_ExcludedFloatReg(Z_F11),
 143   RegisterSaver_ExcludedFloatReg(Z_F12),
 144   RegisterSaver_ExcludedFloatReg(Z_F13),
 145   RegisterSaver_ExcludedFloatReg(Z_F14),
 146   RegisterSaver_ExcludedFloatReg(Z_F15),
 147   //
 148   // RegisterSaver_ExcludedIntReg(Z_R0), // scratch
 149   // RegisterSaver_ExcludedIntReg(Z_R1), // scratch
 150   RegisterSaver_LiveIntReg(Z_R2 ),
 151   RegisterSaver_LiveIntReg(Z_R3 ),
 152   RegisterSaver_LiveIntReg(Z_R4 ),
 153   RegisterSaver_LiveIntReg(Z_R5 ),
 154   RegisterSaver_LiveIntReg(Z_R6 ),
 155   RegisterSaver_LiveIntReg(Z_R7 ),
 156   RegisterSaver_LiveIntReg(Z_R8 ),
 157   RegisterSaver_LiveIntReg(Z_R9 ),
 158   RegisterSaver_LiveIntReg(Z_R10),
 159   RegisterSaver_LiveIntReg(Z_R11),
 160   RegisterSaver_LiveIntReg(Z_R12),
 161   RegisterSaver_LiveIntReg(Z_R13),
 162   // RegisterSaver_ExcludedIntReg(Z_R14), // return pc (Saved in caller frame.)
 163   // RegisterSaver_ExcludedIntReg(Z_R15)  // stack pointer
 164 };
 165 
 166 static const RegisterSaver::LiveRegType RegisterSaver_LiveRegsWithoutR2[] = {
 167   // Live registers which get spilled to the stack. Register positions
 168   // in this array correspond directly to the stack layout.
 169   //
 170   // live float registers:
 171   //
 172   RegisterSaver_LiveFloatReg(Z_F0 ),
 173   // RegisterSaver_ExcludedFloatReg(Z_F1 ), // scratch (Z_fscratch_1)
 174   RegisterSaver_LiveFloatReg(Z_F2 ),
 175   RegisterSaver_LiveFloatReg(Z_F3 ),
 176   RegisterSaver_LiveFloatReg(Z_F4 ),
 177   RegisterSaver_LiveFloatReg(Z_F5 ),
 178   RegisterSaver_LiveFloatReg(Z_F6 ),
 179   RegisterSaver_LiveFloatReg(Z_F7 ),
 180   RegisterSaver_LiveFloatReg(Z_F8 ),
 181   RegisterSaver_LiveFloatReg(Z_F9 ),
 182   RegisterSaver_LiveFloatReg(Z_F10),
 183   RegisterSaver_LiveFloatReg(Z_F11),
 184   RegisterSaver_LiveFloatReg(Z_F12),
 185   RegisterSaver_LiveFloatReg(Z_F13),
 186   RegisterSaver_LiveFloatReg(Z_F14),
 187   RegisterSaver_LiveFloatReg(Z_F15),
 188   //
 189   // RegisterSaver_ExcludedIntReg(Z_R0), // scratch
 190   // RegisterSaver_ExcludedIntReg(Z_R1), // scratch
 191   RegisterSaver_ExcludedIntReg(Z_R2), // Omit saving R2.
 192   RegisterSaver_LiveIntReg(Z_R3 ),
 193   RegisterSaver_LiveIntReg(Z_R4 ),
 194   RegisterSaver_LiveIntReg(Z_R5 ),
 195   RegisterSaver_LiveIntReg(Z_R6 ),
 196   RegisterSaver_LiveIntReg(Z_R7 ),
 197   RegisterSaver_LiveIntReg(Z_R8 ),
 198   RegisterSaver_LiveIntReg(Z_R9 ),
 199   RegisterSaver_LiveIntReg(Z_R10),
 200   RegisterSaver_LiveIntReg(Z_R11),
 201   RegisterSaver_LiveIntReg(Z_R12),
 202   RegisterSaver_LiveIntReg(Z_R13),
 203   // RegisterSaver_ExcludedIntReg(Z_R14), // return pc (Saved in caller frame.)
 204   // RegisterSaver_ExcludedIntReg(Z_R15)  // stack pointer
 205 };
 206 
 207 // Live argument registers which get spilled to the stack.
 208 static const RegisterSaver::LiveRegType RegisterSaver_LiveArgRegs[] = {
 209   RegisterSaver_LiveFloatReg(Z_FARG1),
 210   RegisterSaver_LiveFloatReg(Z_FARG2),
 211   RegisterSaver_LiveFloatReg(Z_FARG3),
 212   RegisterSaver_LiveFloatReg(Z_FARG4),
 213   RegisterSaver_LiveIntReg(Z_ARG1),
 214   RegisterSaver_LiveIntReg(Z_ARG2),
 215   RegisterSaver_LiveIntReg(Z_ARG3),
 216   RegisterSaver_LiveIntReg(Z_ARG4),
 217   RegisterSaver_LiveIntReg(Z_ARG5)
 218 };
 219 
 220 static const RegisterSaver::LiveRegType RegisterSaver_LiveVolatileRegs[] = {
 221   // Live registers which get spilled to the stack. Register positions
 222   // in this array correspond directly to the stack layout.
 223   //
 224   // live float registers:
 225   //
 226   RegisterSaver_LiveFloatReg(Z_F0 ),
 227   // RegisterSaver_ExcludedFloatReg(Z_F1 ), // scratch (Z_fscratch_1)
 228   RegisterSaver_LiveFloatReg(Z_F2 ),
 229   RegisterSaver_LiveFloatReg(Z_F3 ),
 230   RegisterSaver_LiveFloatReg(Z_F4 ),
 231   RegisterSaver_LiveFloatReg(Z_F5 ),
 232   RegisterSaver_LiveFloatReg(Z_F6 ),
 233   RegisterSaver_LiveFloatReg(Z_F7 ),
 234   // RegisterSaver_LiveFloatReg(Z_F8 ), // non-volatile
 235   // RegisterSaver_LiveFloatReg(Z_F9 ), // non-volatile
 236   // RegisterSaver_LiveFloatReg(Z_F10), // non-volatile
 237   // RegisterSaver_LiveFloatReg(Z_F11), // non-volatile
 238   // RegisterSaver_LiveFloatReg(Z_F12), // non-volatile
 239   // RegisterSaver_LiveFloatReg(Z_F13), // non-volatile
 240   // RegisterSaver_LiveFloatReg(Z_F14), // non-volatile
 241   // RegisterSaver_LiveFloatReg(Z_F15), // non-volatile
 242   //
 243   // RegisterSaver_ExcludedIntReg(Z_R0), // scratch
 244   // RegisterSaver_ExcludedIntReg(Z_R1), // scratch
 245   RegisterSaver_LiveIntReg(Z_R2 ),
 246   RegisterSaver_LiveIntReg(Z_R3 ),
 247   RegisterSaver_LiveIntReg(Z_R4 ),
 248   RegisterSaver_LiveIntReg(Z_R5 ),
 249   // RegisterSaver_LiveIntReg(Z_R6 ), // non-volatile
 250   // RegisterSaver_LiveIntReg(Z_R7 ), // non-volatile
 251   // RegisterSaver_LiveIntReg(Z_R8 ), // non-volatile
 252   // RegisterSaver_LiveIntReg(Z_R9 ), // non-volatile
 253   // RegisterSaver_LiveIntReg(Z_R10), // non-volatile
 254   // RegisterSaver_LiveIntReg(Z_R11), // non-volatile
 255   // RegisterSaver_LiveIntReg(Z_R12), // non-volatile
 256   // RegisterSaver_LiveIntReg(Z_R13), // non-volatile
 257   // RegisterSaver_ExcludedIntReg(Z_R14), // return pc (Saved in caller frame.)
 258   // RegisterSaver_ExcludedIntReg(Z_R15)  // stack pointer
 259 };
 260 
 261 int RegisterSaver::live_reg_save_size(RegisterSet reg_set) {
 262   int reg_space = -1;
 263   switch (reg_set) {
 264     case all_registers:           reg_space = sizeof(RegisterSaver_LiveRegs); break;
 265     case all_registers_except_r2: reg_space = sizeof(RegisterSaver_LiveRegsWithoutR2); break;
 266     case all_integer_registers:   reg_space = sizeof(RegisterSaver_LiveIntRegs); break;
 267     case all_volatile_registers:  reg_space = sizeof(RegisterSaver_LiveVolatileRegs); break;
 268     case arg_registers:           reg_space = sizeof(RegisterSaver_LiveArgRegs); break;
 269     default: ShouldNotReachHere();
 270   }
 271   return (reg_space / sizeof(RegisterSaver::LiveRegType)) * reg_size;
 272 }
 273 
 274 
 275 int RegisterSaver::live_reg_frame_size(RegisterSet reg_set) {
 276   return live_reg_save_size(reg_set) + frame::z_abi_160_size;
 277 }
 278 
 279 
 280 // return_pc: Specify the register that should be stored as the return pc in the current frame.
 281 OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, RegisterSet reg_set, Register return_pc) {
 282   // Record volatile registers as callee-save values in an OopMap so
 283   // their save locations will be propagated to the caller frame's
 284   // RegisterMap during StackFrameStream construction (needed for
 285   // deoptimization; see compiledVFrame::create_stack_value).
 286 
 287   // Calculate frame size.
 288   const int frame_size_in_bytes  = live_reg_frame_size(reg_set);
 289   const int frame_size_in_slots  = frame_size_in_bytes / sizeof(jint);
 290   const int register_save_offset = frame_size_in_bytes - live_reg_save_size(reg_set);
 291 
 292   // OopMap frame size is in c2 stack slots (sizeof(jint)) not bytes or words.
 293   OopMap* map = new OopMap(frame_size_in_slots, 0);
 294 
 295   int regstosave_num = 0;
 296   const RegisterSaver::LiveRegType* live_regs = nullptr;
 297 
 298   switch (reg_set) {
 299     case all_registers:
 300       regstosave_num = sizeof(RegisterSaver_LiveRegs)/sizeof(RegisterSaver::LiveRegType);
 301       live_regs      = RegisterSaver_LiveRegs;
 302       break;
 303     case all_registers_except_r2:
 304       regstosave_num = sizeof(RegisterSaver_LiveRegsWithoutR2)/sizeof(RegisterSaver::LiveRegType);;
 305       live_regs      = RegisterSaver_LiveRegsWithoutR2;
 306       break;
 307     case all_integer_registers:
 308       regstosave_num = sizeof(RegisterSaver_LiveIntRegs)/sizeof(RegisterSaver::LiveRegType);
 309       live_regs      = RegisterSaver_LiveIntRegs;
 310       break;
 311     case all_volatile_registers:
 312       regstosave_num = sizeof(RegisterSaver_LiveVolatileRegs)/sizeof(RegisterSaver::LiveRegType);
 313       live_regs      = RegisterSaver_LiveVolatileRegs;
 314       break;
 315     case arg_registers:
 316       regstosave_num = sizeof(RegisterSaver_LiveArgRegs)/sizeof(RegisterSaver::LiveRegType);;
 317       live_regs      = RegisterSaver_LiveArgRegs;
 318       break;
 319     default: ShouldNotReachHere();
 320   }
 321 
 322   // Save return pc in old frame.
 323   __ save_return_pc(return_pc);
 324 
 325   // Push a new frame (includes stack linkage).
 326   // Use return_pc as scratch for push_frame. Z_R0_scratch (the default) and Z_R1_scratch are
 327   // illegally used to pass parameters by RangeCheckStub::emit_code().
 328   __ push_frame(frame_size_in_bytes, return_pc);
 329   // We have to restore return_pc right away.
 330   // Nobody else will. Furthermore, return_pc isn't necessarily the default (Z_R14).
 331   // Nobody else knows which register we saved.
 332   __ z_lg(return_pc, _z_common_abi(return_pc) + frame_size_in_bytes, Z_SP);
 333 
 334   // Register save area in new frame starts above z_abi_160 area.
 335   int offset = register_save_offset;
 336 
 337   Register first = noreg;
 338   Register last  = noreg;
 339   int      first_offset = -1;
 340   bool     float_spilled = false;
 341 
 342   for (int i = 0; i < regstosave_num; i++, offset += reg_size) {
 343     int reg_num  = live_regs[i].reg_num;
 344     int reg_type = live_regs[i].reg_type;
 345 
 346     switch (reg_type) {
 347       case RegisterSaver::int_reg: {
 348         Register reg = as_Register(reg_num);
 349         if (last != reg->predecessor()) {
 350           if (first != noreg) {
 351             __ z_stmg(first, last, first_offset, Z_SP);
 352           }
 353           first = reg;
 354           first_offset = offset;
 355           DEBUG_ONLY(float_spilled = false);
 356         }
 357         last = reg;
 358         assert(last != Z_R0, "r0 would require special treatment");
 359         assert(!float_spilled, "for simplicity, do not mix up ints and floats in RegisterSaver_LiveRegs[]");
 360         break;
 361       }
 362 
 363       case RegisterSaver::excluded_reg: // Not saved/restored, but with dedicated slot.
 364         continue; // Continue with next loop iteration.
 365 
 366       case RegisterSaver::float_reg: {
 367         FloatRegister freg = as_FloatRegister(reg_num);
 368         __ z_std(freg, offset, Z_SP);
 369         DEBUG_ONLY(float_spilled = true);
 370         break;
 371       }
 372 
 373       default:
 374         ShouldNotReachHere();
 375         break;
 376     }
 377 
 378     // Second set_callee_saved is really a waste but we'll keep things as they were for now
 379     map->set_callee_saved(VMRegImpl::stack2reg(offset >> 2), live_regs[i].vmreg);
 380     map->set_callee_saved(VMRegImpl::stack2reg((offset + half_reg_size) >> 2), live_regs[i].vmreg->next());
 381   }
 382   assert(first != noreg, "Should spill at least one int reg.");
 383   __ z_stmg(first, last, first_offset, Z_SP);
 384 
 385   // And we're done.
 386   return map;
 387 }
 388 
 389 
 390 // Generate the OopMap (again, regs where saved before).
 391 OopMap* RegisterSaver::generate_oop_map(MacroAssembler* masm, RegisterSet reg_set) {
 392   // Calculate frame size.
 393   const int frame_size_in_bytes  = live_reg_frame_size(reg_set);
 394   const int frame_size_in_slots  = frame_size_in_bytes / sizeof(jint);
 395   const int register_save_offset = frame_size_in_bytes - live_reg_save_size(reg_set);
 396 
 397   // OopMap frame size is in c2 stack slots (sizeof(jint)) not bytes or words.
 398   OopMap* map = new OopMap(frame_size_in_slots, 0);
 399 
 400   int regstosave_num = 0;
 401   const RegisterSaver::LiveRegType* live_regs = nullptr;
 402 
 403   switch (reg_set) {
 404     case all_registers:
 405       regstosave_num = sizeof(RegisterSaver_LiveRegs)/sizeof(RegisterSaver::LiveRegType);
 406       live_regs      = RegisterSaver_LiveRegs;
 407       break;
 408     case all_registers_except_r2:
 409       regstosave_num = sizeof(RegisterSaver_LiveRegsWithoutR2)/sizeof(RegisterSaver::LiveRegType);;
 410       live_regs      = RegisterSaver_LiveRegsWithoutR2;
 411       break;
 412     case all_integer_registers:
 413       regstosave_num = sizeof(RegisterSaver_LiveIntRegs)/sizeof(RegisterSaver::LiveRegType);
 414       live_regs      = RegisterSaver_LiveIntRegs;
 415       break;
 416     case all_volatile_registers:
 417       regstosave_num = sizeof(RegisterSaver_LiveVolatileRegs)/sizeof(RegisterSaver::LiveRegType);
 418       live_regs      = RegisterSaver_LiveVolatileRegs;
 419       break;
 420     case arg_registers:
 421       regstosave_num = sizeof(RegisterSaver_LiveArgRegs)/sizeof(RegisterSaver::LiveRegType);;
 422       live_regs      = RegisterSaver_LiveArgRegs;
 423       break;
 424     default: ShouldNotReachHere();
 425   }
 426 
 427   // Register save area in new frame starts above z_abi_160 area.
 428   int offset = register_save_offset;
 429   for (int i = 0; i < regstosave_num; i++) {
 430     if (live_regs[i].reg_type < RegisterSaver::excluded_reg) {
 431       map->set_callee_saved(VMRegImpl::stack2reg(offset>>2), live_regs[i].vmreg);
 432       map->set_callee_saved(VMRegImpl::stack2reg((offset + half_reg_size)>>2), live_regs[i].vmreg->next());
 433     }
 434     offset += reg_size;
 435   }
 436   return map;
 437 }
 438 
 439 
 440 // Pop the current frame and restore all the registers that we saved.
 441 void RegisterSaver::restore_live_registers(MacroAssembler* masm, RegisterSet reg_set) {
 442   int offset;
 443   const int register_save_offset = live_reg_frame_size(reg_set) - live_reg_save_size(reg_set);
 444 
 445   Register first = noreg;
 446   Register last = noreg;
 447   int      first_offset = -1;
 448   bool     float_spilled = false;
 449 
 450   int regstosave_num = 0;
 451   const RegisterSaver::LiveRegType* live_regs = nullptr;
 452 
 453   switch (reg_set) {
 454     case all_registers:
 455       regstosave_num = sizeof(RegisterSaver_LiveRegs)/sizeof(RegisterSaver::LiveRegType);;
 456       live_regs      = RegisterSaver_LiveRegs;
 457       break;
 458     case all_registers_except_r2:
 459       regstosave_num = sizeof(RegisterSaver_LiveRegsWithoutR2)/sizeof(RegisterSaver::LiveRegType);;
 460       live_regs      = RegisterSaver_LiveRegsWithoutR2;
 461       break;
 462     case all_integer_registers:
 463       regstosave_num = sizeof(RegisterSaver_LiveIntRegs)/sizeof(RegisterSaver::LiveRegType);
 464       live_regs      = RegisterSaver_LiveIntRegs;
 465       break;
 466     case all_volatile_registers:
 467       regstosave_num = sizeof(RegisterSaver_LiveVolatileRegs)/sizeof(RegisterSaver::LiveRegType);;
 468       live_regs      = RegisterSaver_LiveVolatileRegs;
 469       break;
 470     case arg_registers:
 471       regstosave_num = sizeof(RegisterSaver_LiveArgRegs)/sizeof(RegisterSaver::LiveRegType);;
 472       live_regs      = RegisterSaver_LiveArgRegs;
 473       break;
 474     default: ShouldNotReachHere();
 475   }
 476 
 477   // Restore all registers (ints and floats).
 478 
 479   // Register save area in new frame starts above z_abi_160 area.
 480   offset = register_save_offset;
 481 
 482   for (int i = 0; i < regstosave_num; i++, offset += reg_size) {
 483     int reg_num  = live_regs[i].reg_num;
 484     int reg_type = live_regs[i].reg_type;
 485 
 486     switch (reg_type) {
 487       case RegisterSaver::excluded_reg:
 488         continue; // Continue with next loop iteration.
 489 
 490       case RegisterSaver::int_reg: {
 491         Register reg = as_Register(reg_num);
 492         if (last != reg->predecessor()) {
 493           if (first != noreg) {
 494             __ z_lmg(first, last, first_offset, Z_SP);
 495           }
 496           first = reg;
 497           first_offset = offset;
 498           DEBUG_ONLY(float_spilled = false);
 499         }
 500         last = reg;
 501         assert(last != Z_R0, "r0 would require special treatment");
 502         assert(!float_spilled, "for simplicity, do not mix up ints and floats in RegisterSaver_LiveRegs[]");
 503         break;
 504       }
 505 
 506       case RegisterSaver::float_reg: {
 507         FloatRegister freg = as_FloatRegister(reg_num);
 508         __ z_ld(freg, offset, Z_SP);
 509         DEBUG_ONLY(float_spilled = true);
 510         break;
 511       }
 512 
 513       default:
 514         ShouldNotReachHere();
 515     }
 516   }
 517   assert(first != noreg, "Should spill at least one int reg.");
 518   __ z_lmg(first, last, first_offset, Z_SP);
 519 
 520   // Pop the frame.
 521   __ pop_frame();
 522 
 523   // Restore the flags.
 524   __ restore_return_pc();
 525 }
 526 
 527 
 528 // Pop the current frame and restore the registers that might be holding a result.
 529 void RegisterSaver::restore_result_registers(MacroAssembler* masm) {
 530   int i;
 531   int offset;
 532   const int regstosave_num       = sizeof(RegisterSaver_LiveRegs) /
 533                                    sizeof(RegisterSaver::LiveRegType);
 534   const int register_save_offset = live_reg_frame_size(all_registers) - live_reg_save_size(all_registers);
 535 
 536   // Restore all result registers (ints and floats).
 537   offset = register_save_offset;
 538   for (int i = 0; i < regstosave_num; i++, offset += reg_size) {
 539     int reg_num = RegisterSaver_LiveRegs[i].reg_num;
 540     int reg_type = RegisterSaver_LiveRegs[i].reg_type;
 541     switch (reg_type) {
 542       case RegisterSaver::excluded_reg:
 543         continue; // Continue with next loop iteration.
 544       case RegisterSaver::int_reg: {
 545         if (as_Register(reg_num) == Z_RET) { // int result_reg
 546           __ z_lg(as_Register(reg_num), offset, Z_SP);
 547         }
 548         break;
 549       }
 550       case RegisterSaver::float_reg: {
 551         if (as_FloatRegister(reg_num) == Z_FRET) { // float result_reg
 552           __ z_ld(as_FloatRegister(reg_num), offset, Z_SP);
 553         }
 554         break;
 555       }
 556       default:
 557         ShouldNotReachHere();
 558     }
 559   }
 560 }
 561 
 562 // ---------------------------------------------------------------------------
 563 void SharedRuntime::save_native_result(MacroAssembler * masm,
 564                                        BasicType ret_type,
 565                                        int frame_slots) {
 566   Address memaddr(Z_SP, frame_slots * VMRegImpl::stack_slot_size);
 567 
 568   switch (ret_type) {
 569     case T_BOOLEAN:  // Save shorter types as int. Do we need sign extension at restore??
 570     case T_BYTE:
 571     case T_CHAR:
 572     case T_SHORT:
 573     case T_INT:
 574       __ reg2mem_opt(Z_RET, memaddr, false);
 575       break;
 576     case T_OBJECT:   // Save pointer types as long.
 577     case T_ARRAY:
 578     case T_ADDRESS:
 579     case T_VOID:
 580     case T_LONG:
 581       __ reg2mem_opt(Z_RET, memaddr);
 582       break;
 583     case T_FLOAT:
 584       __ freg2mem_opt(Z_FRET, memaddr, false);
 585       break;
 586     case T_DOUBLE:
 587       __ freg2mem_opt(Z_FRET, memaddr);
 588       break;
 589     default:
 590       ShouldNotReachHere();
 591       break;
 592   }
 593 }
 594 
 595 void SharedRuntime::restore_native_result(MacroAssembler *masm,
 596                                           BasicType       ret_type,
 597                                           int             frame_slots) {
 598   Address memaddr(Z_SP, frame_slots * VMRegImpl::stack_slot_size);
 599 
 600   switch (ret_type) {
 601     case T_BOOLEAN:  // Restore shorter types as int. Do we need sign extension at restore??
 602     case T_BYTE:
 603     case T_CHAR:
 604     case T_SHORT:
 605     case T_INT:
 606       __ mem2reg_opt(Z_RET, memaddr, false);
 607       break;
 608     case T_OBJECT:   // Restore pointer types as long.
 609     case T_ARRAY:
 610     case T_ADDRESS:
 611     case T_VOID:
 612     case T_LONG:
 613       __ mem2reg_opt(Z_RET, memaddr);
 614       break;
 615     case T_FLOAT:
 616       __ mem2freg_opt(Z_FRET, memaddr, false);
 617       break;
 618     case T_DOUBLE:
 619       __ mem2freg_opt(Z_FRET, memaddr);
 620       break;
 621     default:
 622       ShouldNotReachHere();
 623       break;
 624   }
 625 }
 626 
 627 // ---------------------------------------------------------------------------
 628 // Read the array of BasicTypes from a signature, and compute where the
 629 // arguments should go. Values in the VMRegPair regs array refer to 4-byte
 630 // quantities. Values less than VMRegImpl::stack0 are registers, those above
 631 // refer to 4-byte stack slots. All stack slots are based off of the stack pointer
 632 // as framesizes are fixed.
 633 // VMRegImpl::stack0 refers to the first slot 0(sp).
 634 // VMRegImpl::stack0+1 refers to the memory word 4-byes higher. Registers
 635 // up to Register::number_of_registers are the 64-bit integer registers.
 636 
 637 // Note: the INPUTS in sig_bt are in units of Java argument words, which are
 638 // either 32-bit or 64-bit depending on the build. The OUTPUTS are in 32-bit
 639 // units regardless of build.
 640 
 641 // The Java calling convention is a "shifted" version of the C ABI.
 642 // By skipping the first C ABI register we can call non-static jni methods
 643 // with small numbers of arguments without having to shuffle the arguments
 644 // at all. Since we control the java ABI we ought to at least get some
 645 // advantage out of it.
 646 int SharedRuntime::java_calling_convention(const BasicType *sig_bt,
 647                                            VMRegPair *regs,
 648                                            int total_args_passed) {
 649   // c2c calling conventions for compiled-compiled calls.
 650 
 651   // An int/float occupies 1 slot here.
 652   const int inc_stk_for_intfloat   = 1; // 1 slots for ints and floats.
 653   const int inc_stk_for_longdouble = 2; // 2 slots for longs and doubles.
 654 
 655   const VMReg z_iarg_reg[5] = {
 656     Z_R2->as_VMReg(),
 657     Z_R3->as_VMReg(),
 658     Z_R4->as_VMReg(),
 659     Z_R5->as_VMReg(),
 660     Z_R6->as_VMReg()
 661   };
 662   const VMReg z_farg_reg[4] = {
 663     Z_F0->as_VMReg(),
 664     Z_F2->as_VMReg(),
 665     Z_F4->as_VMReg(),
 666     Z_F6->as_VMReg()
 667   };
 668   const int z_num_iarg_registers = sizeof(z_iarg_reg) / sizeof(z_iarg_reg[0]);
 669   const int z_num_farg_registers = sizeof(z_farg_reg) / sizeof(z_farg_reg[0]);
 670 
 671   assert(Register::number_of_arg_registers == z_num_iarg_registers, "iarg reg count mismatch");
 672   assert(FloatRegister::number_of_arg_registers == z_num_farg_registers, "farg reg count mismatch");
 673 
 674   int i;
 675   int stk = 0;
 676   int ireg = 0;
 677   int freg = 0;
 678 
 679   for (int i = 0; i < total_args_passed; ++i) {
 680     switch (sig_bt[i]) {
 681       case T_BOOLEAN:
 682       case T_CHAR:
 683       case T_BYTE:
 684       case T_SHORT:
 685       case T_INT:
 686         if (ireg < z_num_iarg_registers) {
 687           // Put int/ptr in register.
 688           regs[i].set1(z_iarg_reg[ireg]);
 689           ++ireg;
 690         } else {
 691           // Put int/ptr on stack.
 692           regs[i].set1(VMRegImpl::stack2reg(stk));
 693           stk += inc_stk_for_intfloat;
 694         }
 695         break;
 696       case T_LONG:
 697         assert((i + 1) < total_args_passed && sig_bt[i+1] == T_VOID, "expecting half");
 698         if (ireg < z_num_iarg_registers) {
 699           // Put long in register.
 700           regs[i].set2(z_iarg_reg[ireg]);
 701           ++ireg;
 702         } else {
 703           // Put long on stack and align to 2 slots.
 704           if (stk & 0x1) { ++stk; }
 705           regs[i].set2(VMRegImpl::stack2reg(stk));
 706           stk += inc_stk_for_longdouble;
 707         }
 708         break;
 709       case T_OBJECT:
 710       case T_ARRAY:
 711       case T_ADDRESS:
 712         if (ireg < z_num_iarg_registers) {
 713           // Put ptr in register.
 714           regs[i].set2(z_iarg_reg[ireg]);
 715           ++ireg;
 716         } else {
 717           // Put ptr on stack and align to 2 slots, because
 718           // "64-bit pointers record oop-ishness on 2 aligned adjacent
 719           // registers." (see OopFlow::build_oop_map).
 720           if (stk & 0x1) { ++stk; }
 721           regs[i].set2(VMRegImpl::stack2reg(stk));
 722           stk += inc_stk_for_longdouble;
 723         }
 724         break;
 725       case T_FLOAT:
 726         if (freg < z_num_farg_registers) {
 727           // Put float in register.
 728           regs[i].set1(z_farg_reg[freg]);
 729           ++freg;
 730         } else {
 731           // Put float on stack.
 732           regs[i].set1(VMRegImpl::stack2reg(stk));
 733           stk += inc_stk_for_intfloat;
 734         }
 735         break;
 736       case T_DOUBLE:
 737         assert((i + 1) < total_args_passed && sig_bt[i+1] == T_VOID, "expecting half");
 738         if (freg < z_num_farg_registers) {
 739           // Put double in register.
 740           regs[i].set2(z_farg_reg[freg]);
 741           ++freg;
 742         } else {
 743           // Put double on stack and align to 2 slots.
 744           if (stk & 0x1) { ++stk; }
 745           regs[i].set2(VMRegImpl::stack2reg(stk));
 746           stk += inc_stk_for_longdouble;
 747         }
 748         break;
 749       case T_VOID:
 750         assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half");
 751         // Do not count halves.
 752         regs[i].set_bad();
 753         break;
 754       default:
 755         ShouldNotReachHere();
 756     }
 757   }
 758   return stk;
 759 }
 760 
 761 int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
 762                                         VMRegPair *regs,
 763                                         VMRegPair *regs2,
 764                                         int total_args_passed) {
 765   assert(regs2 == nullptr, "second VMRegPair array not used on this platform");
 766 
 767   // Calling conventions for C runtime calls and calls to JNI native methods.
 768   const VMReg z_iarg_reg[5] = {
 769     Z_R2->as_VMReg(),
 770     Z_R3->as_VMReg(),
 771     Z_R4->as_VMReg(),
 772     Z_R5->as_VMReg(),
 773     Z_R6->as_VMReg()
 774   };
 775   const VMReg z_farg_reg[4] = {
 776     Z_F0->as_VMReg(),
 777     Z_F2->as_VMReg(),
 778     Z_F4->as_VMReg(),
 779     Z_F6->as_VMReg()
 780   };
 781   const int z_num_iarg_registers = sizeof(z_iarg_reg) / sizeof(z_iarg_reg[0]);
 782   const int z_num_farg_registers = sizeof(z_farg_reg) / sizeof(z_farg_reg[0]);
 783 
 784   // Check calling conventions consistency.
 785   assert(Register::number_of_arg_registers == z_num_iarg_registers, "iarg reg count mismatch");
 786   assert(FloatRegister::number_of_arg_registers == z_num_farg_registers, "farg reg count mismatch");
 787 
 788   // Avoid passing C arguments in the wrong stack slots.
 789 
 790   // 'Stk' counts stack slots. Due to alignment, 32 bit values occupy
 791   // 2 such slots, like 64 bit values do.
 792   const int inc_stk_for_intfloat   = 2; // 2 slots for ints and floats.
 793   const int inc_stk_for_longdouble = 2; // 2 slots for longs and doubles.
 794 
 795   int i;
 796   // Leave room for C-compatible ABI
 797   int stk = (frame::z_abi_160_size - frame::z_jit_out_preserve_size) / VMRegImpl::stack_slot_size;
 798   int freg = 0;
 799   int ireg = 0;
 800 
 801   // We put the first 5 arguments into registers and the rest on the
 802   // stack. Float arguments are already in their argument registers
 803   // due to c2c calling conventions (see calling_convention).
 804   for (int i = 0; i < total_args_passed; ++i) {
 805     switch (sig_bt[i]) {
 806       case T_BOOLEAN:
 807       case T_CHAR:
 808       case T_BYTE:
 809       case T_SHORT:
 810       case T_INT:
 811         // Fall through, handle as long.
 812       case T_LONG:
 813       case T_OBJECT:
 814       case T_ARRAY:
 815       case T_ADDRESS:
 816       case T_METADATA:
 817         // Oops are already boxed if required (JNI).
 818         if (ireg < z_num_iarg_registers) {
 819           regs[i].set2(z_iarg_reg[ireg]);
 820           ++ireg;
 821         } else {
 822           regs[i].set2(VMRegImpl::stack2reg(stk));
 823           stk += inc_stk_for_longdouble;
 824         }
 825         break;
 826       case T_FLOAT:
 827         if (freg < z_num_farg_registers) {
 828           regs[i].set1(z_farg_reg[freg]);
 829           ++freg;
 830         } else {
 831           regs[i].set1(VMRegImpl::stack2reg(stk+1));
 832           stk +=  inc_stk_for_intfloat;
 833         }
 834         break;
 835       case T_DOUBLE:
 836         assert((i + 1) < total_args_passed && sig_bt[i+1] == T_VOID, "expecting half");
 837         if (freg < z_num_farg_registers) {
 838           regs[i].set2(z_farg_reg[freg]);
 839           ++freg;
 840         } else {
 841           // Put double on stack.
 842           regs[i].set2(VMRegImpl::stack2reg(stk));
 843           stk += inc_stk_for_longdouble;
 844         }
 845         break;
 846       case T_VOID:
 847         // Do not count halves.
 848         regs[i].set_bad();
 849         break;
 850       default:
 851         ShouldNotReachHere();
 852     }
 853   }
 854   return align_up(stk, 2);
 855 }
 856 
 857 int SharedRuntime::vector_calling_convention(VMRegPair *regs,
 858                                              uint num_bits,
 859                                              uint total_args_passed) {
 860   Unimplemented();
 861   return 0;
 862 }
 863 
 864 ////////////////////////////////////////////////////////////////////////
 865 //
 866 //  Argument shufflers
 867 //
 868 ////////////////////////////////////////////////////////////////////////
 869 
 870 //----------------------------------------------------------------------
 871 // The java_calling_convention describes stack locations as ideal slots on
 872 // a frame with no abi restrictions. Since we must observe abi restrictions
 873 // (like the placement of the register window) the slots must be biased by
 874 // the following value.
 875 //----------------------------------------------------------------------
 876 static int reg2slot(VMReg r) {
 877   return r->reg2stack() + SharedRuntime::out_preserve_stack_slots();
 878 }
 879 
 880 static int reg2offset(VMReg r) {
 881   return reg2slot(r) * VMRegImpl::stack_slot_size;
 882 }
 883 
 884 static void verify_oop_args(MacroAssembler *masm,
 885                             int total_args_passed,
 886                             const BasicType *sig_bt,
 887                             const VMRegPair *regs) {
 888   if (!VerifyOops) { return; }
 889 
 890   for (int i = 0; i < total_args_passed; i++) {
 891     if (is_reference_type(sig_bt[i])) {
 892       VMReg r = regs[i].first();
 893       assert(r->is_valid(), "bad oop arg");
 894 
 895       if (r->is_stack()) {
 896         __ z_lg(Z_R0_scratch,
 897                 Address(Z_SP, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize));
 898         __ verify_oop(Z_R0_scratch, FILE_AND_LINE);
 899       } else {
 900         __ verify_oop(r->as_Register(), FILE_AND_LINE);
 901       }
 902     }
 903   }
 904 }
 905 
 906 static void gen_special_dispatch(MacroAssembler *masm,
 907                                  int total_args_passed,
 908                                  vmIntrinsics::ID special_dispatch,
 909                                  const BasicType *sig_bt,
 910                                  const VMRegPair *regs) {
 911   verify_oop_args(masm, total_args_passed, sig_bt, regs);
 912 
 913   // Now write the args into the outgoing interpreter space.
 914   bool     has_receiver   = false;
 915   Register receiver_reg   = noreg;
 916   int      member_arg_pos = -1;
 917   Register member_reg     = noreg;
 918   int      ref_kind       = MethodHandles::signature_polymorphic_intrinsic_ref_kind(special_dispatch);
 919 
 920   if (ref_kind != 0) {
 921     member_arg_pos = total_args_passed - 1;  // trailing MemberName argument
 922     member_reg = Z_R9;                       // Known to be free at this point.
 923     has_receiver = MethodHandles::ref_kind_has_receiver(ref_kind);
 924   } else if (special_dispatch == vmIntrinsics::_linkToNative) {
 925     member_arg_pos = total_args_passed - 1;  // trailing NativeEntryPoint argument
 926     member_reg = Z_R9;  // known to be free at this point
 927   } else {
 928     guarantee(special_dispatch == vmIntrinsics::_invokeBasic,
 929               "special_dispatch=%d", vmIntrinsics::as_int(special_dispatch));
 930     has_receiver = true;
 931   }
 932 
 933   if (member_reg != noreg) {
 934     // Load the member_arg into register, if necessary.
 935     assert(member_arg_pos >= 0 && member_arg_pos < total_args_passed, "oob");
 936     assert(sig_bt[member_arg_pos] == T_OBJECT, "dispatch argument must be an object");
 937 
 938     VMReg r = regs[member_arg_pos].first();
 939     assert(r->is_valid(), "bad member arg");
 940 
 941     if (r->is_stack()) {
 942       __ z_lg(member_reg, Address(Z_SP, reg2offset(r)));
 943     } else {
 944       // No data motion is needed.
 945       member_reg = r->as_Register();
 946     }
 947   }
 948 
 949   if (has_receiver) {
 950     // Make sure the receiver is loaded into a register.
 951     assert(total_args_passed > 0, "oob");
 952     assert(sig_bt[0] == T_OBJECT, "receiver argument must be an object");
 953 
 954     VMReg r = regs[0].first();
 955     assert(r->is_valid(), "bad receiver arg");
 956 
 957     if (r->is_stack()) {
 958       // Porting note: This assumes that compiled calling conventions always
 959       // pass the receiver oop in a register. If this is not true on some
 960       // platform, pick a temp and load the receiver from stack.
 961       assert(false, "receiver always in a register");
 962       receiver_reg = Z_R13;  // Known to be free at this point.
 963       __ z_lg(receiver_reg, Address(Z_SP, reg2offset(r)));
 964     } else {
 965       // No data motion is needed.
 966       receiver_reg = r->as_Register();
 967     }
 968   }
 969 
 970   // Figure out which address we are really jumping to:
 971   MethodHandles::generate_method_handle_dispatch(masm, special_dispatch,
 972                                                  receiver_reg, member_reg,
 973                                                  /*for_compiler_entry:*/ true);
 974 }
 975 
 976 ////////////////////////////////////////////////////////////////////////
 977 //
 978 //  Argument shufflers
 979 //
 980 ////////////////////////////////////////////////////////////////////////
 981 
 982 // Is the size of a vector size (in bytes) bigger than a size saved by default?
 983 // 8 bytes registers are saved by default on z/Architecture.
 984 bool SharedRuntime::is_wide_vector(int size) {
 985   // Note, MaxVectorSize == 8 on this platform.
 986   assert(size <= 8, "%d bytes vectors are not supported", size);
 987   return size > 8;
 988 }
 989 
 990 //----------------------------------------------------------------------
 991 // An oop arg. Must pass a handle not the oop itself
 992 //----------------------------------------------------------------------
 993 static void object_move(MacroAssembler *masm,
 994                         OopMap *map,
 995                         int oop_handle_offset,
 996                         int framesize_in_slots,
 997                         VMRegPair src,
 998                         VMRegPair dst,
 999                         bool is_receiver,
1000                         int *receiver_offset) {
1001   int frame_offset = framesize_in_slots*VMRegImpl::stack_slot_size;
1002 
1003   assert(!is_receiver || (is_receiver && (*receiver_offset == -1)), "only one receiving object per call, please.");
1004 
1005   // Must pass a handle. First figure out the location we use as a handle.
1006 
1007   if (src.first()->is_stack()) {
1008     // Oop is already on the stack, put handle on stack or in register
1009     // If handle will be on the stack, use temp reg to calculate it.
1010     Register rHandle = dst.first()->is_stack() ? Z_R1 : dst.first()->as_Register();
1011     Label    skip;
1012     int      slot_in_older_frame = reg2slot(src.first());
1013 
1014     guarantee(!is_receiver, "expecting receiver in register");
1015     map->set_oop(VMRegImpl::stack2reg(slot_in_older_frame + framesize_in_slots));
1016 
1017     __ add2reg(rHandle, reg2offset(src.first())+frame_offset, Z_SP);
1018     __ load_and_test_long(Z_R0, Address(rHandle));
1019     __ z_brne(skip);
1020     // Use a null handle if oop is null.
1021     __ clear_reg(rHandle, true, false);
1022     __ bind(skip);
1023 
1024     // Copy handle to the right place (register or stack).
1025     if (dst.first()->is_stack()) {
1026       __ z_stg(rHandle, reg2offset(dst.first()), Z_SP);
1027     } // else
1028       // nothing to do. rHandle uses the correct register
1029   } else {
1030     // Oop is passed in an input register. We must flush it to the stack.
1031     const Register rOop = src.first()->as_Register();
1032     const Register rHandle = dst.first()->is_stack() ? Z_R1 : dst.first()->as_Register();
1033     int            oop_slot = (rOop->encoding()-Z_ARG1->encoding()) * VMRegImpl::slots_per_word + oop_handle_offset;
1034     int            oop_slot_offset = oop_slot*VMRegImpl::stack_slot_size;
1035     NearLabel skip;
1036 
1037     if (is_receiver) {
1038       *receiver_offset = oop_slot_offset;
1039     }
1040     map->set_oop(VMRegImpl::stack2reg(oop_slot));
1041 
1042     // Flush Oop to stack, calculate handle.
1043     __ z_stg(rOop, oop_slot_offset, Z_SP);
1044     __ add2reg(rHandle, oop_slot_offset, Z_SP);
1045 
1046     // If Oop is null, use a null handle.
1047     __ compare64_and_branch(rOop, (RegisterOrConstant)0L, Assembler::bcondNotEqual, skip);
1048     __ clear_reg(rHandle, true, false);
1049     __ bind(skip);
1050 
1051     // Copy handle to the right place (register or stack).
1052     if (dst.first()->is_stack()) {
1053       __ z_stg(rHandle, reg2offset(dst.first()), Z_SP);
1054     } // else
1055       // nothing to do here, since rHandle = dst.first()->as_Register in this case.
1056   }
1057 }
1058 
1059 //----------------------------------------------------------------------
1060 // A float arg. May have to do float reg to int reg conversion
1061 //----------------------------------------------------------------------
1062 static void float_move(MacroAssembler *masm,
1063                        VMRegPair src,
1064                        VMRegPair dst,
1065                        int framesize_in_slots,
1066                        int workspace_slot_offset) {
1067   int frame_offset = framesize_in_slots * VMRegImpl::stack_slot_size;
1068   int workspace_offset = workspace_slot_offset * VMRegImpl::stack_slot_size;
1069 
1070   // We do not accept an argument in a VMRegPair to be spread over two slots,
1071   // no matter what physical location (reg or stack) the slots may have.
1072   // We just check for the unaccepted slot to be invalid.
1073   assert(!src.second()->is_valid(), "float in arg spread over two slots");
1074   assert(!dst.second()->is_valid(), "float out arg spread over two slots");
1075 
1076   if (src.first()->is_stack()) {
1077     if (dst.first()->is_stack()) {
1078       // stack -> stack. The easiest of the bunch.
1079       __ z_mvc(Address(Z_SP, reg2offset(dst.first())),
1080                Address(Z_SP, reg2offset(src.first()) + frame_offset), sizeof(float));
1081     } else {
1082       // stack to reg
1083       Address memaddr(Z_SP, reg2offset(src.first()) + frame_offset);
1084       if (dst.first()->is_Register()) {
1085         __ mem2reg_opt(dst.first()->as_Register(), memaddr, false);
1086       } else {
1087         __ mem2freg_opt(dst.first()->as_FloatRegister(), memaddr, false);
1088       }
1089     }
1090   } else if (src.first()->is_Register()) {
1091     if (dst.first()->is_stack()) {
1092       // gpr -> stack
1093       __ reg2mem_opt(src.first()->as_Register(),
1094                      Address(Z_SP, reg2offset(dst.first()), false ));
1095     } else {
1096       if (dst.first()->is_Register()) {
1097         // gpr -> gpr
1098         __ move_reg_if_needed(dst.first()->as_Register(), T_INT,
1099                               src.first()->as_Register(), T_INT);
1100       } else {
1101         if (VM_Version::has_FPSupportEnhancements()) {
1102           // gpr -> fpr. Exploit z10 capability of direct transfer.
1103           __ z_ldgr(dst.first()->as_FloatRegister(), src.first()->as_Register());
1104         } else {
1105           // gpr -> fpr. Use work space on stack to transfer data.
1106           Address   stackaddr(Z_SP, workspace_offset);
1107 
1108           __ reg2mem_opt(src.first()->as_Register(), stackaddr, false);
1109           __ mem2freg_opt(dst.first()->as_FloatRegister(), stackaddr, false);
1110         }
1111       }
1112     }
1113   } else {
1114     if (dst.first()->is_stack()) {
1115       // fpr -> stack
1116       __ freg2mem_opt(src.first()->as_FloatRegister(),
1117                       Address(Z_SP, reg2offset(dst.first())), false);
1118     } else {
1119       if (dst.first()->is_Register()) {
1120         if (VM_Version::has_FPSupportEnhancements()) {
1121           // fpr -> gpr.
1122           __ z_lgdr(dst.first()->as_Register(), src.first()->as_FloatRegister());
1123         } else {
1124           // fpr -> gpr. Use work space on stack to transfer data.
1125           Address   stackaddr(Z_SP, workspace_offset);
1126 
1127           __ freg2mem_opt(src.first()->as_FloatRegister(), stackaddr, false);
1128           __ mem2reg_opt(dst.first()->as_Register(), stackaddr, false);
1129         }
1130       } else {
1131         // fpr -> fpr
1132         __ move_freg_if_needed(dst.first()->as_FloatRegister(), T_FLOAT,
1133                                src.first()->as_FloatRegister(), T_FLOAT);
1134       }
1135     }
1136   }
1137 }
1138 
1139 //----------------------------------------------------------------------
1140 // A double arg. May have to do double reg to long reg conversion
1141 //----------------------------------------------------------------------
1142 static void double_move(MacroAssembler *masm,
1143                         VMRegPair src,
1144                         VMRegPair dst,
1145                         int framesize_in_slots,
1146                         int workspace_slot_offset) {
1147   int frame_offset = framesize_in_slots*VMRegImpl::stack_slot_size;
1148   int workspace_offset = workspace_slot_offset*VMRegImpl::stack_slot_size;
1149 
1150   // Since src is always a java calling convention we know that the
1151   // src pair is always either all registers or all stack (and aligned?)
1152 
1153   if (src.first()->is_stack()) {
1154     if (dst.first()->is_stack()) {
1155       // stack -> stack. The easiest of the bunch.
1156       __ z_mvc(Address(Z_SP, reg2offset(dst.first())),
1157                Address(Z_SP, reg2offset(src.first()) + frame_offset), sizeof(double));
1158     } else {
1159       // stack to reg
1160       Address stackaddr(Z_SP, reg2offset(src.first()) + frame_offset);
1161 
1162       if (dst.first()->is_Register()) {
1163         __ mem2reg_opt(dst.first()->as_Register(), stackaddr);
1164       } else {
1165         __ mem2freg_opt(dst.first()->as_FloatRegister(), stackaddr);
1166       }
1167     }
1168   } else if (src.first()->is_Register()) {
1169     if (dst.first()->is_stack()) {
1170       // gpr -> stack
1171       __ reg2mem_opt(src.first()->as_Register(),
1172                      Address(Z_SP, reg2offset(dst.first())));
1173     } else {
1174       if (dst.first()->is_Register()) {
1175         // gpr -> gpr
1176         __ move_reg_if_needed(dst.first()->as_Register(), T_LONG,
1177                               src.first()->as_Register(), T_LONG);
1178       } else {
1179         if (VM_Version::has_FPSupportEnhancements()) {
1180           // gpr -> fpr. Exploit z10 capability of direct transfer.
1181           __ z_ldgr(dst.first()->as_FloatRegister(), src.first()->as_Register());
1182         } else {
1183           // gpr -> fpr. Use work space on stack to transfer data.
1184           Address stackaddr(Z_SP, workspace_offset);
1185           __ reg2mem_opt(src.first()->as_Register(), stackaddr);
1186           __ mem2freg_opt(dst.first()->as_FloatRegister(), stackaddr);
1187         }
1188       }
1189     }
1190   } else {
1191     if (dst.first()->is_stack()) {
1192       // fpr -> stack
1193       __ freg2mem_opt(src.first()->as_FloatRegister(),
1194                       Address(Z_SP, reg2offset(dst.first())));
1195     } else {
1196       if (dst.first()->is_Register()) {
1197         if (VM_Version::has_FPSupportEnhancements()) {
1198           // fpr -> gpr. Exploit z10 capability of direct transfer.
1199           __ z_lgdr(dst.first()->as_Register(), src.first()->as_FloatRegister());
1200         } else {
1201           // fpr -> gpr. Use work space on stack to transfer data.
1202           Address stackaddr(Z_SP, workspace_offset);
1203 
1204           __ freg2mem_opt(src.first()->as_FloatRegister(), stackaddr);
1205           __ mem2reg_opt(dst.first()->as_Register(), stackaddr);
1206         }
1207       } else {
1208         // fpr -> fpr
1209         // In theory these overlap but the ordering is such that this is likely a nop.
1210         __ move_freg_if_needed(dst.first()->as_FloatRegister(), T_DOUBLE,
1211                                src.first()->as_FloatRegister(), T_DOUBLE);
1212       }
1213     }
1214   }
1215 }
1216 
1217 //----------------------------------------------------------------------
1218 // A long arg.
1219 //----------------------------------------------------------------------
1220 static void long_move(MacroAssembler *masm,
1221                       VMRegPair src,
1222                       VMRegPair dst,
1223                       int framesize_in_slots) {
1224   int frame_offset = framesize_in_slots*VMRegImpl::stack_slot_size;
1225 
1226   if (src.first()->is_stack()) {
1227     if (dst.first()->is_stack()) {
1228       // stack -> stack. The easiest of the bunch.
1229       __ z_mvc(Address(Z_SP, reg2offset(dst.first())),
1230                Address(Z_SP, reg2offset(src.first()) + frame_offset), sizeof(long));
1231     } else {
1232       // stack to reg
1233       assert(dst.first()->is_Register(), "long dst value must be in GPR");
1234       __ mem2reg_opt(dst.first()->as_Register(),
1235                       Address(Z_SP, reg2offset(src.first()) + frame_offset));
1236     }
1237   } else {
1238     // reg to reg
1239     assert(src.first()->is_Register(), "long src value must be in GPR");
1240     if (dst.first()->is_stack()) {
1241       // reg -> stack
1242       __ reg2mem_opt(src.first()->as_Register(),
1243                      Address(Z_SP, reg2offset(dst.first())));
1244     } else {
1245       // reg -> reg
1246       assert(dst.first()->is_Register(), "long dst value must be in GPR");
1247       __ move_reg_if_needed(dst.first()->as_Register(),
1248                             T_LONG, src.first()->as_Register(), T_LONG);
1249     }
1250   }
1251 }
1252 
1253 
1254 //----------------------------------------------------------------------
1255 // A int-like arg.
1256 //----------------------------------------------------------------------
1257 // On z/Architecture we will store integer like items to the stack as 64 bit
1258 // items, according to the z/Architecture ABI, even though Java would only store
1259 // 32 bits for a parameter.
1260 // We do sign extension for all base types. That is ok since the only
1261 // unsigned base type is T_CHAR, and T_CHAR uses only 16 bits of an int.
1262 // Sign extension 32->64 bit will thus not affect the value.
1263 //----------------------------------------------------------------------
1264 static void move32_64(MacroAssembler *masm,
1265                       VMRegPair src,
1266                       VMRegPair dst,
1267                       int framesize_in_slots) {
1268   int frame_offset = framesize_in_slots * VMRegImpl::stack_slot_size;
1269 
1270   if (src.first()->is_stack()) {
1271     Address memaddr(Z_SP, reg2offset(src.first()) + frame_offset);
1272     if (dst.first()->is_stack()) {
1273       // stack -> stack. MVC not possible due to sign extension.
1274       Address firstaddr(Z_SP, reg2offset(dst.first()));
1275       __ mem2reg_signed_opt(Z_R0_scratch, memaddr);
1276       __ reg2mem_opt(Z_R0_scratch, firstaddr);
1277     } else {
1278       // stack -> reg, sign extended
1279       __ mem2reg_signed_opt(dst.first()->as_Register(), memaddr);
1280     }
1281   } else {
1282     if (dst.first()->is_stack()) {
1283       // reg -> stack, sign extended
1284       Address firstaddr(Z_SP, reg2offset(dst.first()));
1285       __ z_lgfr(src.first()->as_Register(), src.first()->as_Register());
1286       __ reg2mem_opt(src.first()->as_Register(), firstaddr);
1287     } else {
1288       // reg -> reg, sign extended
1289       __ z_lgfr(dst.first()->as_Register(), src.first()->as_Register());
1290     }
1291   }
1292 }
1293 
1294 //----------------------------------------------------------------------
1295 // Wrap a JNI call.
1296 //----------------------------------------------------------------------
1297 #undef USE_RESIZE_FRAME
1298 nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm,
1299                                                 const methodHandle& method,
1300                                                 int compile_id,
1301                                                 BasicType *in_sig_bt,
1302                                                 VMRegPair *in_regs,
1303                                                 BasicType ret_type) {
1304   int total_in_args = method->size_of_parameters();
1305   if (method->is_method_handle_intrinsic()) {
1306     vmIntrinsics::ID iid = method->intrinsic_id();
1307     intptr_t start = (intptr_t) __ pc();
1308     int vep_offset = ((intptr_t) __ pc()) - start;
1309 
1310     gen_special_dispatch(masm, total_in_args,
1311                          method->intrinsic_id(), in_sig_bt, in_regs);
1312 
1313     int frame_complete = ((intptr_t)__ pc()) - start; // Not complete, period.
1314 
1315     __ flush();
1316 
1317     int stack_slots = SharedRuntime::out_preserve_stack_slots();  // No out slots at all, actually.
1318 
1319     return nmethod::new_native_nmethod(method,
1320                                        compile_id,
1321                                        masm->code(),
1322                                        vep_offset,
1323                                        frame_complete,
1324                                        stack_slots / VMRegImpl::slots_per_word,
1325                                        in_ByteSize(-1),
1326                                        in_ByteSize(-1),
1327                                        (OopMapSet *) nullptr);
1328   }
1329 
1330 
1331   ///////////////////////////////////////////////////////////////////////
1332   //
1333   //  Precalculations before generating any code
1334   //
1335   ///////////////////////////////////////////////////////////////////////
1336 
1337   address native_func = method->native_function();
1338   assert(native_func != nullptr, "must have function");
1339 
1340   //---------------------------------------------------------------------
1341   // We have received a description of where all the java args are located
1342   // on entry to the wrapper. We need to convert these args to where
1343   // the jni function will expect them. To figure out where they go
1344   // we convert the java signature to a C signature by inserting
1345   // the hidden arguments as arg[0] and possibly arg[1] (static method).
1346   //
1347   // The first hidden argument arg[0] is a pointer to the JNI environment.
1348   // It is generated for every call.
1349   // The second argument arg[1] to the JNI call, which is hidden for static
1350   // methods, is the boxed lock object. For static calls, the lock object
1351   // is the static method itself. The oop is constructed here. for instance
1352   // calls, the lock is performed on the object itself, the pointer of
1353   // which is passed as the first visible argument.
1354   //---------------------------------------------------------------------
1355 
1356   // Additionally, on z/Architecture we must convert integers
1357   // to longs in the C signature. We do this in advance in order to have
1358   // no trouble with indexes into the bt-arrays.
1359   // So convert the signature and registers now, and adjust the total number
1360   // of in-arguments accordingly.
1361   bool method_is_static = method->is_static();
1362   int  total_c_args     = total_in_args + (method_is_static ? 2 : 1);
1363 
1364   BasicType *out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args);
1365   VMRegPair *out_regs   = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args);
1366   BasicType* in_elem_bt = nullptr;
1367 
1368   // Create the signature for the C call:
1369   //   1) add the JNIEnv*
1370   //   2) add the class if the method is static
1371   //   3) copy the rest of the incoming signature (shifted by the number of
1372   //      hidden arguments)
1373 
1374   int argc = 0;
1375   out_sig_bt[argc++] = T_ADDRESS;
1376   if (method->is_static()) {
1377     out_sig_bt[argc++] = T_OBJECT;
1378   }
1379 
1380   for (int i = 0; i < total_in_args; i++) {
1381     out_sig_bt[argc++] = in_sig_bt[i];
1382   }
1383 
1384   ///////////////////////////////////////////////////////////////////////
1385   // Now figure out where the args must be stored and how much stack space
1386   // they require (neglecting out_preserve_stack_slots but providing space
1387   // for storing the first five register arguments).
1388   // It's weird, see int_stk_helper.
1389   ///////////////////////////////////////////////////////////////////////
1390 
1391   //---------------------------------------------------------------------
1392   // Compute framesize for the wrapper.
1393   //
1394   // - We need to handlize all oops passed in registers.
1395   // - We must create space for them here that is disjoint from the save area.
1396   // - We always just allocate 5 words for storing down these object.
1397   //   This allows us to simply record the base and use the Ireg number to
1398   //   decide which slot to use.
1399   // - Note that the reg number used to index the stack slot is the inbound
1400   //   number, not the outbound number.
1401   // - We must shuffle args to match the native convention,
1402   //   and to include var-args space.
1403   //---------------------------------------------------------------------
1404 
1405   //---------------------------------------------------------------------
1406   // Calculate the total number of stack slots we will need:
1407   // - 1) abi requirements
1408   // - 2) outgoing args
1409   // - 3) space for inbound oop handle area
1410   // - 4) space for handlizing a klass if static method
1411   // - 5) space for a lock if synchronized method
1412   // - 6) workspace (save rtn value, int<->float reg moves, ...)
1413   // - 7) filler slots for alignment
1414   //---------------------------------------------------------------------
1415   // Here is how the space we have allocated will look like.
1416   // Since we use resize_frame, we do not create a new stack frame,
1417   // but just extend the one we got with our own data area.
1418   //
1419   // If an offset or pointer name points to a separator line, it is
1420   // assumed that addressing with offset 0 selects storage starting
1421   // at the first byte above the separator line.
1422   //
1423   //
1424   //     ...                   ...
1425   //      | caller's frame      |
1426   // FP-> |---------------------|
1427   //      | filler slots, if any|
1428   //     7| #slots == mult of 2 |
1429   //      |---------------------|
1430   //      | work space          |
1431   //     6| 2 slots = 8 bytes   |
1432   //      |---------------------|
1433   //     5| lock box (if sync)  |
1434   //      |---------------------| <- lock_slot_offset
1435   //     4| klass (if static)   |
1436   //      |---------------------| <- klass_slot_offset
1437   //     3| oopHandle area      |
1438   //      |                     |
1439   //      |                     |
1440   //      |---------------------| <- oop_handle_offset
1441   //     2| outbound memory     |
1442   //     ...                   ...
1443   //      | based arguments     |
1444   //      |---------------------|
1445   //      | vararg              |
1446   //     ...                   ...
1447   //      | area                |
1448   //      |---------------------| <- out_arg_slot_offset
1449   //     1| out_preserved_slots |
1450   //     ...                   ...
1451   //      | (z_abi spec)        |
1452   // SP-> |---------------------| <- FP_slot_offset (back chain)
1453   //     ...                   ...
1454   //
1455   //---------------------------------------------------------------------
1456 
1457   // *_slot_offset indicates offset from SP in #stack slots
1458   // *_offset      indicates offset from SP in #bytes
1459 
1460   int stack_slots = c_calling_convention(out_sig_bt, out_regs, /*regs2=*/nullptr, total_c_args) + // 1+2
1461                     SharedRuntime::out_preserve_stack_slots(); // see c_calling_convention
1462 
1463   // Now the space for the inbound oop handle area.
1464   int total_save_slots = Register::number_of_arg_registers * VMRegImpl::slots_per_word;
1465 
1466   int oop_handle_slot_offset = stack_slots;
1467   stack_slots += total_save_slots;                                        // 3)
1468 
1469   int klass_slot_offset = 0;
1470   int klass_offset      = -1;
1471   if (method_is_static) {                                                 // 4)
1472     klass_slot_offset  = stack_slots;
1473     klass_offset       = klass_slot_offset * VMRegImpl::stack_slot_size;
1474     stack_slots       += VMRegImpl::slots_per_word;
1475   }
1476 
1477   int lock_slot_offset = 0;
1478   int lock_offset      = -1;
1479   if (method->is_synchronized()) {                                        // 5)
1480     lock_slot_offset   = stack_slots;
1481     lock_offset        = lock_slot_offset * VMRegImpl::stack_slot_size;
1482     stack_slots       += VMRegImpl::slots_per_word;
1483   }
1484 
1485   int workspace_slot_offset= stack_slots;                                 // 6)
1486   stack_slots         += 2;
1487 
1488   // Now compute actual number of stack words we need.
1489   // Round to align stack properly.
1490   stack_slots = align_up(stack_slots,                                     // 7)
1491                          frame::alignment_in_bytes / VMRegImpl::stack_slot_size);
1492   int frame_size_in_bytes = stack_slots * VMRegImpl::stack_slot_size;
1493 
1494 
1495   ///////////////////////////////////////////////////////////////////////
1496   // Now we can start generating code
1497   ///////////////////////////////////////////////////////////////////////
1498 
1499   unsigned int wrapper_CodeStart  = __ offset();
1500   unsigned int wrapper_UEPStart;
1501   unsigned int wrapper_VEPStart;
1502   unsigned int wrapper_FrameDone;
1503   unsigned int wrapper_CRegsSet;
1504   Label     handle_pending_exception;
1505   Label     ic_miss;
1506 
1507   //---------------------------------------------------------------------
1508   // Unverified entry point (UEP)
1509   //---------------------------------------------------------------------
1510   wrapper_UEPStart = __ offset();
1511 
1512   // check ic: object class <-> cached class
1513   if (!method_is_static) __ nmethod_UEP(ic_miss);
1514   // Fill with nops (alignment of verified entry point).
1515   __ align(CodeEntryAlignment);
1516 
1517   //---------------------------------------------------------------------
1518   // Verified entry point (VEP)
1519   //---------------------------------------------------------------------
1520   wrapper_VEPStart = __ offset();
1521 
1522   if (VM_Version::supports_fast_class_init_checks() && method->needs_clinit_barrier()) {
1523     Label L_skip_barrier;
1524     Register klass = Z_R1_scratch;
1525     // Notify OOP recorder (don't need the relocation)
1526     AddressLiteral md = __ constant_metadata_address(method->method_holder());
1527     __ load_const_optimized(klass, md.value());
1528     __ clinit_barrier(klass, Z_thread, &L_skip_barrier /*L_fast_path*/);
1529 
1530     __ load_const_optimized(klass, SharedRuntime::get_handle_wrong_method_stub());
1531     __ z_br(klass);
1532 
1533     __ bind(L_skip_barrier);
1534   }
1535 
1536   __ save_return_pc();
1537   __ generate_stack_overflow_check(frame_size_in_bytes);  // Check before creating frame.
1538 #ifndef USE_RESIZE_FRAME
1539   __ push_frame(frame_size_in_bytes);                     // Create a new frame for the wrapper.
1540 #else
1541   __ resize_frame(-frame_size_in_bytes, Z_R0_scratch);    // No new frame for the wrapper.
1542                                                           // Just resize the existing one.
1543 #endif
1544 
1545   BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
1546   bs->nmethod_entry_barrier(masm);
1547 
1548   wrapper_FrameDone = __ offset();
1549 
1550   // Native nmethod wrappers never take possession of the oop arguments.
1551   // So the caller will gc the arguments.
1552   // The only thing we need an oopMap for is if the call is static.
1553   //
1554   // An OopMap for lock (and class if static), and one for the VM call itself
1555   OopMapSet  *oop_maps        = new OopMapSet();
1556   OopMap     *map             = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
1557 
1558   //////////////////////////////////////////////////////////////////////
1559   //
1560   // The Grand Shuffle
1561   //
1562   //////////////////////////////////////////////////////////////////////
1563   //
1564   // We immediately shuffle the arguments so that for any vm call we have
1565   // to make from here on out (sync slow path, jvmti, etc.) we will have
1566   // captured the oops from our caller and have a valid oopMap for them.
1567   //
1568   //--------------------------------------------------------------------
1569   // Natives require 1 or 2 extra arguments over the normal ones: the JNIEnv*
1570   // (derived from JavaThread* which is in Z_thread) and, if static,
1571   // the class mirror instead of a receiver. This pretty much guarantees that
1572   // register layout will not match. We ignore these extra arguments during
1573   // the shuffle. The shuffle is described by the two calling convention
1574   // vectors we have in our possession. We simply walk the java vector to
1575   // get the source locations and the c vector to get the destinations.
1576   //
1577   // This is a trick. We double the stack slots so we can claim
1578   // the oops in the caller's frame. Since we are sure to have
1579   // more args than the caller doubling is enough to make
1580   // sure we can capture all the incoming oop args from the caller.
1581   //--------------------------------------------------------------------
1582 
1583   // Record sp-based slot for receiver on stack for non-static methods.
1584   int receiver_offset = -1;
1585 
1586   //--------------------------------------------------------------------
1587   // We move the arguments backwards because the floating point registers
1588   // destination will always be to a register with a greater or equal
1589   // register number or the stack.
1590   //   jix is the index of the incoming Java arguments.
1591   //   cix is the index of the outgoing C arguments.
1592   //--------------------------------------------------------------------
1593 
1594 #ifdef ASSERT
1595   bool reg_destroyed[Register::number_of_registers];
1596   bool freg_destroyed[FloatRegister::number_of_registers];
1597   for (int r = 0; r < Register::number_of_registers; r++) {
1598     reg_destroyed[r] = false;
1599   }
1600   for (int f = 0; f < FloatRegister::number_of_registers; f++) {
1601     freg_destroyed[f] = false;
1602   }
1603 #endif // ASSERT
1604 
1605   for (int jix = total_in_args - 1, cix = total_c_args - 1; jix >= 0; jix--, cix--) {
1606 #ifdef ASSERT
1607     if (in_regs[jix].first()->is_Register()) {
1608       assert(!reg_destroyed[in_regs[jix].first()->as_Register()->encoding()], "ack!");
1609     } else {
1610       if (in_regs[jix].first()->is_FloatRegister()) {
1611         assert(!freg_destroyed[in_regs[jix].first()->as_FloatRegister()->encoding()], "ack!");
1612       }
1613     }
1614     if (out_regs[cix].first()->is_Register()) {
1615       reg_destroyed[out_regs[cix].first()->as_Register()->encoding()] = true;
1616     } else {
1617       if (out_regs[cix].first()->is_FloatRegister()) {
1618         freg_destroyed[out_regs[cix].first()->as_FloatRegister()->encoding()] = true;
1619       }
1620     }
1621 #endif // ASSERT
1622 
1623     switch (in_sig_bt[jix]) {
1624       // Due to casting, small integers should only occur in pairs with type T_LONG.
1625       case T_BOOLEAN:
1626       case T_CHAR:
1627       case T_BYTE:
1628       case T_SHORT:
1629       case T_INT:
1630         // Move int and do sign extension.
1631         move32_64(masm, in_regs[jix], out_regs[cix], stack_slots);
1632         break;
1633 
1634       case T_LONG :
1635         long_move(masm, in_regs[jix], out_regs[cix], stack_slots);
1636         break;
1637 
1638       case T_ARRAY:
1639       case T_OBJECT:
1640         object_move(masm, map, oop_handle_slot_offset, stack_slots, in_regs[jix], out_regs[cix],
1641                     ((jix == 0) && (!method_is_static)),
1642                     &receiver_offset);
1643         break;
1644       case T_VOID:
1645         break;
1646 
1647       case T_FLOAT:
1648         float_move(masm, in_regs[jix], out_regs[cix], stack_slots, workspace_slot_offset);
1649         break;
1650 
1651       case T_DOUBLE:
1652         assert(jix+1 <  total_in_args && in_sig_bt[jix+1]  == T_VOID && out_sig_bt[cix+1] == T_VOID, "bad arg list");
1653         double_move(masm, in_regs[jix], out_regs[cix], stack_slots, workspace_slot_offset);
1654         break;
1655 
1656       case T_ADDRESS:
1657         assert(false, "found T_ADDRESS in java args");
1658         break;
1659 
1660       default:
1661         ShouldNotReachHere();
1662     }
1663   }
1664 
1665   //--------------------------------------------------------------------
1666   // Pre-load a static method's oop into ARG2.
1667   // Used both by locking code and the normal JNI call code.
1668   //--------------------------------------------------------------------
1669   if (method_is_static) {
1670     __ set_oop_constant(JNIHandles::make_local(method->method_holder()->java_mirror()), Z_ARG2);
1671 
1672     // Now handlize the static class mirror in ARG2. It's known not-null.
1673     __ z_stg(Z_ARG2, klass_offset, Z_SP);
1674     map->set_oop(VMRegImpl::stack2reg(klass_slot_offset));
1675     __ add2reg(Z_ARG2, klass_offset, Z_SP);
1676   }
1677 
1678   // Get JNIEnv* which is first argument to native.
1679   __ add2reg(Z_ARG1, in_bytes(JavaThread::jni_environment_offset()), Z_thread);
1680 
1681   //////////////////////////////////////////////////////////////////////
1682   // We have all of the arguments setup at this point.
1683   // We MUST NOT touch any outgoing regs from this point on.
1684   // So if we must call out we must push a new frame.
1685   //////////////////////////////////////////////////////////////////////
1686 
1687 
1688   // Calc the current pc into Z_R10 and into wrapper_CRegsSet.
1689   // Both values represent the same position.
1690   __ get_PC(Z_R10);                // PC into register
1691   wrapper_CRegsSet = __ offset();  // and into into variable.
1692 
1693   // Z_R10 now has the pc loaded that we will use when we finally call to native.
1694 
1695   // We use the same pc/oopMap repeatedly when we call out.
1696   oop_maps->add_gc_map((int)(wrapper_CRegsSet-wrapper_CodeStart), map);
1697 
1698   // Lock a synchronized method.
1699 
1700   if (method->is_synchronized()) {
1701 
1702     // ATTENTION: args and Z_R10 must be preserved.
1703     Register r_oop  = Z_R11;
1704     Register r_box  = Z_R12;
1705     Register r_tmp1 = Z_R13;
1706     Register r_tmp2 = Z_R7;
1707     Label done;
1708 
1709     // Load the oop for the object or class. R_carg2_classorobject contains
1710     // either the handlized oop from the incoming arguments or the handlized
1711     // class mirror (if the method is static).
1712     __ z_lg(r_oop, 0, Z_ARG2);
1713 
1714     lock_offset = (lock_slot_offset * VMRegImpl::stack_slot_size);
1715     // Get the lock box slot's address.
1716     __ add2reg(r_box, lock_offset, Z_SP);
1717 
1718     // Try fastpath for locking.
1719     // Fast_lock kills r_temp_1, r_temp_2.
1720     // in case of DiagnoseSyncOnValueBasedClasses content for Z_R1_scratch
1721     // will be destroyed, So avoid using Z_R1 as temp here.
1722     __ compiler_fast_lock_object(r_oop, r_box, r_tmp1, r_tmp2);
1723     __ z_bre(done);
1724 
1725     //-------------------------------------------------------------------------
1726     // None of the above fast optimizations worked so we have to get into the
1727     // slow case of monitor enter. Inline a special case of call_VM that
1728     // disallows any pending_exception.
1729     //-------------------------------------------------------------------------
1730 
1731     Register oldSP = Z_R11;
1732 
1733     __ z_lgr(oldSP, Z_SP);
1734 
1735     RegisterSaver::save_live_registers(masm, RegisterSaver::arg_registers);
1736 
1737     // Prepare arguments for call.
1738     __ z_lg(Z_ARG1, 0, Z_ARG2); // Ynboxed class mirror or unboxed object.
1739     __ add2reg(Z_ARG2, lock_offset, oldSP);
1740     __ z_lgr(Z_ARG3, Z_thread);
1741 
1742     __ set_last_Java_frame(oldSP, Z_R10 /* gc map pc */);
1743 
1744     // Do the call.
1745     __ load_const_optimized(Z_R1_scratch, CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_locking_C));
1746     __ call(Z_R1_scratch);
1747 
1748     __ reset_last_Java_frame();
1749 
1750     RegisterSaver::restore_live_registers(masm, RegisterSaver::arg_registers);
1751 #ifdef ASSERT
1752     { Label L;
1753       __ load_and_test_long(Z_R0, Address(Z_thread, Thread::pending_exception_offset()));
1754       __ z_bre(L);
1755       __ stop("no pending exception allowed on exit from IR::monitorenter");
1756       __ bind(L);
1757     }
1758 #endif
1759     __ bind(done);
1760   } // lock for synchronized methods
1761 
1762 
1763   //////////////////////////////////////////////////////////////////////
1764   // Finally just about ready to make the JNI call.
1765   //////////////////////////////////////////////////////////////////////
1766 
1767   // Use that pc we placed in Z_R10 a while back as the current frame anchor.
1768   __ set_last_Java_frame(Z_SP, Z_R10);
1769 
1770   // Transition from _thread_in_Java to _thread_in_native.
1771   __ set_thread_state(_thread_in_native);
1772 
1773   //////////////////////////////////////////////////////////////////////
1774   // This is the JNI call.
1775   //////////////////////////////////////////////////////////////////////
1776 
1777   __ call_c(native_func);
1778 
1779 
1780   //////////////////////////////////////////////////////////////////////
1781   // We have survived the call once we reach here.
1782   //////////////////////////////////////////////////////////////////////
1783 
1784 
1785   //--------------------------------------------------------------------
1786   // Unpack native results.
1787   //--------------------------------------------------------------------
1788   // For int-types, we do any needed sign-extension required.
1789   // Care must be taken that the return value (in Z_ARG1 = Z_RET = Z_R2
1790   // or in Z_FARG0 = Z_FRET = Z_F0) will survive any VM calls for
1791   // blocking or unlocking.
1792   // An OOP result (handle) is done specially in the slow-path code.
1793   //--------------------------------------------------------------------
1794   switch (ret_type) {
1795     case T_VOID:    break;         // Nothing to do!
1796     case T_FLOAT:   break;         // Got it where we want it (unless slow-path)
1797     case T_DOUBLE:  break;         // Got it where we want it (unless slow-path)
1798     case T_LONG:    break;         // Got it where we want it (unless slow-path)
1799     case T_OBJECT:  break;         // Really a handle.
1800                                    // Cannot de-handlize until after reclaiming jvm_lock.
1801     case T_ARRAY:   break;
1802 
1803     case T_BOOLEAN:                // 0 -> false(0); !0 -> true(1)
1804       __ z_lngfr(Z_RET, Z_RET);    // Force sign bit on except for zero.
1805       __ z_srlg(Z_RET, Z_RET, 63); // Shift sign bit into least significant pos.
1806       break;
1807     case T_BYTE:    __ z_lgbr(Z_RET, Z_RET);  break; // sign extension
1808     case T_CHAR:    __ z_llghr(Z_RET, Z_RET); break; // unsigned result
1809     case T_SHORT:   __ z_lghr(Z_RET, Z_RET);  break; // sign extension
1810     case T_INT:     __ z_lgfr(Z_RET, Z_RET);  break; // sign-extend for beauty.
1811 
1812     default:
1813       ShouldNotReachHere();
1814       break;
1815   }
1816 
1817   Label after_transition;
1818 
1819   // Switch thread to "native transition" state before reading the synchronization state.
1820   // This additional state is necessary because reading and testing the synchronization
1821   // state is not atomic w.r.t. GC, as this scenario demonstrates:
1822   //   - Java thread A, in _thread_in_native state, loads _not_synchronized and is preempted.
1823   //   - VM thread changes sync state to synchronizing and suspends threads for GC.
1824   //   - Thread A is resumed to finish this native method, but doesn't block here since it
1825   //     didn't see any synchronization in progress, and escapes.
1826 
1827   // Transition from _thread_in_native to _thread_in_native_trans.
1828   __ set_thread_state(_thread_in_native_trans);
1829 
1830   // Safepoint synchronization
1831   //--------------------------------------------------------------------
1832   // Must we block?
1833   //--------------------------------------------------------------------
1834   // Block, if necessary, before resuming in _thread_in_Java state.
1835   // In order for GC to work, don't clear the last_Java_sp until after blocking.
1836   //--------------------------------------------------------------------
1837   {
1838     Label no_block, sync;
1839 
1840     save_native_result(masm, ret_type, workspace_slot_offset); // Make Z_R2 available as work reg.
1841 
1842     // Force this write out before the read below.
1843     if (!UseSystemMemoryBarrier) {
1844       __ z_fence();
1845     }
1846 
1847     __ safepoint_poll(sync, Z_R1);
1848 
1849     __ load_and_test_int(Z_R0, Address(Z_thread, JavaThread::suspend_flags_offset()));
1850     __ z_bre(no_block);
1851 
1852     // Block. Save any potential method result value before the operation and
1853     // use a leaf call to leave the last_Java_frame setup undisturbed. Doing this
1854     // lets us share the oopMap we used when we went native rather than create
1855     // a distinct one for this pc.
1856     //
1857     __ bind(sync);
1858     __ z_acquire();
1859 
1860     address entry_point = CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans);
1861 
1862     __ call_VM_leaf(entry_point, Z_thread);
1863 
1864     __ bind(no_block);
1865     restore_native_result(masm, ret_type, workspace_slot_offset);
1866   }
1867 
1868   //--------------------------------------------------------------------
1869   // Thread state is thread_in_native_trans. Any safepoint blocking has
1870   // already happened so we can now change state to _thread_in_Java.
1871   //--------------------------------------------------------------------
1872   // Transition from _thread_in_native_trans to _thread_in_Java.
1873   __ set_thread_state(_thread_in_Java);
1874   __ bind(after_transition);
1875 
1876   //--------------------------------------------------------------------
1877   // Reguard any pages if necessary.
1878   // Protect native result from being destroyed.
1879   //--------------------------------------------------------------------
1880 
1881   Label no_reguard;
1882 
1883   __ z_cli(Address(Z_thread, JavaThread::stack_guard_state_offset() + in_ByteSize(sizeof(StackOverflow::StackGuardState) - 1)),
1884            StackOverflow::stack_guard_yellow_reserved_disabled);
1885 
1886   __ z_bre(no_reguard);
1887 
1888   save_native_result(masm, ret_type, workspace_slot_offset);
1889   __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages), Z_method);
1890   restore_native_result(masm, ret_type, workspace_slot_offset);
1891 
1892   __ bind(no_reguard);
1893 
1894 
1895   // Synchronized methods (slow path only)
1896   // No pending exceptions for now.
1897   //--------------------------------------------------------------------
1898   // Handle possibly pending exception (will unlock if necessary).
1899   // Native result is, if any is live, in Z_FRES or Z_RES.
1900   //--------------------------------------------------------------------
1901   // Unlock
1902   //--------------------------------------------------------------------
1903   if (method->is_synchronized()) {
1904     const Register r_oop        = Z_R11;
1905     const Register r_box        = Z_R12;
1906     const Register r_tmp1       = Z_R13;
1907     const Register r_tmp2       = Z_R7;
1908     Label done;
1909 
1910     // Get unboxed oop of class mirror or object ...
1911     int   offset = method_is_static ? klass_offset : receiver_offset;
1912 
1913     assert(offset != -1, "");
1914     __ z_lg(r_oop, offset, Z_SP);
1915 
1916     // ... and address of lock object box.
1917     __ add2reg(r_box, lock_offset, Z_SP);
1918 
1919     // Try fastpath for unlocking.
1920     // Fast_unlock kills r_tmp1, r_tmp2.
1921     __ compiler_fast_unlock_object(r_oop, r_box, r_tmp1, r_tmp2);
1922     __ z_bre(done);
1923 
1924     // Slow path for unlocking.
1925     // Save and restore any potential method result value around the unlocking operation.
1926     const Register R_exc = Z_R11;
1927 
1928     save_native_result(masm, ret_type, workspace_slot_offset);
1929 
1930     // Must save pending exception around the slow-path VM call. Since it's a
1931     // leaf call, the pending exception (if any) can be kept in a register.
1932     __ z_lg(R_exc, Address(Z_thread, Thread::pending_exception_offset()));
1933     assert(R_exc->is_nonvolatile(), "exception register must be non-volatile");
1934 
1935     // Must clear pending-exception before re-entering the VM. Since this is
1936     // a leaf call, pending-exception-oop can be safely kept in a register.
1937     __ clear_mem(Address(Z_thread, Thread::pending_exception_offset()), sizeof(intptr_t));
1938 
1939     // Inline a special case of call_VM that disallows any pending_exception.
1940 
1941     // Get locked oop from the handle we passed to jni.
1942     __ z_lg(Z_ARG1, offset, Z_SP);
1943     __ add2reg(Z_ARG2, lock_offset, Z_SP);
1944     __ z_lgr(Z_ARG3, Z_thread);
1945 
1946     __ load_const_optimized(Z_R1_scratch, CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C));
1947 
1948     __ call(Z_R1_scratch);
1949 
1950 #ifdef ASSERT
1951     {
1952       Label L;
1953       __ load_and_test_long(Z_R0, Address(Z_thread, Thread::pending_exception_offset()));
1954       __ z_bre(L);
1955       __ stop("no pending exception allowed on exit from IR::monitorexit");
1956       __ bind(L);
1957     }
1958 #endif
1959 
1960     // Check_forward_pending_exception jump to forward_exception if any pending
1961     // exception is set. The forward_exception routine expects to see the
1962     // exception in pending_exception and not in a register. Kind of clumsy,
1963     // since all folks who branch to forward_exception must have tested
1964     // pending_exception first and hence have it in a register already.
1965     __ z_stg(R_exc, Address(Z_thread, Thread::pending_exception_offset()));
1966     restore_native_result(masm, ret_type, workspace_slot_offset);
1967     __ z_bru(done);
1968     __ z_illtrap(0x66);
1969 
1970     __ bind(done);
1971   }
1972 
1973 
1974   //--------------------------------------------------------------------
1975   // Clear "last Java frame" SP and PC.
1976   //--------------------------------------------------------------------
1977 
1978   __ reset_last_Java_frame();
1979 
1980   // Unpack oop result, e.g. JNIHandles::resolve result.
1981   if (is_reference_type(ret_type)) {
1982     __ resolve_jobject(Z_RET, /* tmp1 */ Z_R13, /* tmp2 */ Z_R7);
1983   }
1984 
1985   if (CheckJNICalls) {
1986     // clear_pending_jni_exception_check
1987     __ clear_mem(Address(Z_thread, JavaThread::pending_jni_exception_check_fn_offset()), sizeof(oop));
1988   }
1989 
1990   // Reset handle block.
1991   __ z_lg(Z_R1_scratch, Address(Z_thread, JavaThread::active_handles_offset()));
1992   __ clear_mem(Address(Z_R1_scratch, JNIHandleBlock::top_offset()), 4);
1993 
1994   // Check for pending exceptions.
1995   __ load_and_test_long(Z_R0, Address(Z_thread, Thread::pending_exception_offset()));
1996   __ z_brne(handle_pending_exception);
1997 
1998 
1999   //////////////////////////////////////////////////////////////////////
2000   // Return
2001   //////////////////////////////////////////////////////////////////////
2002 
2003 
2004 #ifndef USE_RESIZE_FRAME
2005   __ pop_frame();                     // Pop wrapper frame.
2006 #else
2007   __ resize_frame(frame_size_in_bytes, Z_R0_scratch);  // Revert stack extension.
2008 #endif
2009   __ restore_return_pc();             // This is the way back to the caller.
2010   __ z_br(Z_R14);
2011 
2012 
2013   //////////////////////////////////////////////////////////////////////
2014   // Out-of-line calls to the runtime.
2015   //////////////////////////////////////////////////////////////////////
2016 
2017 
2018   //---------------------------------------------------------------------
2019   // Handler for pending exceptions (out-of-line).
2020   //---------------------------------------------------------------------
2021   // Since this is a native call, we know the proper exception handler
2022   // is the empty function. We just pop this frame and then jump to
2023   // forward_exception_entry. Z_R14 will contain the native caller's
2024   // return PC.
2025   __ bind(handle_pending_exception);
2026   __ pop_frame();
2027   __ load_const_optimized(Z_R1_scratch, StubRoutines::forward_exception_entry());
2028   __ restore_return_pc();
2029   __ z_br(Z_R1_scratch);
2030 
2031   //---------------------------------------------------------------------
2032   // Handler for a cache miss (out-of-line)
2033   //---------------------------------------------------------------------
2034   __ call_ic_miss_handler(ic_miss, 0x77, 0, Z_R1_scratch);
2035   __ flush();
2036 
2037 
2038   //////////////////////////////////////////////////////////////////////
2039   // end of code generation
2040   //////////////////////////////////////////////////////////////////////
2041 
2042 
2043   nmethod *nm = nmethod::new_native_nmethod(method,
2044                                             compile_id,
2045                                             masm->code(),
2046                                             (int)(wrapper_VEPStart-wrapper_CodeStart),
2047                                             (int)(wrapper_FrameDone-wrapper_CodeStart),
2048                                             stack_slots / VMRegImpl::slots_per_word,
2049                                             (method_is_static ? in_ByteSize(klass_offset) : in_ByteSize(receiver_offset)),
2050                                             in_ByteSize(lock_offset),
2051                                             oop_maps);
2052 
2053   return nm;
2054 }
2055 
2056 static address gen_c2i_adapter(MacroAssembler  *masm,
2057                                int total_args_passed,
2058                                int comp_args_on_stack,
2059                                const BasicType *sig_bt,
2060                                const VMRegPair *regs,
2061                                Label &skip_fixup) {
2062   // Before we get into the guts of the C2I adapter, see if we should be here
2063   // at all. We've come from compiled code and are attempting to jump to the
2064   // interpreter, which means the caller made a static call to get here
2065   // (vcalls always get a compiled target if there is one). Check for a
2066   // compiled target. If there is one, we need to patch the caller's call.
2067 
2068   // These two defs MUST MATCH code in gen_i2c2i_adapter!
2069   const Register ientry = Z_R11;
2070   const Register code   = Z_R11;
2071 
2072   address c2i_entrypoint;
2073   Label   patch_callsite;
2074 
2075   // Regular (verified) c2i entry point.
2076   c2i_entrypoint = __ pc();
2077 
2078   // Call patching needed?
2079   __ load_and_test_long(Z_R0_scratch, method_(code));
2080   __ z_lg(ientry, method_(interpreter_entry));  // Preload interpreter entry (also if patching).
2081   __ z_brne(patch_callsite);                    // Patch required if code isn't null (compiled target exists).
2082 
2083   __ bind(skip_fixup);  // Return point from patch_callsite.
2084 
2085   // Since all args are passed on the stack, total_args_passed*wordSize is the
2086   // space we need. We need ABI scratch area but we use the caller's since
2087   // it has already been allocated.
2088 
2089   const int abi_scratch = frame::z_top_ijava_frame_abi_size;
2090   int       extraspace  = align_up(total_args_passed, 2)*wordSize + abi_scratch;
2091   Register  sender_SP   = Z_R10;
2092   Register  value       = Z_R12;
2093 
2094   // Remember the senderSP so we can pop the interpreter arguments off of the stack.
2095   // In addition, frame manager expects initial_caller_sp in Z_R10.
2096   __ z_lgr(sender_SP, Z_SP);
2097 
2098   // This should always fit in 14 bit immediate.
2099   __ resize_frame(-extraspace, Z_R0_scratch);
2100 
2101   // We use the caller's ABI scratch area (out_preserved_stack_slots) for the initial
2102   // args. This essentially moves the callers ABI scratch area from the top to the
2103   // bottom of the arg area.
2104 
2105   int st_off =  extraspace - wordSize;
2106 
2107   // Now write the args into the outgoing interpreter space.
2108   for (int i = 0; i < total_args_passed; i++) {
2109     VMReg r_1 = regs[i].first();
2110     VMReg r_2 = regs[i].second();
2111     if (!r_1->is_valid()) {
2112       assert(!r_2->is_valid(), "");
2113       continue;
2114     }
2115     if (r_1->is_stack()) {
2116       // The calling convention produces OptoRegs that ignore the preserve area (abi scratch).
2117       // We must account for it here.
2118       int ld_off = (r_1->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size;
2119 
2120       if (!r_2->is_valid()) {
2121         __ z_mvc(Address(Z_SP, st_off), Address(sender_SP, ld_off), sizeof(void*));
2122       } else {
2123         // longs are given 2 64-bit slots in the interpreter,
2124         // but the data is passed in only 1 slot.
2125         if (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) {
2126 #ifdef ASSERT
2127           __ clear_mem(Address(Z_SP, st_off), sizeof(void *));
2128 #endif
2129           st_off -= wordSize;
2130         }
2131         __ z_mvc(Address(Z_SP, st_off), Address(sender_SP, ld_off), sizeof(void*));
2132       }
2133     } else {
2134       if (r_1->is_Register()) {
2135         if (!r_2->is_valid()) {
2136           __ z_st(r_1->as_Register(), st_off, Z_SP);
2137         } else {
2138           // longs are given 2 64-bit slots in the interpreter, but the
2139           // data is passed in only 1 slot.
2140           if (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) {
2141 #ifdef ASSERT
2142             __ clear_mem(Address(Z_SP, st_off), sizeof(void *));
2143 #endif
2144             st_off -= wordSize;
2145           }
2146           __ z_stg(r_1->as_Register(), st_off, Z_SP);
2147         }
2148       } else {
2149         assert(r_1->is_FloatRegister(), "");
2150         if (!r_2->is_valid()) {
2151           __ z_ste(r_1->as_FloatRegister(), st_off, Z_SP);
2152         } else {
2153           // In 64bit, doubles are given 2 64-bit slots in the interpreter, but the
2154           // data is passed in only 1 slot.
2155           // One of these should get known junk...
2156 #ifdef ASSERT
2157           __ z_lzdr(Z_F1);
2158           __ z_std(Z_F1, st_off, Z_SP);
2159 #endif
2160           st_off-=wordSize;
2161           __ z_std(r_1->as_FloatRegister(), st_off, Z_SP);
2162         }
2163       }
2164     }
2165     st_off -= wordSize;
2166   }
2167 
2168 
2169   // Jump to the interpreter just as if interpreter was doing it.
2170   __ add2reg(Z_esp, st_off, Z_SP);
2171 
2172   // Frame_manager expects initial_caller_sp (= SP without resize by c2i) in Z_R10.
2173   __ z_br(ientry);
2174 
2175 
2176   // Prevent illegal entry to out-of-line code.
2177   __ z_illtrap(0x22);
2178 
2179   // Generate out-of-line runtime call to patch caller,
2180   // then continue as interpreted.
2181 
2182   // IF you lose the race you go interpreted.
2183   // We don't see any possible endless c2i -> i2c -> c2i ...
2184   // transitions no matter how rare.
2185   __ bind(patch_callsite);
2186 
2187   RegisterSaver::save_live_registers(masm, RegisterSaver::arg_registers);
2188   __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite), Z_method, Z_R14);
2189   RegisterSaver::restore_live_registers(masm, RegisterSaver::arg_registers);
2190   __ z_bru(skip_fixup);
2191 
2192   // end of out-of-line code
2193 
2194   return c2i_entrypoint;
2195 }
2196 
2197 // On entry, the following registers are set
2198 //
2199 //    Z_thread  r8  - JavaThread*
2200 //    Z_method  r9  - callee's method (method to be invoked)
2201 //    Z_esp     r7  - operand (or expression) stack pointer of caller. one slot above last arg.
2202 //    Z_SP      r15 - SP prepared by call stub such that caller's outgoing args are near top
2203 //
2204 void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm,
2205                                     int total_args_passed,
2206                                     int comp_args_on_stack,
2207                                     const BasicType *sig_bt,
2208                                     const VMRegPair *regs) {
2209   const Register value = Z_R12;
2210   const Register ld_ptr= Z_esp;
2211 
2212   int ld_offset = total_args_passed * wordSize;
2213 
2214   // Cut-out for having no stack args.
2215   if (comp_args_on_stack) {
2216     // Sig words on the stack are greater than VMRegImpl::stack0. Those in
2217     // registers are below. By subtracting stack0, we either get a negative
2218     // number (all values in registers) or the maximum stack slot accessed.
2219     // Convert VMRegImpl (4 byte) stack slots to words.
2220     int comp_words_on_stack = align_up(comp_args_on_stack*VMRegImpl::stack_slot_size, wordSize)>>LogBytesPerWord;
2221     // Round up to miminum stack alignment, in wordSize
2222     comp_words_on_stack = align_up(comp_words_on_stack, 2);
2223 
2224     __ resize_frame(-comp_words_on_stack*wordSize, Z_R0_scratch);
2225   }
2226 
2227   // Now generate the shuffle code. Pick up all register args and move the
2228   // rest through register value=Z_R12.
2229   for (int i = 0; i < total_args_passed; i++) {
2230     if (sig_bt[i] == T_VOID) {
2231       assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
2232       continue;
2233     }
2234 
2235     // Pick up 0, 1 or 2 words from ld_ptr.
2236     assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(),
2237            "scrambled load targets?");
2238     VMReg r_1 = regs[i].first();
2239     VMReg r_2 = regs[i].second();
2240     if (!r_1->is_valid()) {
2241       assert(!r_2->is_valid(), "");
2242       continue;
2243     }
2244     if (r_1->is_FloatRegister()) {
2245       if (!r_2->is_valid()) {
2246         __ z_le(r_1->as_FloatRegister(), ld_offset, ld_ptr);
2247         ld_offset-=wordSize;
2248       } else {
2249         // Skip the unused interpreter slot.
2250         __ z_ld(r_1->as_FloatRegister(), ld_offset - wordSize, ld_ptr);
2251         ld_offset -= 2 * wordSize;
2252       }
2253     } else {
2254       if (r_1->is_stack()) {
2255         // Must do a memory to memory move.
2256         int st_off = (r_1->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size;
2257 
2258         if (!r_2->is_valid()) {
2259           __ z_mvc(Address(Z_SP, st_off), Address(ld_ptr, ld_offset), sizeof(void*));
2260         } else {
2261           // In 64bit, longs are given 2 64-bit slots in the interpreter, but the
2262           // data is passed in only 1 slot.
2263           if (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) {
2264             ld_offset -= wordSize;
2265           }
2266           __ z_mvc(Address(Z_SP, st_off), Address(ld_ptr, ld_offset), sizeof(void*));
2267         }
2268       } else {
2269         if (!r_2->is_valid()) {
2270           // Not sure we need to do this but it shouldn't hurt.
2271           if (is_reference_type(sig_bt[i]) || sig_bt[i] == T_ADDRESS) {
2272             __ z_lg(r_1->as_Register(), ld_offset, ld_ptr);
2273           } else {
2274             __ z_l(r_1->as_Register(), ld_offset, ld_ptr);
2275           }
2276         } else {
2277           // In 64bit, longs are given 2 64-bit slots in the interpreter, but the
2278           // data is passed in only 1 slot.
2279           if (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) {
2280             ld_offset -= wordSize;
2281           }
2282           __ z_lg(r_1->as_Register(), ld_offset, ld_ptr);
2283         }
2284       }
2285       ld_offset -= wordSize;
2286     }
2287   }
2288 
2289   // Jump to the compiled code just as if compiled code was doing it.
2290   // load target address from method:
2291   __ z_lg(Z_R1_scratch, Address(Z_method, Method::from_compiled_offset()));
2292 
2293   // Store method into thread->callee_target.
2294   // 6243940: We might end up in handle_wrong_method if
2295   // the callee is deoptimized as we race thru here. If that
2296   // happens we don't want to take a safepoint because the
2297   // caller frame will look interpreted and arguments are now
2298   // "compiled" so it is much better to make this transition
2299   // invisible to the stack walking code. Unfortunately, if
2300   // we try and find the callee by normal means a safepoint
2301   // is possible. So we stash the desired callee in the thread
2302   // and the vm will find it there should this case occur.
2303   __ z_stg(Z_method, thread_(callee_target));
2304 
2305   __ z_br(Z_R1_scratch);
2306 }
2307 
2308 AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm,
2309                                                             int total_args_passed,
2310                                                             int comp_args_on_stack,
2311                                                             const BasicType *sig_bt,
2312                                                             const VMRegPair *regs,
2313                                                             AdapterFingerPrint* fingerprint) {
2314   __ align(CodeEntryAlignment);
2315   address i2c_entry = __ pc();
2316   gen_i2c_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs);
2317 
2318   address c2i_unverified_entry;
2319 
2320   Label skip_fixup;
2321   {
2322     Label ic_miss;
2323     const int klass_offset           = oopDesc::klass_offset_in_bytes();
2324     const int holder_klass_offset    = in_bytes(CompiledICHolder::holder_klass_offset());
2325     const int holder_metadata_offset = in_bytes(CompiledICHolder::holder_metadata_offset());
2326 
2327     // Out-of-line call to ic_miss handler.
2328     __ call_ic_miss_handler(ic_miss, 0x11, 0, Z_R1_scratch);
2329 
2330     // Unverified Entry Point UEP
2331     __ align(CodeEntryAlignment);
2332     c2i_unverified_entry = __ pc();
2333 
2334     // Check the pointers.
2335     if (!ImplicitNullChecks || MacroAssembler::needs_explicit_null_check(klass_offset)) {
2336       __ z_ltgr(Z_ARG1, Z_ARG1);
2337       __ z_bre(ic_miss);
2338     }
2339     __ verify_oop(Z_ARG1, FILE_AND_LINE);
2340 
2341     // Check ic: object class <-> cached class
2342     // Compress cached class for comparison. That's more efficient.
2343     if (UseCompressedClassPointers) {
2344       __ z_lg(Z_R11, holder_klass_offset, Z_method);             // Z_R11 is overwritten a few instructions down anyway.
2345       __ compare_klass_ptr(Z_R11, klass_offset, Z_ARG1, false); // Cached class can't be zero.
2346     } else {
2347       __ z_clc(klass_offset, sizeof(void *)-1, Z_ARG1, holder_klass_offset, Z_method);
2348     }
2349     __ z_brne(ic_miss);  // Cache miss: call runtime to handle this.
2350 
2351     // This def MUST MATCH code in gen_c2i_adapter!
2352     const Register code = Z_R11;
2353 
2354     __ z_lg(Z_method, holder_metadata_offset, Z_method);
2355     __ load_and_test_long(Z_R0, method_(code));
2356     __ z_brne(ic_miss);  // Cache miss: call runtime to handle this.
2357 
2358     // Fallthru to VEP. Duplicate LTG, but saved taken branch.
2359   }
2360 
2361   address c2i_entry = __ pc();
2362 
2363   // Class initialization barrier for static methods
2364   address c2i_no_clinit_check_entry = nullptr;
2365   if (VM_Version::supports_fast_class_init_checks()) {
2366     Label L_skip_barrier;
2367 
2368     { // Bypass the barrier for non-static methods
2369       __ testbit(Address(Z_method, Method::access_flags_offset()), JVM_ACC_STATIC_BIT);
2370       __ z_bfalse(L_skip_barrier); // non-static
2371     }
2372 
2373     Register klass = Z_R11;
2374     __ load_method_holder(klass, Z_method);
2375     __ clinit_barrier(klass, Z_thread, &L_skip_barrier /*L_fast_path*/);
2376 
2377     __ load_const_optimized(klass, SharedRuntime::get_handle_wrong_method_stub());
2378     __ z_br(klass);
2379 
2380     __ bind(L_skip_barrier);
2381     c2i_no_clinit_check_entry = __ pc();
2382   }
2383 
2384   gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup);
2385 
2386   return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry, c2i_no_clinit_check_entry);
2387 }
2388 
2389 // This function returns the adjust size (in number of words) to a c2i adapter
2390 // activation for use during deoptimization.
2391 //
2392 // Actually only compiled frames need to be adjusted, but it
2393 // doesn't harm to adjust entry and interpreter frames, too.
2394 //
2395 int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals) {
2396   assert(callee_locals >= callee_parameters,
2397           "test and remove; got more parms than locals");
2398   // Handle the abi adjustment here instead of doing it in push_skeleton_frames.
2399   return (callee_locals - callee_parameters) * Interpreter::stackElementWords +
2400          frame::z_parent_ijava_frame_abi_size / BytesPerWord;
2401 }
2402 
2403 uint SharedRuntime::in_preserve_stack_slots() {
2404   return frame::jit_in_preserve_size_in_4_byte_units;
2405 }
2406 
2407 uint SharedRuntime::out_preserve_stack_slots() {
2408   return frame::z_jit_out_preserve_size/VMRegImpl::stack_slot_size;
2409 }
2410 
2411 //
2412 // Frame generation for deopt and uncommon trap blobs.
2413 //
2414 static void push_skeleton_frame(MacroAssembler* masm,
2415                           /* Unchanged */
2416                           Register frame_sizes_reg,
2417                           Register pcs_reg,
2418                           /* Invalidate */
2419                           Register frame_size_reg,
2420                           Register pc_reg) {
2421   BLOCK_COMMENT("  push_skeleton_frame {");
2422    __ z_lg(pc_reg, 0, pcs_reg);
2423    __ z_lg(frame_size_reg, 0, frame_sizes_reg);
2424    __ z_stg(pc_reg, _z_abi(return_pc), Z_SP);
2425    Register fp = pc_reg;
2426    __ push_frame(frame_size_reg, fp);
2427 #ifdef ASSERT
2428    // The magic is required for successful walking skeletal frames.
2429    __ load_const_optimized(frame_size_reg/*tmp*/, frame::z_istate_magic_number);
2430    __ z_stg(frame_size_reg, _z_ijava_state_neg(magic), fp);
2431    // Fill other slots that are supposedly not necessary with eye catchers.
2432    __ load_const_optimized(frame_size_reg/*use as tmp*/, 0xdeadbad1);
2433    __ z_stg(frame_size_reg, _z_ijava_state_neg(top_frame_sp), fp);
2434    // The sender_sp of the bottom frame is set before pushing it.
2435    // The sender_sp of non bottom frames is their caller's top_frame_sp, which
2436    // is unknown here. Luckily it is not needed before filling the frame in
2437    // layout_activation(), we assert this by setting an eye catcher (see
2438    // comments on sender_sp in frame_s390.hpp).
2439    __ z_stg(frame_size_reg, _z_ijava_state_neg(sender_sp), Z_SP);
2440 #endif // ASSERT
2441   BLOCK_COMMENT("  } push_skeleton_frame");
2442 }
2443 
2444 // Loop through the UnrollBlock info and create new frames.
2445 static void push_skeleton_frames(MacroAssembler* masm, bool deopt,
2446                             /* read */
2447                             Register unroll_block_reg,
2448                             /* invalidate */
2449                             Register frame_sizes_reg,
2450                             Register number_of_frames_reg,
2451                             Register pcs_reg,
2452                             Register tmp1,
2453                             Register tmp2) {
2454   BLOCK_COMMENT("push_skeleton_frames {");
2455   // _number_of_frames is of type int (deoptimization.hpp).
2456   __ z_lgf(number_of_frames_reg,
2457            Address(unroll_block_reg, Deoptimization::UnrollBlock::number_of_frames_offset()));
2458   __ z_lg(pcs_reg,
2459           Address(unroll_block_reg, Deoptimization::UnrollBlock::frame_pcs_offset()));
2460   __ z_lg(frame_sizes_reg,
2461           Address(unroll_block_reg, Deoptimization::UnrollBlock::frame_sizes_offset()));
2462 
2463   // stack: (caller_of_deoptee, ...).
2464 
2465   // If caller_of_deoptee is a compiled frame, then we extend it to make
2466   // room for the callee's locals and the frame::z_parent_ijava_frame_abi.
2467   // See also Deoptimization::last_frame_adjust() above.
2468   // Note: entry and interpreted frames are adjusted, too. But this doesn't harm.
2469 
2470   __ z_lgf(Z_R1_scratch,
2471            Address(unroll_block_reg, Deoptimization::UnrollBlock::caller_adjustment_offset()));
2472   __ z_lgr(tmp1, Z_SP);  // Save the sender sp before extending the frame.
2473   __ resize_frame_sub(Z_R1_scratch, tmp2/*tmp*/);
2474   // The oldest skeletal frame requires a valid sender_sp to make it walkable
2475   // (it is required to find the original pc of caller_of_deoptee if it is marked
2476   // for deoptimization - see nmethod::orig_pc_addr()).
2477   __ z_stg(tmp1, _z_ijava_state_neg(sender_sp), Z_SP);
2478 
2479   // Now push the new interpreter frames.
2480   Label loop, loop_entry;
2481 
2482   // Make sure that there is at least one entry in the array.
2483   DEBUG_ONLY(__ z_ltgr(number_of_frames_reg, number_of_frames_reg));
2484   __ asm_assert(Assembler::bcondNotZero, "array_size must be > 0", 0x205);
2485 
2486   __ z_bru(loop_entry);
2487 
2488   __ bind(loop);
2489 
2490   __ add2reg(frame_sizes_reg, wordSize);
2491   __ add2reg(pcs_reg, wordSize);
2492 
2493   __ bind(loop_entry);
2494 
2495   // Allocate a new frame, fill in the pc.
2496   push_skeleton_frame(masm, frame_sizes_reg, pcs_reg, tmp1, tmp2);
2497 
2498   __ z_aghi(number_of_frames_reg, -1);  // Emit AGHI, because it sets the condition code
2499   __ z_brne(loop);
2500 
2501   // Set the top frame's return pc.
2502   __ add2reg(pcs_reg, wordSize);
2503   __ z_lg(Z_R0_scratch, 0, pcs_reg);
2504   __ z_stg(Z_R0_scratch, _z_abi(return_pc), Z_SP);
2505   BLOCK_COMMENT("} push_skeleton_frames");
2506 }
2507 
2508 //------------------------------generate_deopt_blob----------------------------
2509 void SharedRuntime::generate_deopt_blob() {
2510   // Allocate space for the code.
2511   ResourceMark rm;
2512   // Setup code generation tools.
2513   CodeBuffer buffer("deopt_blob", 2048, 1024);
2514   InterpreterMacroAssembler* masm = new InterpreterMacroAssembler(&buffer);
2515   Label exec_mode_initialized;
2516   OopMap* map = nullptr;
2517   OopMapSet *oop_maps = new OopMapSet();
2518 
2519   unsigned int start_off = __ offset();
2520   Label cont;
2521 
2522   // --------------------------------------------------------------------------
2523   // Normal entry (non-exception case)
2524   //
2525   // We have been called from the deopt handler of the deoptee.
2526   // Z_R14 points behind the call in the deopt handler. We adjust
2527   // it such that it points to the start of the deopt handler.
2528   // The return_pc has been stored in the frame of the deoptee and
2529   // will replace the address of the deopt_handler in the call
2530   // to Deoptimization::fetch_unroll_info below.
2531   // The (int) cast is necessary, because -((unsigned int)14)
2532   // is an unsigned int.
2533   __ add2reg(Z_R14, -(int)NativeCall::max_instruction_size());
2534 
2535   const Register   exec_mode_reg = Z_tmp_1;
2536 
2537   // stack: (deoptee, caller of deoptee, ...)
2538 
2539   // pushes an "unpack" frame
2540   // R14 contains the return address pointing into the deoptimized
2541   // nmethod that was valid just before the nmethod was deoptimized.
2542   // save R14 into the deoptee frame.  the `fetch_unroll_info'
2543   // procedure called below will read it from there.
2544   map = RegisterSaver::save_live_registers(masm, RegisterSaver::all_registers);
2545 
2546   // note the entry point.
2547   __ load_const_optimized(exec_mode_reg, Deoptimization::Unpack_deopt);
2548   __ z_bru(exec_mode_initialized);
2549 
2550 #ifndef COMPILER1
2551   int reexecute_offset = 1; // odd offset will produce odd pc, which triggers an hardware trap
2552 #else
2553   // --------------------------------------------------------------------------
2554   // Reexecute entry
2555   // - Z_R14 = Deopt Handler in nmethod
2556 
2557   int reexecute_offset = __ offset() - start_off;
2558 
2559   // No need to update map as each call to save_live_registers will produce identical oopmap
2560   (void) RegisterSaver::save_live_registers(masm, RegisterSaver::all_registers);
2561 
2562   __ load_const_optimized(exec_mode_reg, Deoptimization::Unpack_reexecute);
2563   __ z_bru(exec_mode_initialized);
2564 #endif
2565 
2566 
2567   // --------------------------------------------------------------------------
2568   // Exception entry. We reached here via a branch. Registers on entry:
2569   // - Z_EXC_OOP (Z_ARG1) = exception oop
2570   // - Z_EXC_PC  (Z_ARG2) = the exception pc.
2571 
2572   int exception_offset = __ offset() - start_off;
2573 
2574   // all registers are dead at this entry point, except for Z_EXC_OOP, and
2575   // Z_EXC_PC which contain the exception oop and exception pc
2576   // respectively.  Set them in TLS and fall thru to the
2577   // unpack_with_exception_in_tls entry point.
2578 
2579   // Store exception oop and pc in thread (location known to GC).
2580   // Need this since the call to "fetch_unroll_info()" may safepoint.
2581   __ z_stg(Z_EXC_OOP, Address(Z_thread, JavaThread::exception_oop_offset()));
2582   __ z_stg(Z_EXC_PC,  Address(Z_thread, JavaThread::exception_pc_offset()));
2583 
2584   // fall through
2585 
2586   int exception_in_tls_offset = __ offset() - start_off;
2587 
2588   // new implementation because exception oop is now passed in JavaThread
2589 
2590   // Prolog for exception case
2591   // All registers must be preserved because they might be used by LinearScan
2592   // Exceptiop oop and throwing PC are passed in JavaThread
2593 
2594   // load throwing pc from JavaThread and us it as the return address of the current frame.
2595   __ z_lg(Z_R1_scratch, Address(Z_thread, JavaThread::exception_pc_offset()));
2596 
2597   // Save everything in sight.
2598   (void) RegisterSaver::save_live_registers(masm, RegisterSaver::all_registers, Z_R1_scratch);
2599 
2600   // Now it is safe to overwrite any register
2601 
2602   // Clear the exception pc field in JavaThread
2603   __ clear_mem(Address(Z_thread, JavaThread::exception_pc_offset()), 8);
2604 
2605   // Deopt during an exception.  Save exec mode for unpack_frames.
2606   __ load_const_optimized(exec_mode_reg, Deoptimization::Unpack_exception);
2607 
2608 
2609 #ifdef ASSERT
2610   // verify that there is really an exception oop in JavaThread
2611   __ z_lg(Z_ARG1, Address(Z_thread, JavaThread::exception_oop_offset()));
2612   __ MacroAssembler::verify_oop(Z_ARG1, FILE_AND_LINE);
2613 
2614   // verify that there is no pending exception
2615   __ asm_assert_mem8_is_zero(in_bytes(Thread::pending_exception_offset()), Z_thread,
2616                              "must not have pending exception here", __LINE__);
2617 #endif
2618 
2619   // --------------------------------------------------------------------------
2620   // At this point, the live registers are saved and
2621   // the exec_mode_reg has been set up correctly.
2622   __ bind(exec_mode_initialized);
2623 
2624   // stack: ("unpack" frame, deoptee, caller_of_deoptee, ...).
2625 
2626   const Register unroll_block_reg  = Z_tmp_2;
2627 
2628   // we need to set `last_Java_frame' because `fetch_unroll_info' will
2629   // call `last_Java_frame()'.  however we can't block and no gc will
2630   // occur so we don't need an oopmap. the value of the pc in the
2631   // frame is not particularly important.  it just needs to identify the blob.
2632 
2633   // Don't set last_Java_pc anymore here (is implicitly null then).
2634   // the correct PC is retrieved in pd_last_frame() in that case.
2635   __ set_last_Java_frame(/*sp*/Z_SP, noreg);
2636   // With EscapeAnalysis turned on, this call may safepoint
2637   // despite it's marked as "leaf call"!
2638   __ call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info), Z_thread, exec_mode_reg);
2639   // Set an oopmap for the call site this describes all our saved volatile registers
2640   int oop_map_offs = __ offset();
2641   oop_maps->add_gc_map(oop_map_offs, map);
2642 
2643   __ reset_last_Java_frame();
2644   // save the return value.
2645   __ z_lgr(unroll_block_reg, Z_RET);
2646   // restore the return registers that have been saved
2647   // (among other registers) by save_live_registers(...).
2648   RegisterSaver::restore_result_registers(masm);
2649 
2650   // reload the exec mode from the UnrollBlock (it might have changed)
2651   __ z_llgf(exec_mode_reg, Address(unroll_block_reg, Deoptimization::UnrollBlock::unpack_kind_offset()));
2652 
2653   // In excp_deopt_mode, restore and clear exception oop which we
2654   // stored in the thread during exception entry above. The exception
2655   // oop will be the return value of this stub.
2656   NearLabel skip_restore_excp;
2657   __ compare64_and_branch(exec_mode_reg, Deoptimization::Unpack_exception, Assembler::bcondNotEqual, skip_restore_excp);
2658   __ z_lg(Z_RET, thread_(exception_oop));
2659   __ clear_mem(thread_(exception_oop), 8);
2660   __ bind(skip_restore_excp);
2661 
2662   // remove the "unpack" frame
2663   __ pop_frame();
2664 
2665   // stack: (deoptee, caller of deoptee, ...).
2666 
2667   // pop the deoptee's frame
2668   __ pop_frame();
2669 
2670   // stack: (caller_of_deoptee, ...).
2671 
2672   // loop through the `UnrollBlock' info and create interpreter frames.
2673   push_skeleton_frames(masm, true/*deopt*/,
2674                   unroll_block_reg,
2675                   Z_tmp_3,
2676                   Z_tmp_4,
2677                   Z_ARG5,
2678                   Z_ARG4,
2679                   Z_ARG3);
2680 
2681   // stack: (skeletal interpreter frame, ..., optional skeletal
2682   // interpreter frame, caller of deoptee, ...).
2683 
2684   // push an "unpack" frame taking care of float / int return values.
2685   __ push_frame(RegisterSaver::live_reg_frame_size(RegisterSaver::all_registers));
2686 
2687   // stack: (unpack frame, skeletal interpreter frame, ..., optional
2688   // skeletal interpreter frame, caller of deoptee, ...).
2689 
2690   // spill live volatile registers since we'll do a call.
2691   __ z_stg(Z_RET, offset_of(frame::z_abi_160_spill, spill[0]), Z_SP);
2692   __ z_std(Z_FRET, offset_of(frame::z_abi_160_spill, spill[1]), Z_SP);
2693 
2694   // let the unpacker layout information in the skeletal frames just allocated.
2695   __ get_PC(Z_RET, oop_map_offs - __ offset());
2696   __ set_last_Java_frame(/*sp*/Z_SP, /*pc*/Z_RET);
2697   __ call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames),
2698                   Z_thread/*thread*/, exec_mode_reg/*exec_mode*/);
2699 
2700   __ reset_last_Java_frame();
2701 
2702   // restore the volatiles saved above.
2703   __ z_lg(Z_RET, offset_of(frame::z_abi_160_spill, spill[0]), Z_SP);
2704   __ z_ld(Z_FRET, offset_of(frame::z_abi_160_spill, spill[1]), Z_SP);
2705 
2706   // pop the "unpack" frame.
2707   __ pop_frame();
2708   __ restore_return_pc();
2709 
2710   // stack: (top interpreter frame, ..., optional interpreter frame,
2711   // caller of deoptee, ...).
2712 
2713   __ z_lg(Z_fp, _z_abi(callers_sp), Z_SP); // restore frame pointer
2714   __ restore_bcp();
2715   __ restore_locals();
2716   __ restore_esp();
2717 
2718   // return to the interpreter entry point.
2719   __ z_br(Z_R14);
2720 
2721   // Make sure all code is generated
2722   masm->flush();
2723 
2724   _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset, reexecute_offset, RegisterSaver::live_reg_frame_size(RegisterSaver::all_registers)/wordSize);
2725   _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset);
2726 }
2727 
2728 
2729 #ifdef COMPILER2
2730 //------------------------------generate_uncommon_trap_blob--------------------
2731 void SharedRuntime::generate_uncommon_trap_blob() {
2732   // Allocate space for the code
2733   ResourceMark rm;
2734   // Setup code generation tools
2735   CodeBuffer buffer("uncommon_trap_blob", 2048, 1024);
2736   InterpreterMacroAssembler* masm = new InterpreterMacroAssembler(&buffer);
2737 
2738   Register unroll_block_reg = Z_tmp_1;
2739   Register klass_index_reg  = Z_ARG2;
2740   Register unc_trap_reg     = Z_ARG2;
2741 
2742   // stack: (deoptee, caller_of_deoptee, ...).
2743 
2744   // push a dummy "unpack" frame and call
2745   // `Deoptimization::uncommon_trap' to pack the compiled frame into a
2746   // vframe array and return the `UnrollBlock' information.
2747 
2748   // save R14 to compiled frame.
2749   __ save_return_pc();
2750   // push the "unpack_frame".
2751   __ push_frame_abi160(0);
2752 
2753   // stack: (unpack frame, deoptee, caller_of_deoptee, ...).
2754 
2755   // set the "unpack" frame as last_Java_frame.
2756   // `Deoptimization::uncommon_trap' expects it and considers its
2757   // sender frame as the deoptee frame.
2758   __ get_PC(Z_R1_scratch);
2759   __ set_last_Java_frame(/*sp*/Z_SP, /*pc*/Z_R1_scratch);
2760 
2761   __ z_lgr(klass_index_reg, Z_ARG1);  // passed implicitly as ARG2
2762   __ z_lghi(Z_ARG3, Deoptimization::Unpack_uncommon_trap);  // passed implicitly as ARG3
2763   BLOCK_COMMENT("call Deoptimization::uncommon_trap()");
2764   __ call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::uncommon_trap), Z_thread);
2765 
2766   __ reset_last_Java_frame();
2767 
2768   // pop the "unpack" frame
2769   __ pop_frame();
2770 
2771   // stack: (deoptee, caller_of_deoptee, ...).
2772 
2773   // save the return value.
2774   __ z_lgr(unroll_block_reg, Z_RET);
2775 
2776   // pop the deoptee frame.
2777   __ pop_frame();
2778 
2779   // stack: (caller_of_deoptee, ...).
2780 
2781 #ifdef ASSERT
2782   assert(Immediate::is_uimm8(Deoptimization::Unpack_LIMIT), "Code not fit for larger immediates");
2783   assert(Immediate::is_uimm8(Deoptimization::Unpack_uncommon_trap), "Code not fit for larger immediates");
2784   const int unpack_kind_byte_offset = in_bytes(Deoptimization::UnrollBlock::unpack_kind_offset())
2785 #ifndef VM_LITTLE_ENDIAN
2786   + 3
2787 #endif
2788   ;
2789   if (Displacement::is_shortDisp(unpack_kind_byte_offset)) {
2790     __ z_cli(unpack_kind_byte_offset, unroll_block_reg, Deoptimization::Unpack_uncommon_trap);
2791   } else {
2792     __ z_cliy(unpack_kind_byte_offset, unroll_block_reg, Deoptimization::Unpack_uncommon_trap);
2793   }
2794   __ asm_assert(Assembler::bcondEqual, "SharedRuntime::generate_deopt_blob: expected Unpack_uncommon_trap", 0);
2795 #endif
2796 
2797   __ zap_from_to(Z_SP, Z_SP, Z_R0_scratch, Z_R1, 500, -1);
2798 
2799   // allocate new interpreter frame(s) and possibly resize the caller's frame
2800   // (no more adapters !)
2801   push_skeleton_frames(masm, false/*deopt*/,
2802                   unroll_block_reg,
2803                   Z_tmp_2,
2804                   Z_tmp_3,
2805                   Z_tmp_4,
2806                   Z_ARG5,
2807                   Z_ARG4);
2808 
2809   // stack: (skeletal interpreter frame, ..., optional skeletal
2810   // interpreter frame, (resized) caller of deoptee, ...).
2811 
2812   // push a dummy "unpack" frame taking care of float return values.
2813   // call `Deoptimization::unpack_frames' to layout information in the
2814   // interpreter frames just created
2815 
2816   // push the "unpack" frame
2817    const unsigned int framesize_in_bytes = __ push_frame_abi160(0);
2818 
2819   // stack: (unpack frame, skeletal interpreter frame, ..., optional
2820   // skeletal interpreter frame, (resized) caller of deoptee, ...).
2821 
2822   // set the "unpack" frame as last_Java_frame
2823   __ get_PC(Z_R1_scratch);
2824   __ set_last_Java_frame(/*sp*/Z_SP, /*pc*/Z_R1_scratch);
2825 
2826   // indicate it is the uncommon trap case
2827   BLOCK_COMMENT("call Deoptimization::Unpack_uncommon_trap()");
2828   __ load_const_optimized(unc_trap_reg, Deoptimization::Unpack_uncommon_trap);
2829   // let the unpacker layout information in the skeletal frames just allocated.
2830   __ call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames), Z_thread);
2831 
2832   __ reset_last_Java_frame();
2833   // pop the "unpack" frame
2834   __ pop_frame();
2835   // restore LR from top interpreter frame
2836   __ restore_return_pc();
2837 
2838   // stack: (top interpreter frame, ..., optional interpreter frame,
2839   // (resized) caller of deoptee, ...).
2840 
2841   __ z_lg(Z_fp, _z_abi(callers_sp), Z_SP); // restore frame pointer
2842   __ restore_bcp();
2843   __ restore_locals();
2844   __ restore_esp();
2845 
2846   // return to the interpreter entry point
2847   __ z_br(Z_R14);
2848 
2849   masm->flush();
2850   _uncommon_trap_blob = UncommonTrapBlob::create(&buffer, nullptr, framesize_in_bytes/wordSize);
2851 }
2852 #endif // COMPILER2
2853 
2854 
2855 //------------------------------generate_handler_blob------
2856 //
2857 // Generate a special Compile2Runtime blob that saves all registers,
2858 // and setup oopmap.
2859 SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_type) {
2860   assert(StubRoutines::forward_exception_entry() != nullptr,
2861          "must be generated before");
2862 
2863   ResourceMark rm;
2864   OopMapSet *oop_maps = new OopMapSet();
2865   OopMap* map;
2866 
2867   // Allocate space for the code. Setup code generation tools.
2868   CodeBuffer buffer("handler_blob", 2048, 1024);
2869   MacroAssembler* masm = new MacroAssembler(&buffer);
2870 
2871   unsigned int start_off = __ offset();
2872   address call_pc = nullptr;
2873   int frame_size_in_bytes;
2874 
2875   bool cause_return = (poll_type == POLL_AT_RETURN);
2876   // Make room for return address (or push it again)
2877   if (!cause_return) {
2878     __ z_lg(Z_R14, Address(Z_thread, JavaThread::saved_exception_pc_offset()));
2879   }
2880 
2881   // Save registers, fpu state, and flags
2882   map = RegisterSaver::save_live_registers(masm, RegisterSaver::all_registers);
2883 
2884   if (!cause_return) {
2885     // Keep a copy of the return pc to detect if it gets modified.
2886     __ z_lgr(Z_R6, Z_R14);
2887   }
2888 
2889   // The following is basically a call_VM. However, we need the precise
2890   // address of the call in order to generate an oopmap. Hence, we do all the
2891   // work ourselves.
2892   __ set_last_Java_frame(Z_SP, noreg);
2893 
2894   // call into the runtime to handle the safepoint poll
2895   __ call_VM_leaf(call_ptr, Z_thread);
2896 
2897 
2898   // Set an oopmap for the call site. This oopmap will map all
2899   // oop-registers and debug-info registers as callee-saved. This
2900   // will allow deoptimization at this safepoint to find all possible
2901   // debug-info recordings, as well as let GC find all oops.
2902 
2903   oop_maps->add_gc_map((int)(__ offset()-start_off), map);
2904 
2905   Label noException;
2906 
2907   __ reset_last_Java_frame();
2908 
2909   __ load_and_test_long(Z_R1, thread_(pending_exception));
2910   __ z_bre(noException);
2911 
2912   // Pending exception case, used (sporadically) by
2913   // api/java_lang/Thread.State/index#ThreadState et al.
2914   RegisterSaver::restore_live_registers(masm, RegisterSaver::all_registers);
2915 
2916   // Jump to forward_exception_entry, with the issuing PC in Z_R14
2917   // so it looks like the original nmethod called forward_exception_entry.
2918   __ load_const_optimized(Z_R1_scratch, StubRoutines::forward_exception_entry());
2919   __ z_br(Z_R1_scratch);
2920 
2921   // No exception case
2922   __ bind(noException);
2923 
2924   if (!cause_return) {
2925     Label no_adjust;
2926      // If our stashed return pc was modified by the runtime we avoid touching it
2927     const int offset_of_return_pc = _z_common_abi(return_pc) + RegisterSaver::live_reg_frame_size(RegisterSaver::all_registers);
2928     __ z_cg(Z_R6, offset_of_return_pc, Z_SP);
2929     __ z_brne(no_adjust);
2930 
2931     // Adjust return pc forward to step over the safepoint poll instruction
2932     __ instr_size(Z_R1_scratch, Z_R6);
2933     __ z_agr(Z_R6, Z_R1_scratch);
2934     __ z_stg(Z_R6, offset_of_return_pc, Z_SP);
2935 
2936     __ bind(no_adjust);
2937   }
2938 
2939   // Normal exit, restore registers and exit.
2940   RegisterSaver::restore_live_registers(masm, RegisterSaver::all_registers);
2941 
2942   __ z_br(Z_R14);
2943 
2944   // Make sure all code is generated
2945   masm->flush();
2946 
2947   // Fill-out other meta info
2948   return SafepointBlob::create(&buffer, oop_maps, RegisterSaver::live_reg_frame_size(RegisterSaver::all_registers)/wordSize);
2949 }
2950 
2951 
2952 //
2953 // generate_resolve_blob - call resolution (static/virtual/opt-virtual/ic-miss
2954 //
2955 // Generate a stub that calls into vm to find out the proper destination
2956 // of a Java call. All the argument registers are live at this point
2957 // but since this is generic code we don't know what they are and the caller
2958 // must do any gc of the args.
2959 //
2960 RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const char* name) {
2961   assert (StubRoutines::forward_exception_entry() != nullptr, "must be generated before");
2962 
2963   // allocate space for the code
2964   ResourceMark rm;
2965 
2966   CodeBuffer buffer(name, 1000, 512);
2967   MacroAssembler* masm                = new MacroAssembler(&buffer);
2968 
2969   OopMapSet *oop_maps = new OopMapSet();
2970   OopMap* map = nullptr;
2971 
2972   unsigned int start_off = __ offset();
2973 
2974   map = RegisterSaver::save_live_registers(masm, RegisterSaver::all_registers);
2975 
2976   // We must save a PC from within the stub as return PC
2977   // C code doesn't store the LR where we expect the PC,
2978   // so we would run into trouble upon stack walking.
2979   __ get_PC(Z_R1_scratch);
2980 
2981   unsigned int frame_complete = __ offset();
2982 
2983   __ set_last_Java_frame(/*sp*/Z_SP, Z_R1_scratch);
2984 
2985   __ call_VM_leaf(destination, Z_thread, Z_method);
2986 
2987 
2988   // Set an oopmap for the call site.
2989   // We need this not only for callee-saved registers, but also for volatile
2990   // registers that the compiler might be keeping live across a safepoint.
2991 
2992   oop_maps->add_gc_map((int)(frame_complete-start_off), map);
2993 
2994   // clear last_Java_sp
2995   __ reset_last_Java_frame();
2996 
2997   // check for pending exceptions
2998   Label pending;
2999   __ load_and_test_long(Z_R0, Address(Z_thread, Thread::pending_exception_offset()));
3000   __ z_brne(pending);
3001 
3002   __ z_lgr(Z_R1_scratch, Z_R2); // r1 is neither saved nor restored, r2 contains the continuation.
3003   RegisterSaver::restore_live_registers(masm, RegisterSaver::all_registers);
3004 
3005   // get the returned method
3006   __ get_vm_result_2(Z_method);
3007 
3008   // We are back to the original state on entry and ready to go.
3009   __ z_br(Z_R1_scratch);
3010 
3011   // Pending exception after the safepoint
3012 
3013   __ bind(pending);
3014 
3015   RegisterSaver::restore_live_registers(masm, RegisterSaver::all_registers);
3016 
3017   // exception pending => remove activation and forward to exception handler
3018 
3019   __ z_lgr(Z_R2, Z_R0); // pending_exception
3020   __ clear_mem(Address(Z_thread, JavaThread::vm_result_offset()), sizeof(jlong));
3021   __ load_const_optimized(Z_R1_scratch, StubRoutines::forward_exception_entry());
3022   __ z_br(Z_R1_scratch);
3023 
3024   // -------------
3025   // make sure all code is generated
3026   masm->flush();
3027 
3028   // return the blob
3029   // frame_size_words or bytes??
3030   return RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, RegisterSaver::live_reg_frame_size(RegisterSaver::all_registers)/wordSize,
3031                                        oop_maps, true);
3032 
3033 }
3034 
3035 //------------------------------Montgomery multiplication------------------------
3036 //
3037 
3038 // Subtract 0:b from carry:a. Return carry.
3039 static unsigned long
3040 sub(unsigned long a[], unsigned long b[], unsigned long carry, long len) {
3041   unsigned long i, c = 8 * (unsigned long)(len - 1);
3042   __asm__ __volatile__ (
3043     "SLGR   %[i], %[i]         \n" // initialize to 0 and pre-set carry
3044     "LGHI   0, 8               \n" // index increment (for BRXLG)
3045     "LGR    1, %[c]            \n" // index limit (for BRXLG)
3046     "0:                        \n"
3047     "LG     %[c], 0(%[i],%[a]) \n"
3048     "SLBG   %[c], 0(%[i],%[b]) \n" // subtract with borrow
3049     "STG    %[c], 0(%[i],%[a]) \n"
3050     "BRXLG  %[i], 0, 0b        \n" // while ((i+=8)<limit);
3051     "SLBGR  %[c], %[c]         \n" // save carry - 1
3052     : [i]"=&a"(i), [c]"+r"(c)
3053     : [a]"a"(a), [b]"a"(b)
3054     : "cc", "memory", "r0", "r1"
3055  );
3056   return carry + c;
3057 }
3058 
3059 // Multiply (unsigned) Long A by Long B, accumulating the double-
3060 // length result into the accumulator formed of T0, T1, and T2.
3061 inline void MACC(unsigned long A[], long A_ind,
3062                  unsigned long B[], long B_ind,
3063                  unsigned long &T0, unsigned long &T1, unsigned long &T2) {
3064   long A_si = 8 * A_ind,
3065        B_si = 8 * B_ind;
3066   __asm__ __volatile__ (
3067     "LG     1, 0(%[A_si],%[A]) \n"
3068     "MLG    0, 0(%[B_si],%[B]) \n" // r0r1 = A * B
3069     "ALGR   %[T0], 1           \n"
3070     "LGHI   1, 0               \n" // r1 = 0
3071     "ALCGR  %[T1], 0           \n"
3072     "ALCGR  %[T2], 1           \n"
3073     : [T0]"+r"(T0), [T1]"+r"(T1), [T2]"+r"(T2)
3074     : [A]"r"(A), [A_si]"r"(A_si), [B]"r"(B), [B_si]"r"(B_si)
3075     : "cc", "r0", "r1"
3076  );
3077 }
3078 
3079 // As above, but add twice the double-length result into the
3080 // accumulator.
3081 inline void MACC2(unsigned long A[], long A_ind,
3082                   unsigned long B[], long B_ind,
3083                   unsigned long &T0, unsigned long &T1, unsigned long &T2) {
3084   const unsigned long zero = 0;
3085   long A_si = 8 * A_ind,
3086        B_si = 8 * B_ind;
3087   __asm__ __volatile__ (
3088     "LG     1, 0(%[A_si],%[A]) \n"
3089     "MLG    0, 0(%[B_si],%[B]) \n" // r0r1 = A * B
3090     "ALGR   %[T0], 1           \n"
3091     "ALCGR  %[T1], 0           \n"
3092     "ALCGR  %[T2], %[zero]     \n"
3093     "ALGR   %[T0], 1           \n"
3094     "ALCGR  %[T1], 0           \n"
3095     "ALCGR  %[T2], %[zero]     \n"
3096     : [T0]"+r"(T0), [T1]"+r"(T1), [T2]"+r"(T2)
3097     : [A]"r"(A), [A_si]"r"(A_si), [B]"r"(B), [B_si]"r"(B_si), [zero]"r"(zero)
3098     : "cc", "r0", "r1"
3099  );
3100 }
3101 
3102 // Fast Montgomery multiplication. The derivation of the algorithm is
3103 // in "A Cryptographic Library for the Motorola DSP56000,
3104 // Dusse and Kaliski, Proc. EUROCRYPT 90, pp. 230-237".
3105 static void
3106 montgomery_multiply(unsigned long a[], unsigned long b[], unsigned long n[],
3107                     unsigned long m[], unsigned long inv, int len) {
3108   unsigned long t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator
3109   int i;
3110 
3111   assert(inv * n[0] == -1UL, "broken inverse in Montgomery multiply");
3112 
3113   for (i = 0; i < len; i++) {
3114     int j;
3115     for (j = 0; j < i; j++) {
3116       MACC(a, j, b, i-j, t0, t1, t2);
3117       MACC(m, j, n, i-j, t0, t1, t2);
3118     }
3119     MACC(a, i, b, 0, t0, t1, t2);
3120     m[i] = t0 * inv;
3121     MACC(m, i, n, 0, t0, t1, t2);
3122 
3123     assert(t0 == 0, "broken Montgomery multiply");
3124 
3125     t0 = t1; t1 = t2; t2 = 0;
3126   }
3127 
3128   for (i = len; i < 2 * len; i++) {
3129     int j;
3130     for (j = i - len + 1; j < len; j++) {
3131       MACC(a, j, b, i-j, t0, t1, t2);
3132       MACC(m, j, n, i-j, t0, t1, t2);
3133     }
3134     m[i-len] = t0;
3135     t0 = t1; t1 = t2; t2 = 0;
3136   }
3137 
3138   while (t0) {
3139     t0 = sub(m, n, t0, len);
3140   }
3141 }
3142 
3143 // Fast Montgomery squaring. This uses asymptotically 25% fewer
3144 // multiplies so it should be up to 25% faster than Montgomery
3145 // multiplication. However, its loop control is more complex and it
3146 // may actually run slower on some machines.
3147 static void
3148 montgomery_square(unsigned long a[], unsigned long n[],
3149                   unsigned long m[], unsigned long inv, int len) {
3150   unsigned long t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator
3151   int i;
3152 
3153   assert(inv * n[0] == -1UL, "broken inverse in Montgomery multiply");
3154 
3155   for (i = 0; i < len; i++) {
3156     int j;
3157     int end = (i+1)/2;
3158     for (j = 0; j < end; j++) {
3159       MACC2(a, j, a, i-j, t0, t1, t2);
3160       MACC(m, j, n, i-j, t0, t1, t2);
3161     }
3162     if ((i & 1) == 0) {
3163       MACC(a, j, a, j, t0, t1, t2);
3164     }
3165     for (; j < i; j++) {
3166       MACC(m, j, n, i-j, t0, t1, t2);
3167     }
3168     m[i] = t0 * inv;
3169     MACC(m, i, n, 0, t0, t1, t2);
3170 
3171     assert(t0 == 0, "broken Montgomery square");
3172 
3173     t0 = t1; t1 = t2; t2 = 0;
3174   }
3175 
3176   for (i = len; i < 2*len; i++) {
3177     int start = i-len+1;
3178     int end = start + (len - start)/2;
3179     int j;
3180     for (j = start; j < end; j++) {
3181       MACC2(a, j, a, i-j, t0, t1, t2);
3182       MACC(m, j, n, i-j, t0, t1, t2);
3183     }
3184     if ((i & 1) == 0) {
3185       MACC(a, j, a, j, t0, t1, t2);
3186     }
3187     for (; j < len; j++) {
3188       MACC(m, j, n, i-j, t0, t1, t2);
3189     }
3190     m[i-len] = t0;
3191     t0 = t1; t1 = t2; t2 = 0;
3192   }
3193 
3194   while (t0) {
3195     t0 = sub(m, n, t0, len);
3196   }
3197 }
3198 
3199 // The threshold at which squaring is advantageous was determined
3200 // experimentally on an i7-3930K (Ivy Bridge) CPU @ 3.5GHz.
3201 // Value seems to be ok for other platforms, too.
3202 #define MONTGOMERY_SQUARING_THRESHOLD 64
3203 
3204 // Copy len longwords from s to d, word-swapping as we go. The
3205 // destination array is reversed.
3206 static void reverse_words(unsigned long *s, unsigned long *d, int len) {
3207   d += len;
3208   while(len-- > 0) {
3209     d--;
3210     unsigned long s_val = *s;
3211     // Swap words in a longword on little endian machines.
3212 #ifdef VM_LITTLE_ENDIAN
3213      Unimplemented();
3214 #endif
3215     *d = s_val;
3216     s++;
3217   }
3218 }
3219 
3220 void SharedRuntime::montgomery_multiply(jint *a_ints, jint *b_ints, jint *n_ints,
3221                                         jint len, jlong inv,
3222                                         jint *m_ints) {
3223   len = len & 0x7fffFFFF; // C2 does not respect int to long conversion for stub calls.
3224   assert(len % 2 == 0, "array length in montgomery_multiply must be even");
3225   int longwords = len/2;
3226 
3227   // Make very sure we don't use so much space that the stack might
3228   // overflow. 512 jints corresponds to an 16384-bit integer and
3229   // will use here a total of 8k bytes of stack space.
3230   int divisor = sizeof(unsigned long) * 4;
3231   guarantee(longwords <= 8192 / divisor, "must be");
3232   int total_allocation = longwords * sizeof (unsigned long) * 4;
3233   unsigned long *scratch = (unsigned long *)alloca(total_allocation);
3234 
3235   // Local scratch arrays
3236   unsigned long
3237     *a = scratch + 0 * longwords,
3238     *b = scratch + 1 * longwords,
3239     *n = scratch + 2 * longwords,
3240     *m = scratch + 3 * longwords;
3241 
3242   reverse_words((unsigned long *)a_ints, a, longwords);
3243   reverse_words((unsigned long *)b_ints, b, longwords);
3244   reverse_words((unsigned long *)n_ints, n, longwords);
3245 
3246   ::montgomery_multiply(a, b, n, m, (unsigned long)inv, longwords);
3247 
3248   reverse_words(m, (unsigned long *)m_ints, longwords);
3249 }
3250 
3251 void SharedRuntime::montgomery_square(jint *a_ints, jint *n_ints,
3252                                       jint len, jlong inv,
3253                                       jint *m_ints) {
3254   len = len & 0x7fffFFFF; // C2 does not respect int to long conversion for stub calls.
3255   assert(len % 2 == 0, "array length in montgomery_square must be even");
3256   int longwords = len/2;
3257 
3258   // Make very sure we don't use so much space that the stack might
3259   // overflow. 512 jints corresponds to an 16384-bit integer and
3260   // will use here a total of 6k bytes of stack space.
3261   int divisor = sizeof(unsigned long) * 3;
3262   guarantee(longwords <= (8192 / divisor), "must be");
3263   int total_allocation = longwords * sizeof (unsigned long) * 3;
3264   unsigned long *scratch = (unsigned long *)alloca(total_allocation);
3265 
3266   // Local scratch arrays
3267   unsigned long
3268     *a = scratch + 0 * longwords,
3269     *n = scratch + 1 * longwords,
3270     *m = scratch + 2 * longwords;
3271 
3272   reverse_words((unsigned long *)a_ints, a, longwords);
3273   reverse_words((unsigned long *)n_ints, n, longwords);
3274 
3275   if (len >= MONTGOMERY_SQUARING_THRESHOLD) {
3276     ::montgomery_square(a, n, m, (unsigned long)inv, longwords);
3277   } else {
3278     ::montgomery_multiply(a, a, n, m, (unsigned long)inv, longwords);
3279   }
3280 
3281   reverse_words(m, (unsigned long *)m_ints, longwords);
3282 }
3283 
3284 extern "C"
3285 int SpinPause() {
3286   return 0;
3287 }