1 /*
   2  * Copyright (c) 2016, 2021, Oracle and/or its affiliates. All rights reserved.
   3  * Copyright (c) 2016, 2019 SAP SE. All rights reserved.
   4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   5  *
   6  * This code is free software; you can redistribute it and/or modify it
   7  * under the terms of the GNU General Public License version 2 only, as
   8  * published by the Free Software Foundation.
   9  *
  10  * This code is distributed in the hope that it will be useful, but WITHOUT
  11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  13  * version 2 for more details (a copy is included in the LICENSE file that
  14  * accompanied this code).
  15  *
  16  * You should have received a copy of the GNU General Public License version
  17  * 2 along with this work; if not, write to the Free Software Foundation,
  18  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  19  *
  20  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  21  * or visit www.oracle.com if you need additional information or have any
  22  * questions.
  23  *
  24  */
  25 
  26 #include "precompiled.hpp"
  27 #include "asm/macroAssembler.inline.hpp"
  28 #include "code/debugInfoRec.hpp"
  29 #include "code/icBuffer.hpp"
  30 #include "code/vtableStubs.hpp"
  31 #include "compiler/oopMap.hpp"
  32 #include "gc/shared/gcLocker.hpp"
  33 #include "interpreter/interpreter.hpp"
  34 #include "interpreter/interp_masm.hpp"
  35 #include "memory/resourceArea.hpp"
  36 #include "nativeInst_s390.hpp"
  37 #include "oops/compiledICHolder.hpp"
  38 #include "oops/klass.inline.hpp"
  39 #include "prims/methodHandles.hpp"
  40 #include "registerSaver_s390.hpp"
  41 #include "runtime/jniHandles.hpp"
  42 #include "runtime/safepointMechanism.hpp"
  43 #include "runtime/sharedRuntime.hpp"
  44 #include "runtime/signature.hpp"
  45 #include "runtime/stubRoutines.hpp"
  46 #include "runtime/vframeArray.hpp"
  47 #include "utilities/align.hpp"
  48 #include "vmreg_s390.inline.hpp"
  49 #ifdef COMPILER1
  50 #include "c1/c1_Runtime1.hpp"
  51 #endif
  52 #ifdef COMPILER2
  53 #include "opto/ad.hpp"
  54 #include "opto/runtime.hpp"
  55 #endif
  56 
  57 #ifdef PRODUCT
  58 #define __ masm->
  59 #else
  60 #define __ (Verbose ? (masm->block_comment(FILE_AND_LINE),masm):masm)->
  61 #endif
  62 
  63 #define BLOCK_COMMENT(str) __ block_comment(str)
  64 #define BIND(label)        bind(label); BLOCK_COMMENT(#label ":")
  65 
  66 #define RegisterSaver_LiveIntReg(regname) \
  67   { RegisterSaver::int_reg,   regname->encoding(), regname->as_VMReg() }
  68 
  69 #define RegisterSaver_LiveFloatReg(regname) \
  70   { RegisterSaver::float_reg, regname->encoding(), regname->as_VMReg() }
  71 
  72 // Registers which are not saved/restored, but still they have got a frame slot.
  73 // Used to get same frame size for RegisterSaver_LiveRegs and RegisterSaver_LiveRegsWithoutR2
  74 #define RegisterSaver_ExcludedIntReg(regname) \
  75   { RegisterSaver::excluded_reg, regname->encoding(), regname->as_VMReg() }
  76 
  77 // Registers which are not saved/restored, but still they have got a frame slot.
  78 // Used to get same frame size for RegisterSaver_LiveRegs and RegisterSaver_LiveRegsWithoutR2.
  79 #define RegisterSaver_ExcludedFloatReg(regname) \
  80   { RegisterSaver::excluded_reg, regname->encoding(), regname->as_VMReg() }
  81 
  82 static const RegisterSaver::LiveRegType RegisterSaver_LiveRegs[] = {
  83   // Live registers which get spilled to the stack. Register positions
  84   // in this array correspond directly to the stack layout.
  85   //
  86   // live float registers:
  87   //
  88   RegisterSaver_LiveFloatReg(Z_F0 ),
  89   // RegisterSaver_ExcludedFloatReg(Z_F1 ), // scratch (Z_fscratch_1)
  90   RegisterSaver_LiveFloatReg(Z_F2 ),
  91   RegisterSaver_LiveFloatReg(Z_F3 ),
  92   RegisterSaver_LiveFloatReg(Z_F4 ),
  93   RegisterSaver_LiveFloatReg(Z_F5 ),
  94   RegisterSaver_LiveFloatReg(Z_F6 ),
  95   RegisterSaver_LiveFloatReg(Z_F7 ),
  96   RegisterSaver_LiveFloatReg(Z_F8 ),
  97   RegisterSaver_LiveFloatReg(Z_F9 ),
  98   RegisterSaver_LiveFloatReg(Z_F10),
  99   RegisterSaver_LiveFloatReg(Z_F11),
 100   RegisterSaver_LiveFloatReg(Z_F12),
 101   RegisterSaver_LiveFloatReg(Z_F13),
 102   RegisterSaver_LiveFloatReg(Z_F14),
 103   RegisterSaver_LiveFloatReg(Z_F15),
 104   //
 105   // RegisterSaver_ExcludedIntReg(Z_R0), // scratch
 106   // RegisterSaver_ExcludedIntReg(Z_R1), // scratch
 107   RegisterSaver_LiveIntReg(Z_R2 ),
 108   RegisterSaver_LiveIntReg(Z_R3 ),
 109   RegisterSaver_LiveIntReg(Z_R4 ),
 110   RegisterSaver_LiveIntReg(Z_R5 ),
 111   RegisterSaver_LiveIntReg(Z_R6 ),
 112   RegisterSaver_LiveIntReg(Z_R7 ),
 113   RegisterSaver_LiveIntReg(Z_R8 ),
 114   RegisterSaver_LiveIntReg(Z_R9 ),
 115   RegisterSaver_LiveIntReg(Z_R10),
 116   RegisterSaver_LiveIntReg(Z_R11),
 117   RegisterSaver_LiveIntReg(Z_R12),
 118   RegisterSaver_LiveIntReg(Z_R13),
 119   // RegisterSaver_ExcludedIntReg(Z_R14), // return pc (Saved in caller frame.)
 120   // RegisterSaver_ExcludedIntReg(Z_R15)  // stack pointer
 121 };
 122 
 123 static const RegisterSaver::LiveRegType RegisterSaver_LiveIntRegs[] = {
 124   // Live registers which get spilled to the stack. Register positions
 125   // in this array correspond directly to the stack layout.
 126   //
 127   // live float registers: All excluded, but still they get a stack slot to get same frame size.
 128   //
 129   RegisterSaver_ExcludedFloatReg(Z_F0 ),
 130   // RegisterSaver_ExcludedFloatReg(Z_F1 ), // scratch (Z_fscratch_1)
 131   RegisterSaver_ExcludedFloatReg(Z_F2 ),
 132   RegisterSaver_ExcludedFloatReg(Z_F3 ),
 133   RegisterSaver_ExcludedFloatReg(Z_F4 ),
 134   RegisterSaver_ExcludedFloatReg(Z_F5 ),
 135   RegisterSaver_ExcludedFloatReg(Z_F6 ),
 136   RegisterSaver_ExcludedFloatReg(Z_F7 ),
 137   RegisterSaver_ExcludedFloatReg(Z_F8 ),
 138   RegisterSaver_ExcludedFloatReg(Z_F9 ),
 139   RegisterSaver_ExcludedFloatReg(Z_F10),
 140   RegisterSaver_ExcludedFloatReg(Z_F11),
 141   RegisterSaver_ExcludedFloatReg(Z_F12),
 142   RegisterSaver_ExcludedFloatReg(Z_F13),
 143   RegisterSaver_ExcludedFloatReg(Z_F14),
 144   RegisterSaver_ExcludedFloatReg(Z_F15),
 145   //
 146   // RegisterSaver_ExcludedIntReg(Z_R0), // scratch
 147   // RegisterSaver_ExcludedIntReg(Z_R1), // scratch
 148   RegisterSaver_LiveIntReg(Z_R2 ),
 149   RegisterSaver_LiveIntReg(Z_R3 ),
 150   RegisterSaver_LiveIntReg(Z_R4 ),
 151   RegisterSaver_LiveIntReg(Z_R5 ),
 152   RegisterSaver_LiveIntReg(Z_R6 ),
 153   RegisterSaver_LiveIntReg(Z_R7 ),
 154   RegisterSaver_LiveIntReg(Z_R8 ),
 155   RegisterSaver_LiveIntReg(Z_R9 ),
 156   RegisterSaver_LiveIntReg(Z_R10),
 157   RegisterSaver_LiveIntReg(Z_R11),
 158   RegisterSaver_LiveIntReg(Z_R12),
 159   RegisterSaver_LiveIntReg(Z_R13),
 160   // RegisterSaver_ExcludedIntReg(Z_R14), // return pc (Saved in caller frame.)
 161   // RegisterSaver_ExcludedIntReg(Z_R15)  // stack pointer
 162 };
 163 
 164 static const RegisterSaver::LiveRegType RegisterSaver_LiveRegsWithoutR2[] = {
 165   // Live registers which get spilled to the stack. Register positions
 166   // in this array correspond directly to the stack layout.
 167   //
 168   // live float registers:
 169   //
 170   RegisterSaver_LiveFloatReg(Z_F0 ),
 171   // RegisterSaver_ExcludedFloatReg(Z_F1 ), // scratch (Z_fscratch_1)
 172   RegisterSaver_LiveFloatReg(Z_F2 ),
 173   RegisterSaver_LiveFloatReg(Z_F3 ),
 174   RegisterSaver_LiveFloatReg(Z_F4 ),
 175   RegisterSaver_LiveFloatReg(Z_F5 ),
 176   RegisterSaver_LiveFloatReg(Z_F6 ),
 177   RegisterSaver_LiveFloatReg(Z_F7 ),
 178   RegisterSaver_LiveFloatReg(Z_F8 ),
 179   RegisterSaver_LiveFloatReg(Z_F9 ),
 180   RegisterSaver_LiveFloatReg(Z_F10),
 181   RegisterSaver_LiveFloatReg(Z_F11),
 182   RegisterSaver_LiveFloatReg(Z_F12),
 183   RegisterSaver_LiveFloatReg(Z_F13),
 184   RegisterSaver_LiveFloatReg(Z_F14),
 185   RegisterSaver_LiveFloatReg(Z_F15),
 186   //
 187   // RegisterSaver_ExcludedIntReg(Z_R0), // scratch
 188   // RegisterSaver_ExcludedIntReg(Z_R1), // scratch
 189   RegisterSaver_ExcludedIntReg(Z_R2), // Omit saving R2.
 190   RegisterSaver_LiveIntReg(Z_R3 ),
 191   RegisterSaver_LiveIntReg(Z_R4 ),
 192   RegisterSaver_LiveIntReg(Z_R5 ),
 193   RegisterSaver_LiveIntReg(Z_R6 ),
 194   RegisterSaver_LiveIntReg(Z_R7 ),
 195   RegisterSaver_LiveIntReg(Z_R8 ),
 196   RegisterSaver_LiveIntReg(Z_R9 ),
 197   RegisterSaver_LiveIntReg(Z_R10),
 198   RegisterSaver_LiveIntReg(Z_R11),
 199   RegisterSaver_LiveIntReg(Z_R12),
 200   RegisterSaver_LiveIntReg(Z_R13),
 201   // RegisterSaver_ExcludedIntReg(Z_R14), // return pc (Saved in caller frame.)
 202   // RegisterSaver_ExcludedIntReg(Z_R15)  // stack pointer
 203 };
 204 
 205 // Live argument registers which get spilled to the stack.
 206 static const RegisterSaver::LiveRegType RegisterSaver_LiveArgRegs[] = {
 207   RegisterSaver_LiveFloatReg(Z_FARG1),
 208   RegisterSaver_LiveFloatReg(Z_FARG2),
 209   RegisterSaver_LiveFloatReg(Z_FARG3),
 210   RegisterSaver_LiveFloatReg(Z_FARG4),
 211   RegisterSaver_LiveIntReg(Z_ARG1),
 212   RegisterSaver_LiveIntReg(Z_ARG2),
 213   RegisterSaver_LiveIntReg(Z_ARG3),
 214   RegisterSaver_LiveIntReg(Z_ARG4),
 215   RegisterSaver_LiveIntReg(Z_ARG5)
 216 };
 217 
 218 static const RegisterSaver::LiveRegType RegisterSaver_LiveVolatileRegs[] = {
 219   // Live registers which get spilled to the stack. Register positions
 220   // in this array correspond directly to the stack layout.
 221   //
 222   // live float registers:
 223   //
 224   RegisterSaver_LiveFloatReg(Z_F0 ),
 225   // RegisterSaver_ExcludedFloatReg(Z_F1 ), // scratch (Z_fscratch_1)
 226   RegisterSaver_LiveFloatReg(Z_F2 ),
 227   RegisterSaver_LiveFloatReg(Z_F3 ),
 228   RegisterSaver_LiveFloatReg(Z_F4 ),
 229   RegisterSaver_LiveFloatReg(Z_F5 ),
 230   RegisterSaver_LiveFloatReg(Z_F6 ),
 231   RegisterSaver_LiveFloatReg(Z_F7 ),
 232   // RegisterSaver_LiveFloatReg(Z_F8 ), // non-volatile
 233   // RegisterSaver_LiveFloatReg(Z_F9 ), // non-volatile
 234   // RegisterSaver_LiveFloatReg(Z_F10), // non-volatile
 235   // RegisterSaver_LiveFloatReg(Z_F11), // non-volatile
 236   // RegisterSaver_LiveFloatReg(Z_F12), // non-volatile
 237   // RegisterSaver_LiveFloatReg(Z_F13), // non-volatile
 238   // RegisterSaver_LiveFloatReg(Z_F14), // non-volatile
 239   // RegisterSaver_LiveFloatReg(Z_F15), // non-volatile
 240   //
 241   // RegisterSaver_ExcludedIntReg(Z_R0), // scratch
 242   // RegisterSaver_ExcludedIntReg(Z_R1), // scratch
 243   RegisterSaver_LiveIntReg(Z_R2 ),
 244   RegisterSaver_LiveIntReg(Z_R3 ),
 245   RegisterSaver_LiveIntReg(Z_R4 ),
 246   RegisterSaver_LiveIntReg(Z_R5 ),
 247   // RegisterSaver_LiveIntReg(Z_R6 ), // non-volatile
 248   // RegisterSaver_LiveIntReg(Z_R7 ), // non-volatile
 249   // RegisterSaver_LiveIntReg(Z_R8 ), // non-volatile
 250   // RegisterSaver_LiveIntReg(Z_R9 ), // non-volatile
 251   // RegisterSaver_LiveIntReg(Z_R10), // non-volatile
 252   // RegisterSaver_LiveIntReg(Z_R11), // non-volatile
 253   // RegisterSaver_LiveIntReg(Z_R12), // non-volatile
 254   // RegisterSaver_LiveIntReg(Z_R13), // non-volatile
 255   // RegisterSaver_ExcludedIntReg(Z_R14), // return pc (Saved in caller frame.)
 256   // RegisterSaver_ExcludedIntReg(Z_R15)  // stack pointer
 257 };
 258 
 259 int RegisterSaver::live_reg_save_size(RegisterSet reg_set) {
 260   int reg_space = -1;
 261   switch (reg_set) {
 262     case all_registers:           reg_space = sizeof(RegisterSaver_LiveRegs); break;
 263     case all_registers_except_r2: reg_space = sizeof(RegisterSaver_LiveRegsWithoutR2); break;
 264     case all_integer_registers:   reg_space = sizeof(RegisterSaver_LiveIntRegs); break;
 265     case all_volatile_registers:  reg_space = sizeof(RegisterSaver_LiveVolatileRegs); break;
 266     case arg_registers:           reg_space = sizeof(RegisterSaver_LiveArgRegs); break;
 267     default: ShouldNotReachHere();
 268   }
 269   return (reg_space / sizeof(RegisterSaver::LiveRegType)) * reg_size;
 270 }
 271 
 272 
 273 int RegisterSaver::live_reg_frame_size(RegisterSet reg_set) {
 274   return live_reg_save_size(reg_set) + frame::z_abi_160_size;
 275 }
 276 
 277 
 278 // return_pc: Specify the register that should be stored as the return pc in the current frame.
 279 OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, RegisterSet reg_set, Register return_pc) {
 280   // Record volatile registers as callee-save values in an OopMap so
 281   // their save locations will be propagated to the caller frame's
 282   // RegisterMap during StackFrameStream construction (needed for
 283   // deoptimization; see compiledVFrame::create_stack_value).
 284 
 285   // Calculate frame size.
 286   const int frame_size_in_bytes  = live_reg_frame_size(reg_set);
 287   const int frame_size_in_slots  = frame_size_in_bytes / sizeof(jint);
 288   const int register_save_offset = frame_size_in_bytes - live_reg_save_size(reg_set);
 289 
 290   // OopMap frame size is in c2 stack slots (sizeof(jint)) not bytes or words.
 291   OopMap* map = new OopMap(frame_size_in_slots, 0);
 292 
 293   int regstosave_num = 0;
 294   const RegisterSaver::LiveRegType* live_regs = NULL;
 295 
 296   switch (reg_set) {
 297     case all_registers:
 298       regstosave_num = sizeof(RegisterSaver_LiveRegs)/sizeof(RegisterSaver::LiveRegType);
 299       live_regs      = RegisterSaver_LiveRegs;
 300       break;
 301     case all_registers_except_r2:
 302       regstosave_num = sizeof(RegisterSaver_LiveRegsWithoutR2)/sizeof(RegisterSaver::LiveRegType);;
 303       live_regs      = RegisterSaver_LiveRegsWithoutR2;
 304       break;
 305     case all_integer_registers:
 306       regstosave_num = sizeof(RegisterSaver_LiveIntRegs)/sizeof(RegisterSaver::LiveRegType);
 307       live_regs      = RegisterSaver_LiveIntRegs;
 308       break;
 309     case all_volatile_registers:
 310       regstosave_num = sizeof(RegisterSaver_LiveVolatileRegs)/sizeof(RegisterSaver::LiveRegType);
 311       live_regs      = RegisterSaver_LiveVolatileRegs;
 312       break;
 313     case arg_registers:
 314       regstosave_num = sizeof(RegisterSaver_LiveArgRegs)/sizeof(RegisterSaver::LiveRegType);;
 315       live_regs      = RegisterSaver_LiveArgRegs;
 316       break;
 317     default: ShouldNotReachHere();
 318   }
 319 
 320   // Save return pc in old frame.
 321   __ save_return_pc(return_pc);
 322 
 323   // Push a new frame (includes stack linkage).
 324   // Use return_pc as scratch for push_frame. Z_R0_scratch (the default) and Z_R1_scratch are
 325   // illegally used to pass parameters by RangeCheckStub::emit_code().
 326   __ push_frame(frame_size_in_bytes, return_pc);
 327   // We have to restore return_pc right away.
 328   // Nobody else will. Furthermore, return_pc isn't necessarily the default (Z_R14).
 329   // Nobody else knows which register we saved.
 330   __ z_lg(return_pc, _z_abi16(return_pc) + frame_size_in_bytes, Z_SP);
 331 
 332   // Register save area in new frame starts above z_abi_160 area.
 333   int offset = register_save_offset;
 334 
 335   Register first = noreg;
 336   Register last  = noreg;
 337   int      first_offset = -1;
 338   bool     float_spilled = false;
 339 
 340   for (int i = 0; i < regstosave_num; i++, offset += reg_size) {
 341     int reg_num  = live_regs[i].reg_num;
 342     int reg_type = live_regs[i].reg_type;
 343 
 344     switch (reg_type) {
 345       case RegisterSaver::int_reg: {
 346         Register reg = as_Register(reg_num);
 347         if (last != reg->predecessor()) {
 348           if (first != noreg) {
 349             __ z_stmg(first, last, first_offset, Z_SP);
 350           }
 351           first = reg;
 352           first_offset = offset;
 353           DEBUG_ONLY(float_spilled = false);
 354         }
 355         last = reg;
 356         assert(last != Z_R0, "r0 would require special treatment");
 357         assert(!float_spilled, "for simplicity, do not mix up ints and floats in RegisterSaver_LiveRegs[]");
 358         break;
 359       }
 360 
 361       case RegisterSaver::excluded_reg: // Not saved/restored, but with dedicated slot.
 362         continue; // Continue with next loop iteration.
 363 
 364       case RegisterSaver::float_reg: {
 365         FloatRegister freg = as_FloatRegister(reg_num);
 366         __ z_std(freg, offset, Z_SP);
 367         DEBUG_ONLY(float_spilled = true);
 368         break;
 369       }
 370 
 371       default:
 372         ShouldNotReachHere();
 373         break;
 374     }
 375 
 376     // Second set_callee_saved is really a waste but we'll keep things as they were for now
 377     map->set_callee_saved(VMRegImpl::stack2reg(offset >> 2), live_regs[i].vmreg);
 378     map->set_callee_saved(VMRegImpl::stack2reg((offset + half_reg_size) >> 2), live_regs[i].vmreg->next());
 379   }
 380   assert(first != noreg, "Should spill at least one int reg.");
 381   __ z_stmg(first, last, first_offset, Z_SP);
 382 
 383   // And we're done.
 384   return map;
 385 }
 386 
 387 
 388 // Generate the OopMap (again, regs where saved before).
 389 OopMap* RegisterSaver::generate_oop_map(MacroAssembler* masm, RegisterSet reg_set) {
 390   // Calculate frame size.
 391   const int frame_size_in_bytes  = live_reg_frame_size(reg_set);
 392   const int frame_size_in_slots  = frame_size_in_bytes / sizeof(jint);
 393   const int register_save_offset = frame_size_in_bytes - live_reg_save_size(reg_set);
 394 
 395   // OopMap frame size is in c2 stack slots (sizeof(jint)) not bytes or words.
 396   OopMap* map = new OopMap(frame_size_in_slots, 0);
 397 
 398   int regstosave_num = 0;
 399   const RegisterSaver::LiveRegType* live_regs = NULL;
 400 
 401   switch (reg_set) {
 402     case all_registers:
 403       regstosave_num = sizeof(RegisterSaver_LiveRegs)/sizeof(RegisterSaver::LiveRegType);
 404       live_regs      = RegisterSaver_LiveRegs;
 405       break;
 406     case all_registers_except_r2:
 407       regstosave_num = sizeof(RegisterSaver_LiveRegsWithoutR2)/sizeof(RegisterSaver::LiveRegType);;
 408       live_regs      = RegisterSaver_LiveRegsWithoutR2;
 409       break;
 410     case all_integer_registers:
 411       regstosave_num = sizeof(RegisterSaver_LiveIntRegs)/sizeof(RegisterSaver::LiveRegType);
 412       live_regs      = RegisterSaver_LiveIntRegs;
 413       break;
 414     case all_volatile_registers:
 415       regstosave_num = sizeof(RegisterSaver_LiveVolatileRegs)/sizeof(RegisterSaver::LiveRegType);
 416       live_regs      = RegisterSaver_LiveVolatileRegs;
 417       break;
 418     case arg_registers:
 419       regstosave_num = sizeof(RegisterSaver_LiveArgRegs)/sizeof(RegisterSaver::LiveRegType);;
 420       live_regs      = RegisterSaver_LiveArgRegs;
 421       break;
 422     default: ShouldNotReachHere();
 423   }
 424 
 425   // Register save area in new frame starts above z_abi_160 area.
 426   int offset = register_save_offset;
 427   for (int i = 0; i < regstosave_num; i++) {
 428     if (live_regs[i].reg_type < RegisterSaver::excluded_reg) {
 429       map->set_callee_saved(VMRegImpl::stack2reg(offset>>2), live_regs[i].vmreg);
 430       map->set_callee_saved(VMRegImpl::stack2reg((offset + half_reg_size)>>2), live_regs[i].vmreg->next());
 431     }
 432     offset += reg_size;
 433   }
 434   return map;
 435 }
 436 
 437 
 438 // Pop the current frame and restore all the registers that we saved.
 439 void RegisterSaver::restore_live_registers(MacroAssembler* masm, RegisterSet reg_set) {
 440   int offset;
 441   const int register_save_offset = live_reg_frame_size(reg_set) - live_reg_save_size(reg_set);
 442 
 443   Register first = noreg;
 444   Register last = noreg;
 445   int      first_offset = -1;
 446   bool     float_spilled = false;
 447 
 448   int regstosave_num = 0;
 449   const RegisterSaver::LiveRegType* live_regs = NULL;
 450 
 451   switch (reg_set) {
 452     case all_registers:
 453       regstosave_num = sizeof(RegisterSaver_LiveRegs)/sizeof(RegisterSaver::LiveRegType);;
 454       live_regs      = RegisterSaver_LiveRegs;
 455       break;
 456     case all_registers_except_r2:
 457       regstosave_num = sizeof(RegisterSaver_LiveRegsWithoutR2)/sizeof(RegisterSaver::LiveRegType);;
 458       live_regs      = RegisterSaver_LiveRegsWithoutR2;
 459       break;
 460     case all_integer_registers:
 461       regstosave_num = sizeof(RegisterSaver_LiveIntRegs)/sizeof(RegisterSaver::LiveRegType);
 462       live_regs      = RegisterSaver_LiveIntRegs;
 463       break;
 464     case all_volatile_registers:
 465       regstosave_num = sizeof(RegisterSaver_LiveVolatileRegs)/sizeof(RegisterSaver::LiveRegType);;
 466       live_regs      = RegisterSaver_LiveVolatileRegs;
 467       break;
 468     case arg_registers:
 469       regstosave_num = sizeof(RegisterSaver_LiveArgRegs)/sizeof(RegisterSaver::LiveRegType);;
 470       live_regs      = RegisterSaver_LiveArgRegs;
 471       break;
 472     default: ShouldNotReachHere();
 473   }
 474 
 475   // Restore all registers (ints and floats).
 476 
 477   // Register save area in new frame starts above z_abi_160 area.
 478   offset = register_save_offset;
 479 
 480   for (int i = 0; i < regstosave_num; i++, offset += reg_size) {
 481     int reg_num  = live_regs[i].reg_num;
 482     int reg_type = live_regs[i].reg_type;
 483 
 484     switch (reg_type) {
 485       case RegisterSaver::excluded_reg:
 486         continue; // Continue with next loop iteration.
 487 
 488       case RegisterSaver::int_reg: {
 489         Register reg = as_Register(reg_num);
 490         if (last != reg->predecessor()) {
 491           if (first != noreg) {
 492             __ z_lmg(first, last, first_offset, Z_SP);
 493           }
 494           first = reg;
 495           first_offset = offset;
 496           DEBUG_ONLY(float_spilled = false);
 497         }
 498         last = reg;
 499         assert(last != Z_R0, "r0 would require special treatment");
 500         assert(!float_spilled, "for simplicity, do not mix up ints and floats in RegisterSaver_LiveRegs[]");
 501         break;
 502       }
 503 
 504       case RegisterSaver::float_reg: {
 505         FloatRegister freg = as_FloatRegister(reg_num);
 506         __ z_ld(freg, offset, Z_SP);
 507         DEBUG_ONLY(float_spilled = true);
 508         break;
 509       }
 510 
 511       default:
 512         ShouldNotReachHere();
 513     }
 514   }
 515   assert(first != noreg, "Should spill at least one int reg.");
 516   __ z_lmg(first, last, first_offset, Z_SP);
 517 
 518   // Pop the frame.
 519   __ pop_frame();
 520 
 521   // Restore the flags.
 522   __ restore_return_pc();
 523 }
 524 
 525 
 526 // Pop the current frame and restore the registers that might be holding a result.
 527 void RegisterSaver::restore_result_registers(MacroAssembler* masm) {
 528   int i;
 529   int offset;
 530   const int regstosave_num       = sizeof(RegisterSaver_LiveRegs) /
 531                                    sizeof(RegisterSaver::LiveRegType);
 532   const int register_save_offset = live_reg_frame_size(all_registers) - live_reg_save_size(all_registers);
 533 
 534   // Restore all result registers (ints and floats).
 535   offset = register_save_offset;
 536   for (int i = 0; i < regstosave_num; i++, offset += reg_size) {
 537     int reg_num = RegisterSaver_LiveRegs[i].reg_num;
 538     int reg_type = RegisterSaver_LiveRegs[i].reg_type;
 539     switch (reg_type) {
 540       case RegisterSaver::excluded_reg:
 541         continue; // Continue with next loop iteration.
 542       case RegisterSaver::int_reg: {
 543         if (as_Register(reg_num) == Z_RET) { // int result_reg
 544           __ z_lg(as_Register(reg_num), offset, Z_SP);
 545         }
 546         break;
 547       }
 548       case RegisterSaver::float_reg: {
 549         if (as_FloatRegister(reg_num) == Z_FRET) { // float result_reg
 550           __ z_ld(as_FloatRegister(reg_num), offset, Z_SP);
 551         }
 552         break;
 553       }
 554       default:
 555         ShouldNotReachHere();
 556     }
 557   }
 558 }
 559 
 560 // ---------------------------------------------------------------------------
 561 void SharedRuntime::save_native_result(MacroAssembler * masm,
 562                                        BasicType ret_type,
 563                                        int frame_slots) {
 564   Address memaddr(Z_SP, frame_slots * VMRegImpl::stack_slot_size);
 565 
 566   switch (ret_type) {
 567     case T_BOOLEAN:  // Save shorter types as int. Do we need sign extension at restore??
 568     case T_BYTE:
 569     case T_CHAR:
 570     case T_SHORT:
 571     case T_INT:
 572       __ reg2mem_opt(Z_RET, memaddr, false);
 573       break;
 574     case T_OBJECT:   // Save pointer types as long.
 575     case T_ARRAY:
 576     case T_ADDRESS:
 577     case T_VOID:
 578     case T_LONG:
 579       __ reg2mem_opt(Z_RET, memaddr);
 580       break;
 581     case T_FLOAT:
 582       __ freg2mem_opt(Z_FRET, memaddr, false);
 583       break;
 584     case T_DOUBLE:
 585       __ freg2mem_opt(Z_FRET, memaddr);
 586       break;
 587     default:
 588       ShouldNotReachHere();
 589       break;
 590   }
 591 }
 592 
 593 void SharedRuntime::restore_native_result(MacroAssembler *masm,
 594                                           BasicType       ret_type,
 595                                           int             frame_slots) {
 596   Address memaddr(Z_SP, frame_slots * VMRegImpl::stack_slot_size);
 597 
 598   switch (ret_type) {
 599     case T_BOOLEAN:  // Restore shorter types as int. Do we need sign extension at restore??
 600     case T_BYTE:
 601     case T_CHAR:
 602     case T_SHORT:
 603     case T_INT:
 604       __ mem2reg_opt(Z_RET, memaddr, false);
 605       break;
 606     case T_OBJECT:   // Restore pointer types as long.
 607     case T_ARRAY:
 608     case T_ADDRESS:
 609     case T_VOID:
 610     case T_LONG:
 611       __ mem2reg_opt(Z_RET, memaddr);
 612       break;
 613     case T_FLOAT:
 614       __ mem2freg_opt(Z_FRET, memaddr, false);
 615       break;
 616     case T_DOUBLE:
 617       __ mem2freg_opt(Z_FRET, memaddr);
 618       break;
 619     default:
 620       ShouldNotReachHere();
 621       break;
 622   }
 623 }
 624 
 625 // ---------------------------------------------------------------------------
 626 // Read the array of BasicTypes from a signature, and compute where the
 627 // arguments should go. Values in the VMRegPair regs array refer to 4-byte
 628 // quantities. Values less than VMRegImpl::stack0 are registers, those above
 629 // refer to 4-byte stack slots. All stack slots are based off of the stack pointer
 630 // as framesizes are fixed.
 631 // VMRegImpl::stack0 refers to the first slot 0(sp).
 632 // VMRegImpl::stack0+1 refers to the memory word 4-byes higher. Registers
 633 // up to RegisterImpl::number_of_registers are the 64-bit integer registers.
 634 
 635 // Note: the INPUTS in sig_bt are in units of Java argument words, which are
 636 // either 32-bit or 64-bit depending on the build. The OUTPUTS are in 32-bit
 637 // units regardless of build.
 638 
 639 // The Java calling convention is a "shifted" version of the C ABI.
 640 // By skipping the first C ABI register we can call non-static jni methods
 641 // with small numbers of arguments without having to shuffle the arguments
 642 // at all. Since we control the java ABI we ought to at least get some
 643 // advantage out of it.
 644 int SharedRuntime::java_calling_convention(const BasicType *sig_bt,
 645                                            VMRegPair *regs,
 646                                            int total_args_passed) {
 647   // c2c calling conventions for compiled-compiled calls.
 648 
 649   // An int/float occupies 1 slot here.
 650   const int inc_stk_for_intfloat   = 1; // 1 slots for ints and floats.
 651   const int inc_stk_for_longdouble = 2; // 2 slots for longs and doubles.
 652 
 653   const VMReg z_iarg_reg[5] = {
 654     Z_R2->as_VMReg(),
 655     Z_R3->as_VMReg(),
 656     Z_R4->as_VMReg(),
 657     Z_R5->as_VMReg(),
 658     Z_R6->as_VMReg()
 659   };
 660   const VMReg z_farg_reg[4] = {
 661     Z_F0->as_VMReg(),
 662     Z_F2->as_VMReg(),
 663     Z_F4->as_VMReg(),
 664     Z_F6->as_VMReg()
 665   };
 666   const int z_num_iarg_registers = sizeof(z_iarg_reg) / sizeof(z_iarg_reg[0]);
 667   const int z_num_farg_registers = sizeof(z_farg_reg) / sizeof(z_farg_reg[0]);
 668 
 669   assert(RegisterImpl::number_of_arg_registers == z_num_iarg_registers, "iarg reg count mismatch");
 670   assert(FloatRegisterImpl::number_of_arg_registers == z_num_farg_registers, "farg reg count mismatch");
 671 
 672   int i;
 673   int stk = 0;
 674   int ireg = 0;
 675   int freg = 0;
 676 
 677   for (int i = 0; i < total_args_passed; ++i) {
 678     switch (sig_bt[i]) {
 679       case T_BOOLEAN:
 680       case T_CHAR:
 681       case T_BYTE:
 682       case T_SHORT:
 683       case T_INT:
 684         if (ireg < z_num_iarg_registers) {
 685           // Put int/ptr in register.
 686           regs[i].set1(z_iarg_reg[ireg]);
 687           ++ireg;
 688         } else {
 689           // Put int/ptr on stack.
 690           regs[i].set1(VMRegImpl::stack2reg(stk));
 691           stk += inc_stk_for_intfloat;
 692         }
 693         break;
 694       case T_LONG:
 695         assert((i + 1) < total_args_passed && sig_bt[i+1] == T_VOID, "expecting half");
 696         if (ireg < z_num_iarg_registers) {
 697           // Put long in register.
 698           regs[i].set2(z_iarg_reg[ireg]);
 699           ++ireg;
 700         } else {
 701           // Put long on stack and align to 2 slots.
 702           if (stk & 0x1) { ++stk; }
 703           regs[i].set2(VMRegImpl::stack2reg(stk));
 704           stk += inc_stk_for_longdouble;
 705         }
 706         break;
 707       case T_OBJECT:
 708       case T_ARRAY:
 709       case T_ADDRESS:
 710         if (ireg < z_num_iarg_registers) {
 711           // Put ptr in register.
 712           regs[i].set2(z_iarg_reg[ireg]);
 713           ++ireg;
 714         } else {
 715           // Put ptr on stack and align to 2 slots, because
 716           // "64-bit pointers record oop-ishness on 2 aligned adjacent
 717           // registers." (see OopFlow::build_oop_map).
 718           if (stk & 0x1) { ++stk; }
 719           regs[i].set2(VMRegImpl::stack2reg(stk));
 720           stk += inc_stk_for_longdouble;
 721         }
 722         break;
 723       case T_FLOAT:
 724         if (freg < z_num_farg_registers) {
 725           // Put float in register.
 726           regs[i].set1(z_farg_reg[freg]);
 727           ++freg;
 728         } else {
 729           // Put float on stack.
 730           regs[i].set1(VMRegImpl::stack2reg(stk));
 731           stk += inc_stk_for_intfloat;
 732         }
 733         break;
 734       case T_DOUBLE:
 735         assert((i + 1) < total_args_passed && sig_bt[i+1] == T_VOID, "expecting half");
 736         if (freg < z_num_farg_registers) {
 737           // Put double in register.
 738           regs[i].set2(z_farg_reg[freg]);
 739           ++freg;
 740         } else {
 741           // Put double on stack and align to 2 slots.
 742           if (stk & 0x1) { ++stk; }
 743           regs[i].set2(VMRegImpl::stack2reg(stk));
 744           stk += inc_stk_for_longdouble;
 745         }
 746         break;
 747       case T_VOID:
 748         assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half");
 749         // Do not count halves.
 750         regs[i].set_bad();
 751         break;
 752       default:
 753         ShouldNotReachHere();
 754     }
 755   }
 756   return align_up(stk, 2);
 757 }
 758 
 759 int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
 760                                         VMRegPair *regs,
 761                                         VMRegPair *regs2,
 762                                         int total_args_passed) {
 763   assert(regs2 == NULL, "second VMRegPair array not used on this platform");
 764 
 765   // Calling conventions for C runtime calls and calls to JNI native methods.
 766   const VMReg z_iarg_reg[5] = {
 767     Z_R2->as_VMReg(),
 768     Z_R3->as_VMReg(),
 769     Z_R4->as_VMReg(),
 770     Z_R5->as_VMReg(),
 771     Z_R6->as_VMReg()
 772   };
 773   const VMReg z_farg_reg[4] = {
 774     Z_F0->as_VMReg(),
 775     Z_F2->as_VMReg(),
 776     Z_F4->as_VMReg(),
 777     Z_F6->as_VMReg()
 778   };
 779   const int z_num_iarg_registers = sizeof(z_iarg_reg) / sizeof(z_iarg_reg[0]);
 780   const int z_num_farg_registers = sizeof(z_farg_reg) / sizeof(z_farg_reg[0]);
 781 
 782   // Check calling conventions consistency.
 783   assert(RegisterImpl::number_of_arg_registers == z_num_iarg_registers, "iarg reg count mismatch");
 784   assert(FloatRegisterImpl::number_of_arg_registers == z_num_farg_registers, "farg reg count mismatch");
 785 
 786   // Avoid passing C arguments in the wrong stack slots.
 787 
 788   // 'Stk' counts stack slots. Due to alignment, 32 bit values occupy
 789   // 2 such slots, like 64 bit values do.
 790   const int inc_stk_for_intfloat   = 2; // 2 slots for ints and floats.
 791   const int inc_stk_for_longdouble = 2; // 2 slots for longs and doubles.
 792 
 793   int i;
 794   // Leave room for C-compatible ABI
 795   int stk = (frame::z_abi_160_size - frame::z_jit_out_preserve_size) / VMRegImpl::stack_slot_size;
 796   int freg = 0;
 797   int ireg = 0;
 798 
 799   // We put the first 5 arguments into registers and the rest on the
 800   // stack. Float arguments are already in their argument registers
 801   // due to c2c calling conventions (see calling_convention).
 802   for (int i = 0; i < total_args_passed; ++i) {
 803     switch (sig_bt[i]) {
 804       case T_BOOLEAN:
 805       case T_CHAR:
 806       case T_BYTE:
 807       case T_SHORT:
 808       case T_INT:
 809         // Fall through, handle as long.
 810       case T_LONG:
 811       case T_OBJECT:
 812       case T_ARRAY:
 813       case T_ADDRESS:
 814       case T_METADATA:
 815         // Oops are already boxed if required (JNI).
 816         if (ireg < z_num_iarg_registers) {
 817           regs[i].set2(z_iarg_reg[ireg]);
 818           ++ireg;
 819         } else {
 820           regs[i].set2(VMRegImpl::stack2reg(stk));
 821           stk += inc_stk_for_longdouble;
 822         }
 823         break;
 824       case T_FLOAT:
 825         if (freg < z_num_farg_registers) {
 826           regs[i].set1(z_farg_reg[freg]);
 827           ++freg;
 828         } else {
 829           regs[i].set1(VMRegImpl::stack2reg(stk+1));
 830           stk +=  inc_stk_for_intfloat;
 831         }
 832         break;
 833       case T_DOUBLE:
 834         assert((i + 1) < total_args_passed && sig_bt[i+1] == T_VOID, "expecting half");
 835         if (freg < z_num_farg_registers) {
 836           regs[i].set2(z_farg_reg[freg]);
 837           ++freg;
 838         } else {
 839           // Put double on stack.
 840           regs[i].set2(VMRegImpl::stack2reg(stk));
 841           stk += inc_stk_for_longdouble;
 842         }
 843         break;
 844       case T_VOID:
 845         // Do not count halves.
 846         regs[i].set_bad();
 847         break;
 848       default:
 849         ShouldNotReachHere();
 850     }
 851   }
 852   return align_up(stk, 2);
 853 }
 854 
 855 int SharedRuntime::vector_calling_convention(VMRegPair *regs,
 856                                              uint num_bits,
 857                                              uint total_args_passed) {
 858   Unimplemented();
 859   return 0;
 860 }
 861 
 862 ////////////////////////////////////////////////////////////////////////
 863 //
 864 //  Argument shufflers
 865 //
 866 ////////////////////////////////////////////////////////////////////////
 867 
 868 //----------------------------------------------------------------------
 869 // The java_calling_convention describes stack locations as ideal slots on
 870 // a frame with no abi restrictions. Since we must observe abi restrictions
 871 // (like the placement of the register window) the slots must be biased by
 872 // the following value.
 873 //----------------------------------------------------------------------
 874 static int reg2slot(VMReg r) {
 875   return r->reg2stack() + SharedRuntime::out_preserve_stack_slots();
 876 }
 877 
 878 static int reg2offset(VMReg r) {
 879   return reg2slot(r) * VMRegImpl::stack_slot_size;
 880 }
 881 
 882 static void verify_oop_args(MacroAssembler *masm,
 883                             int total_args_passed,
 884                             const BasicType *sig_bt,
 885                             const VMRegPair *regs) {
 886   if (!VerifyOops) { return; }
 887 
 888   for (int i = 0; i < total_args_passed; i++) {
 889     if (is_reference_type(sig_bt[i])) {
 890       VMReg r = regs[i].first();
 891       assert(r->is_valid(), "bad oop arg");
 892 
 893       if (r->is_stack()) {
 894         __ z_lg(Z_R0_scratch,
 895                 Address(Z_SP, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize));
 896         __ verify_oop(Z_R0_scratch, FILE_AND_LINE);
 897       } else {
 898         __ verify_oop(r->as_Register(), FILE_AND_LINE);
 899       }
 900     }
 901   }
 902 }
 903 
 904 static void gen_special_dispatch(MacroAssembler *masm,
 905                                  int total_args_passed,
 906                                  vmIntrinsics::ID special_dispatch,
 907                                  const BasicType *sig_bt,
 908                                  const VMRegPair *regs) {
 909   verify_oop_args(masm, total_args_passed, sig_bt, regs);
 910 
 911   // Now write the args into the outgoing interpreter space.
 912   bool     has_receiver   = false;
 913   Register receiver_reg   = noreg;
 914   int      member_arg_pos = -1;
 915   Register member_reg     = noreg;
 916   int      ref_kind       = MethodHandles::signature_polymorphic_intrinsic_ref_kind(special_dispatch);
 917 
 918   if (ref_kind != 0) {
 919     member_arg_pos = total_args_passed - 1;  // trailing MemberName argument
 920     member_reg = Z_R9;                       // Known to be free at this point.
 921     has_receiver = MethodHandles::ref_kind_has_receiver(ref_kind);
 922   } else {
 923     guarantee(special_dispatch == vmIntrinsics::_invokeBasic || special_dispatch == vmIntrinsics::_linkToNative,
 924               "special_dispatch=%d", vmIntrinsics::as_int(special_dispatch));
 925     has_receiver = true;
 926   }
 927 
 928   if (member_reg != noreg) {
 929     // Load the member_arg into register, if necessary.
 930     assert(member_arg_pos >= 0 && member_arg_pos < total_args_passed, "oob");
 931     assert(sig_bt[member_arg_pos] == T_OBJECT, "dispatch argument must be an object");
 932 
 933     VMReg r = regs[member_arg_pos].first();
 934     assert(r->is_valid(), "bad member arg");
 935 
 936     if (r->is_stack()) {
 937       __ z_lg(member_reg, Address(Z_SP, reg2offset(r)));
 938     } else {
 939       // No data motion is needed.
 940       member_reg = r->as_Register();
 941     }
 942   }
 943 
 944   if (has_receiver) {
 945     // Make sure the receiver is loaded into a register.
 946     assert(total_args_passed > 0, "oob");
 947     assert(sig_bt[0] == T_OBJECT, "receiver argument must be an object");
 948 
 949     VMReg r = regs[0].first();
 950     assert(r->is_valid(), "bad receiver arg");
 951 
 952     if (r->is_stack()) {
 953       // Porting note: This assumes that compiled calling conventions always
 954       // pass the receiver oop in a register. If this is not true on some
 955       // platform, pick a temp and load the receiver from stack.
 956       assert(false, "receiver always in a register");
 957       receiver_reg = Z_R13;  // Known to be free at this point.
 958       __ z_lg(receiver_reg, Address(Z_SP, reg2offset(r)));
 959     } else {
 960       // No data motion is needed.
 961       receiver_reg = r->as_Register();
 962     }
 963   }
 964 
 965   // Figure out which address we are really jumping to:
 966   MethodHandles::generate_method_handle_dispatch(masm, special_dispatch,
 967                                                  receiver_reg, member_reg,
 968                                                  /*for_compiler_entry:*/ true);
 969 }
 970 
 971 ////////////////////////////////////////////////////////////////////////
 972 //
 973 //  Argument shufflers
 974 //
 975 ////////////////////////////////////////////////////////////////////////
 976 
 977 // Is the size of a vector size (in bytes) bigger than a size saved by default?
 978 // 8 bytes registers are saved by default on z/Architecture.
 979 bool SharedRuntime::is_wide_vector(int size) {
 980   // Note, MaxVectorSize == 8 on this platform.
 981   assert(size <= 8, "%d bytes vectors are not supported", size);
 982   return size > 8;
 983 }
 984 
 985 //----------------------------------------------------------------------
 986 // An oop arg. Must pass a handle not the oop itself
 987 //----------------------------------------------------------------------
 988 static void object_move(MacroAssembler *masm,
 989                         OopMap *map,
 990                         int oop_handle_offset,
 991                         int framesize_in_slots,
 992                         VMRegPair src,
 993                         VMRegPair dst,
 994                         bool is_receiver,
 995                         int *receiver_offset) {
 996   int frame_offset = framesize_in_slots*VMRegImpl::stack_slot_size;
 997 
 998   assert(!is_receiver || (is_receiver && (*receiver_offset == -1)), "only one receiving object per call, please.");
 999 
1000   // Must pass a handle. First figure out the location we use as a handle.
1001 
1002   if (src.first()->is_stack()) {
1003     // Oop is already on the stack, put handle on stack or in register
1004     // If handle will be on the stack, use temp reg to calculate it.
1005     Register rHandle = dst.first()->is_stack() ? Z_R1 : dst.first()->as_Register();
1006     Label    skip;
1007     int      slot_in_older_frame = reg2slot(src.first());
1008 
1009     guarantee(!is_receiver, "expecting receiver in register");
1010     map->set_oop(VMRegImpl::stack2reg(slot_in_older_frame + framesize_in_slots));
1011 
1012     __ add2reg(rHandle, reg2offset(src.first())+frame_offset, Z_SP);
1013     __ load_and_test_long(Z_R0, Address(rHandle));
1014     __ z_brne(skip);
1015     // Use a NULL handle if oop is NULL.
1016     __ clear_reg(rHandle, true, false);
1017     __ bind(skip);
1018 
1019     // Copy handle to the right place (register or stack).
1020     if (dst.first()->is_stack()) {
1021       __ z_stg(rHandle, reg2offset(dst.first()), Z_SP);
1022     } // else
1023       // nothing to do. rHandle uses the correct register
1024   } else {
1025     // Oop is passed in an input register. We must flush it to the stack.
1026     const Register rOop = src.first()->as_Register();
1027     const Register rHandle = dst.first()->is_stack() ? Z_R1 : dst.first()->as_Register();
1028     int            oop_slot = (rOop->encoding()-Z_ARG1->encoding()) * VMRegImpl::slots_per_word + oop_handle_offset;
1029     int            oop_slot_offset = oop_slot*VMRegImpl::stack_slot_size;
1030     NearLabel skip;
1031 
1032     if (is_receiver) {
1033       *receiver_offset = oop_slot_offset;
1034     }
1035     map->set_oop(VMRegImpl::stack2reg(oop_slot));
1036 
1037     // Flush Oop to stack, calculate handle.
1038     __ z_stg(rOop, oop_slot_offset, Z_SP);
1039     __ add2reg(rHandle, oop_slot_offset, Z_SP);
1040 
1041     // If Oop == NULL, use a NULL handle.
1042     __ compare64_and_branch(rOop, (RegisterOrConstant)0L, Assembler::bcondNotEqual, skip);
1043     __ clear_reg(rHandle, true, false);
1044     __ bind(skip);
1045 
1046     // Copy handle to the right place (register or stack).
1047     if (dst.first()->is_stack()) {
1048       __ z_stg(rHandle, reg2offset(dst.first()), Z_SP);
1049     } // else
1050       // nothing to do here, since rHandle = dst.first()->as_Register in this case.
1051   }
1052 }
1053 
1054 //----------------------------------------------------------------------
1055 // A float arg. May have to do float reg to int reg conversion
1056 //----------------------------------------------------------------------
1057 static void float_move(MacroAssembler *masm,
1058                        VMRegPair src,
1059                        VMRegPair dst,
1060                        int framesize_in_slots,
1061                        int workspace_slot_offset) {
1062   int frame_offset = framesize_in_slots * VMRegImpl::stack_slot_size;
1063   int workspace_offset = workspace_slot_offset * VMRegImpl::stack_slot_size;
1064 
1065   // We do not accept an argument in a VMRegPair to be spread over two slots,
1066   // no matter what physical location (reg or stack) the slots may have.
1067   // We just check for the unaccepted slot to be invalid.
1068   assert(!src.second()->is_valid(), "float in arg spread over two slots");
1069   assert(!dst.second()->is_valid(), "float out arg spread over two slots");
1070 
1071   if (src.first()->is_stack()) {
1072     if (dst.first()->is_stack()) {
1073       // stack -> stack. The easiest of the bunch.
1074       __ z_mvc(Address(Z_SP, reg2offset(dst.first())),
1075                Address(Z_SP, reg2offset(src.first()) + frame_offset), sizeof(float));
1076     } else {
1077       // stack to reg
1078       Address memaddr(Z_SP, reg2offset(src.first()) + frame_offset);
1079       if (dst.first()->is_Register()) {
1080         __ mem2reg_opt(dst.first()->as_Register(), memaddr, false);
1081       } else {
1082         __ mem2freg_opt(dst.first()->as_FloatRegister(), memaddr, false);
1083       }
1084     }
1085   } else if (src.first()->is_Register()) {
1086     if (dst.first()->is_stack()) {
1087       // gpr -> stack
1088       __ reg2mem_opt(src.first()->as_Register(),
1089                      Address(Z_SP, reg2offset(dst.first()), false ));
1090     } else {
1091       if (dst.first()->is_Register()) {
1092         // gpr -> gpr
1093         __ move_reg_if_needed(dst.first()->as_Register(), T_INT,
1094                               src.first()->as_Register(), T_INT);
1095       } else {
1096         if (VM_Version::has_FPSupportEnhancements()) {
1097           // gpr -> fpr. Exploit z10 capability of direct transfer.
1098           __ z_ldgr(dst.first()->as_FloatRegister(), src.first()->as_Register());
1099         } else {
1100           // gpr -> fpr. Use work space on stack to transfer data.
1101           Address   stackaddr(Z_SP, workspace_offset);
1102 
1103           __ reg2mem_opt(src.first()->as_Register(), stackaddr, false);
1104           __ mem2freg_opt(dst.first()->as_FloatRegister(), stackaddr, false);
1105         }
1106       }
1107     }
1108   } else {
1109     if (dst.first()->is_stack()) {
1110       // fpr -> stack
1111       __ freg2mem_opt(src.first()->as_FloatRegister(),
1112                       Address(Z_SP, reg2offset(dst.first())), false);
1113     } else {
1114       if (dst.first()->is_Register()) {
1115         if (VM_Version::has_FPSupportEnhancements()) {
1116           // fpr -> gpr.
1117           __ z_lgdr(dst.first()->as_Register(), src.first()->as_FloatRegister());
1118         } else {
1119           // fpr -> gpr. Use work space on stack to transfer data.
1120           Address   stackaddr(Z_SP, workspace_offset);
1121 
1122           __ freg2mem_opt(src.first()->as_FloatRegister(), stackaddr, false);
1123           __ mem2reg_opt(dst.first()->as_Register(), stackaddr, false);
1124         }
1125       } else {
1126         // fpr -> fpr
1127         __ move_freg_if_needed(dst.first()->as_FloatRegister(), T_FLOAT,
1128                                src.first()->as_FloatRegister(), T_FLOAT);
1129       }
1130     }
1131   }
1132 }
1133 
1134 //----------------------------------------------------------------------
1135 // A double arg. May have to do double reg to long reg conversion
1136 //----------------------------------------------------------------------
1137 static void double_move(MacroAssembler *masm,
1138                         VMRegPair src,
1139                         VMRegPair dst,
1140                         int framesize_in_slots,
1141                         int workspace_slot_offset) {
1142   int frame_offset = framesize_in_slots*VMRegImpl::stack_slot_size;
1143   int workspace_offset = workspace_slot_offset*VMRegImpl::stack_slot_size;
1144 
1145   // Since src is always a java calling convention we know that the
1146   // src pair is always either all registers or all stack (and aligned?)
1147 
1148   if (src.first()->is_stack()) {
1149     if (dst.first()->is_stack()) {
1150       // stack -> stack. The easiest of the bunch.
1151       __ z_mvc(Address(Z_SP, reg2offset(dst.first())),
1152                Address(Z_SP, reg2offset(src.first()) + frame_offset), sizeof(double));
1153     } else {
1154       // stack to reg
1155       Address stackaddr(Z_SP, reg2offset(src.first()) + frame_offset);
1156 
1157       if (dst.first()->is_Register()) {
1158         __ mem2reg_opt(dst.first()->as_Register(), stackaddr);
1159       } else {
1160         __ mem2freg_opt(dst.first()->as_FloatRegister(), stackaddr);
1161       }
1162     }
1163   } else if (src.first()->is_Register()) {
1164     if (dst.first()->is_stack()) {
1165       // gpr -> stack
1166       __ reg2mem_opt(src.first()->as_Register(),
1167                      Address(Z_SP, reg2offset(dst.first())));
1168     } else {
1169       if (dst.first()->is_Register()) {
1170         // gpr -> gpr
1171         __ move_reg_if_needed(dst.first()->as_Register(), T_LONG,
1172                               src.first()->as_Register(), T_LONG);
1173       } else {
1174         if (VM_Version::has_FPSupportEnhancements()) {
1175           // gpr -> fpr. Exploit z10 capability of direct transfer.
1176           __ z_ldgr(dst.first()->as_FloatRegister(), src.first()->as_Register());
1177         } else {
1178           // gpr -> fpr. Use work space on stack to transfer data.
1179           Address stackaddr(Z_SP, workspace_offset);
1180           __ reg2mem_opt(src.first()->as_Register(), stackaddr);
1181           __ mem2freg_opt(dst.first()->as_FloatRegister(), stackaddr);
1182         }
1183       }
1184     }
1185   } else {
1186     if (dst.first()->is_stack()) {
1187       // fpr -> stack
1188       __ freg2mem_opt(src.first()->as_FloatRegister(),
1189                       Address(Z_SP, reg2offset(dst.first())));
1190     } else {
1191       if (dst.first()->is_Register()) {
1192         if (VM_Version::has_FPSupportEnhancements()) {
1193           // fpr -> gpr. Exploit z10 capability of direct transfer.
1194           __ z_lgdr(dst.first()->as_Register(), src.first()->as_FloatRegister());
1195         } else {
1196           // fpr -> gpr. Use work space on stack to transfer data.
1197           Address stackaddr(Z_SP, workspace_offset);
1198 
1199           __ freg2mem_opt(src.first()->as_FloatRegister(), stackaddr);
1200           __ mem2reg_opt(dst.first()->as_Register(), stackaddr);
1201         }
1202       } else {
1203         // fpr -> fpr
1204         // In theory these overlap but the ordering is such that this is likely a nop.
1205         __ move_freg_if_needed(dst.first()->as_FloatRegister(), T_DOUBLE,
1206                                src.first()->as_FloatRegister(), T_DOUBLE);
1207       }
1208     }
1209   }
1210 }
1211 
1212 //----------------------------------------------------------------------
1213 // A long arg.
1214 //----------------------------------------------------------------------
1215 static void long_move(MacroAssembler *masm,
1216                       VMRegPair src,
1217                       VMRegPair dst,
1218                       int framesize_in_slots) {
1219   int frame_offset = framesize_in_slots*VMRegImpl::stack_slot_size;
1220 
1221   if (src.first()->is_stack()) {
1222     if (dst.first()->is_stack()) {
1223       // stack -> stack. The easiest of the bunch.
1224       __ z_mvc(Address(Z_SP, reg2offset(dst.first())),
1225                Address(Z_SP, reg2offset(src.first()) + frame_offset), sizeof(long));
1226     } else {
1227       // stack to reg
1228       assert(dst.first()->is_Register(), "long dst value must be in GPR");
1229       __ mem2reg_opt(dst.first()->as_Register(),
1230                       Address(Z_SP, reg2offset(src.first()) + frame_offset));
1231     }
1232   } else {
1233     // reg to reg
1234     assert(src.first()->is_Register(), "long src value must be in GPR");
1235     if (dst.first()->is_stack()) {
1236       // reg -> stack
1237       __ reg2mem_opt(src.first()->as_Register(),
1238                      Address(Z_SP, reg2offset(dst.first())));
1239     } else {
1240       // reg -> reg
1241       assert(dst.first()->is_Register(), "long dst value must be in GPR");
1242       __ move_reg_if_needed(dst.first()->as_Register(),
1243                             T_LONG, src.first()->as_Register(), T_LONG);
1244     }
1245   }
1246 }
1247 
1248 
1249 //----------------------------------------------------------------------
1250 // A int-like arg.
1251 //----------------------------------------------------------------------
1252 // On z/Architecture we will store integer like items to the stack as 64 bit
1253 // items, according to the z/Architecture ABI, even though Java would only store
1254 // 32 bits for a parameter.
1255 // We do sign extension for all base types. That is ok since the only
1256 // unsigned base type is T_CHAR, and T_CHAR uses only 16 bits of an int.
1257 // Sign extension 32->64 bit will thus not affect the value.
1258 //----------------------------------------------------------------------
1259 static void move32_64(MacroAssembler *masm,
1260                       VMRegPair src,
1261                       VMRegPair dst,
1262                       int framesize_in_slots) {
1263   int frame_offset = framesize_in_slots * VMRegImpl::stack_slot_size;
1264 
1265   if (src.first()->is_stack()) {
1266     Address memaddr(Z_SP, reg2offset(src.first()) + frame_offset);
1267     if (dst.first()->is_stack()) {
1268       // stack -> stack. MVC not posible due to sign extension.
1269       Address firstaddr(Z_SP, reg2offset(dst.first()));
1270       __ mem2reg_signed_opt(Z_R0_scratch, memaddr);
1271       __ reg2mem_opt(Z_R0_scratch, firstaddr);
1272     } else {
1273       // stack -> reg, sign extended
1274       __ mem2reg_signed_opt(dst.first()->as_Register(), memaddr);
1275     }
1276   } else {
1277     if (dst.first()->is_stack()) {
1278       // reg -> stack, sign extended
1279       Address firstaddr(Z_SP, reg2offset(dst.first()));
1280       __ z_lgfr(src.first()->as_Register(), src.first()->as_Register());
1281       __ reg2mem_opt(src.first()->as_Register(), firstaddr);
1282     } else {
1283       // reg -> reg, sign extended
1284       __ z_lgfr(dst.first()->as_Register(), src.first()->as_Register());
1285     }
1286   }
1287 }
1288 
1289 //----------------------------------------------------------------------
1290 // Wrap a JNI call.
1291 //----------------------------------------------------------------------
1292 #undef USE_RESIZE_FRAME
1293 nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm,
1294                                                 const methodHandle& method,
1295                                                 int compile_id,
1296                                                 BasicType *in_sig_bt,
1297                                                 VMRegPair *in_regs,
1298                                                 BasicType ret_type) {
1299   int total_in_args = method->size_of_parameters();
1300   if (method->is_method_handle_intrinsic()) {
1301     vmIntrinsics::ID iid = method->intrinsic_id();
1302     intptr_t start = (intptr_t) __ pc();
1303     int vep_offset = ((intptr_t) __ pc()) - start;
1304 
1305     gen_special_dispatch(masm, total_in_args,
1306                          method->intrinsic_id(), in_sig_bt, in_regs);
1307 
1308     int frame_complete = ((intptr_t)__ pc()) - start; // Not complete, period.
1309 
1310     __ flush();
1311 
1312     int stack_slots = SharedRuntime::out_preserve_stack_slots();  // No out slots at all, actually.
1313 
1314     return nmethod::new_native_nmethod(method,
1315                                        compile_id,
1316                                        masm->code(),
1317                                        vep_offset,
1318                                        frame_complete,
1319                                        stack_slots / VMRegImpl::slots_per_word,
1320                                        in_ByteSize(-1),
1321                                        in_ByteSize(-1),
1322                                        (OopMapSet *) NULL);
1323   }
1324 
1325 
1326   ///////////////////////////////////////////////////////////////////////
1327   //
1328   //  Precalculations before generating any code
1329   //
1330   ///////////////////////////////////////////////////////////////////////
1331 
1332   address native_func = method->native_function();
1333   assert(native_func != NULL, "must have function");
1334 
1335   //---------------------------------------------------------------------
1336   // We have received a description of where all the java args are located
1337   // on entry to the wrapper. We need to convert these args to where
1338   // the jni function will expect them. To figure out where they go
1339   // we convert the java signature to a C signature by inserting
1340   // the hidden arguments as arg[0] and possibly arg[1] (static method).
1341   //
1342   // The first hidden argument arg[0] is a pointer to the JNI environment.
1343   // It is generated for every call.
1344   // The second argument arg[1] to the JNI call, which is hidden for static
1345   // methods, is the boxed lock object. For static calls, the lock object
1346   // is the static method itself. The oop is constructed here. for instance
1347   // calls, the lock is performed on the object itself, the pointer of
1348   // which is passed as the first visible argument.
1349   //---------------------------------------------------------------------
1350 
1351   // Additionally, on z/Architecture we must convert integers
1352   // to longs in the C signature. We do this in advance in order to have
1353   // no trouble with indexes into the bt-arrays.
1354   // So convert the signature and registers now, and adjust the total number
1355   // of in-arguments accordingly.
1356   bool method_is_static = method->is_static();
1357   int  total_c_args     = total_in_args + (method_is_static ? 2 : 1);
1358 
1359   BasicType *out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args);
1360   VMRegPair *out_regs   = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args);
1361   BasicType* in_elem_bt = NULL;
1362 
1363   // Create the signature for the C call:
1364   //   1) add the JNIEnv*
1365   //   2) add the class if the method is static
1366   //   3) copy the rest of the incoming signature (shifted by the number of
1367   //      hidden arguments)
1368 
1369   int argc = 0;
1370   out_sig_bt[argc++] = T_ADDRESS;
1371   if (method->is_static()) {
1372     out_sig_bt[argc++] = T_OBJECT;
1373   }
1374 
1375   for (int i = 0; i < total_in_args; i++) {
1376     out_sig_bt[argc++] = in_sig_bt[i];
1377   }
1378 
1379   ///////////////////////////////////////////////////////////////////////
1380   // Now figure out where the args must be stored and how much stack space
1381   // they require (neglecting out_preserve_stack_slots but providing space
1382   // for storing the first five register arguments).
1383   // It's weird, see int_stk_helper.
1384   ///////////////////////////////////////////////////////////////////////
1385 
1386   //---------------------------------------------------------------------
1387   // Compute framesize for the wrapper.
1388   //
1389   // - We need to handlize all oops passed in registers.
1390   // - We must create space for them here that is disjoint from the save area.
1391   // - We always just allocate 5 words for storing down these object.
1392   //   This allows us to simply record the base and use the Ireg number to
1393   //   decide which slot to use.
1394   // - Note that the reg number used to index the stack slot is the inbound
1395   //   number, not the outbound number.
1396   // - We must shuffle args to match the native convention,
1397   //   and to include var-args space.
1398   //---------------------------------------------------------------------
1399 
1400   //---------------------------------------------------------------------
1401   // Calculate the total number of stack slots we will need:
1402   // - 1) abi requirements
1403   // - 2) outgoing args
1404   // - 3) space for inbound oop handle area
1405   // - 4) space for handlizing a klass if static method
1406   // - 5) space for a lock if synchronized method
1407   // - 6) workspace (save rtn value, int<->float reg moves, ...)
1408   // - 7) filler slots for alignment
1409   //---------------------------------------------------------------------
1410   // Here is how the space we have allocated will look like.
1411   // Since we use resize_frame, we do not create a new stack frame,
1412   // but just extend the one we got with our own data area.
1413   //
1414   // If an offset or pointer name points to a separator line, it is
1415   // assumed that addressing with offset 0 selects storage starting
1416   // at the first byte above the separator line.
1417   //
1418   //
1419   //     ...                   ...
1420   //      | caller's frame      |
1421   // FP-> |---------------------|
1422   //      | filler slots, if any|
1423   //     7| #slots == mult of 2 |
1424   //      |---------------------|
1425   //      | work space          |
1426   //     6| 2 slots = 8 bytes   |
1427   //      |---------------------|
1428   //     5| lock box (if sync)  |
1429   //      |---------------------| <- lock_slot_offset
1430   //     4| klass (if static)   |
1431   //      |---------------------| <- klass_slot_offset
1432   //     3| oopHandle area      |
1433   //      |                     |
1434   //      |                     |
1435   //      |---------------------| <- oop_handle_offset
1436   //     2| outbound memory     |
1437   //     ...                   ...
1438   //      | based arguments     |
1439   //      |---------------------|
1440   //      | vararg              |
1441   //     ...                   ...
1442   //      | area                |
1443   //      |---------------------| <- out_arg_slot_offset
1444   //     1| out_preserved_slots |
1445   //     ...                   ...
1446   //      | (z_abi spec)        |
1447   // SP-> |---------------------| <- FP_slot_offset (back chain)
1448   //     ...                   ...
1449   //
1450   //---------------------------------------------------------------------
1451 
1452   // *_slot_offset indicates offset from SP in #stack slots
1453   // *_offset      indicates offset from SP in #bytes
1454 
1455   int stack_slots = c_calling_convention(out_sig_bt, out_regs, /*regs2=*/NULL, total_c_args) + // 1+2
1456                     SharedRuntime::out_preserve_stack_slots(); // see c_calling_convention
1457 
1458   // Now the space for the inbound oop handle area.
1459   int total_save_slots = RegisterImpl::number_of_arg_registers * VMRegImpl::slots_per_word;
1460 
1461   int oop_handle_slot_offset = stack_slots;
1462   stack_slots += total_save_slots;                                        // 3)
1463 
1464   int klass_slot_offset = 0;
1465   int klass_offset      = -1;
1466   if (method_is_static) {                                                 // 4)
1467     klass_slot_offset  = stack_slots;
1468     klass_offset       = klass_slot_offset * VMRegImpl::stack_slot_size;
1469     stack_slots       += VMRegImpl::slots_per_word;
1470   }
1471 
1472   int lock_slot_offset = 0;
1473   int lock_offset      = -1;
1474   if (method->is_synchronized()) {                                        // 5)
1475     lock_slot_offset   = stack_slots;
1476     lock_offset        = lock_slot_offset * VMRegImpl::stack_slot_size;
1477     stack_slots       += VMRegImpl::slots_per_word;
1478   }
1479 
1480   int workspace_slot_offset= stack_slots;                                 // 6)
1481   stack_slots         += 2;
1482 
1483   // Now compute actual number of stack words we need.
1484   // Round to align stack properly.
1485   stack_slots = align_up(stack_slots,                                     // 7)
1486                          frame::alignment_in_bytes / VMRegImpl::stack_slot_size);
1487   int frame_size_in_bytes = stack_slots * VMRegImpl::stack_slot_size;
1488 
1489 
1490   ///////////////////////////////////////////////////////////////////////
1491   // Now we can start generating code
1492   ///////////////////////////////////////////////////////////////////////
1493 
1494   unsigned int wrapper_CodeStart  = __ offset();
1495   unsigned int wrapper_UEPStart;
1496   unsigned int wrapper_VEPStart;
1497   unsigned int wrapper_FrameDone;
1498   unsigned int wrapper_CRegsSet;
1499   Label     handle_pending_exception;
1500   Label     ic_miss;
1501 
1502   //---------------------------------------------------------------------
1503   // Unverified entry point (UEP)
1504   //---------------------------------------------------------------------
1505   wrapper_UEPStart = __ offset();
1506 
1507   // check ic: object class <-> cached class
1508   if (!method_is_static) __ nmethod_UEP(ic_miss);
1509   // Fill with nops (alignment of verified entry point).
1510   __ align(CodeEntryAlignment);
1511 
1512   //---------------------------------------------------------------------
1513   // Verified entry point (VEP)
1514   //---------------------------------------------------------------------
1515   wrapper_VEPStart = __ offset();
1516 
1517   if (VM_Version::supports_fast_class_init_checks() && method->needs_clinit_barrier()) {
1518     Label L_skip_barrier;
1519     Register klass = Z_R1_scratch;
1520     // Notify OOP recorder (don't need the relocation)
1521     AddressLiteral md = __ constant_metadata_address(method->method_holder());
1522     __ load_const_optimized(klass, md.value());
1523     __ clinit_barrier(klass, Z_thread, &L_skip_barrier /*L_fast_path*/);
1524 
1525     __ load_const_optimized(klass, SharedRuntime::get_handle_wrong_method_stub());
1526     __ z_br(klass);
1527 
1528     __ bind(L_skip_barrier);
1529   }
1530 
1531   __ save_return_pc();
1532   __ generate_stack_overflow_check(frame_size_in_bytes);  // Check before creating frame.
1533 #ifndef USE_RESIZE_FRAME
1534   __ push_frame(frame_size_in_bytes);                     // Create a new frame for the wrapper.
1535 #else
1536   __ resize_frame(-frame_size_in_bytes, Z_R0_scratch);    // No new frame for the wrapper.
1537                                                           // Just resize the existing one.
1538 #endif
1539 
1540   wrapper_FrameDone = __ offset();
1541 
1542   __ verify_thread();
1543 
1544   // Native nmethod wrappers never take possession of the oop arguments.
1545   // So the caller will gc the arguments.
1546   // The only thing we need an oopMap for is if the call is static.
1547   //
1548   // An OopMap for lock (and class if static), and one for the VM call itself
1549   OopMapSet  *oop_maps        = new OopMapSet();
1550   OopMap     *map             = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
1551 
1552   //////////////////////////////////////////////////////////////////////
1553   //
1554   // The Grand Shuffle
1555   //
1556   //////////////////////////////////////////////////////////////////////
1557   //
1558   // We immediately shuffle the arguments so that for any vm call we have
1559   // to make from here on out (sync slow path, jvmti, etc.) we will have
1560   // captured the oops from our caller and have a valid oopMap for them.
1561   //
1562   //--------------------------------------------------------------------
1563   // Natives require 1 or 2 extra arguments over the normal ones: the JNIEnv*
1564   // (derived from JavaThread* which is in Z_thread) and, if static,
1565   // the class mirror instead of a receiver. This pretty much guarantees that
1566   // register layout will not match. We ignore these extra arguments during
1567   // the shuffle. The shuffle is described by the two calling convention
1568   // vectors we have in our possession. We simply walk the java vector to
1569   // get the source locations and the c vector to get the destinations.
1570   //
1571   // This is a trick. We double the stack slots so we can claim
1572   // the oops in the caller's frame. Since we are sure to have
1573   // more args than the caller doubling is enough to make
1574   // sure we can capture all the incoming oop args from the caller.
1575   //--------------------------------------------------------------------
1576 
1577   // Record sp-based slot for receiver on stack for non-static methods.
1578   int receiver_offset = -1;
1579 
1580   //--------------------------------------------------------------------
1581   // We move the arguments backwards because the floating point registers
1582   // destination will always be to a register with a greater or equal
1583   // register number or the stack.
1584   //   jix is the index of the incoming Java arguments.
1585   //   cix is the index of the outgoing C arguments.
1586   //--------------------------------------------------------------------
1587 
1588 #ifdef ASSERT
1589   bool reg_destroyed[RegisterImpl::number_of_registers];
1590   bool freg_destroyed[FloatRegisterImpl::number_of_registers];
1591   for (int r = 0; r < RegisterImpl::number_of_registers; r++) {
1592     reg_destroyed[r] = false;
1593   }
1594   for (int f = 0; f < FloatRegisterImpl::number_of_registers; f++) {
1595     freg_destroyed[f] = false;
1596   }
1597 #endif // ASSERT
1598 
1599   for (int jix = total_in_args - 1, cix = total_c_args - 1; jix >= 0; jix--, cix--) {
1600 #ifdef ASSERT
1601     if (in_regs[jix].first()->is_Register()) {
1602       assert(!reg_destroyed[in_regs[jix].first()->as_Register()->encoding()], "ack!");
1603     } else {
1604       if (in_regs[jix].first()->is_FloatRegister()) {
1605         assert(!freg_destroyed[in_regs[jix].first()->as_FloatRegister()->encoding()], "ack!");
1606       }
1607     }
1608     if (out_regs[cix].first()->is_Register()) {
1609       reg_destroyed[out_regs[cix].first()->as_Register()->encoding()] = true;
1610     } else {
1611       if (out_regs[cix].first()->is_FloatRegister()) {
1612         freg_destroyed[out_regs[cix].first()->as_FloatRegister()->encoding()] = true;
1613       }
1614     }
1615 #endif // ASSERT
1616 
1617     switch (in_sig_bt[jix]) {
1618       // Due to casting, small integers should only occur in pairs with type T_LONG.
1619       case T_BOOLEAN:
1620       case T_CHAR:
1621       case T_BYTE:
1622       case T_SHORT:
1623       case T_INT:
1624         // Move int and do sign extension.
1625         move32_64(masm, in_regs[jix], out_regs[cix], stack_slots);
1626         break;
1627 
1628       case T_LONG :
1629         long_move(masm, in_regs[jix], out_regs[cix], stack_slots);
1630         break;
1631 
1632       case T_ARRAY:
1633       case T_OBJECT:
1634         object_move(masm, map, oop_handle_slot_offset, stack_slots, in_regs[jix], out_regs[cix],
1635                     ((jix == 0) && (!method_is_static)),
1636                     &receiver_offset);
1637         break;
1638       case T_VOID:
1639         break;
1640 
1641       case T_FLOAT:
1642         float_move(masm, in_regs[jix], out_regs[cix], stack_slots, workspace_slot_offset);
1643         break;
1644 
1645       case T_DOUBLE:
1646         assert(jix+1 <  total_in_args && in_sig_bt[jix+1]  == T_VOID && out_sig_bt[cix+1] == T_VOID, "bad arg list");
1647         double_move(masm, in_regs[jix], out_regs[cix], stack_slots, workspace_slot_offset);
1648         break;
1649 
1650       case T_ADDRESS:
1651         assert(false, "found T_ADDRESS in java args");
1652         break;
1653 
1654       default:
1655         ShouldNotReachHere();
1656     }
1657   }
1658 
1659   //--------------------------------------------------------------------
1660   // Pre-load a static method's oop into ARG2.
1661   // Used both by locking code and the normal JNI call code.
1662   //--------------------------------------------------------------------
1663   if (method_is_static) {
1664     __ set_oop_constant(JNIHandles::make_local(method->method_holder()->java_mirror()), Z_ARG2);
1665 
1666     // Now handlize the static class mirror in ARG2. It's known not-null.
1667     __ z_stg(Z_ARG2, klass_offset, Z_SP);
1668     map->set_oop(VMRegImpl::stack2reg(klass_slot_offset));
1669     __ add2reg(Z_ARG2, klass_offset, Z_SP);
1670   }
1671 
1672   // Get JNIEnv* which is first argument to native.
1673   __ add2reg(Z_ARG1, in_bytes(JavaThread::jni_environment_offset()), Z_thread);
1674 
1675   //////////////////////////////////////////////////////////////////////
1676   // We have all of the arguments setup at this point.
1677   // We MUST NOT touch any outgoing regs from this point on.
1678   // So if we must call out we must push a new frame.
1679   //////////////////////////////////////////////////////////////////////
1680 
1681 
1682   // Calc the current pc into Z_R10 and into wrapper_CRegsSet.
1683   // Both values represent the same position.
1684   __ get_PC(Z_R10);                // PC into register
1685   wrapper_CRegsSet = __ offset();  // and into into variable.
1686 
1687   // Z_R10 now has the pc loaded that we will use when we finally call to native.
1688 
1689   // We use the same pc/oopMap repeatedly when we call out.
1690   oop_maps->add_gc_map((int)(wrapper_CRegsSet-wrapper_CodeStart), map);
1691 
1692   // Lock a synchronized method.
1693 
1694   if (method->is_synchronized()) {
1695 
1696     // ATTENTION: args and Z_R10 must be preserved.
1697     Register r_oop  = Z_R11;
1698     Register r_box  = Z_R12;
1699     Register r_tmp1 = Z_R13;
1700     Register r_tmp2 = Z_R7;
1701     Label done;
1702 
1703     // Load the oop for the object or class. R_carg2_classorobject contains
1704     // either the handlized oop from the incoming arguments or the handlized
1705     // class mirror (if the method is static).
1706     __ z_lg(r_oop, 0, Z_ARG2);
1707 
1708     lock_offset = (lock_slot_offset * VMRegImpl::stack_slot_size);
1709     // Get the lock box slot's address.
1710     __ add2reg(r_box, lock_offset, Z_SP);
1711 
1712     // Try fastpath for locking.
1713     // Fast_lock kills r_temp_1, r_temp_2. (Don't use R1 as temp, won't work!)
1714     __ compiler_fast_lock_object(r_oop, r_box, r_tmp1, r_tmp2);
1715     __ z_bre(done);
1716 
1717     //-------------------------------------------------------------------------
1718     // None of the above fast optimizations worked so we have to get into the
1719     // slow case of monitor enter. Inline a special case of call_VM that
1720     // disallows any pending_exception.
1721     //-------------------------------------------------------------------------
1722 
1723     Register oldSP = Z_R11;
1724 
1725     __ z_lgr(oldSP, Z_SP);
1726 
1727     RegisterSaver::save_live_registers(masm, RegisterSaver::arg_registers);
1728 
1729     // Prepare arguments for call.
1730     __ z_lg(Z_ARG1, 0, Z_ARG2); // Ynboxed class mirror or unboxed object.
1731     __ add2reg(Z_ARG2, lock_offset, oldSP);
1732     __ z_lgr(Z_ARG3, Z_thread);
1733 
1734     __ set_last_Java_frame(oldSP, Z_R10 /* gc map pc */);
1735 
1736     // Do the call.
1737     __ load_const_optimized(Z_R1_scratch, CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_locking_C));
1738     __ call(Z_R1_scratch);
1739 
1740     __ reset_last_Java_frame();
1741 
1742     RegisterSaver::restore_live_registers(masm, RegisterSaver::arg_registers);
1743 #ifdef ASSERT
1744     { Label L;
1745       __ load_and_test_long(Z_R0, Address(Z_thread, Thread::pending_exception_offset()));
1746       __ z_bre(L);
1747       __ stop("no pending exception allowed on exit from IR::monitorenter");
1748       __ bind(L);
1749     }
1750 #endif
1751     __ bind(done);
1752   } // lock for synchronized methods
1753 
1754 
1755   //////////////////////////////////////////////////////////////////////
1756   // Finally just about ready to make the JNI call.
1757   //////////////////////////////////////////////////////////////////////
1758 
1759   // Use that pc we placed in Z_R10 a while back as the current frame anchor.
1760   __ set_last_Java_frame(Z_SP, Z_R10);
1761 
1762   // Transition from _thread_in_Java to _thread_in_native.
1763   __ set_thread_state(_thread_in_native);
1764 
1765   //////////////////////////////////////////////////////////////////////
1766   // This is the JNI call.
1767   //////////////////////////////////////////////////////////////////////
1768 
1769   __ call_c(native_func);
1770 
1771 
1772   //////////////////////////////////////////////////////////////////////
1773   // We have survived the call once we reach here.
1774   //////////////////////////////////////////////////////////////////////
1775 
1776 
1777   //--------------------------------------------------------------------
1778   // Unpack native results.
1779   //--------------------------------------------------------------------
1780   // For int-types, we do any needed sign-extension required.
1781   // Care must be taken that the return value (in Z_ARG1 = Z_RET = Z_R2
1782   // or in Z_FARG0 = Z_FRET = Z_F0) will survive any VM calls for
1783   // blocking or unlocking.
1784   // An OOP result (handle) is done specially in the slow-path code.
1785   //--------------------------------------------------------------------
1786   switch (ret_type) {
1787     case T_VOID:    break;         // Nothing to do!
1788     case T_FLOAT:   break;         // Got it where we want it (unless slow-path)
1789     case T_DOUBLE:  break;         // Got it where we want it (unless slow-path)
1790     case T_LONG:    break;         // Got it where we want it (unless slow-path)
1791     case T_OBJECT:  break;         // Really a handle.
1792                                    // Cannot de-handlize until after reclaiming jvm_lock.
1793     case T_ARRAY:   break;
1794 
1795     case T_BOOLEAN:                // 0 -> false(0); !0 -> true(1)
1796       __ z_lngfr(Z_RET, Z_RET);    // Force sign bit on except for zero.
1797       __ z_srlg(Z_RET, Z_RET, 63); // Shift sign bit into least significant pos.
1798       break;
1799     case T_BYTE:    __ z_lgbr(Z_RET, Z_RET);  break; // sign extension
1800     case T_CHAR:    __ z_llghr(Z_RET, Z_RET); break; // unsigned result
1801     case T_SHORT:   __ z_lghr(Z_RET, Z_RET);  break; // sign extension
1802     case T_INT:     __ z_lgfr(Z_RET, Z_RET);  break; // sign-extend for beauty.
1803 
1804     default:
1805       ShouldNotReachHere();
1806       break;
1807   }
1808 
1809   Label after_transition;
1810 
1811   // Switch thread to "native transition" state before reading the synchronization state.
1812   // This additional state is necessary because reading and testing the synchronization
1813   // state is not atomic w.r.t. GC, as this scenario demonstrates:
1814   //   - Java thread A, in _thread_in_native state, loads _not_synchronized and is preempted.
1815   //   - VM thread changes sync state to synchronizing and suspends threads for GC.
1816   //   - Thread A is resumed to finish this native method, but doesn't block here since it
1817   //     didn't see any synchronization in progress, and escapes.
1818 
1819   // Transition from _thread_in_native to _thread_in_native_trans.
1820   __ set_thread_state(_thread_in_native_trans);
1821 
1822   // Safepoint synchronization
1823   //--------------------------------------------------------------------
1824   // Must we block?
1825   //--------------------------------------------------------------------
1826   // Block, if necessary, before resuming in _thread_in_Java state.
1827   // In order for GC to work, don't clear the last_Java_sp until after blocking.
1828   //--------------------------------------------------------------------
1829   {
1830     Label no_block, sync;
1831 
1832     save_native_result(masm, ret_type, workspace_slot_offset); // Make Z_R2 available as work reg.
1833 
1834     // Force this write out before the read below.
1835     __ z_fence();
1836 
1837     __ safepoint_poll(sync, Z_R1);
1838 
1839     __ load_and_test_int(Z_R0, Address(Z_thread, JavaThread::suspend_flags_offset()));
1840     __ z_bre(no_block);
1841 
1842     // Block. Save any potential method result value before the operation and
1843     // use a leaf call to leave the last_Java_frame setup undisturbed. Doing this
1844     // lets us share the oopMap we used when we went native rather than create
1845     // a distinct one for this pc.
1846     //
1847     __ bind(sync);
1848     __ z_acquire();
1849 
1850     address entry_point = CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans);
1851 
1852     __ call_VM_leaf(entry_point, Z_thread);
1853 
1854     __ bind(no_block);
1855     restore_native_result(masm, ret_type, workspace_slot_offset);
1856   }
1857 
1858   //--------------------------------------------------------------------
1859   // Thread state is thread_in_native_trans. Any safepoint blocking has
1860   // already happened so we can now change state to _thread_in_Java.
1861   //--------------------------------------------------------------------
1862   // Transition from _thread_in_native_trans to _thread_in_Java.
1863   __ set_thread_state(_thread_in_Java);
1864   __ bind(after_transition);
1865 
1866   //--------------------------------------------------------------------
1867   // Reguard any pages if necessary.
1868   // Protect native result from being destroyed.
1869   //--------------------------------------------------------------------
1870 
1871   Label no_reguard;
1872 
1873   __ z_cli(Address(Z_thread, JavaThread::stack_guard_state_offset() + in_ByteSize(sizeof(StackOverflow::StackGuardState) - 1)),
1874            StackOverflow::stack_guard_yellow_reserved_disabled);
1875 
1876   __ z_bre(no_reguard);
1877 
1878   save_native_result(masm, ret_type, workspace_slot_offset);
1879   __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages), Z_method);
1880   restore_native_result(masm, ret_type, workspace_slot_offset);
1881 
1882   __ bind(no_reguard);
1883 
1884 
1885   // Synchronized methods (slow path only)
1886   // No pending exceptions for now.
1887   //--------------------------------------------------------------------
1888   // Handle possibly pending exception (will unlock if necessary).
1889   // Native result is, if any is live, in Z_FRES or Z_RES.
1890   //--------------------------------------------------------------------
1891   // Unlock
1892   //--------------------------------------------------------------------
1893   if (method->is_synchronized()) {
1894     const Register r_oop        = Z_R11;
1895     const Register r_box        = Z_R12;
1896     const Register r_tmp1       = Z_R13;
1897     const Register r_tmp2       = Z_R7;
1898     Label done;
1899 
1900     // Get unboxed oop of class mirror or object ...
1901     int   offset = method_is_static ? klass_offset : receiver_offset;
1902 
1903     assert(offset != -1, "");
1904     __ z_lg(r_oop, offset, Z_SP);
1905 
1906     // ... and address of lock object box.
1907     __ add2reg(r_box, lock_offset, Z_SP);
1908 
1909     // Try fastpath for unlocking.
1910     __ compiler_fast_unlock_object(r_oop, r_box, r_tmp1, r_tmp2); // Don't use R1 as temp.
1911     __ z_bre(done);
1912 
1913     // Slow path for unlocking.
1914     // Save and restore any potential method result value around the unlocking operation.
1915     const Register R_exc = Z_R11;
1916 
1917     save_native_result(masm, ret_type, workspace_slot_offset);
1918 
1919     // Must save pending exception around the slow-path VM call. Since it's a
1920     // leaf call, the pending exception (if any) can be kept in a register.
1921     __ z_lg(R_exc, Address(Z_thread, Thread::pending_exception_offset()));
1922     assert(R_exc->is_nonvolatile(), "exception register must be non-volatile");
1923 
1924     // Must clear pending-exception before re-entering the VM. Since this is
1925     // a leaf call, pending-exception-oop can be safely kept in a register.
1926     __ clear_mem(Address(Z_thread, Thread::pending_exception_offset()), sizeof(intptr_t));
1927 
1928     // Inline a special case of call_VM that disallows any pending_exception.
1929 
1930     // Get locked oop from the handle we passed to jni.
1931     __ z_lg(Z_ARG1, offset, Z_SP);
1932     __ add2reg(Z_ARG2, lock_offset, Z_SP);
1933     __ z_lgr(Z_ARG3, Z_thread);
1934 
1935     __ load_const_optimized(Z_R1_scratch, CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C));
1936 
1937     __ call(Z_R1_scratch);
1938 
1939 #ifdef ASSERT
1940     {
1941       Label L;
1942       __ load_and_test_long(Z_R0, Address(Z_thread, Thread::pending_exception_offset()));
1943       __ z_bre(L);
1944       __ stop("no pending exception allowed on exit from IR::monitorexit");
1945       __ bind(L);
1946     }
1947 #endif
1948 
1949     // Check_forward_pending_exception jump to forward_exception if any pending
1950     // exception is set. The forward_exception routine expects to see the
1951     // exception in pending_exception and not in a register. Kind of clumsy,
1952     // since all folks who branch to forward_exception must have tested
1953     // pending_exception first and hence have it in a register already.
1954     __ z_stg(R_exc, Address(Z_thread, Thread::pending_exception_offset()));
1955     restore_native_result(masm, ret_type, workspace_slot_offset);
1956     __ z_bru(done);
1957     __ z_illtrap(0x66);
1958 
1959     __ bind(done);
1960   }
1961 
1962 
1963   //--------------------------------------------------------------------
1964   // Clear "last Java frame" SP and PC.
1965   //--------------------------------------------------------------------
1966   __ verify_thread(); // Z_thread must be correct.
1967 
1968   __ reset_last_Java_frame();
1969 
1970   // Unpack oop result, e.g. JNIHandles::resolve result.
1971   if (is_reference_type(ret_type)) {
1972     __ resolve_jobject(Z_RET, /* tmp1 */ Z_R13, /* tmp2 */ Z_R7);
1973   }
1974 
1975   if (CheckJNICalls) {
1976     // clear_pending_jni_exception_check
1977     __ clear_mem(Address(Z_thread, JavaThread::pending_jni_exception_check_fn_offset()), sizeof(oop));
1978   }
1979 
1980   // Reset handle block.
1981   __ z_lg(Z_R1_scratch, Address(Z_thread, JavaThread::active_handles_offset()));
1982   __ clear_mem(Address(Z_R1_scratch, JNIHandleBlock::top_offset_in_bytes()), 4);
1983 
1984   // Check for pending exceptions.
1985   __ load_and_test_long(Z_R0, Address(Z_thread, Thread::pending_exception_offset()));
1986   __ z_brne(handle_pending_exception);
1987 
1988 
1989   //////////////////////////////////////////////////////////////////////
1990   // Return
1991   //////////////////////////////////////////////////////////////////////
1992 
1993 
1994 #ifndef USE_RESIZE_FRAME
1995   __ pop_frame();                     // Pop wrapper frame.
1996 #else
1997   __ resize_frame(frame_size_in_bytes, Z_R0_scratch);  // Revert stack extension.
1998 #endif
1999   __ restore_return_pc();             // This is the way back to the caller.
2000   __ z_br(Z_R14);
2001 
2002 
2003   //////////////////////////////////////////////////////////////////////
2004   // Out-of-line calls to the runtime.
2005   //////////////////////////////////////////////////////////////////////
2006 
2007 
2008   //---------------------------------------------------------------------
2009   // Handler for pending exceptions (out-of-line).
2010   //---------------------------------------------------------------------
2011   // Since this is a native call, we know the proper exception handler
2012   // is the empty function. We just pop this frame and then jump to
2013   // forward_exception_entry. Z_R14 will contain the native caller's
2014   // return PC.
2015   __ bind(handle_pending_exception);
2016   __ pop_frame();
2017   __ load_const_optimized(Z_R1_scratch, StubRoutines::forward_exception_entry());
2018   __ restore_return_pc();
2019   __ z_br(Z_R1_scratch);
2020 
2021   //---------------------------------------------------------------------
2022   // Handler for a cache miss (out-of-line)
2023   //---------------------------------------------------------------------
2024   __ call_ic_miss_handler(ic_miss, 0x77, 0, Z_R1_scratch);
2025   __ flush();
2026 
2027 
2028   //////////////////////////////////////////////////////////////////////
2029   // end of code generation
2030   //////////////////////////////////////////////////////////////////////
2031 
2032 
2033   nmethod *nm = nmethod::new_native_nmethod(method,
2034                                             compile_id,
2035                                             masm->code(),
2036                                             (int)(wrapper_VEPStart-wrapper_CodeStart),
2037                                             (int)(wrapper_FrameDone-wrapper_CodeStart),
2038                                             stack_slots / VMRegImpl::slots_per_word,
2039                                             (method_is_static ? in_ByteSize(klass_offset) : in_ByteSize(receiver_offset)),
2040                                             in_ByteSize(lock_offset),
2041                                             oop_maps);
2042 
2043   return nm;
2044 }
2045 
2046 static address gen_c2i_adapter(MacroAssembler  *masm,
2047                                int total_args_passed,
2048                                int comp_args_on_stack,
2049                                const BasicType *sig_bt,
2050                                const VMRegPair *regs,
2051                                Label &skip_fixup) {
2052   // Before we get into the guts of the C2I adapter, see if we should be here
2053   // at all. We've come from compiled code and are attempting to jump to the
2054   // interpreter, which means the caller made a static call to get here
2055   // (vcalls always get a compiled target if there is one). Check for a
2056   // compiled target. If there is one, we need to patch the caller's call.
2057 
2058   // These two defs MUST MATCH code in gen_i2c2i_adapter!
2059   const Register ientry = Z_R11;
2060   const Register code   = Z_R11;
2061 
2062   address c2i_entrypoint;
2063   Label   patch_callsite;
2064 
2065   // Regular (verified) c2i entry point.
2066   c2i_entrypoint = __ pc();
2067 
2068   // Call patching needed?
2069   __ load_and_test_long(Z_R0_scratch, method_(code));
2070   __ z_lg(ientry, method_(interpreter_entry));  // Preload interpreter entry (also if patching).
2071   __ z_brne(patch_callsite);                    // Patch required if code != NULL (compiled target exists).
2072 
2073   __ bind(skip_fixup);  // Return point from patch_callsite.
2074 
2075   // Since all args are passed on the stack, total_args_passed*wordSize is the
2076   // space we need. We need ABI scratch area but we use the caller's since
2077   // it has already been allocated.
2078 
2079   const int abi_scratch = frame::z_top_ijava_frame_abi_size;
2080   int       extraspace  = align_up(total_args_passed, 2)*wordSize + abi_scratch;
2081   Register  sender_SP   = Z_R10;
2082   Register  value       = Z_R12;
2083 
2084   // Remember the senderSP so we can pop the interpreter arguments off of the stack.
2085   // In addition, frame manager expects initial_caller_sp in Z_R10.
2086   __ z_lgr(sender_SP, Z_SP);
2087 
2088   // This should always fit in 14 bit immediate.
2089   __ resize_frame(-extraspace, Z_R0_scratch);
2090 
2091   // We use the caller's ABI scratch area (out_preserved_stack_slots) for the initial
2092   // args. This essentially moves the callers ABI scratch area from the top to the
2093   // bottom of the arg area.
2094 
2095   int st_off =  extraspace - wordSize;
2096 
2097   // Now write the args into the outgoing interpreter space.
2098   for (int i = 0; i < total_args_passed; i++) {
2099     VMReg r_1 = regs[i].first();
2100     VMReg r_2 = regs[i].second();
2101     if (!r_1->is_valid()) {
2102       assert(!r_2->is_valid(), "");
2103       continue;
2104     }
2105     if (r_1->is_stack()) {
2106       // The calling convention produces OptoRegs that ignore the preserve area (abi scratch).
2107       // We must account for it here.
2108       int ld_off = (r_1->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size;
2109 
2110       if (!r_2->is_valid()) {
2111         __ z_mvc(Address(Z_SP, st_off), Address(sender_SP, ld_off), sizeof(void*));
2112       } else {
2113         // longs are given 2 64-bit slots in the interpreter,
2114         // but the data is passed in only 1 slot.
2115         if (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) {
2116 #ifdef ASSERT
2117           __ clear_mem(Address(Z_SP, st_off), sizeof(void *));
2118 #endif
2119           st_off -= wordSize;
2120         }
2121         __ z_mvc(Address(Z_SP, st_off), Address(sender_SP, ld_off), sizeof(void*));
2122       }
2123     } else {
2124       if (r_1->is_Register()) {
2125         if (!r_2->is_valid()) {
2126           __ z_st(r_1->as_Register(), st_off, Z_SP);
2127         } else {
2128           // longs are given 2 64-bit slots in the interpreter, but the
2129           // data is passed in only 1 slot.
2130           if (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) {
2131 #ifdef ASSERT
2132             __ clear_mem(Address(Z_SP, st_off), sizeof(void *));
2133 #endif
2134             st_off -= wordSize;
2135           }
2136           __ z_stg(r_1->as_Register(), st_off, Z_SP);
2137         }
2138       } else {
2139         assert(r_1->is_FloatRegister(), "");
2140         if (!r_2->is_valid()) {
2141           __ z_ste(r_1->as_FloatRegister(), st_off, Z_SP);
2142         } else {
2143           // In 64bit, doubles are given 2 64-bit slots in the interpreter, but the
2144           // data is passed in only 1 slot.
2145           // One of these should get known junk...
2146 #ifdef ASSERT
2147           __ z_lzdr(Z_F1);
2148           __ z_std(Z_F1, st_off, Z_SP);
2149 #endif
2150           st_off-=wordSize;
2151           __ z_std(r_1->as_FloatRegister(), st_off, Z_SP);
2152         }
2153       }
2154     }
2155     st_off -= wordSize;
2156   }
2157 
2158 
2159   // Jump to the interpreter just as if interpreter was doing it.
2160   __ add2reg(Z_esp, st_off, Z_SP);
2161 
2162   // Frame_manager expects initial_caller_sp (= SP without resize by c2i) in Z_R10.
2163   __ z_br(ientry);
2164 
2165 
2166   // Prevent illegal entry to out-of-line code.
2167   __ z_illtrap(0x22);
2168 
2169   // Generate out-of-line runtime call to patch caller,
2170   // then continue as interpreted.
2171 
2172   // IF you lose the race you go interpreted.
2173   // We don't see any possible endless c2i -> i2c -> c2i ...
2174   // transitions no matter how rare.
2175   __ bind(patch_callsite);
2176 
2177   RegisterSaver::save_live_registers(masm, RegisterSaver::arg_registers);
2178   __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite), Z_method, Z_R14);
2179   RegisterSaver::restore_live_registers(masm, RegisterSaver::arg_registers);
2180   __ z_bru(skip_fixup);
2181 
2182   // end of out-of-line code
2183 
2184   return c2i_entrypoint;
2185 }
2186 
2187 // On entry, the following registers are set
2188 //
2189 //    Z_thread  r8  - JavaThread*
2190 //    Z_method  r9  - callee's method (method to be invoked)
2191 //    Z_esp     r7  - operand (or expression) stack pointer of caller. one slot above last arg.
2192 //    Z_SP      r15 - SP prepared by call stub such that caller's outgoing args are near top
2193 //
2194 void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm,
2195                                     int total_args_passed,
2196                                     int comp_args_on_stack,
2197                                     const BasicType *sig_bt,
2198                                     const VMRegPair *regs) {
2199   const Register value = Z_R12;
2200   const Register ld_ptr= Z_esp;
2201 
2202   int ld_offset = total_args_passed * wordSize;
2203 
2204   // Cut-out for having no stack args.
2205   if (comp_args_on_stack) {
2206     // Sig words on the stack are greater than VMRegImpl::stack0. Those in
2207     // registers are below. By subtracting stack0, we either get a negative
2208     // number (all values in registers) or the maximum stack slot accessed.
2209     // Convert VMRegImpl (4 byte) stack slots to words.
2210     int comp_words_on_stack = align_up(comp_args_on_stack*VMRegImpl::stack_slot_size, wordSize)>>LogBytesPerWord;
2211     // Round up to miminum stack alignment, in wordSize
2212     comp_words_on_stack = align_up(comp_words_on_stack, 2);
2213 
2214     __ resize_frame(-comp_words_on_stack*wordSize, Z_R0_scratch);
2215   }
2216 
2217   // Now generate the shuffle code. Pick up all register args and move the
2218   // rest through register value=Z_R12.
2219   for (int i = 0; i < total_args_passed; i++) {
2220     if (sig_bt[i] == T_VOID) {
2221       assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
2222       continue;
2223     }
2224 
2225     // Pick up 0, 1 or 2 words from ld_ptr.
2226     assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(),
2227            "scrambled load targets?");
2228     VMReg r_1 = regs[i].first();
2229     VMReg r_2 = regs[i].second();
2230     if (!r_1->is_valid()) {
2231       assert(!r_2->is_valid(), "");
2232       continue;
2233     }
2234     if (r_1->is_FloatRegister()) {
2235       if (!r_2->is_valid()) {
2236         __ z_le(r_1->as_FloatRegister(), ld_offset, ld_ptr);
2237         ld_offset-=wordSize;
2238       } else {
2239         // Skip the unused interpreter slot.
2240         __ z_ld(r_1->as_FloatRegister(), ld_offset - wordSize, ld_ptr);
2241         ld_offset -= 2 * wordSize;
2242       }
2243     } else {
2244       if (r_1->is_stack()) {
2245         // Must do a memory to memory move.
2246         int st_off = (r_1->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size;
2247 
2248         if (!r_2->is_valid()) {
2249           __ z_mvc(Address(Z_SP, st_off), Address(ld_ptr, ld_offset), sizeof(void*));
2250         } else {
2251           // In 64bit, longs are given 2 64-bit slots in the interpreter, but the
2252           // data is passed in only 1 slot.
2253           if (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) {
2254             ld_offset -= wordSize;
2255           }
2256           __ z_mvc(Address(Z_SP, st_off), Address(ld_ptr, ld_offset), sizeof(void*));
2257         }
2258       } else {
2259         if (!r_2->is_valid()) {
2260           // Not sure we need to do this but it shouldn't hurt.
2261           if (is_reference_type(sig_bt[i]) || sig_bt[i] == T_ADDRESS) {
2262             __ z_lg(r_1->as_Register(), ld_offset, ld_ptr);
2263           } else {
2264             __ z_l(r_1->as_Register(), ld_offset, ld_ptr);
2265           }
2266         } else {
2267           // In 64bit, longs are given 2 64-bit slots in the interpreter, but the
2268           // data is passed in only 1 slot.
2269           if (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) {
2270             ld_offset -= wordSize;
2271           }
2272           __ z_lg(r_1->as_Register(), ld_offset, ld_ptr);
2273         }
2274       }
2275       ld_offset -= wordSize;
2276     }
2277   }
2278 
2279   // Jump to the compiled code just as if compiled code was doing it.
2280   // load target address from method:
2281   __ z_lg(Z_R1_scratch, Address(Z_method, Method::from_compiled_offset()));
2282 
2283   // Store method into thread->callee_target.
2284   // 6243940: We might end up in handle_wrong_method if
2285   // the callee is deoptimized as we race thru here. If that
2286   // happens we don't want to take a safepoint because the
2287   // caller frame will look interpreted and arguments are now
2288   // "compiled" so it is much better to make this transition
2289   // invisible to the stack walking code. Unfortunately, if
2290   // we try and find the callee by normal means a safepoint
2291   // is possible. So we stash the desired callee in the thread
2292   // and the vm will find it there should this case occur.
2293   __ z_stg(Z_method, thread_(callee_target));
2294 
2295   __ z_br(Z_R1_scratch);
2296 }
2297 
2298 AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm,
2299                                                             int total_args_passed,
2300                                                             int comp_args_on_stack,
2301                                                             const BasicType *sig_bt,
2302                                                             const VMRegPair *regs,
2303                                                             AdapterFingerPrint* fingerprint) {
2304   __ align(CodeEntryAlignment);
2305   address i2c_entry = __ pc();
2306   gen_i2c_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs);
2307 
2308   address c2i_unverified_entry;
2309 
2310   Label skip_fixup;
2311   {
2312     Label ic_miss;
2313     const int klass_offset           = oopDesc::klass_offset_in_bytes();
2314     const int holder_klass_offset    = CompiledICHolder::holder_klass_offset();
2315     const int holder_metadata_offset = CompiledICHolder::holder_metadata_offset();
2316 
2317     // Out-of-line call to ic_miss handler.
2318     __ call_ic_miss_handler(ic_miss, 0x11, 0, Z_R1_scratch);
2319 
2320     // Unverified Entry Point UEP
2321     __ align(CodeEntryAlignment);
2322     c2i_unverified_entry = __ pc();
2323 
2324     // Check the pointers.
2325     if (!ImplicitNullChecks || MacroAssembler::needs_explicit_null_check(klass_offset)) {
2326       __ z_ltgr(Z_ARG1, Z_ARG1);
2327       __ z_bre(ic_miss);
2328     }
2329     __ verify_oop(Z_ARG1, FILE_AND_LINE);
2330 
2331     // Check ic: object class <-> cached class
2332     // Compress cached class for comparison. That's more efficient.
2333     if (UseCompressedClassPointers) {
2334       __ z_lg(Z_R11, holder_klass_offset, Z_method);             // Z_R11 is overwritten a few instructions down anyway.
2335       __ compare_klass_ptr(Z_R11, klass_offset, Z_ARG1, false); // Cached class can't be zero.
2336     } else {
2337       __ z_clc(klass_offset, sizeof(void *)-1, Z_ARG1, holder_klass_offset, Z_method);
2338     }
2339     __ z_brne(ic_miss);  // Cache miss: call runtime to handle this.
2340 
2341     // This def MUST MATCH code in gen_c2i_adapter!
2342     const Register code = Z_R11;
2343 
2344     __ z_lg(Z_method, holder_metadata_offset, Z_method);
2345     __ load_and_test_long(Z_R0, method_(code));
2346     __ z_brne(ic_miss);  // Cache miss: call runtime to handle this.
2347 
2348     // Fallthru to VEP. Duplicate LTG, but saved taken branch.
2349   }
2350 
2351   address c2i_entry = __ pc();
2352 
2353   // Class initialization barrier for static methods
2354   address c2i_no_clinit_check_entry = NULL;
2355   if (VM_Version::supports_fast_class_init_checks()) {
2356     Label L_skip_barrier;
2357 
2358     { // Bypass the barrier for non-static methods
2359       __ testbit(Address(Z_method, Method::access_flags_offset()), JVM_ACC_STATIC_BIT);
2360       __ z_bfalse(L_skip_barrier); // non-static
2361     }
2362 
2363     Register klass = Z_R11;
2364     __ load_method_holder(klass, Z_method);
2365     __ clinit_barrier(klass, Z_thread, &L_skip_barrier /*L_fast_path*/);
2366 
2367     __ load_const_optimized(klass, SharedRuntime::get_handle_wrong_method_stub());
2368     __ z_br(klass);
2369 
2370     __ bind(L_skip_barrier);
2371     c2i_no_clinit_check_entry = __ pc();
2372   }
2373 
2374   gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup);
2375 
2376   return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry, c2i_no_clinit_check_entry);
2377 }
2378 
2379 // This function returns the adjust size (in number of words) to a c2i adapter
2380 // activation for use during deoptimization.
2381 //
2382 // Actually only compiled frames need to be adjusted, but it
2383 // doesn't harm to adjust entry and interpreter frames, too.
2384 //
2385 int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals) {
2386   assert(callee_locals >= callee_parameters,
2387           "test and remove; got more parms than locals");
2388   // Handle the abi adjustment here instead of doing it in push_skeleton_frames.
2389   return (callee_locals - callee_parameters) * Interpreter::stackElementWords +
2390          frame::z_parent_ijava_frame_abi_size / BytesPerWord;
2391 }
2392 
2393 uint SharedRuntime::in_preserve_stack_slots() {
2394   return frame::jit_in_preserve_size_in_4_byte_units;
2395 }
2396 
2397 uint SharedRuntime::out_preserve_stack_slots() {
2398   return frame::z_jit_out_preserve_size/VMRegImpl::stack_slot_size;
2399 }
2400 
2401 //
2402 // Frame generation for deopt and uncommon trap blobs.
2403 //
2404 static void push_skeleton_frame(MacroAssembler* masm,
2405                           /* Unchanged */
2406                           Register frame_sizes_reg,
2407                           Register pcs_reg,
2408                           /* Invalidate */
2409                           Register frame_size_reg,
2410                           Register pc_reg) {
2411   BLOCK_COMMENT("  push_skeleton_frame {");
2412    __ z_lg(pc_reg, 0, pcs_reg);
2413    __ z_lg(frame_size_reg, 0, frame_sizes_reg);
2414    __ z_stg(pc_reg, _z_abi(return_pc), Z_SP);
2415    Register fp = pc_reg;
2416    __ push_frame(frame_size_reg, fp);
2417 #ifdef ASSERT
2418    // The magic is required for successful walking skeletal frames.
2419    __ load_const_optimized(frame_size_reg/*tmp*/, frame::z_istate_magic_number);
2420    __ z_stg(frame_size_reg, _z_ijava_state_neg(magic), fp);
2421    // Fill other slots that are supposedly not necessary with eye catchers.
2422    __ load_const_optimized(frame_size_reg/*use as tmp*/, 0xdeadbad1);
2423    __ z_stg(frame_size_reg, _z_ijava_state_neg(top_frame_sp), fp);
2424    // The sender_sp of the bottom frame is set before pushing it.
2425    // The sender_sp of non bottom frames is their caller's top_frame_sp, which
2426    // is unknown here. Luckily it is not needed before filling the frame in
2427    // layout_activation(), we assert this by setting an eye catcher (see
2428    // comments on sender_sp in frame_s390.hpp).
2429    __ z_stg(frame_size_reg, _z_ijava_state_neg(sender_sp), Z_SP);
2430 #endif // ASSERT
2431   BLOCK_COMMENT("  } push_skeleton_frame");
2432 }
2433 
2434 // Loop through the UnrollBlock info and create new frames.
2435 static void push_skeleton_frames(MacroAssembler* masm, bool deopt,
2436                             /* read */
2437                             Register unroll_block_reg,
2438                             /* invalidate */
2439                             Register frame_sizes_reg,
2440                             Register number_of_frames_reg,
2441                             Register pcs_reg,
2442                             Register tmp1,
2443                             Register tmp2) {
2444   BLOCK_COMMENT("push_skeleton_frames {");
2445   // _number_of_frames is of type int (deoptimization.hpp).
2446   __ z_lgf(number_of_frames_reg,
2447            Address(unroll_block_reg, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes()));
2448   __ z_lg(pcs_reg,
2449           Address(unroll_block_reg, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes()));
2450   __ z_lg(frame_sizes_reg,
2451           Address(unroll_block_reg, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes()));
2452 
2453   // stack: (caller_of_deoptee, ...).
2454 
2455   // If caller_of_deoptee is a compiled frame, then we extend it to make
2456   // room for the callee's locals and the frame::z_parent_ijava_frame_abi.
2457   // See also Deoptimization::last_frame_adjust() above.
2458   // Note: entry and interpreted frames are adjusted, too. But this doesn't harm.
2459 
2460   __ z_lgf(Z_R1_scratch,
2461            Address(unroll_block_reg, Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes()));
2462   __ z_lgr(tmp1, Z_SP);  // Save the sender sp before extending the frame.
2463   __ resize_frame_sub(Z_R1_scratch, tmp2/*tmp*/);
2464   // The oldest skeletal frame requires a valid sender_sp to make it walkable
2465   // (it is required to find the original pc of caller_of_deoptee if it is marked
2466   // for deoptimization - see nmethod::orig_pc_addr()).
2467   __ z_stg(tmp1, _z_ijava_state_neg(sender_sp), Z_SP);
2468 
2469   // Now push the new interpreter frames.
2470   Label loop, loop_entry;
2471 
2472   // Make sure that there is at least one entry in the array.
2473   DEBUG_ONLY(__ z_ltgr(number_of_frames_reg, number_of_frames_reg));
2474   __ asm_assert_ne("array_size must be > 0", 0x205);
2475 
2476   __ z_bru(loop_entry);
2477 
2478   __ bind(loop);
2479 
2480   __ add2reg(frame_sizes_reg, wordSize);
2481   __ add2reg(pcs_reg, wordSize);
2482 
2483   __ bind(loop_entry);
2484 
2485   // Allocate a new frame, fill in the pc.
2486   push_skeleton_frame(masm, frame_sizes_reg, pcs_reg, tmp1, tmp2);
2487 
2488   __ z_aghi(number_of_frames_reg, -1);  // Emit AGHI, because it sets the condition code
2489   __ z_brne(loop);
2490 
2491   // Set the top frame's return pc.
2492   __ add2reg(pcs_reg, wordSize);
2493   __ z_lg(Z_R0_scratch, 0, pcs_reg);
2494   __ z_stg(Z_R0_scratch, _z_abi(return_pc), Z_SP);
2495   BLOCK_COMMENT("} push_skeleton_frames");
2496 }
2497 
2498 //------------------------------generate_deopt_blob----------------------------
2499 void SharedRuntime::generate_deopt_blob() {
2500   // Allocate space for the code.
2501   ResourceMark rm;
2502   // Setup code generation tools.
2503   CodeBuffer buffer("deopt_blob", 2048, 1024);
2504   InterpreterMacroAssembler* masm = new InterpreterMacroAssembler(&buffer);
2505   Label exec_mode_initialized;
2506   OopMap* map = NULL;
2507   OopMapSet *oop_maps = new OopMapSet();
2508 
2509   unsigned int start_off = __ offset();
2510   Label cont;
2511 
2512   // --------------------------------------------------------------------------
2513   // Normal entry (non-exception case)
2514   //
2515   // We have been called from the deopt handler of the deoptee.
2516   // Z_R14 points behind the call in the deopt handler. We adjust
2517   // it such that it points to the start of the deopt handler.
2518   // The return_pc has been stored in the frame of the deoptee and
2519   // will replace the address of the deopt_handler in the call
2520   // to Deoptimization::fetch_unroll_info below.
2521   // The (int) cast is necessary, because -((unsigned int)14)
2522   // is an unsigned int.
2523   __ add2reg(Z_R14, -(int)NativeCall::max_instruction_size());
2524 
2525   const Register   exec_mode_reg = Z_tmp_1;
2526 
2527   // stack: (deoptee, caller of deoptee, ...)
2528 
2529   // pushes an "unpack" frame
2530   // R14 contains the return address pointing into the deoptimized
2531   // nmethod that was valid just before the nmethod was deoptimized.
2532   // save R14 into the deoptee frame.  the `fetch_unroll_info'
2533   // procedure called below will read it from there.
2534   map = RegisterSaver::save_live_registers(masm, RegisterSaver::all_registers);
2535 
2536   // note the entry point.
2537   __ load_const_optimized(exec_mode_reg, Deoptimization::Unpack_deopt);
2538   __ z_bru(exec_mode_initialized);
2539 
2540 #ifndef COMPILER1
2541   int reexecute_offset = 1; // odd offset will produce odd pc, which triggers an hardware trap
2542 #else
2543   // --------------------------------------------------------------------------
2544   // Reexecute entry
2545   // - Z_R14 = Deopt Handler in nmethod
2546 
2547   int reexecute_offset = __ offset() - start_off;
2548 
2549   // No need to update map as each call to save_live_registers will produce identical oopmap
2550   (void) RegisterSaver::save_live_registers(masm, RegisterSaver::all_registers);
2551 
2552   __ load_const_optimized(exec_mode_reg, Deoptimization::Unpack_reexecute);
2553   __ z_bru(exec_mode_initialized);
2554 #endif
2555 
2556 
2557   // --------------------------------------------------------------------------
2558   // Exception entry. We reached here via a branch. Registers on entry:
2559   // - Z_EXC_OOP (Z_ARG1) = exception oop
2560   // - Z_EXC_PC  (Z_ARG2) = the exception pc.
2561 
2562   int exception_offset = __ offset() - start_off;
2563 
2564   // all registers are dead at this entry point, except for Z_EXC_OOP, and
2565   // Z_EXC_PC which contain the exception oop and exception pc
2566   // respectively.  Set them in TLS and fall thru to the
2567   // unpack_with_exception_in_tls entry point.
2568 
2569   // Store exception oop and pc in thread (location known to GC).
2570   // Need this since the call to "fetch_unroll_info()" may safepoint.
2571   __ z_stg(Z_EXC_OOP, Address(Z_thread, JavaThread::exception_oop_offset()));
2572   __ z_stg(Z_EXC_PC,  Address(Z_thread, JavaThread::exception_pc_offset()));
2573 
2574   // fall through
2575 
2576   int exception_in_tls_offset = __ offset() - start_off;
2577 
2578   // new implementation because exception oop is now passed in JavaThread
2579 
2580   // Prolog for exception case
2581   // All registers must be preserved because they might be used by LinearScan
2582   // Exceptiop oop and throwing PC are passed in JavaThread
2583 
2584   // load throwing pc from JavaThread and us it as the return address of the current frame.
2585   __ z_lg(Z_R1_scratch, Address(Z_thread, JavaThread::exception_pc_offset()));
2586 
2587   // Save everything in sight.
2588   (void) RegisterSaver::save_live_registers(masm, RegisterSaver::all_registers, Z_R1_scratch);
2589 
2590   // Now it is safe to overwrite any register
2591 
2592   // Clear the exception pc field in JavaThread
2593   __ clear_mem(Address(Z_thread, JavaThread::exception_pc_offset()), 8);
2594 
2595   // Deopt during an exception.  Save exec mode for unpack_frames.
2596   __ load_const_optimized(exec_mode_reg, Deoptimization::Unpack_exception);
2597 
2598 
2599 #ifdef ASSERT
2600   // verify that there is really an exception oop in JavaThread
2601   __ z_lg(Z_ARG1, Address(Z_thread, JavaThread::exception_oop_offset()));
2602   __ MacroAssembler::verify_oop(Z_ARG1, FILE_AND_LINE);
2603 
2604   // verify that there is no pending exception
2605   __ asm_assert_mem8_is_zero(in_bytes(Thread::pending_exception_offset()), Z_thread,
2606                              "must not have pending exception here", __LINE__);
2607 #endif
2608 
2609   // --------------------------------------------------------------------------
2610   // At this point, the live registers are saved and
2611   // the exec_mode_reg has been set up correctly.
2612   __ bind(exec_mode_initialized);
2613 
2614   // stack: ("unpack" frame, deoptee, caller_of_deoptee, ...).
2615 
2616   {
2617   const Register unroll_block_reg  = Z_tmp_2;
2618 
2619   // we need to set `last_Java_frame' because `fetch_unroll_info' will
2620   // call `last_Java_frame()'.  however we can't block and no gc will
2621   // occur so we don't need an oopmap. the value of the pc in the
2622   // frame is not particularly important.  it just needs to identify the blob.
2623 
2624   // Don't set last_Java_pc anymore here (is implicitly NULL then).
2625   // the correct PC is retrieved in pd_last_frame() in that case.
2626   __ set_last_Java_frame(/*sp*/Z_SP, noreg);
2627   // With EscapeAnalysis turned on, this call may safepoint
2628   // despite it's marked as "leaf call"!
2629   __ call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info), Z_thread, exec_mode_reg);
2630   // Set an oopmap for the call site this describes all our saved volatile registers
2631   int offs = __ offset();
2632   oop_maps->add_gc_map(offs, map);
2633 
2634   __ reset_last_Java_frame();
2635   // save the return value.
2636   __ z_lgr(unroll_block_reg, Z_RET);
2637   // restore the return registers that have been saved
2638   // (among other registers) by save_live_registers(...).
2639   RegisterSaver::restore_result_registers(masm);
2640 
2641   // reload the exec mode from the UnrollBlock (it might have changed)
2642   __ z_llgf(exec_mode_reg, Address(unroll_block_reg, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes()));
2643 
2644   // In excp_deopt_mode, restore and clear exception oop which we
2645   // stored in the thread during exception entry above. The exception
2646   // oop will be the return value of this stub.
2647   NearLabel skip_restore_excp;
2648   __ compare64_and_branch(exec_mode_reg, Deoptimization::Unpack_exception, Assembler::bcondNotEqual, skip_restore_excp);
2649   __ z_lg(Z_RET, thread_(exception_oop));
2650   __ clear_mem(thread_(exception_oop), 8);
2651   __ bind(skip_restore_excp);
2652 
2653   // remove the "unpack" frame
2654   __ pop_frame();
2655 
2656   // stack: (deoptee, caller of deoptee, ...).
2657 
2658   // pop the deoptee's frame
2659   __ pop_frame();
2660 
2661   // stack: (caller_of_deoptee, ...).
2662 
2663   // loop through the `UnrollBlock' info and create interpreter frames.
2664   push_skeleton_frames(masm, true/*deopt*/,
2665                   unroll_block_reg,
2666                   Z_tmp_3,
2667                   Z_tmp_4,
2668                   Z_ARG5,
2669                   Z_ARG4,
2670                   Z_ARG3);
2671 
2672   // stack: (skeletal interpreter frame, ..., optional skeletal
2673   // interpreter frame, caller of deoptee, ...).
2674   }
2675 
2676   // push an "unpack" frame taking care of float / int return values.
2677   __ push_frame(RegisterSaver::live_reg_frame_size(RegisterSaver::all_registers));
2678 
2679   // stack: (unpack frame, skeletal interpreter frame, ..., optional
2680   // skeletal interpreter frame, caller of deoptee, ...).
2681 
2682   // spill live volatile registers since we'll do a call.
2683   __ z_stg(Z_RET, offset_of(frame::z_abi_160_spill, spill[0]), Z_SP);
2684   __ z_std(Z_FRET, offset_of(frame::z_abi_160_spill, spill[1]), Z_SP);
2685 
2686   // let the unpacker layout information in the skeletal frames just allocated.
2687   __ get_PC(Z_RET);
2688   __ set_last_Java_frame(/*sp*/Z_SP, /*pc*/Z_RET);
2689   __ call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames),
2690                   Z_thread/*thread*/, exec_mode_reg/*exec_mode*/);
2691 
2692   __ reset_last_Java_frame();
2693 
2694   // restore the volatiles saved above.
2695   __ z_lg(Z_RET, offset_of(frame::z_abi_160_spill, spill[0]), Z_SP);
2696   __ z_ld(Z_FRET, offset_of(frame::z_abi_160_spill, spill[1]), Z_SP);
2697 
2698   // pop the "unpack" frame.
2699   __ pop_frame();
2700   __ restore_return_pc();
2701 
2702   // stack: (top interpreter frame, ..., optional interpreter frame,
2703   // caller of deoptee, ...).
2704 
2705   __ z_lg(Z_fp, _z_abi(callers_sp), Z_SP); // restore frame pointer
2706   __ restore_bcp();
2707   __ restore_locals();
2708   __ restore_esp();
2709 
2710   // return to the interpreter entry point.
2711   __ z_br(Z_R14);
2712 
2713   // Make sure all code is generated
2714   masm->flush();
2715 
2716   _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset, reexecute_offset, RegisterSaver::live_reg_frame_size(RegisterSaver::all_registers)/wordSize);
2717   _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset);
2718 }
2719 
2720 
2721 #ifdef COMPILER2
2722 //------------------------------generate_uncommon_trap_blob--------------------
2723 void SharedRuntime::generate_uncommon_trap_blob() {
2724   // Allocate space for the code
2725   ResourceMark rm;
2726   // Setup code generation tools
2727   CodeBuffer buffer("uncommon_trap_blob", 2048, 1024);
2728   InterpreterMacroAssembler* masm = new InterpreterMacroAssembler(&buffer);
2729 
2730   Register unroll_block_reg = Z_tmp_1;
2731   Register klass_index_reg  = Z_ARG2;
2732   Register unc_trap_reg     = Z_ARG2;
2733 
2734   // stack: (deoptee, caller_of_deoptee, ...).
2735 
2736   // push a dummy "unpack" frame and call
2737   // `Deoptimization::uncommon_trap' to pack the compiled frame into a
2738   // vframe array and return the `UnrollBlock' information.
2739 
2740   // save R14 to compiled frame.
2741   __ save_return_pc();
2742   // push the "unpack_frame".
2743   __ push_frame_abi160(0);
2744 
2745   // stack: (unpack frame, deoptee, caller_of_deoptee, ...).
2746 
2747   // set the "unpack" frame as last_Java_frame.
2748   // `Deoptimization::uncommon_trap' expects it and considers its
2749   // sender frame as the deoptee frame.
2750   __ get_PC(Z_R1_scratch);
2751   __ set_last_Java_frame(/*sp*/Z_SP, /*pc*/Z_R1_scratch);
2752 
2753   __ z_lgr(klass_index_reg, Z_ARG1);  // passed implicitly as ARG2
2754   __ z_lghi(Z_ARG3, Deoptimization::Unpack_uncommon_trap);  // passed implicitly as ARG3
2755   BLOCK_COMMENT("call Deoptimization::uncommon_trap()");
2756   __ call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::uncommon_trap), Z_thread);
2757 
2758   __ reset_last_Java_frame();
2759 
2760   // pop the "unpack" frame
2761   __ pop_frame();
2762 
2763   // stack: (deoptee, caller_of_deoptee, ...).
2764 
2765   // save the return value.
2766   __ z_lgr(unroll_block_reg, Z_RET);
2767 
2768   // pop the deoptee frame.
2769   __ pop_frame();
2770 
2771   // stack: (caller_of_deoptee, ...).
2772 
2773 #ifdef ASSERT
2774   assert(Immediate::is_uimm8(Deoptimization::Unpack_LIMIT), "Code not fit for larger immediates");
2775   assert(Immediate::is_uimm8(Deoptimization::Unpack_uncommon_trap), "Code not fit for larger immediates");
2776   const int unpack_kind_byte_offset = Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes()
2777 #ifndef VM_LITTLE_ENDIAN
2778   + 3
2779 #endif
2780   ;
2781   if (Displacement::is_shortDisp(unpack_kind_byte_offset)) {
2782     __ z_cli(unpack_kind_byte_offset, unroll_block_reg, Deoptimization::Unpack_uncommon_trap);
2783   } else {
2784     __ z_cliy(unpack_kind_byte_offset, unroll_block_reg, Deoptimization::Unpack_uncommon_trap);
2785   }
2786   __ asm_assert_eq("SharedRuntime::generate_deopt_blob: expected Unpack_uncommon_trap", 0);
2787 #endif
2788 
2789   __ zap_from_to(Z_SP, Z_SP, Z_R0_scratch, Z_R1, 500, -1);
2790 
2791   // allocate new interpreter frame(s) and possibly resize the caller's frame
2792   // (no more adapters !)
2793   push_skeleton_frames(masm, false/*deopt*/,
2794                   unroll_block_reg,
2795                   Z_tmp_2,
2796                   Z_tmp_3,
2797                   Z_tmp_4,
2798                   Z_ARG5,
2799                   Z_ARG4);
2800 
2801   // stack: (skeletal interpreter frame, ..., optional skeletal
2802   // interpreter frame, (resized) caller of deoptee, ...).
2803 
2804   // push a dummy "unpack" frame taking care of float return values.
2805   // call `Deoptimization::unpack_frames' to layout information in the
2806   // interpreter frames just created
2807 
2808   // push the "unpack" frame
2809    const unsigned int framesize_in_bytes = __ push_frame_abi160(0);
2810 
2811   // stack: (unpack frame, skeletal interpreter frame, ..., optional
2812   // skeletal interpreter frame, (resized) caller of deoptee, ...).
2813 
2814   // set the "unpack" frame as last_Java_frame
2815   __ get_PC(Z_R1_scratch);
2816   __ set_last_Java_frame(/*sp*/Z_SP, /*pc*/Z_R1_scratch);
2817 
2818   // indicate it is the uncommon trap case
2819   BLOCK_COMMENT("call Deoptimization::Unpack_uncommon_trap()");
2820   __ load_const_optimized(unc_trap_reg, Deoptimization::Unpack_uncommon_trap);
2821   // let the unpacker layout information in the skeletal frames just allocated.
2822   __ call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames), Z_thread);
2823 
2824   __ reset_last_Java_frame();
2825   // pop the "unpack" frame
2826   __ pop_frame();
2827   // restore LR from top interpreter frame
2828   __ restore_return_pc();
2829 
2830   // stack: (top interpreter frame, ..., optional interpreter frame,
2831   // (resized) caller of deoptee, ...).
2832 
2833   __ z_lg(Z_fp, _z_abi(callers_sp), Z_SP); // restore frame pointer
2834   __ restore_bcp();
2835   __ restore_locals();
2836   __ restore_esp();
2837 
2838   // return to the interpreter entry point
2839   __ z_br(Z_R14);
2840 
2841   masm->flush();
2842   _uncommon_trap_blob = UncommonTrapBlob::create(&buffer, NULL, framesize_in_bytes/wordSize);
2843 }
2844 #endif // COMPILER2
2845 
2846 
2847 //------------------------------generate_handler_blob------
2848 //
2849 // Generate a special Compile2Runtime blob that saves all registers,
2850 // and setup oopmap.
2851 SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_type) {
2852   assert(StubRoutines::forward_exception_entry() != NULL,
2853          "must be generated before");
2854 
2855   ResourceMark rm;
2856   OopMapSet *oop_maps = new OopMapSet();
2857   OopMap* map;
2858 
2859   // Allocate space for the code. Setup code generation tools.
2860   CodeBuffer buffer("handler_blob", 2048, 1024);
2861   MacroAssembler* masm = new MacroAssembler(&buffer);
2862 
2863   unsigned int start_off = __ offset();
2864   address call_pc = NULL;
2865   int frame_size_in_bytes;
2866 
2867   bool cause_return = (poll_type == POLL_AT_RETURN);
2868   // Make room for return address (or push it again)
2869   if (!cause_return) {
2870     __ z_lg(Z_R14, Address(Z_thread, JavaThread::saved_exception_pc_offset()));
2871   }
2872 
2873   // Save registers, fpu state, and flags
2874   map = RegisterSaver::save_live_registers(masm, RegisterSaver::all_registers);
2875 
2876   if (!cause_return) {
2877     // Keep a copy of the return pc to detect if it gets modified.
2878     __ z_lgr(Z_R6, Z_R14);
2879   }
2880 
2881   // The following is basically a call_VM. However, we need the precise
2882   // address of the call in order to generate an oopmap. Hence, we do all the
2883   // work outselves.
2884   __ set_last_Java_frame(Z_SP, noreg);
2885 
2886   // call into the runtime to handle the safepoint poll
2887   __ call_VM_leaf(call_ptr, Z_thread);
2888 
2889 
2890   // Set an oopmap for the call site. This oopmap will map all
2891   // oop-registers and debug-info registers as callee-saved. This
2892   // will allow deoptimization at this safepoint to find all possible
2893   // debug-info recordings, as well as let GC find all oops.
2894 
2895   oop_maps->add_gc_map((int)(__ offset()-start_off), map);
2896 
2897   Label noException;
2898 
2899   __ reset_last_Java_frame();
2900 
2901   __ load_and_test_long(Z_R1, thread_(pending_exception));
2902   __ z_bre(noException);
2903 
2904   // Pending exception case, used (sporadically) by
2905   // api/java_lang/Thread.State/index#ThreadState et al.
2906   RegisterSaver::restore_live_registers(masm, RegisterSaver::all_registers);
2907 
2908   // Jump to forward_exception_entry, with the issuing PC in Z_R14
2909   // so it looks like the original nmethod called forward_exception_entry.
2910   __ load_const_optimized(Z_R1_scratch, StubRoutines::forward_exception_entry());
2911   __ z_br(Z_R1_scratch);
2912 
2913   // No exception case
2914   __ bind(noException);
2915 
2916   if (!cause_return) {
2917     Label no_adjust;
2918      // If our stashed return pc was modified by the runtime we avoid touching it
2919     const int offset_of_return_pc = _z_abi16(return_pc) + RegisterSaver::live_reg_frame_size(RegisterSaver::all_registers);
2920     __ z_cg(Z_R6, offset_of_return_pc, Z_SP);
2921     __ z_brne(no_adjust);
2922 
2923     // Adjust return pc forward to step over the safepoint poll instruction
2924     __ instr_size(Z_R1_scratch, Z_R6);
2925     __ z_agr(Z_R6, Z_R1_scratch);
2926     __ z_stg(Z_R6, offset_of_return_pc, Z_SP);
2927 
2928     __ bind(no_adjust);
2929   }
2930 
2931   // Normal exit, restore registers and exit.
2932   RegisterSaver::restore_live_registers(masm, RegisterSaver::all_registers);
2933 
2934   __ z_br(Z_R14);
2935 
2936   // Make sure all code is generated
2937   masm->flush();
2938 
2939   // Fill-out other meta info
2940   return SafepointBlob::create(&buffer, oop_maps, RegisterSaver::live_reg_frame_size(RegisterSaver::all_registers)/wordSize);
2941 }
2942 
2943 
2944 //
2945 // generate_resolve_blob - call resolution (static/virtual/opt-virtual/ic-miss
2946 //
2947 // Generate a stub that calls into vm to find out the proper destination
2948 // of a Java call. All the argument registers are live at this point
2949 // but since this is generic code we don't know what they are and the caller
2950 // must do any gc of the args.
2951 //
2952 RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const char* name) {
2953   assert (StubRoutines::forward_exception_entry() != NULL, "must be generated before");
2954 
2955   // allocate space for the code
2956   ResourceMark rm;
2957 
2958   CodeBuffer buffer(name, 1000, 512);
2959   MacroAssembler* masm                = new MacroAssembler(&buffer);
2960 
2961   OopMapSet *oop_maps = new OopMapSet();
2962   OopMap* map = NULL;
2963 
2964   unsigned int start_off = __ offset();
2965 
2966   map = RegisterSaver::save_live_registers(masm, RegisterSaver::all_registers);
2967 
2968   // We must save a PC from within the stub as return PC
2969   // C code doesn't store the LR where we expect the PC,
2970   // so we would run into trouble upon stack walking.
2971   __ get_PC(Z_R1_scratch);
2972 
2973   unsigned int frame_complete = __ offset();
2974 
2975   __ set_last_Java_frame(/*sp*/Z_SP, Z_R1_scratch);
2976 
2977   __ call_VM_leaf(destination, Z_thread, Z_method);
2978 
2979 
2980   // Set an oopmap for the call site.
2981   // We need this not only for callee-saved registers, but also for volatile
2982   // registers that the compiler might be keeping live across a safepoint.
2983 
2984   oop_maps->add_gc_map((int)(frame_complete-start_off), map);
2985 
2986   // clear last_Java_sp
2987   __ reset_last_Java_frame();
2988 
2989   // check for pending exceptions
2990   Label pending;
2991   __ load_and_test_long(Z_R0, Address(Z_thread, Thread::pending_exception_offset()));
2992   __ z_brne(pending);
2993 
2994   __ z_lgr(Z_R1_scratch, Z_R2); // r1 is neither saved nor restored, r2 contains the continuation.
2995   RegisterSaver::restore_live_registers(masm, RegisterSaver::all_registers);
2996 
2997   // get the returned method
2998   __ get_vm_result_2(Z_method);
2999 
3000   // We are back the the original state on entry and ready to go.
3001   __ z_br(Z_R1_scratch);
3002 
3003   // Pending exception after the safepoint
3004 
3005   __ bind(pending);
3006 
3007   RegisterSaver::restore_live_registers(masm, RegisterSaver::all_registers);
3008 
3009   // exception pending => remove activation and forward to exception handler
3010 
3011   __ z_lgr(Z_R2, Z_R0); // pending_exception
3012   __ clear_mem(Address(Z_thread, JavaThread::vm_result_offset()), sizeof(jlong));
3013   __ load_const_optimized(Z_R1_scratch, StubRoutines::forward_exception_entry());
3014   __ z_br(Z_R1_scratch);
3015 
3016   // -------------
3017   // make sure all code is generated
3018   masm->flush();
3019 
3020   // return the blob
3021   // frame_size_words or bytes??
3022   return RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, RegisterSaver::live_reg_frame_size(RegisterSaver::all_registers)/wordSize,
3023                                        oop_maps, true);
3024 
3025 }
3026 
3027 //------------------------------Montgomery multiplication------------------------
3028 //
3029 
3030 // Subtract 0:b from carry:a. Return carry.
3031 static unsigned long
3032 sub(unsigned long a[], unsigned long b[], unsigned long carry, long len) {
3033   unsigned long i, c = 8 * (unsigned long)(len - 1);
3034   __asm__ __volatile__ (
3035     "SLGR   %[i], %[i]         \n" // initialize to 0 and pre-set carry
3036     "LGHI   0, 8               \n" // index increment (for BRXLG)
3037     "LGR    1, %[c]            \n" // index limit (for BRXLG)
3038     "0:                        \n"
3039     "LG     %[c], 0(%[i],%[a]) \n"
3040     "SLBG   %[c], 0(%[i],%[b]) \n" // subtract with borrow
3041     "STG    %[c], 0(%[i],%[a]) \n"
3042     "BRXLG  %[i], 0, 0b        \n" // while ((i+=8)<limit);
3043     "SLBGR  %[c], %[c]         \n" // save carry - 1
3044     : [i]"=&a"(i), [c]"+r"(c)
3045     : [a]"a"(a), [b]"a"(b)
3046     : "cc", "memory", "r0", "r1"
3047  );
3048   return carry + c;
3049 }
3050 
3051 // Multiply (unsigned) Long A by Long B, accumulating the double-
3052 // length result into the accumulator formed of T0, T1, and T2.
3053 inline void MACC(unsigned long A[], long A_ind,
3054                  unsigned long B[], long B_ind,
3055                  unsigned long &T0, unsigned long &T1, unsigned long &T2) {
3056   long A_si = 8 * A_ind,
3057        B_si = 8 * B_ind;
3058   __asm__ __volatile__ (
3059     "LG     1, 0(%[A_si],%[A]) \n"
3060     "MLG    0, 0(%[B_si],%[B]) \n" // r0r1 = A * B
3061     "ALGR   %[T0], 1           \n"
3062     "LGHI   1, 0               \n" // r1 = 0
3063     "ALCGR  %[T1], 0           \n"
3064     "ALCGR  %[T2], 1           \n"
3065     : [T0]"+r"(T0), [T1]"+r"(T1), [T2]"+r"(T2)
3066     : [A]"r"(A), [A_si]"r"(A_si), [B]"r"(B), [B_si]"r"(B_si)
3067     : "cc", "r0", "r1"
3068  );
3069 }
3070 
3071 // As above, but add twice the double-length result into the
3072 // accumulator.
3073 inline void MACC2(unsigned long A[], long A_ind,
3074                   unsigned long B[], long B_ind,
3075                   unsigned long &T0, unsigned long &T1, unsigned long &T2) {
3076   const unsigned long zero = 0;
3077   long A_si = 8 * A_ind,
3078        B_si = 8 * B_ind;
3079   __asm__ __volatile__ (
3080     "LG     1, 0(%[A_si],%[A]) \n"
3081     "MLG    0, 0(%[B_si],%[B]) \n" // r0r1 = A * B
3082     "ALGR   %[T0], 1           \n"
3083     "ALCGR  %[T1], 0           \n"
3084     "ALCGR  %[T2], %[zero]     \n"
3085     "ALGR   %[T0], 1           \n"
3086     "ALCGR  %[T1], 0           \n"
3087     "ALCGR  %[T2], %[zero]     \n"
3088     : [T0]"+r"(T0), [T1]"+r"(T1), [T2]"+r"(T2)
3089     : [A]"r"(A), [A_si]"r"(A_si), [B]"r"(B), [B_si]"r"(B_si), [zero]"r"(zero)
3090     : "cc", "r0", "r1"
3091  );
3092 }
3093 
3094 // Fast Montgomery multiplication. The derivation of the algorithm is
3095 // in "A Cryptographic Library for the Motorola DSP56000,
3096 // Dusse and Kaliski, Proc. EUROCRYPT 90, pp. 230-237".
3097 static void
3098 montgomery_multiply(unsigned long a[], unsigned long b[], unsigned long n[],
3099                     unsigned long m[], unsigned long inv, int len) {
3100   unsigned long t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator
3101   int i;
3102 
3103   assert(inv * n[0] == -1UL, "broken inverse in Montgomery multiply");
3104 
3105   for (i = 0; i < len; i++) {
3106     int j;
3107     for (j = 0; j < i; j++) {
3108       MACC(a, j, b, i-j, t0, t1, t2);
3109       MACC(m, j, n, i-j, t0, t1, t2);
3110     }
3111     MACC(a, i, b, 0, t0, t1, t2);
3112     m[i] = t0 * inv;
3113     MACC(m, i, n, 0, t0, t1, t2);
3114 
3115     assert(t0 == 0, "broken Montgomery multiply");
3116 
3117     t0 = t1; t1 = t2; t2 = 0;
3118   }
3119 
3120   for (i = len; i < 2 * len; i++) {
3121     int j;
3122     for (j = i - len + 1; j < len; j++) {
3123       MACC(a, j, b, i-j, t0, t1, t2);
3124       MACC(m, j, n, i-j, t0, t1, t2);
3125     }
3126     m[i-len] = t0;
3127     t0 = t1; t1 = t2; t2 = 0;
3128   }
3129 
3130   while (t0) {
3131     t0 = sub(m, n, t0, len);
3132   }
3133 }
3134 
3135 // Fast Montgomery squaring. This uses asymptotically 25% fewer
3136 // multiplies so it should be up to 25% faster than Montgomery
3137 // multiplication. However, its loop control is more complex and it
3138 // may actually run slower on some machines.
3139 static void
3140 montgomery_square(unsigned long a[], unsigned long n[],
3141                   unsigned long m[], unsigned long inv, int len) {
3142   unsigned long t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator
3143   int i;
3144 
3145   assert(inv * n[0] == -1UL, "broken inverse in Montgomery multiply");
3146 
3147   for (i = 0; i < len; i++) {
3148     int j;
3149     int end = (i+1)/2;
3150     for (j = 0; j < end; j++) {
3151       MACC2(a, j, a, i-j, t0, t1, t2);
3152       MACC(m, j, n, i-j, t0, t1, t2);
3153     }
3154     if ((i & 1) == 0) {
3155       MACC(a, j, a, j, t0, t1, t2);
3156     }
3157     for (; j < i; j++) {
3158       MACC(m, j, n, i-j, t0, t1, t2);
3159     }
3160     m[i] = t0 * inv;
3161     MACC(m, i, n, 0, t0, t1, t2);
3162 
3163     assert(t0 == 0, "broken Montgomery square");
3164 
3165     t0 = t1; t1 = t2; t2 = 0;
3166   }
3167 
3168   for (i = len; i < 2*len; i++) {
3169     int start = i-len+1;
3170     int end = start + (len - start)/2;
3171     int j;
3172     for (j = start; j < end; j++) {
3173       MACC2(a, j, a, i-j, t0, t1, t2);
3174       MACC(m, j, n, i-j, t0, t1, t2);
3175     }
3176     if ((i & 1) == 0) {
3177       MACC(a, j, a, j, t0, t1, t2);
3178     }
3179     for (; j < len; j++) {
3180       MACC(m, j, n, i-j, t0, t1, t2);
3181     }
3182     m[i-len] = t0;
3183     t0 = t1; t1 = t2; t2 = 0;
3184   }
3185 
3186   while (t0) {
3187     t0 = sub(m, n, t0, len);
3188   }
3189 }
3190 
3191 // The threshold at which squaring is advantageous was determined
3192 // experimentally on an i7-3930K (Ivy Bridge) CPU @ 3.5GHz.
3193 // Value seems to be ok for other platforms, too.
3194 #define MONTGOMERY_SQUARING_THRESHOLD 64
3195 
3196 // Copy len longwords from s to d, word-swapping as we go. The
3197 // destination array is reversed.
3198 static void reverse_words(unsigned long *s, unsigned long *d, int len) {
3199   d += len;
3200   while(len-- > 0) {
3201     d--;
3202     unsigned long s_val = *s;
3203     // Swap words in a longword on little endian machines.
3204 #ifdef VM_LITTLE_ENDIAN
3205      Unimplemented();
3206 #endif
3207     *d = s_val;
3208     s++;
3209   }
3210 }
3211 
3212 void SharedRuntime::montgomery_multiply(jint *a_ints, jint *b_ints, jint *n_ints,
3213                                         jint len, jlong inv,
3214                                         jint *m_ints) {
3215   len = len & 0x7fffFFFF; // C2 does not respect int to long conversion for stub calls.
3216   assert(len % 2 == 0, "array length in montgomery_multiply must be even");
3217   int longwords = len/2;
3218 
3219   // Make very sure we don't use so much space that the stack might
3220   // overflow. 512 jints corresponds to an 16384-bit integer and
3221   // will use here a total of 8k bytes of stack space.
3222   int total_allocation = longwords * sizeof (unsigned long) * 4;
3223   guarantee(total_allocation <= 8192, "must be");
3224   unsigned long *scratch = (unsigned long *)alloca(total_allocation);
3225 
3226   // Local scratch arrays
3227   unsigned long
3228     *a = scratch + 0 * longwords,
3229     *b = scratch + 1 * longwords,
3230     *n = scratch + 2 * longwords,
3231     *m = scratch + 3 * longwords;
3232 
3233   reverse_words((unsigned long *)a_ints, a, longwords);
3234   reverse_words((unsigned long *)b_ints, b, longwords);
3235   reverse_words((unsigned long *)n_ints, n, longwords);
3236 
3237   ::montgomery_multiply(a, b, n, m, (unsigned long)inv, longwords);
3238 
3239   reverse_words(m, (unsigned long *)m_ints, longwords);
3240 }
3241 
3242 void SharedRuntime::montgomery_square(jint *a_ints, jint *n_ints,
3243                                       jint len, jlong inv,
3244                                       jint *m_ints) {
3245   len = len & 0x7fffFFFF; // C2 does not respect int to long conversion for stub calls.
3246   assert(len % 2 == 0, "array length in montgomery_square must be even");
3247   int longwords = len/2;
3248 
3249   // Make very sure we don't use so much space that the stack might
3250   // overflow. 512 jints corresponds to an 16384-bit integer and
3251   // will use here a total of 6k bytes of stack space.
3252   int total_allocation = longwords * sizeof (unsigned long) * 3;
3253   guarantee(total_allocation <= 8192, "must be");
3254   unsigned long *scratch = (unsigned long *)alloca(total_allocation);
3255 
3256   // Local scratch arrays
3257   unsigned long
3258     *a = scratch + 0 * longwords,
3259     *n = scratch + 1 * longwords,
3260     *m = scratch + 2 * longwords;
3261 
3262   reverse_words((unsigned long *)a_ints, a, longwords);
3263   reverse_words((unsigned long *)n_ints, n, longwords);
3264 
3265   if (len >= MONTGOMERY_SQUARING_THRESHOLD) {
3266     ::montgomery_square(a, n, m, (unsigned long)inv, longwords);
3267   } else {
3268     ::montgomery_multiply(a, a, n, m, (unsigned long)inv, longwords);
3269   }
3270 
3271   reverse_words(m, (unsigned long *)m_ints, longwords);
3272 }
3273 
3274 extern "C"
3275 int SpinPause() {
3276   return 0;
3277 }
3278 
3279 #ifdef COMPILER2
3280 RuntimeStub* SharedRuntime::make_native_invoker(address call_target,
3281                                                 int shadow_space_bytes,
3282                                                 const GrowableArray<VMReg>& input_registers,
3283                                                 const GrowableArray<VMReg>& output_registers) {
3284   Unimplemented();
3285   return nullptr;
3286 }
3287 #endif