1 /*
   2  * Copyright (c) 2016, 2022, Oracle and/or its affiliates. All rights reserved.
   3  * Copyright (c) 2016, 2019 SAP SE. All rights reserved.
   4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   5  *
   6  * This code is free software; you can redistribute it and/or modify it
   7  * under the terms of the GNU General Public License version 2 only, as
   8  * published by the Free Software Foundation.
   9  *
  10  * This code is distributed in the hope that it will be useful, but WITHOUT
  11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  13  * version 2 for more details (a copy is included in the LICENSE file that
  14  * accompanied this code).
  15  *
  16  * You should have received a copy of the GNU General Public License version
  17  * 2 along with this work; if not, write to the Free Software Foundation,
  18  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  19  *
  20  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  21  * or visit www.oracle.com if you need additional information or have any
  22  * questions.
  23  *
  24  */
  25 
  26 #include "precompiled.hpp"
  27 #include "asm/macroAssembler.inline.hpp"
  28 #include "code/debugInfoRec.hpp"
  29 #include "code/icBuffer.hpp"
  30 #include "code/vtableStubs.hpp"
  31 #include "compiler/oopMap.hpp"
  32 #include "gc/shared/gcLocker.hpp"
  33 #include "interpreter/interpreter.hpp"
  34 #include "interpreter/interp_masm.hpp"
  35 #include "memory/resourceArea.hpp"
  36 #include "nativeInst_s390.hpp"
  37 #include "oops/compiledICHolder.hpp"
  38 #include "oops/klass.inline.hpp"
  39 #include "prims/methodHandles.hpp"
  40 #include "registerSaver_s390.hpp"
  41 #include "runtime/jniHandles.hpp"
  42 #include "runtime/safepointMechanism.hpp"
  43 #include "runtime/sharedRuntime.hpp"
  44 #include "runtime/signature.hpp"
  45 #include "runtime/stubRoutines.hpp"
  46 #include "runtime/vframeArray.hpp"
  47 #include "utilities/align.hpp"
  48 #include "utilities/macros.hpp"
  49 #include "vmreg_s390.inline.hpp"
  50 #ifdef COMPILER1
  51 #include "c1/c1_Runtime1.hpp"
  52 #endif
  53 #ifdef COMPILER2
  54 #include "opto/ad.hpp"
  55 #include "opto/runtime.hpp"
  56 #endif
  57 
  58 #ifdef PRODUCT
  59 #define __ masm->
  60 #else
  61 #define __ (Verbose ? (masm->block_comment(FILE_AND_LINE),masm):masm)->
  62 #endif
  63 
  64 #define BLOCK_COMMENT(str) __ block_comment(str)
  65 #define BIND(label)        bind(label); BLOCK_COMMENT(#label ":")
  66 
  67 #define RegisterSaver_LiveIntReg(regname) \
  68   { RegisterSaver::int_reg,   regname->encoding(), regname->as_VMReg() }
  69 
  70 #define RegisterSaver_LiveFloatReg(regname) \
  71   { RegisterSaver::float_reg, regname->encoding(), regname->as_VMReg() }
  72 
  73 // Registers which are not saved/restored, but still they have got a frame slot.
  74 // Used to get same frame size for RegisterSaver_LiveRegs and RegisterSaver_LiveRegsWithoutR2
  75 #define RegisterSaver_ExcludedIntReg(regname) \
  76   { RegisterSaver::excluded_reg, regname->encoding(), regname->as_VMReg() }
  77 
  78 // Registers which are not saved/restored, but still they have got a frame slot.
  79 // Used to get same frame size for RegisterSaver_LiveRegs and RegisterSaver_LiveRegsWithoutR2.
  80 #define RegisterSaver_ExcludedFloatReg(regname) \
  81   { RegisterSaver::excluded_reg, regname->encoding(), regname->as_VMReg() }
  82 
  83 static const RegisterSaver::LiveRegType RegisterSaver_LiveRegs[] = {
  84   // Live registers which get spilled to the stack. Register positions
  85   // in this array correspond directly to the stack layout.
  86   //
  87   // live float registers:
  88   //
  89   RegisterSaver_LiveFloatReg(Z_F0 ),
  90   // RegisterSaver_ExcludedFloatReg(Z_F1 ), // scratch (Z_fscratch_1)
  91   RegisterSaver_LiveFloatReg(Z_F2 ),
  92   RegisterSaver_LiveFloatReg(Z_F3 ),
  93   RegisterSaver_LiveFloatReg(Z_F4 ),
  94   RegisterSaver_LiveFloatReg(Z_F5 ),
  95   RegisterSaver_LiveFloatReg(Z_F6 ),
  96   RegisterSaver_LiveFloatReg(Z_F7 ),
  97   RegisterSaver_LiveFloatReg(Z_F8 ),
  98   RegisterSaver_LiveFloatReg(Z_F9 ),
  99   RegisterSaver_LiveFloatReg(Z_F10),
 100   RegisterSaver_LiveFloatReg(Z_F11),
 101   RegisterSaver_LiveFloatReg(Z_F12),
 102   RegisterSaver_LiveFloatReg(Z_F13),
 103   RegisterSaver_LiveFloatReg(Z_F14),
 104   RegisterSaver_LiveFloatReg(Z_F15),
 105   //
 106   // RegisterSaver_ExcludedIntReg(Z_R0), // scratch
 107   // RegisterSaver_ExcludedIntReg(Z_R1), // scratch
 108   RegisterSaver_LiveIntReg(Z_R2 ),
 109   RegisterSaver_LiveIntReg(Z_R3 ),
 110   RegisterSaver_LiveIntReg(Z_R4 ),
 111   RegisterSaver_LiveIntReg(Z_R5 ),
 112   RegisterSaver_LiveIntReg(Z_R6 ),
 113   RegisterSaver_LiveIntReg(Z_R7 ),
 114   RegisterSaver_LiveIntReg(Z_R8 ),
 115   RegisterSaver_LiveIntReg(Z_R9 ),
 116   RegisterSaver_LiveIntReg(Z_R10),
 117   RegisterSaver_LiveIntReg(Z_R11),
 118   RegisterSaver_LiveIntReg(Z_R12),
 119   RegisterSaver_LiveIntReg(Z_R13),
 120   // RegisterSaver_ExcludedIntReg(Z_R14), // return pc (Saved in caller frame.)
 121   // RegisterSaver_ExcludedIntReg(Z_R15)  // stack pointer
 122 };
 123 
 124 static const RegisterSaver::LiveRegType RegisterSaver_LiveIntRegs[] = {
 125   // Live registers which get spilled to the stack. Register positions
 126   // in this array correspond directly to the stack layout.
 127   //
 128   // live float registers: All excluded, but still they get a stack slot to get same frame size.
 129   //
 130   RegisterSaver_ExcludedFloatReg(Z_F0 ),
 131   // RegisterSaver_ExcludedFloatReg(Z_F1 ), // scratch (Z_fscratch_1)
 132   RegisterSaver_ExcludedFloatReg(Z_F2 ),
 133   RegisterSaver_ExcludedFloatReg(Z_F3 ),
 134   RegisterSaver_ExcludedFloatReg(Z_F4 ),
 135   RegisterSaver_ExcludedFloatReg(Z_F5 ),
 136   RegisterSaver_ExcludedFloatReg(Z_F6 ),
 137   RegisterSaver_ExcludedFloatReg(Z_F7 ),
 138   RegisterSaver_ExcludedFloatReg(Z_F8 ),
 139   RegisterSaver_ExcludedFloatReg(Z_F9 ),
 140   RegisterSaver_ExcludedFloatReg(Z_F10),
 141   RegisterSaver_ExcludedFloatReg(Z_F11),
 142   RegisterSaver_ExcludedFloatReg(Z_F12),
 143   RegisterSaver_ExcludedFloatReg(Z_F13),
 144   RegisterSaver_ExcludedFloatReg(Z_F14),
 145   RegisterSaver_ExcludedFloatReg(Z_F15),
 146   //
 147   // RegisterSaver_ExcludedIntReg(Z_R0), // scratch
 148   // RegisterSaver_ExcludedIntReg(Z_R1), // scratch
 149   RegisterSaver_LiveIntReg(Z_R2 ),
 150   RegisterSaver_LiveIntReg(Z_R3 ),
 151   RegisterSaver_LiveIntReg(Z_R4 ),
 152   RegisterSaver_LiveIntReg(Z_R5 ),
 153   RegisterSaver_LiveIntReg(Z_R6 ),
 154   RegisterSaver_LiveIntReg(Z_R7 ),
 155   RegisterSaver_LiveIntReg(Z_R8 ),
 156   RegisterSaver_LiveIntReg(Z_R9 ),
 157   RegisterSaver_LiveIntReg(Z_R10),
 158   RegisterSaver_LiveIntReg(Z_R11),
 159   RegisterSaver_LiveIntReg(Z_R12),
 160   RegisterSaver_LiveIntReg(Z_R13),
 161   // RegisterSaver_ExcludedIntReg(Z_R14), // return pc (Saved in caller frame.)
 162   // RegisterSaver_ExcludedIntReg(Z_R15)  // stack pointer
 163 };
 164 
 165 static const RegisterSaver::LiveRegType RegisterSaver_LiveRegsWithoutR2[] = {
 166   // Live registers which get spilled to the stack. Register positions
 167   // in this array correspond directly to the stack layout.
 168   //
 169   // live float registers:
 170   //
 171   RegisterSaver_LiveFloatReg(Z_F0 ),
 172   // RegisterSaver_ExcludedFloatReg(Z_F1 ), // scratch (Z_fscratch_1)
 173   RegisterSaver_LiveFloatReg(Z_F2 ),
 174   RegisterSaver_LiveFloatReg(Z_F3 ),
 175   RegisterSaver_LiveFloatReg(Z_F4 ),
 176   RegisterSaver_LiveFloatReg(Z_F5 ),
 177   RegisterSaver_LiveFloatReg(Z_F6 ),
 178   RegisterSaver_LiveFloatReg(Z_F7 ),
 179   RegisterSaver_LiveFloatReg(Z_F8 ),
 180   RegisterSaver_LiveFloatReg(Z_F9 ),
 181   RegisterSaver_LiveFloatReg(Z_F10),
 182   RegisterSaver_LiveFloatReg(Z_F11),
 183   RegisterSaver_LiveFloatReg(Z_F12),
 184   RegisterSaver_LiveFloatReg(Z_F13),
 185   RegisterSaver_LiveFloatReg(Z_F14),
 186   RegisterSaver_LiveFloatReg(Z_F15),
 187   //
 188   // RegisterSaver_ExcludedIntReg(Z_R0), // scratch
 189   // RegisterSaver_ExcludedIntReg(Z_R1), // scratch
 190   RegisterSaver_ExcludedIntReg(Z_R2), // Omit saving R2.
 191   RegisterSaver_LiveIntReg(Z_R3 ),
 192   RegisterSaver_LiveIntReg(Z_R4 ),
 193   RegisterSaver_LiveIntReg(Z_R5 ),
 194   RegisterSaver_LiveIntReg(Z_R6 ),
 195   RegisterSaver_LiveIntReg(Z_R7 ),
 196   RegisterSaver_LiveIntReg(Z_R8 ),
 197   RegisterSaver_LiveIntReg(Z_R9 ),
 198   RegisterSaver_LiveIntReg(Z_R10),
 199   RegisterSaver_LiveIntReg(Z_R11),
 200   RegisterSaver_LiveIntReg(Z_R12),
 201   RegisterSaver_LiveIntReg(Z_R13),
 202   // RegisterSaver_ExcludedIntReg(Z_R14), // return pc (Saved in caller frame.)
 203   // RegisterSaver_ExcludedIntReg(Z_R15)  // stack pointer
 204 };
 205 
 206 // Live argument registers which get spilled to the stack.
 207 static const RegisterSaver::LiveRegType RegisterSaver_LiveArgRegs[] = {
 208   RegisterSaver_LiveFloatReg(Z_FARG1),
 209   RegisterSaver_LiveFloatReg(Z_FARG2),
 210   RegisterSaver_LiveFloatReg(Z_FARG3),
 211   RegisterSaver_LiveFloatReg(Z_FARG4),
 212   RegisterSaver_LiveIntReg(Z_ARG1),
 213   RegisterSaver_LiveIntReg(Z_ARG2),
 214   RegisterSaver_LiveIntReg(Z_ARG3),
 215   RegisterSaver_LiveIntReg(Z_ARG4),
 216   RegisterSaver_LiveIntReg(Z_ARG5)
 217 };
 218 
 219 static const RegisterSaver::LiveRegType RegisterSaver_LiveVolatileRegs[] = {
 220   // Live registers which get spilled to the stack. Register positions
 221   // in this array correspond directly to the stack layout.
 222   //
 223   // live float registers:
 224   //
 225   RegisterSaver_LiveFloatReg(Z_F0 ),
 226   // RegisterSaver_ExcludedFloatReg(Z_F1 ), // scratch (Z_fscratch_1)
 227   RegisterSaver_LiveFloatReg(Z_F2 ),
 228   RegisterSaver_LiveFloatReg(Z_F3 ),
 229   RegisterSaver_LiveFloatReg(Z_F4 ),
 230   RegisterSaver_LiveFloatReg(Z_F5 ),
 231   RegisterSaver_LiveFloatReg(Z_F6 ),
 232   RegisterSaver_LiveFloatReg(Z_F7 ),
 233   // RegisterSaver_LiveFloatReg(Z_F8 ), // non-volatile
 234   // RegisterSaver_LiveFloatReg(Z_F9 ), // non-volatile
 235   // RegisterSaver_LiveFloatReg(Z_F10), // non-volatile
 236   // RegisterSaver_LiveFloatReg(Z_F11), // non-volatile
 237   // RegisterSaver_LiveFloatReg(Z_F12), // non-volatile
 238   // RegisterSaver_LiveFloatReg(Z_F13), // non-volatile
 239   // RegisterSaver_LiveFloatReg(Z_F14), // non-volatile
 240   // RegisterSaver_LiveFloatReg(Z_F15), // non-volatile
 241   //
 242   // RegisterSaver_ExcludedIntReg(Z_R0), // scratch
 243   // RegisterSaver_ExcludedIntReg(Z_R1), // scratch
 244   RegisterSaver_LiveIntReg(Z_R2 ),
 245   RegisterSaver_LiveIntReg(Z_R3 ),
 246   RegisterSaver_LiveIntReg(Z_R4 ),
 247   RegisterSaver_LiveIntReg(Z_R5 ),
 248   // RegisterSaver_LiveIntReg(Z_R6 ), // non-volatile
 249   // RegisterSaver_LiveIntReg(Z_R7 ), // non-volatile
 250   // RegisterSaver_LiveIntReg(Z_R8 ), // non-volatile
 251   // RegisterSaver_LiveIntReg(Z_R9 ), // non-volatile
 252   // RegisterSaver_LiveIntReg(Z_R10), // non-volatile
 253   // RegisterSaver_LiveIntReg(Z_R11), // non-volatile
 254   // RegisterSaver_LiveIntReg(Z_R12), // non-volatile
 255   // RegisterSaver_LiveIntReg(Z_R13), // non-volatile
 256   // RegisterSaver_ExcludedIntReg(Z_R14), // return pc (Saved in caller frame.)
 257   // RegisterSaver_ExcludedIntReg(Z_R15)  // stack pointer
 258 };
 259 
 260 int RegisterSaver::live_reg_save_size(RegisterSet reg_set) {
 261   int reg_space = -1;
 262   switch (reg_set) {
 263     case all_registers:           reg_space = sizeof(RegisterSaver_LiveRegs); break;
 264     case all_registers_except_r2: reg_space = sizeof(RegisterSaver_LiveRegsWithoutR2); break;
 265     case all_integer_registers:   reg_space = sizeof(RegisterSaver_LiveIntRegs); break;
 266     case all_volatile_registers:  reg_space = sizeof(RegisterSaver_LiveVolatileRegs); break;
 267     case arg_registers:           reg_space = sizeof(RegisterSaver_LiveArgRegs); break;
 268     default: ShouldNotReachHere();
 269   }
 270   return (reg_space / sizeof(RegisterSaver::LiveRegType)) * reg_size;
 271 }
 272 
 273 
 274 int RegisterSaver::live_reg_frame_size(RegisterSet reg_set) {
 275   return live_reg_save_size(reg_set) + frame::z_abi_160_size;
 276 }
 277 
 278 
 279 // return_pc: Specify the register that should be stored as the return pc in the current frame.
 280 OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, RegisterSet reg_set, Register return_pc) {
 281   // Record volatile registers as callee-save values in an OopMap so
 282   // their save locations will be propagated to the caller frame's
 283   // RegisterMap during StackFrameStream construction (needed for
 284   // deoptimization; see compiledVFrame::create_stack_value).
 285 
 286   // Calculate frame size.
 287   const int frame_size_in_bytes  = live_reg_frame_size(reg_set);
 288   const int frame_size_in_slots  = frame_size_in_bytes / sizeof(jint);
 289   const int register_save_offset = frame_size_in_bytes - live_reg_save_size(reg_set);
 290 
 291   // OopMap frame size is in c2 stack slots (sizeof(jint)) not bytes or words.
 292   OopMap* map = new OopMap(frame_size_in_slots, 0);
 293 
 294   int regstosave_num = 0;
 295   const RegisterSaver::LiveRegType* live_regs = NULL;
 296 
 297   switch (reg_set) {
 298     case all_registers:
 299       regstosave_num = sizeof(RegisterSaver_LiveRegs)/sizeof(RegisterSaver::LiveRegType);
 300       live_regs      = RegisterSaver_LiveRegs;
 301       break;
 302     case all_registers_except_r2:
 303       regstosave_num = sizeof(RegisterSaver_LiveRegsWithoutR2)/sizeof(RegisterSaver::LiveRegType);;
 304       live_regs      = RegisterSaver_LiveRegsWithoutR2;
 305       break;
 306     case all_integer_registers:
 307       regstosave_num = sizeof(RegisterSaver_LiveIntRegs)/sizeof(RegisterSaver::LiveRegType);
 308       live_regs      = RegisterSaver_LiveIntRegs;
 309       break;
 310     case all_volatile_registers:
 311       regstosave_num = sizeof(RegisterSaver_LiveVolatileRegs)/sizeof(RegisterSaver::LiveRegType);
 312       live_regs      = RegisterSaver_LiveVolatileRegs;
 313       break;
 314     case arg_registers:
 315       regstosave_num = sizeof(RegisterSaver_LiveArgRegs)/sizeof(RegisterSaver::LiveRegType);;
 316       live_regs      = RegisterSaver_LiveArgRegs;
 317       break;
 318     default: ShouldNotReachHere();
 319   }
 320 
 321   // Save return pc in old frame.
 322   __ save_return_pc(return_pc);
 323 
 324   // Push a new frame (includes stack linkage).
 325   // Use return_pc as scratch for push_frame. Z_R0_scratch (the default) and Z_R1_scratch are
 326   // illegally used to pass parameters by RangeCheckStub::emit_code().
 327   __ push_frame(frame_size_in_bytes, return_pc);
 328   // We have to restore return_pc right away.
 329   // Nobody else will. Furthermore, return_pc isn't necessarily the default (Z_R14).
 330   // Nobody else knows which register we saved.
 331   __ z_lg(return_pc, _z_abi16(return_pc) + frame_size_in_bytes, Z_SP);
 332 
 333   // Register save area in new frame starts above z_abi_160 area.
 334   int offset = register_save_offset;
 335 
 336   Register first = noreg;
 337   Register last  = noreg;
 338   int      first_offset = -1;
 339   bool     float_spilled = false;
 340 
 341   for (int i = 0; i < regstosave_num; i++, offset += reg_size) {
 342     int reg_num  = live_regs[i].reg_num;
 343     int reg_type = live_regs[i].reg_type;
 344 
 345     switch (reg_type) {
 346       case RegisterSaver::int_reg: {
 347         Register reg = as_Register(reg_num);
 348         if (last != reg->predecessor()) {
 349           if (first != noreg) {
 350             __ z_stmg(first, last, first_offset, Z_SP);
 351           }
 352           first = reg;
 353           first_offset = offset;
 354           DEBUG_ONLY(float_spilled = false);
 355         }
 356         last = reg;
 357         assert(last != Z_R0, "r0 would require special treatment");
 358         assert(!float_spilled, "for simplicity, do not mix up ints and floats in RegisterSaver_LiveRegs[]");
 359         break;
 360       }
 361 
 362       case RegisterSaver::excluded_reg: // Not saved/restored, but with dedicated slot.
 363         continue; // Continue with next loop iteration.
 364 
 365       case RegisterSaver::float_reg: {
 366         FloatRegister freg = as_FloatRegister(reg_num);
 367         __ z_std(freg, offset, Z_SP);
 368         DEBUG_ONLY(float_spilled = true);
 369         break;
 370       }
 371 
 372       default:
 373         ShouldNotReachHere();
 374         break;
 375     }
 376 
 377     // Second set_callee_saved is really a waste but we'll keep things as they were for now
 378     map->set_callee_saved(VMRegImpl::stack2reg(offset >> 2), live_regs[i].vmreg);
 379     map->set_callee_saved(VMRegImpl::stack2reg((offset + half_reg_size) >> 2), live_regs[i].vmreg->next());
 380   }
 381   assert(first != noreg, "Should spill at least one int reg.");
 382   __ z_stmg(first, last, first_offset, Z_SP);
 383 
 384   // And we're done.
 385   return map;
 386 }
 387 
 388 
 389 // Generate the OopMap (again, regs where saved before).
 390 OopMap* RegisterSaver::generate_oop_map(MacroAssembler* masm, RegisterSet reg_set) {
 391   // Calculate frame size.
 392   const int frame_size_in_bytes  = live_reg_frame_size(reg_set);
 393   const int frame_size_in_slots  = frame_size_in_bytes / sizeof(jint);
 394   const int register_save_offset = frame_size_in_bytes - live_reg_save_size(reg_set);
 395 
 396   // OopMap frame size is in c2 stack slots (sizeof(jint)) not bytes or words.
 397   OopMap* map = new OopMap(frame_size_in_slots, 0);
 398 
 399   int regstosave_num = 0;
 400   const RegisterSaver::LiveRegType* live_regs = NULL;
 401 
 402   switch (reg_set) {
 403     case all_registers:
 404       regstosave_num = sizeof(RegisterSaver_LiveRegs)/sizeof(RegisterSaver::LiveRegType);
 405       live_regs      = RegisterSaver_LiveRegs;
 406       break;
 407     case all_registers_except_r2:
 408       regstosave_num = sizeof(RegisterSaver_LiveRegsWithoutR2)/sizeof(RegisterSaver::LiveRegType);;
 409       live_regs      = RegisterSaver_LiveRegsWithoutR2;
 410       break;
 411     case all_integer_registers:
 412       regstosave_num = sizeof(RegisterSaver_LiveIntRegs)/sizeof(RegisterSaver::LiveRegType);
 413       live_regs      = RegisterSaver_LiveIntRegs;
 414       break;
 415     case all_volatile_registers:
 416       regstosave_num = sizeof(RegisterSaver_LiveVolatileRegs)/sizeof(RegisterSaver::LiveRegType);
 417       live_regs      = RegisterSaver_LiveVolatileRegs;
 418       break;
 419     case arg_registers:
 420       regstosave_num = sizeof(RegisterSaver_LiveArgRegs)/sizeof(RegisterSaver::LiveRegType);;
 421       live_regs      = RegisterSaver_LiveArgRegs;
 422       break;
 423     default: ShouldNotReachHere();
 424   }
 425 
 426   // Register save area in new frame starts above z_abi_160 area.
 427   int offset = register_save_offset;
 428   for (int i = 0; i < regstosave_num; i++) {
 429     if (live_regs[i].reg_type < RegisterSaver::excluded_reg) {
 430       map->set_callee_saved(VMRegImpl::stack2reg(offset>>2), live_regs[i].vmreg);
 431       map->set_callee_saved(VMRegImpl::stack2reg((offset + half_reg_size)>>2), live_regs[i].vmreg->next());
 432     }
 433     offset += reg_size;
 434   }
 435   return map;
 436 }
 437 
 438 
 439 // Pop the current frame and restore all the registers that we saved.
 440 void RegisterSaver::restore_live_registers(MacroAssembler* masm, RegisterSet reg_set) {
 441   int offset;
 442   const int register_save_offset = live_reg_frame_size(reg_set) - live_reg_save_size(reg_set);
 443 
 444   Register first = noreg;
 445   Register last = noreg;
 446   int      first_offset = -1;
 447   bool     float_spilled = false;
 448 
 449   int regstosave_num = 0;
 450   const RegisterSaver::LiveRegType* live_regs = NULL;
 451 
 452   switch (reg_set) {
 453     case all_registers:
 454       regstosave_num = sizeof(RegisterSaver_LiveRegs)/sizeof(RegisterSaver::LiveRegType);;
 455       live_regs      = RegisterSaver_LiveRegs;
 456       break;
 457     case all_registers_except_r2:
 458       regstosave_num = sizeof(RegisterSaver_LiveRegsWithoutR2)/sizeof(RegisterSaver::LiveRegType);;
 459       live_regs      = RegisterSaver_LiveRegsWithoutR2;
 460       break;
 461     case all_integer_registers:
 462       regstosave_num = sizeof(RegisterSaver_LiveIntRegs)/sizeof(RegisterSaver::LiveRegType);
 463       live_regs      = RegisterSaver_LiveIntRegs;
 464       break;
 465     case all_volatile_registers:
 466       regstosave_num = sizeof(RegisterSaver_LiveVolatileRegs)/sizeof(RegisterSaver::LiveRegType);;
 467       live_regs      = RegisterSaver_LiveVolatileRegs;
 468       break;
 469     case arg_registers:
 470       regstosave_num = sizeof(RegisterSaver_LiveArgRegs)/sizeof(RegisterSaver::LiveRegType);;
 471       live_regs      = RegisterSaver_LiveArgRegs;
 472       break;
 473     default: ShouldNotReachHere();
 474   }
 475 
 476   // Restore all registers (ints and floats).
 477 
 478   // Register save area in new frame starts above z_abi_160 area.
 479   offset = register_save_offset;
 480 
 481   for (int i = 0; i < regstosave_num; i++, offset += reg_size) {
 482     int reg_num  = live_regs[i].reg_num;
 483     int reg_type = live_regs[i].reg_type;
 484 
 485     switch (reg_type) {
 486       case RegisterSaver::excluded_reg:
 487         continue; // Continue with next loop iteration.
 488 
 489       case RegisterSaver::int_reg: {
 490         Register reg = as_Register(reg_num);
 491         if (last != reg->predecessor()) {
 492           if (first != noreg) {
 493             __ z_lmg(first, last, first_offset, Z_SP);
 494           }
 495           first = reg;
 496           first_offset = offset;
 497           DEBUG_ONLY(float_spilled = false);
 498         }
 499         last = reg;
 500         assert(last != Z_R0, "r0 would require special treatment");
 501         assert(!float_spilled, "for simplicity, do not mix up ints and floats in RegisterSaver_LiveRegs[]");
 502         break;
 503       }
 504 
 505       case RegisterSaver::float_reg: {
 506         FloatRegister freg = as_FloatRegister(reg_num);
 507         __ z_ld(freg, offset, Z_SP);
 508         DEBUG_ONLY(float_spilled = true);
 509         break;
 510       }
 511 
 512       default:
 513         ShouldNotReachHere();
 514     }
 515   }
 516   assert(first != noreg, "Should spill at least one int reg.");
 517   __ z_lmg(first, last, first_offset, Z_SP);
 518 
 519   // Pop the frame.
 520   __ pop_frame();
 521 
 522   // Restore the flags.
 523   __ restore_return_pc();
 524 }
 525 
 526 
 527 // Pop the current frame and restore the registers that might be holding a result.
 528 void RegisterSaver::restore_result_registers(MacroAssembler* masm) {
 529   int i;
 530   int offset;
 531   const int regstosave_num       = sizeof(RegisterSaver_LiveRegs) /
 532                                    sizeof(RegisterSaver::LiveRegType);
 533   const int register_save_offset = live_reg_frame_size(all_registers) - live_reg_save_size(all_registers);
 534 
 535   // Restore all result registers (ints and floats).
 536   offset = register_save_offset;
 537   for (int i = 0; i < regstosave_num; i++, offset += reg_size) {
 538     int reg_num = RegisterSaver_LiveRegs[i].reg_num;
 539     int reg_type = RegisterSaver_LiveRegs[i].reg_type;
 540     switch (reg_type) {
 541       case RegisterSaver::excluded_reg:
 542         continue; // Continue with next loop iteration.
 543       case RegisterSaver::int_reg: {
 544         if (as_Register(reg_num) == Z_RET) { // int result_reg
 545           __ z_lg(as_Register(reg_num), offset, Z_SP);
 546         }
 547         break;
 548       }
 549       case RegisterSaver::float_reg: {
 550         if (as_FloatRegister(reg_num) == Z_FRET) { // float result_reg
 551           __ z_ld(as_FloatRegister(reg_num), offset, Z_SP);
 552         }
 553         break;
 554       }
 555       default:
 556         ShouldNotReachHere();
 557     }
 558   }
 559 }
 560 
 561 // ---------------------------------------------------------------------------
 562 void SharedRuntime::save_native_result(MacroAssembler * masm,
 563                                        BasicType ret_type,
 564                                        int frame_slots) {
 565   Address memaddr(Z_SP, frame_slots * VMRegImpl::stack_slot_size);
 566 
 567   switch (ret_type) {
 568     case T_BOOLEAN:  // Save shorter types as int. Do we need sign extension at restore??
 569     case T_BYTE:
 570     case T_CHAR:
 571     case T_SHORT:
 572     case T_INT:
 573       __ reg2mem_opt(Z_RET, memaddr, false);
 574       break;
 575     case T_OBJECT:   // Save pointer types as long.
 576     case T_ARRAY:
 577     case T_ADDRESS:
 578     case T_VOID:
 579     case T_LONG:
 580       __ reg2mem_opt(Z_RET, memaddr);
 581       break;
 582     case T_FLOAT:
 583       __ freg2mem_opt(Z_FRET, memaddr, false);
 584       break;
 585     case T_DOUBLE:
 586       __ freg2mem_opt(Z_FRET, memaddr);
 587       break;
 588     default:
 589       ShouldNotReachHere();
 590       break;
 591   }
 592 }
 593 
 594 void SharedRuntime::restore_native_result(MacroAssembler *masm,
 595                                           BasicType       ret_type,
 596                                           int             frame_slots) {
 597   Address memaddr(Z_SP, frame_slots * VMRegImpl::stack_slot_size);
 598 
 599   switch (ret_type) {
 600     case T_BOOLEAN:  // Restore shorter types as int. Do we need sign extension at restore??
 601     case T_BYTE:
 602     case T_CHAR:
 603     case T_SHORT:
 604     case T_INT:
 605       __ mem2reg_opt(Z_RET, memaddr, false);
 606       break;
 607     case T_OBJECT:   // Restore pointer types as long.
 608     case T_ARRAY:
 609     case T_ADDRESS:
 610     case T_VOID:
 611     case T_LONG:
 612       __ mem2reg_opt(Z_RET, memaddr);
 613       break;
 614     case T_FLOAT:
 615       __ mem2freg_opt(Z_FRET, memaddr, false);
 616       break;
 617     case T_DOUBLE:
 618       __ mem2freg_opt(Z_FRET, memaddr);
 619       break;
 620     default:
 621       ShouldNotReachHere();
 622       break;
 623   }
 624 }
 625 
 626 // ---------------------------------------------------------------------------
 627 // Read the array of BasicTypes from a signature, and compute where the
 628 // arguments should go. Values in the VMRegPair regs array refer to 4-byte
 629 // quantities. Values less than VMRegImpl::stack0 are registers, those above
 630 // refer to 4-byte stack slots. All stack slots are based off of the stack pointer
 631 // as framesizes are fixed.
 632 // VMRegImpl::stack0 refers to the first slot 0(sp).
 633 // VMRegImpl::stack0+1 refers to the memory word 4-byes higher. Registers
 634 // up to RegisterImpl::number_of_registers are the 64-bit integer registers.
 635 
 636 // Note: the INPUTS in sig_bt are in units of Java argument words, which are
 637 // either 32-bit or 64-bit depending on the build. The OUTPUTS are in 32-bit
 638 // units regardless of build.
 639 
 640 // The Java calling convention is a "shifted" version of the C ABI.
 641 // By skipping the first C ABI register we can call non-static jni methods
 642 // with small numbers of arguments without having to shuffle the arguments
 643 // at all. Since we control the java ABI we ought to at least get some
 644 // advantage out of it.
 645 int SharedRuntime::java_calling_convention(const BasicType *sig_bt,
 646                                            VMRegPair *regs,
 647                                            int total_args_passed) {
 648   // c2c calling conventions for compiled-compiled calls.
 649 
 650   // An int/float occupies 1 slot here.
 651   const int inc_stk_for_intfloat   = 1; // 1 slots for ints and floats.
 652   const int inc_stk_for_longdouble = 2; // 2 slots for longs and doubles.
 653 
 654   const VMReg z_iarg_reg[5] = {
 655     Z_R2->as_VMReg(),
 656     Z_R3->as_VMReg(),
 657     Z_R4->as_VMReg(),
 658     Z_R5->as_VMReg(),
 659     Z_R6->as_VMReg()
 660   };
 661   const VMReg z_farg_reg[4] = {
 662     Z_F0->as_VMReg(),
 663     Z_F2->as_VMReg(),
 664     Z_F4->as_VMReg(),
 665     Z_F6->as_VMReg()
 666   };
 667   const int z_num_iarg_registers = sizeof(z_iarg_reg) / sizeof(z_iarg_reg[0]);
 668   const int z_num_farg_registers = sizeof(z_farg_reg) / sizeof(z_farg_reg[0]);
 669 
 670   assert(RegisterImpl::number_of_arg_registers == z_num_iarg_registers, "iarg reg count mismatch");
 671   assert(FloatRegisterImpl::number_of_arg_registers == z_num_farg_registers, "farg reg count mismatch");
 672 
 673   int i;
 674   int stk = 0;
 675   int ireg = 0;
 676   int freg = 0;
 677 
 678   for (int i = 0; i < total_args_passed; ++i) {
 679     switch (sig_bt[i]) {
 680       case T_BOOLEAN:
 681       case T_CHAR:
 682       case T_BYTE:
 683       case T_SHORT:
 684       case T_INT:
 685         if (ireg < z_num_iarg_registers) {
 686           // Put int/ptr in register.
 687           regs[i].set1(z_iarg_reg[ireg]);
 688           ++ireg;
 689         } else {
 690           // Put int/ptr on stack.
 691           regs[i].set1(VMRegImpl::stack2reg(stk));
 692           stk += inc_stk_for_intfloat;
 693         }
 694         break;
 695       case T_LONG:
 696         assert((i + 1) < total_args_passed && sig_bt[i+1] == T_VOID, "expecting half");
 697         if (ireg < z_num_iarg_registers) {
 698           // Put long in register.
 699           regs[i].set2(z_iarg_reg[ireg]);
 700           ++ireg;
 701         } else {
 702           // Put long on stack and align to 2 slots.
 703           if (stk & 0x1) { ++stk; }
 704           regs[i].set2(VMRegImpl::stack2reg(stk));
 705           stk += inc_stk_for_longdouble;
 706         }
 707         break;
 708       case T_OBJECT:
 709       case T_ARRAY:
 710       case T_ADDRESS:
 711         if (ireg < z_num_iarg_registers) {
 712           // Put ptr in register.
 713           regs[i].set2(z_iarg_reg[ireg]);
 714           ++ireg;
 715         } else {
 716           // Put ptr on stack and align to 2 slots, because
 717           // "64-bit pointers record oop-ishness on 2 aligned adjacent
 718           // registers." (see OopFlow::build_oop_map).
 719           if (stk & 0x1) { ++stk; }
 720           regs[i].set2(VMRegImpl::stack2reg(stk));
 721           stk += inc_stk_for_longdouble;
 722         }
 723         break;
 724       case T_FLOAT:
 725         if (freg < z_num_farg_registers) {
 726           // Put float in register.
 727           regs[i].set1(z_farg_reg[freg]);
 728           ++freg;
 729         } else {
 730           // Put float on stack.
 731           regs[i].set1(VMRegImpl::stack2reg(stk));
 732           stk += inc_stk_for_intfloat;
 733         }
 734         break;
 735       case T_DOUBLE:
 736         assert((i + 1) < total_args_passed && sig_bt[i+1] == T_VOID, "expecting half");
 737         if (freg < z_num_farg_registers) {
 738           // Put double in register.
 739           regs[i].set2(z_farg_reg[freg]);
 740           ++freg;
 741         } else {
 742           // Put double on stack and align to 2 slots.
 743           if (stk & 0x1) { ++stk; }
 744           regs[i].set2(VMRegImpl::stack2reg(stk));
 745           stk += inc_stk_for_longdouble;
 746         }
 747         break;
 748       case T_VOID:
 749         assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half");
 750         // Do not count halves.
 751         regs[i].set_bad();
 752         break;
 753       default:
 754         ShouldNotReachHere();
 755     }
 756   }
 757   return align_up(stk, 2);
 758 }
 759 
 760 int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
 761                                         VMRegPair *regs,
 762                                         VMRegPair *regs2,
 763                                         int total_args_passed) {
 764   assert(regs2 == NULL, "second VMRegPair array not used on this platform");
 765 
 766   // Calling conventions for C runtime calls and calls to JNI native methods.
 767   const VMReg z_iarg_reg[5] = {
 768     Z_R2->as_VMReg(),
 769     Z_R3->as_VMReg(),
 770     Z_R4->as_VMReg(),
 771     Z_R5->as_VMReg(),
 772     Z_R6->as_VMReg()
 773   };
 774   const VMReg z_farg_reg[4] = {
 775     Z_F0->as_VMReg(),
 776     Z_F2->as_VMReg(),
 777     Z_F4->as_VMReg(),
 778     Z_F6->as_VMReg()
 779   };
 780   const int z_num_iarg_registers = sizeof(z_iarg_reg) / sizeof(z_iarg_reg[0]);
 781   const int z_num_farg_registers = sizeof(z_farg_reg) / sizeof(z_farg_reg[0]);
 782 
 783   // Check calling conventions consistency.
 784   assert(RegisterImpl::number_of_arg_registers == z_num_iarg_registers, "iarg reg count mismatch");
 785   assert(FloatRegisterImpl::number_of_arg_registers == z_num_farg_registers, "farg reg count mismatch");
 786 
 787   // Avoid passing C arguments in the wrong stack slots.
 788 
 789   // 'Stk' counts stack slots. Due to alignment, 32 bit values occupy
 790   // 2 such slots, like 64 bit values do.
 791   const int inc_stk_for_intfloat   = 2; // 2 slots for ints and floats.
 792   const int inc_stk_for_longdouble = 2; // 2 slots for longs and doubles.
 793 
 794   int i;
 795   // Leave room for C-compatible ABI
 796   int stk = (frame::z_abi_160_size - frame::z_jit_out_preserve_size) / VMRegImpl::stack_slot_size;
 797   int freg = 0;
 798   int ireg = 0;
 799 
 800   // We put the first 5 arguments into registers and the rest on the
 801   // stack. Float arguments are already in their argument registers
 802   // due to c2c calling conventions (see calling_convention).
 803   for (int i = 0; i < total_args_passed; ++i) {
 804     switch (sig_bt[i]) {
 805       case T_BOOLEAN:
 806       case T_CHAR:
 807       case T_BYTE:
 808       case T_SHORT:
 809       case T_INT:
 810         // Fall through, handle as long.
 811       case T_LONG:
 812       case T_OBJECT:
 813       case T_ARRAY:
 814       case T_ADDRESS:
 815       case T_METADATA:
 816         // Oops are already boxed if required (JNI).
 817         if (ireg < z_num_iarg_registers) {
 818           regs[i].set2(z_iarg_reg[ireg]);
 819           ++ireg;
 820         } else {
 821           regs[i].set2(VMRegImpl::stack2reg(stk));
 822           stk += inc_stk_for_longdouble;
 823         }
 824         break;
 825       case T_FLOAT:
 826         if (freg < z_num_farg_registers) {
 827           regs[i].set1(z_farg_reg[freg]);
 828           ++freg;
 829         } else {
 830           regs[i].set1(VMRegImpl::stack2reg(stk+1));
 831           stk +=  inc_stk_for_intfloat;
 832         }
 833         break;
 834       case T_DOUBLE:
 835         assert((i + 1) < total_args_passed && sig_bt[i+1] == T_VOID, "expecting half");
 836         if (freg < z_num_farg_registers) {
 837           regs[i].set2(z_farg_reg[freg]);
 838           ++freg;
 839         } else {
 840           // Put double on stack.
 841           regs[i].set2(VMRegImpl::stack2reg(stk));
 842           stk += inc_stk_for_longdouble;
 843         }
 844         break;
 845       case T_VOID:
 846         // Do not count halves.
 847         regs[i].set_bad();
 848         break;
 849       default:
 850         ShouldNotReachHere();
 851     }
 852   }
 853   return align_up(stk, 2);
 854 }
 855 
 856 int SharedRuntime::vector_calling_convention(VMRegPair *regs,
 857                                              uint num_bits,
 858                                              uint total_args_passed) {
 859   Unimplemented();
 860   return 0;
 861 }
 862 
 863 ////////////////////////////////////////////////////////////////////////
 864 //
 865 //  Argument shufflers
 866 //
 867 ////////////////////////////////////////////////////////////////////////
 868 
 869 //----------------------------------------------------------------------
 870 // The java_calling_convention describes stack locations as ideal slots on
 871 // a frame with no abi restrictions. Since we must observe abi restrictions
 872 // (like the placement of the register window) the slots must be biased by
 873 // the following value.
 874 //----------------------------------------------------------------------
 875 static int reg2slot(VMReg r) {
 876   return r->reg2stack() + SharedRuntime::out_preserve_stack_slots();
 877 }
 878 
 879 static int reg2offset(VMReg r) {
 880   return reg2slot(r) * VMRegImpl::stack_slot_size;
 881 }
 882 
 883 static void verify_oop_args(MacroAssembler *masm,
 884                             int total_args_passed,
 885                             const BasicType *sig_bt,
 886                             const VMRegPair *regs) {
 887   if (!VerifyOops) { return; }
 888 
 889   for (int i = 0; i < total_args_passed; i++) {
 890     if (is_reference_type(sig_bt[i])) {
 891       VMReg r = regs[i].first();
 892       assert(r->is_valid(), "bad oop arg");
 893 
 894       if (r->is_stack()) {
 895         __ z_lg(Z_R0_scratch,
 896                 Address(Z_SP, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize));
 897         __ verify_oop(Z_R0_scratch, FILE_AND_LINE);
 898       } else {
 899         __ verify_oop(r->as_Register(), FILE_AND_LINE);
 900       }
 901     }
 902   }
 903 }
 904 
 905 static void gen_special_dispatch(MacroAssembler *masm,
 906                                  int total_args_passed,
 907                                  vmIntrinsics::ID special_dispatch,
 908                                  const BasicType *sig_bt,
 909                                  const VMRegPair *regs) {
 910   verify_oop_args(masm, total_args_passed, sig_bt, regs);
 911 
 912   // Now write the args into the outgoing interpreter space.
 913   bool     has_receiver   = false;
 914   Register receiver_reg   = noreg;
 915   int      member_arg_pos = -1;
 916   Register member_reg     = noreg;
 917   int      ref_kind       = MethodHandles::signature_polymorphic_intrinsic_ref_kind(special_dispatch);
 918 
 919   if (ref_kind != 0) {
 920     member_arg_pos = total_args_passed - 1;  // trailing MemberName argument
 921     member_reg = Z_R9;                       // Known to be free at this point.
 922     has_receiver = MethodHandles::ref_kind_has_receiver(ref_kind);
 923   } else if (special_dispatch == vmIntrinsics::_linkToNative) {
 924     member_arg_pos = total_args_passed - 1;  // trailing NativeEntryPoint argument
 925     member_reg = Z_R9;  // known to be free at this point
 926   } else {
 927     guarantee(special_dispatch == vmIntrinsics::_invokeBasic,
 928               "special_dispatch=%d", vmIntrinsics::as_int(special_dispatch));
 929     has_receiver = true;
 930   }
 931 
 932   if (member_reg != noreg) {
 933     // Load the member_arg into register, if necessary.
 934     assert(member_arg_pos >= 0 && member_arg_pos < total_args_passed, "oob");
 935     assert(sig_bt[member_arg_pos] == T_OBJECT, "dispatch argument must be an object");
 936 
 937     VMReg r = regs[member_arg_pos].first();
 938     assert(r->is_valid(), "bad member arg");
 939 
 940     if (r->is_stack()) {
 941       __ z_lg(member_reg, Address(Z_SP, reg2offset(r)));
 942     } else {
 943       // No data motion is needed.
 944       member_reg = r->as_Register();
 945     }
 946   }
 947 
 948   if (has_receiver) {
 949     // Make sure the receiver is loaded into a register.
 950     assert(total_args_passed > 0, "oob");
 951     assert(sig_bt[0] == T_OBJECT, "receiver argument must be an object");
 952 
 953     VMReg r = regs[0].first();
 954     assert(r->is_valid(), "bad receiver arg");
 955 
 956     if (r->is_stack()) {
 957       // Porting note: This assumes that compiled calling conventions always
 958       // pass the receiver oop in a register. If this is not true on some
 959       // platform, pick a temp and load the receiver from stack.
 960       assert(false, "receiver always in a register");
 961       receiver_reg = Z_R13;  // Known to be free at this point.
 962       __ z_lg(receiver_reg, Address(Z_SP, reg2offset(r)));
 963     } else {
 964       // No data motion is needed.
 965       receiver_reg = r->as_Register();
 966     }
 967   }
 968 
 969   // Figure out which address we are really jumping to:
 970   MethodHandles::generate_method_handle_dispatch(masm, special_dispatch,
 971                                                  receiver_reg, member_reg,
 972                                                  /*for_compiler_entry:*/ true);
 973 }
 974 
 975 ////////////////////////////////////////////////////////////////////////
 976 //
 977 //  Argument shufflers
 978 //
 979 ////////////////////////////////////////////////////////////////////////
 980 
 981 // Is the size of a vector size (in bytes) bigger than a size saved by default?
 982 // 8 bytes registers are saved by default on z/Architecture.
 983 bool SharedRuntime::is_wide_vector(int size) {
 984   // Note, MaxVectorSize == 8 on this platform.
 985   assert(size <= 8, "%d bytes vectors are not supported", size);
 986   return size > 8;
 987 }
 988 
 989 //----------------------------------------------------------------------
 990 // An oop arg. Must pass a handle not the oop itself
 991 //----------------------------------------------------------------------
 992 static void object_move(MacroAssembler *masm,
 993                         OopMap *map,
 994                         int oop_handle_offset,
 995                         int framesize_in_slots,
 996                         VMRegPair src,
 997                         VMRegPair dst,
 998                         bool is_receiver,
 999                         int *receiver_offset) {
1000   int frame_offset = framesize_in_slots*VMRegImpl::stack_slot_size;
1001 
1002   assert(!is_receiver || (is_receiver && (*receiver_offset == -1)), "only one receiving object per call, please.");
1003 
1004   // Must pass a handle. First figure out the location we use as a handle.
1005 
1006   if (src.first()->is_stack()) {
1007     // Oop is already on the stack, put handle on stack or in register
1008     // If handle will be on the stack, use temp reg to calculate it.
1009     Register rHandle = dst.first()->is_stack() ? Z_R1 : dst.first()->as_Register();
1010     Label    skip;
1011     int      slot_in_older_frame = reg2slot(src.first());
1012 
1013     guarantee(!is_receiver, "expecting receiver in register");
1014     map->set_oop(VMRegImpl::stack2reg(slot_in_older_frame + framesize_in_slots));
1015 
1016     __ add2reg(rHandle, reg2offset(src.first())+frame_offset, Z_SP);
1017     __ load_and_test_long(Z_R0, Address(rHandle));
1018     __ z_brne(skip);
1019     // Use a NULL handle if oop is NULL.
1020     __ clear_reg(rHandle, true, false);
1021     __ bind(skip);
1022 
1023     // Copy handle to the right place (register or stack).
1024     if (dst.first()->is_stack()) {
1025       __ z_stg(rHandle, reg2offset(dst.first()), Z_SP);
1026     } // else
1027       // nothing to do. rHandle uses the correct register
1028   } else {
1029     // Oop is passed in an input register. We must flush it to the stack.
1030     const Register rOop = src.first()->as_Register();
1031     const Register rHandle = dst.first()->is_stack() ? Z_R1 : dst.first()->as_Register();
1032     int            oop_slot = (rOop->encoding()-Z_ARG1->encoding()) * VMRegImpl::slots_per_word + oop_handle_offset;
1033     int            oop_slot_offset = oop_slot*VMRegImpl::stack_slot_size;
1034     NearLabel skip;
1035 
1036     if (is_receiver) {
1037       *receiver_offset = oop_slot_offset;
1038     }
1039     map->set_oop(VMRegImpl::stack2reg(oop_slot));
1040 
1041     // Flush Oop to stack, calculate handle.
1042     __ z_stg(rOop, oop_slot_offset, Z_SP);
1043     __ add2reg(rHandle, oop_slot_offset, Z_SP);
1044 
1045     // If Oop == NULL, use a NULL handle.
1046     __ compare64_and_branch(rOop, (RegisterOrConstant)0L, Assembler::bcondNotEqual, skip);
1047     __ clear_reg(rHandle, true, false);
1048     __ bind(skip);
1049 
1050     // Copy handle to the right place (register or stack).
1051     if (dst.first()->is_stack()) {
1052       __ z_stg(rHandle, reg2offset(dst.first()), Z_SP);
1053     } // else
1054       // nothing to do here, since rHandle = dst.first()->as_Register in this case.
1055   }
1056 }
1057 
1058 //----------------------------------------------------------------------
1059 // A float arg. May have to do float reg to int reg conversion
1060 //----------------------------------------------------------------------
1061 static void float_move(MacroAssembler *masm,
1062                        VMRegPair src,
1063                        VMRegPair dst,
1064                        int framesize_in_slots,
1065                        int workspace_slot_offset) {
1066   int frame_offset = framesize_in_slots * VMRegImpl::stack_slot_size;
1067   int workspace_offset = workspace_slot_offset * VMRegImpl::stack_slot_size;
1068 
1069   // We do not accept an argument in a VMRegPair to be spread over two slots,
1070   // no matter what physical location (reg or stack) the slots may have.
1071   // We just check for the unaccepted slot to be invalid.
1072   assert(!src.second()->is_valid(), "float in arg spread over two slots");
1073   assert(!dst.second()->is_valid(), "float out arg spread over two slots");
1074 
1075   if (src.first()->is_stack()) {
1076     if (dst.first()->is_stack()) {
1077       // stack -> stack. The easiest of the bunch.
1078       __ z_mvc(Address(Z_SP, reg2offset(dst.first())),
1079                Address(Z_SP, reg2offset(src.first()) + frame_offset), sizeof(float));
1080     } else {
1081       // stack to reg
1082       Address memaddr(Z_SP, reg2offset(src.first()) + frame_offset);
1083       if (dst.first()->is_Register()) {
1084         __ mem2reg_opt(dst.first()->as_Register(), memaddr, false);
1085       } else {
1086         __ mem2freg_opt(dst.first()->as_FloatRegister(), memaddr, false);
1087       }
1088     }
1089   } else if (src.first()->is_Register()) {
1090     if (dst.first()->is_stack()) {
1091       // gpr -> stack
1092       __ reg2mem_opt(src.first()->as_Register(),
1093                      Address(Z_SP, reg2offset(dst.first()), false ));
1094     } else {
1095       if (dst.first()->is_Register()) {
1096         // gpr -> gpr
1097         __ move_reg_if_needed(dst.first()->as_Register(), T_INT,
1098                               src.first()->as_Register(), T_INT);
1099       } else {
1100         if (VM_Version::has_FPSupportEnhancements()) {
1101           // gpr -> fpr. Exploit z10 capability of direct transfer.
1102           __ z_ldgr(dst.first()->as_FloatRegister(), src.first()->as_Register());
1103         } else {
1104           // gpr -> fpr. Use work space on stack to transfer data.
1105           Address   stackaddr(Z_SP, workspace_offset);
1106 
1107           __ reg2mem_opt(src.first()->as_Register(), stackaddr, false);
1108           __ mem2freg_opt(dst.first()->as_FloatRegister(), stackaddr, false);
1109         }
1110       }
1111     }
1112   } else {
1113     if (dst.first()->is_stack()) {
1114       // fpr -> stack
1115       __ freg2mem_opt(src.first()->as_FloatRegister(),
1116                       Address(Z_SP, reg2offset(dst.first())), false);
1117     } else {
1118       if (dst.first()->is_Register()) {
1119         if (VM_Version::has_FPSupportEnhancements()) {
1120           // fpr -> gpr.
1121           __ z_lgdr(dst.first()->as_Register(), src.first()->as_FloatRegister());
1122         } else {
1123           // fpr -> gpr. Use work space on stack to transfer data.
1124           Address   stackaddr(Z_SP, workspace_offset);
1125 
1126           __ freg2mem_opt(src.first()->as_FloatRegister(), stackaddr, false);
1127           __ mem2reg_opt(dst.first()->as_Register(), stackaddr, false);
1128         }
1129       } else {
1130         // fpr -> fpr
1131         __ move_freg_if_needed(dst.first()->as_FloatRegister(), T_FLOAT,
1132                                src.first()->as_FloatRegister(), T_FLOAT);
1133       }
1134     }
1135   }
1136 }
1137 
1138 //----------------------------------------------------------------------
1139 // A double arg. May have to do double reg to long reg conversion
1140 //----------------------------------------------------------------------
1141 static void double_move(MacroAssembler *masm,
1142                         VMRegPair src,
1143                         VMRegPair dst,
1144                         int framesize_in_slots,
1145                         int workspace_slot_offset) {
1146   int frame_offset = framesize_in_slots*VMRegImpl::stack_slot_size;
1147   int workspace_offset = workspace_slot_offset*VMRegImpl::stack_slot_size;
1148 
1149   // Since src is always a java calling convention we know that the
1150   // src pair is always either all registers or all stack (and aligned?)
1151 
1152   if (src.first()->is_stack()) {
1153     if (dst.first()->is_stack()) {
1154       // stack -> stack. The easiest of the bunch.
1155       __ z_mvc(Address(Z_SP, reg2offset(dst.first())),
1156                Address(Z_SP, reg2offset(src.first()) + frame_offset), sizeof(double));
1157     } else {
1158       // stack to reg
1159       Address stackaddr(Z_SP, reg2offset(src.first()) + frame_offset);
1160 
1161       if (dst.first()->is_Register()) {
1162         __ mem2reg_opt(dst.first()->as_Register(), stackaddr);
1163       } else {
1164         __ mem2freg_opt(dst.first()->as_FloatRegister(), stackaddr);
1165       }
1166     }
1167   } else if (src.first()->is_Register()) {
1168     if (dst.first()->is_stack()) {
1169       // gpr -> stack
1170       __ reg2mem_opt(src.first()->as_Register(),
1171                      Address(Z_SP, reg2offset(dst.first())));
1172     } else {
1173       if (dst.first()->is_Register()) {
1174         // gpr -> gpr
1175         __ move_reg_if_needed(dst.first()->as_Register(), T_LONG,
1176                               src.first()->as_Register(), T_LONG);
1177       } else {
1178         if (VM_Version::has_FPSupportEnhancements()) {
1179           // gpr -> fpr. Exploit z10 capability of direct transfer.
1180           __ z_ldgr(dst.first()->as_FloatRegister(), src.first()->as_Register());
1181         } else {
1182           // gpr -> fpr. Use work space on stack to transfer data.
1183           Address stackaddr(Z_SP, workspace_offset);
1184           __ reg2mem_opt(src.first()->as_Register(), stackaddr);
1185           __ mem2freg_opt(dst.first()->as_FloatRegister(), stackaddr);
1186         }
1187       }
1188     }
1189   } else {
1190     if (dst.first()->is_stack()) {
1191       // fpr -> stack
1192       __ freg2mem_opt(src.first()->as_FloatRegister(),
1193                       Address(Z_SP, reg2offset(dst.first())));
1194     } else {
1195       if (dst.first()->is_Register()) {
1196         if (VM_Version::has_FPSupportEnhancements()) {
1197           // fpr -> gpr. Exploit z10 capability of direct transfer.
1198           __ z_lgdr(dst.first()->as_Register(), src.first()->as_FloatRegister());
1199         } else {
1200           // fpr -> gpr. Use work space on stack to transfer data.
1201           Address stackaddr(Z_SP, workspace_offset);
1202 
1203           __ freg2mem_opt(src.first()->as_FloatRegister(), stackaddr);
1204           __ mem2reg_opt(dst.first()->as_Register(), stackaddr);
1205         }
1206       } else {
1207         // fpr -> fpr
1208         // In theory these overlap but the ordering is such that this is likely a nop.
1209         __ move_freg_if_needed(dst.first()->as_FloatRegister(), T_DOUBLE,
1210                                src.first()->as_FloatRegister(), T_DOUBLE);
1211       }
1212     }
1213   }
1214 }
1215 
1216 //----------------------------------------------------------------------
1217 // A long arg.
1218 //----------------------------------------------------------------------
1219 static void long_move(MacroAssembler *masm,
1220                       VMRegPair src,
1221                       VMRegPair dst,
1222                       int framesize_in_slots) {
1223   int frame_offset = framesize_in_slots*VMRegImpl::stack_slot_size;
1224 
1225   if (src.first()->is_stack()) {
1226     if (dst.first()->is_stack()) {
1227       // stack -> stack. The easiest of the bunch.
1228       __ z_mvc(Address(Z_SP, reg2offset(dst.first())),
1229                Address(Z_SP, reg2offset(src.first()) + frame_offset), sizeof(long));
1230     } else {
1231       // stack to reg
1232       assert(dst.first()->is_Register(), "long dst value must be in GPR");
1233       __ mem2reg_opt(dst.first()->as_Register(),
1234                       Address(Z_SP, reg2offset(src.first()) + frame_offset));
1235     }
1236   } else {
1237     // reg to reg
1238     assert(src.first()->is_Register(), "long src value must be in GPR");
1239     if (dst.first()->is_stack()) {
1240       // reg -> stack
1241       __ reg2mem_opt(src.first()->as_Register(),
1242                      Address(Z_SP, reg2offset(dst.first())));
1243     } else {
1244       // reg -> reg
1245       assert(dst.first()->is_Register(), "long dst value must be in GPR");
1246       __ move_reg_if_needed(dst.first()->as_Register(),
1247                             T_LONG, src.first()->as_Register(), T_LONG);
1248     }
1249   }
1250 }
1251 
1252 
1253 //----------------------------------------------------------------------
1254 // A int-like arg.
1255 //----------------------------------------------------------------------
1256 // On z/Architecture we will store integer like items to the stack as 64 bit
1257 // items, according to the z/Architecture ABI, even though Java would only store
1258 // 32 bits for a parameter.
1259 // We do sign extension for all base types. That is ok since the only
1260 // unsigned base type is T_CHAR, and T_CHAR uses only 16 bits of an int.
1261 // Sign extension 32->64 bit will thus not affect the value.
1262 //----------------------------------------------------------------------
1263 static void move32_64(MacroAssembler *masm,
1264                       VMRegPair src,
1265                       VMRegPair dst,
1266                       int framesize_in_slots) {
1267   int frame_offset = framesize_in_slots * VMRegImpl::stack_slot_size;
1268 
1269   if (src.first()->is_stack()) {
1270     Address memaddr(Z_SP, reg2offset(src.first()) + frame_offset);
1271     if (dst.first()->is_stack()) {
1272       // stack -> stack. MVC not possible due to sign extension.
1273       Address firstaddr(Z_SP, reg2offset(dst.first()));
1274       __ mem2reg_signed_opt(Z_R0_scratch, memaddr);
1275       __ reg2mem_opt(Z_R0_scratch, firstaddr);
1276     } else {
1277       // stack -> reg, sign extended
1278       __ mem2reg_signed_opt(dst.first()->as_Register(), memaddr);
1279     }
1280   } else {
1281     if (dst.first()->is_stack()) {
1282       // reg -> stack, sign extended
1283       Address firstaddr(Z_SP, reg2offset(dst.first()));
1284       __ z_lgfr(src.first()->as_Register(), src.first()->as_Register());
1285       __ reg2mem_opt(src.first()->as_Register(), firstaddr);
1286     } else {
1287       // reg -> reg, sign extended
1288       __ z_lgfr(dst.first()->as_Register(), src.first()->as_Register());
1289     }
1290   }
1291 }
1292 
1293 //----------------------------------------------------------------------
1294 // Wrap a JNI call.
1295 //----------------------------------------------------------------------
1296 #undef USE_RESIZE_FRAME
1297 nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm,
1298                                                 const methodHandle& method,
1299                                                 int compile_id,
1300                                                 BasicType *in_sig_bt,
1301                                                 VMRegPair *in_regs,
1302                                                 BasicType ret_type) {
1303   int total_in_args = method->size_of_parameters();
1304   if (method->is_method_handle_intrinsic()) {
1305     vmIntrinsics::ID iid = method->intrinsic_id();
1306     intptr_t start = (intptr_t) __ pc();
1307     int vep_offset = ((intptr_t) __ pc()) - start;
1308 
1309     gen_special_dispatch(masm, total_in_args,
1310                          method->intrinsic_id(), in_sig_bt, in_regs);
1311 
1312     int frame_complete = ((intptr_t)__ pc()) - start; // Not complete, period.
1313 
1314     __ flush();
1315 
1316     int stack_slots = SharedRuntime::out_preserve_stack_slots();  // No out slots at all, actually.
1317 
1318     return nmethod::new_native_nmethod(method,
1319                                        compile_id,
1320                                        masm->code(),
1321                                        vep_offset,
1322                                        frame_complete,
1323                                        stack_slots / VMRegImpl::slots_per_word,
1324                                        in_ByteSize(-1),
1325                                        in_ByteSize(-1),
1326                                        (OopMapSet *) NULL);
1327   }
1328 
1329 
1330   ///////////////////////////////////////////////////////////////////////
1331   //
1332   //  Precalculations before generating any code
1333   //
1334   ///////////////////////////////////////////////////////////////////////
1335 
1336   address native_func = method->native_function();
1337   assert(native_func != NULL, "must have function");
1338 
1339   //---------------------------------------------------------------------
1340   // We have received a description of where all the java args are located
1341   // on entry to the wrapper. We need to convert these args to where
1342   // the jni function will expect them. To figure out where they go
1343   // we convert the java signature to a C signature by inserting
1344   // the hidden arguments as arg[0] and possibly arg[1] (static method).
1345   //
1346   // The first hidden argument arg[0] is a pointer to the JNI environment.
1347   // It is generated for every call.
1348   // The second argument arg[1] to the JNI call, which is hidden for static
1349   // methods, is the boxed lock object. For static calls, the lock object
1350   // is the static method itself. The oop is constructed here. for instance
1351   // calls, the lock is performed on the object itself, the pointer of
1352   // which is passed as the first visible argument.
1353   //---------------------------------------------------------------------
1354 
1355   // Additionally, on z/Architecture we must convert integers
1356   // to longs in the C signature. We do this in advance in order to have
1357   // no trouble with indexes into the bt-arrays.
1358   // So convert the signature and registers now, and adjust the total number
1359   // of in-arguments accordingly.
1360   bool method_is_static = method->is_static();
1361   int  total_c_args     = total_in_args + (method_is_static ? 2 : 1);
1362 
1363   BasicType *out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args);
1364   VMRegPair *out_regs   = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args);
1365   BasicType* in_elem_bt = NULL;
1366 
1367   // Create the signature for the C call:
1368   //   1) add the JNIEnv*
1369   //   2) add the class if the method is static
1370   //   3) copy the rest of the incoming signature (shifted by the number of
1371   //      hidden arguments)
1372 
1373   int argc = 0;
1374   out_sig_bt[argc++] = T_ADDRESS;
1375   if (method->is_static()) {
1376     out_sig_bt[argc++] = T_OBJECT;
1377   }
1378 
1379   for (int i = 0; i < total_in_args; i++) {
1380     out_sig_bt[argc++] = in_sig_bt[i];
1381   }
1382 
1383   ///////////////////////////////////////////////////////////////////////
1384   // Now figure out where the args must be stored and how much stack space
1385   // they require (neglecting out_preserve_stack_slots but providing space
1386   // for storing the first five register arguments).
1387   // It's weird, see int_stk_helper.
1388   ///////////////////////////////////////////////////////////////////////
1389 
1390   //---------------------------------------------------------------------
1391   // Compute framesize for the wrapper.
1392   //
1393   // - We need to handlize all oops passed in registers.
1394   // - We must create space for them here that is disjoint from the save area.
1395   // - We always just allocate 5 words for storing down these object.
1396   //   This allows us to simply record the base and use the Ireg number to
1397   //   decide which slot to use.
1398   // - Note that the reg number used to index the stack slot is the inbound
1399   //   number, not the outbound number.
1400   // - We must shuffle args to match the native convention,
1401   //   and to include var-args space.
1402   //---------------------------------------------------------------------
1403 
1404   //---------------------------------------------------------------------
1405   // Calculate the total number of stack slots we will need:
1406   // - 1) abi requirements
1407   // - 2) outgoing args
1408   // - 3) space for inbound oop handle area
1409   // - 4) space for handlizing a klass if static method
1410   // - 5) space for a lock if synchronized method
1411   // - 6) workspace (save rtn value, int<->float reg moves, ...)
1412   // - 7) filler slots for alignment
1413   //---------------------------------------------------------------------
1414   // Here is how the space we have allocated will look like.
1415   // Since we use resize_frame, we do not create a new stack frame,
1416   // but just extend the one we got with our own data area.
1417   //
1418   // If an offset or pointer name points to a separator line, it is
1419   // assumed that addressing with offset 0 selects storage starting
1420   // at the first byte above the separator line.
1421   //
1422   //
1423   //     ...                   ...
1424   //      | caller's frame      |
1425   // FP-> |---------------------|
1426   //      | filler slots, if any|
1427   //     7| #slots == mult of 2 |
1428   //      |---------------------|
1429   //      | work space          |
1430   //     6| 2 slots = 8 bytes   |
1431   //      |---------------------|
1432   //     5| lock box (if sync)  |
1433   //      |---------------------| <- lock_slot_offset
1434   //     4| klass (if static)   |
1435   //      |---------------------| <- klass_slot_offset
1436   //     3| oopHandle area      |
1437   //      |                     |
1438   //      |                     |
1439   //      |---------------------| <- oop_handle_offset
1440   //     2| outbound memory     |
1441   //     ...                   ...
1442   //      | based arguments     |
1443   //      |---------------------|
1444   //      | vararg              |
1445   //     ...                   ...
1446   //      | area                |
1447   //      |---------------------| <- out_arg_slot_offset
1448   //     1| out_preserved_slots |
1449   //     ...                   ...
1450   //      | (z_abi spec)        |
1451   // SP-> |---------------------| <- FP_slot_offset (back chain)
1452   //     ...                   ...
1453   //
1454   //---------------------------------------------------------------------
1455 
1456   // *_slot_offset indicates offset from SP in #stack slots
1457   // *_offset      indicates offset from SP in #bytes
1458 
1459   int stack_slots = c_calling_convention(out_sig_bt, out_regs, /*regs2=*/NULL, total_c_args) + // 1+2
1460                     SharedRuntime::out_preserve_stack_slots(); // see c_calling_convention
1461 
1462   // Now the space for the inbound oop handle area.
1463   int total_save_slots = RegisterImpl::number_of_arg_registers * VMRegImpl::slots_per_word;
1464 
1465   int oop_handle_slot_offset = stack_slots;
1466   stack_slots += total_save_slots;                                        // 3)
1467 
1468   int klass_slot_offset = 0;
1469   int klass_offset      = -1;
1470   if (method_is_static) {                                                 // 4)
1471     klass_slot_offset  = stack_slots;
1472     klass_offset       = klass_slot_offset * VMRegImpl::stack_slot_size;
1473     stack_slots       += VMRegImpl::slots_per_word;
1474   }
1475 
1476   int lock_slot_offset = 0;
1477   int lock_offset      = -1;
1478   if (method->is_synchronized()) {                                        // 5)
1479     lock_slot_offset   = stack_slots;
1480     lock_offset        = lock_slot_offset * VMRegImpl::stack_slot_size;
1481     stack_slots       += VMRegImpl::slots_per_word;
1482   }
1483 
1484   int workspace_slot_offset= stack_slots;                                 // 6)
1485   stack_slots         += 2;
1486 
1487   // Now compute actual number of stack words we need.
1488   // Round to align stack properly.
1489   stack_slots = align_up(stack_slots,                                     // 7)
1490                          frame::alignment_in_bytes / VMRegImpl::stack_slot_size);
1491   int frame_size_in_bytes = stack_slots * VMRegImpl::stack_slot_size;
1492 
1493 
1494   ///////////////////////////////////////////////////////////////////////
1495   // Now we can start generating code
1496   ///////////////////////////////////////////////////////////////////////
1497 
1498   unsigned int wrapper_CodeStart  = __ offset();
1499   unsigned int wrapper_UEPStart;
1500   unsigned int wrapper_VEPStart;
1501   unsigned int wrapper_FrameDone;
1502   unsigned int wrapper_CRegsSet;
1503   Label     handle_pending_exception;
1504   Label     ic_miss;
1505 
1506   //---------------------------------------------------------------------
1507   // Unverified entry point (UEP)
1508   //---------------------------------------------------------------------
1509   wrapper_UEPStart = __ offset();
1510 
1511   // check ic: object class <-> cached class
1512   if (!method_is_static) __ nmethod_UEP(ic_miss);
1513   // Fill with nops (alignment of verified entry point).
1514   __ align(CodeEntryAlignment);
1515 
1516   //---------------------------------------------------------------------
1517   // Verified entry point (VEP)
1518   //---------------------------------------------------------------------
1519   wrapper_VEPStart = __ offset();
1520 
1521   if (VM_Version::supports_fast_class_init_checks() && method->needs_clinit_barrier()) {
1522     Label L_skip_barrier;
1523     Register klass = Z_R1_scratch;
1524     // Notify OOP recorder (don't need the relocation)
1525     AddressLiteral md = __ constant_metadata_address(method->method_holder());
1526     __ load_const_optimized(klass, md.value());
1527     __ clinit_barrier(klass, Z_thread, &L_skip_barrier /*L_fast_path*/);
1528 
1529     __ load_const_optimized(klass, SharedRuntime::get_handle_wrong_method_stub());
1530     __ z_br(klass);
1531 
1532     __ bind(L_skip_barrier);
1533   }
1534 
1535   __ save_return_pc();
1536   __ generate_stack_overflow_check(frame_size_in_bytes);  // Check before creating frame.
1537 #ifndef USE_RESIZE_FRAME
1538   __ push_frame(frame_size_in_bytes);                     // Create a new frame for the wrapper.
1539 #else
1540   __ resize_frame(-frame_size_in_bytes, Z_R0_scratch);    // No new frame for the wrapper.
1541                                                           // Just resize the existing one.
1542 #endif
1543 
1544   wrapper_FrameDone = __ offset();
1545 
1546   __ verify_thread();
1547 
1548   // Native nmethod wrappers never take possession of the oop arguments.
1549   // So the caller will gc the arguments.
1550   // The only thing we need an oopMap for is if the call is static.
1551   //
1552   // An OopMap for lock (and class if static), and one for the VM call itself
1553   OopMapSet  *oop_maps        = new OopMapSet();
1554   OopMap     *map             = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
1555 
1556   //////////////////////////////////////////////////////////////////////
1557   //
1558   // The Grand Shuffle
1559   //
1560   //////////////////////////////////////////////////////////////////////
1561   //
1562   // We immediately shuffle the arguments so that for any vm call we have
1563   // to make from here on out (sync slow path, jvmti, etc.) we will have
1564   // captured the oops from our caller and have a valid oopMap for them.
1565   //
1566   //--------------------------------------------------------------------
1567   // Natives require 1 or 2 extra arguments over the normal ones: the JNIEnv*
1568   // (derived from JavaThread* which is in Z_thread) and, if static,
1569   // the class mirror instead of a receiver. This pretty much guarantees that
1570   // register layout will not match. We ignore these extra arguments during
1571   // the shuffle. The shuffle is described by the two calling convention
1572   // vectors we have in our possession. We simply walk the java vector to
1573   // get the source locations and the c vector to get the destinations.
1574   //
1575   // This is a trick. We double the stack slots so we can claim
1576   // the oops in the caller's frame. Since we are sure to have
1577   // more args than the caller doubling is enough to make
1578   // sure we can capture all the incoming oop args from the caller.
1579   //--------------------------------------------------------------------
1580 
1581   // Record sp-based slot for receiver on stack for non-static methods.
1582   int receiver_offset = -1;
1583 
1584   //--------------------------------------------------------------------
1585   // We move the arguments backwards because the floating point registers
1586   // destination will always be to a register with a greater or equal
1587   // register number or the stack.
1588   //   jix is the index of the incoming Java arguments.
1589   //   cix is the index of the outgoing C arguments.
1590   //--------------------------------------------------------------------
1591 
1592 #ifdef ASSERT
1593   bool reg_destroyed[RegisterImpl::number_of_registers];
1594   bool freg_destroyed[FloatRegisterImpl::number_of_registers];
1595   for (int r = 0; r < RegisterImpl::number_of_registers; r++) {
1596     reg_destroyed[r] = false;
1597   }
1598   for (int f = 0; f < FloatRegisterImpl::number_of_registers; f++) {
1599     freg_destroyed[f] = false;
1600   }
1601 #endif // ASSERT
1602 
1603   for (int jix = total_in_args - 1, cix = total_c_args - 1; jix >= 0; jix--, cix--) {
1604 #ifdef ASSERT
1605     if (in_regs[jix].first()->is_Register()) {
1606       assert(!reg_destroyed[in_regs[jix].first()->as_Register()->encoding()], "ack!");
1607     } else {
1608       if (in_regs[jix].first()->is_FloatRegister()) {
1609         assert(!freg_destroyed[in_regs[jix].first()->as_FloatRegister()->encoding()], "ack!");
1610       }
1611     }
1612     if (out_regs[cix].first()->is_Register()) {
1613       reg_destroyed[out_regs[cix].first()->as_Register()->encoding()] = true;
1614     } else {
1615       if (out_regs[cix].first()->is_FloatRegister()) {
1616         freg_destroyed[out_regs[cix].first()->as_FloatRegister()->encoding()] = true;
1617       }
1618     }
1619 #endif // ASSERT
1620 
1621     switch (in_sig_bt[jix]) {
1622       // Due to casting, small integers should only occur in pairs with type T_LONG.
1623       case T_BOOLEAN:
1624       case T_CHAR:
1625       case T_BYTE:
1626       case T_SHORT:
1627       case T_INT:
1628         // Move int and do sign extension.
1629         move32_64(masm, in_regs[jix], out_regs[cix], stack_slots);
1630         break;
1631 
1632       case T_LONG :
1633         long_move(masm, in_regs[jix], out_regs[cix], stack_slots);
1634         break;
1635 
1636       case T_ARRAY:
1637       case T_OBJECT:
1638         object_move(masm, map, oop_handle_slot_offset, stack_slots, in_regs[jix], out_regs[cix],
1639                     ((jix == 0) && (!method_is_static)),
1640                     &receiver_offset);
1641         break;
1642       case T_VOID:
1643         break;
1644 
1645       case T_FLOAT:
1646         float_move(masm, in_regs[jix], out_regs[cix], stack_slots, workspace_slot_offset);
1647         break;
1648 
1649       case T_DOUBLE:
1650         assert(jix+1 <  total_in_args && in_sig_bt[jix+1]  == T_VOID && out_sig_bt[cix+1] == T_VOID, "bad arg list");
1651         double_move(masm, in_regs[jix], out_regs[cix], stack_slots, workspace_slot_offset);
1652         break;
1653 
1654       case T_ADDRESS:
1655         assert(false, "found T_ADDRESS in java args");
1656         break;
1657 
1658       default:
1659         ShouldNotReachHere();
1660     }
1661   }
1662 
1663   //--------------------------------------------------------------------
1664   // Pre-load a static method's oop into ARG2.
1665   // Used both by locking code and the normal JNI call code.
1666   //--------------------------------------------------------------------
1667   if (method_is_static) {
1668     __ set_oop_constant(JNIHandles::make_local(method->method_holder()->java_mirror()), Z_ARG2);
1669 
1670     // Now handlize the static class mirror in ARG2. It's known not-null.
1671     __ z_stg(Z_ARG2, klass_offset, Z_SP);
1672     map->set_oop(VMRegImpl::stack2reg(klass_slot_offset));
1673     __ add2reg(Z_ARG2, klass_offset, Z_SP);
1674   }
1675 
1676   // Get JNIEnv* which is first argument to native.
1677   __ add2reg(Z_ARG1, in_bytes(JavaThread::jni_environment_offset()), Z_thread);
1678 
1679   //////////////////////////////////////////////////////////////////////
1680   // We have all of the arguments setup at this point.
1681   // We MUST NOT touch any outgoing regs from this point on.
1682   // So if we must call out we must push a new frame.
1683   //////////////////////////////////////////////////////////////////////
1684 
1685 
1686   // Calc the current pc into Z_R10 and into wrapper_CRegsSet.
1687   // Both values represent the same position.
1688   __ get_PC(Z_R10);                // PC into register
1689   wrapper_CRegsSet = __ offset();  // and into into variable.
1690 
1691   // Z_R10 now has the pc loaded that we will use when we finally call to native.
1692 
1693   // We use the same pc/oopMap repeatedly when we call out.
1694   oop_maps->add_gc_map((int)(wrapper_CRegsSet-wrapper_CodeStart), map);
1695 
1696   // Lock a synchronized method.
1697 
1698   if (method->is_synchronized()) {
1699 
1700     // ATTENTION: args and Z_R10 must be preserved.
1701     Register r_oop  = Z_R11;
1702     Register r_box  = Z_R12;
1703     Register r_tmp1 = Z_R13;
1704     Register r_tmp2 = Z_R7;
1705     Label done;
1706 
1707     // Load the oop for the object or class. R_carg2_classorobject contains
1708     // either the handlized oop from the incoming arguments or the handlized
1709     // class mirror (if the method is static).
1710     __ z_lg(r_oop, 0, Z_ARG2);
1711 
1712     lock_offset = (lock_slot_offset * VMRegImpl::stack_slot_size);
1713     // Get the lock box slot's address.
1714     __ add2reg(r_box, lock_offset, Z_SP);
1715 
1716     // Try fastpath for locking.
1717     // Fast_lock kills r_temp_1, r_temp_2. (Don't use R1 as temp, won't work!)
1718     __ compiler_fast_lock_object(r_oop, r_box, r_tmp1, r_tmp2);
1719     __ z_bre(done);
1720 
1721     //-------------------------------------------------------------------------
1722     // None of the above fast optimizations worked so we have to get into the
1723     // slow case of monitor enter. Inline a special case of call_VM that
1724     // disallows any pending_exception.
1725     //-------------------------------------------------------------------------
1726 
1727     Register oldSP = Z_R11;
1728 
1729     __ z_lgr(oldSP, Z_SP);
1730 
1731     RegisterSaver::save_live_registers(masm, RegisterSaver::arg_registers);
1732 
1733     // Prepare arguments for call.
1734     __ z_lg(Z_ARG1, 0, Z_ARG2); // Ynboxed class mirror or unboxed object.
1735     __ add2reg(Z_ARG2, lock_offset, oldSP);
1736     __ z_lgr(Z_ARG3, Z_thread);
1737 
1738     __ set_last_Java_frame(oldSP, Z_R10 /* gc map pc */);
1739 
1740     // Do the call.
1741     __ load_const_optimized(Z_R1_scratch, CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_locking_C));
1742     __ call(Z_R1_scratch);
1743 
1744     __ reset_last_Java_frame();
1745 
1746     RegisterSaver::restore_live_registers(masm, RegisterSaver::arg_registers);
1747 #ifdef ASSERT
1748     { Label L;
1749       __ load_and_test_long(Z_R0, Address(Z_thread, Thread::pending_exception_offset()));
1750       __ z_bre(L);
1751       __ stop("no pending exception allowed on exit from IR::monitorenter");
1752       __ bind(L);
1753     }
1754 #endif
1755     __ bind(done);
1756   } // lock for synchronized methods
1757 
1758 
1759   //////////////////////////////////////////////////////////////////////
1760   // Finally just about ready to make the JNI call.
1761   //////////////////////////////////////////////////////////////////////
1762 
1763   // Use that pc we placed in Z_R10 a while back as the current frame anchor.
1764   __ set_last_Java_frame(Z_SP, Z_R10);
1765 
1766   // Transition from _thread_in_Java to _thread_in_native.
1767   __ set_thread_state(_thread_in_native);
1768 
1769   //////////////////////////////////////////////////////////////////////
1770   // This is the JNI call.
1771   //////////////////////////////////////////////////////////////////////
1772 
1773   __ call_c(native_func);
1774 
1775 
1776   //////////////////////////////////////////////////////////////////////
1777   // We have survived the call once we reach here.
1778   //////////////////////////////////////////////////////////////////////
1779 
1780 
1781   //--------------------------------------------------------------------
1782   // Unpack native results.
1783   //--------------------------------------------------------------------
1784   // For int-types, we do any needed sign-extension required.
1785   // Care must be taken that the return value (in Z_ARG1 = Z_RET = Z_R2
1786   // or in Z_FARG0 = Z_FRET = Z_F0) will survive any VM calls for
1787   // blocking or unlocking.
1788   // An OOP result (handle) is done specially in the slow-path code.
1789   //--------------------------------------------------------------------
1790   switch (ret_type) {
1791     case T_VOID:    break;         // Nothing to do!
1792     case T_FLOAT:   break;         // Got it where we want it (unless slow-path)
1793     case T_DOUBLE:  break;         // Got it where we want it (unless slow-path)
1794     case T_LONG:    break;         // Got it where we want it (unless slow-path)
1795     case T_OBJECT:  break;         // Really a handle.
1796                                    // Cannot de-handlize until after reclaiming jvm_lock.
1797     case T_ARRAY:   break;
1798 
1799     case T_BOOLEAN:                // 0 -> false(0); !0 -> true(1)
1800       __ z_lngfr(Z_RET, Z_RET);    // Force sign bit on except for zero.
1801       __ z_srlg(Z_RET, Z_RET, 63); // Shift sign bit into least significant pos.
1802       break;
1803     case T_BYTE:    __ z_lgbr(Z_RET, Z_RET);  break; // sign extension
1804     case T_CHAR:    __ z_llghr(Z_RET, Z_RET); break; // unsigned result
1805     case T_SHORT:   __ z_lghr(Z_RET, Z_RET);  break; // sign extension
1806     case T_INT:     __ z_lgfr(Z_RET, Z_RET);  break; // sign-extend for beauty.
1807 
1808     default:
1809       ShouldNotReachHere();
1810       break;
1811   }
1812 
1813   Label after_transition;
1814 
1815   // Switch thread to "native transition" state before reading the synchronization state.
1816   // This additional state is necessary because reading and testing the synchronization
1817   // state is not atomic w.r.t. GC, as this scenario demonstrates:
1818   //   - Java thread A, in _thread_in_native state, loads _not_synchronized and is preempted.
1819   //   - VM thread changes sync state to synchronizing and suspends threads for GC.
1820   //   - Thread A is resumed to finish this native method, but doesn't block here since it
1821   //     didn't see any synchronization in progress, and escapes.
1822 
1823   // Transition from _thread_in_native to _thread_in_native_trans.
1824   __ set_thread_state(_thread_in_native_trans);
1825 
1826   // Safepoint synchronization
1827   //--------------------------------------------------------------------
1828   // Must we block?
1829   //--------------------------------------------------------------------
1830   // Block, if necessary, before resuming in _thread_in_Java state.
1831   // In order for GC to work, don't clear the last_Java_sp until after blocking.
1832   //--------------------------------------------------------------------
1833   {
1834     Label no_block, sync;
1835 
1836     save_native_result(masm, ret_type, workspace_slot_offset); // Make Z_R2 available as work reg.
1837 
1838     // Force this write out before the read below.
1839     __ z_fence();
1840 
1841     __ safepoint_poll(sync, Z_R1);
1842 
1843     __ load_and_test_int(Z_R0, Address(Z_thread, JavaThread::suspend_flags_offset()));
1844     __ z_bre(no_block);
1845 
1846     // Block. Save any potential method result value before the operation and
1847     // use a leaf call to leave the last_Java_frame setup undisturbed. Doing this
1848     // lets us share the oopMap we used when we went native rather than create
1849     // a distinct one for this pc.
1850     //
1851     __ bind(sync);
1852     __ z_acquire();
1853 
1854     address entry_point = CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans);
1855 
1856     __ call_VM_leaf(entry_point, Z_thread);
1857 
1858     __ bind(no_block);
1859     restore_native_result(masm, ret_type, workspace_slot_offset);
1860   }
1861 
1862   //--------------------------------------------------------------------
1863   // Thread state is thread_in_native_trans. Any safepoint blocking has
1864   // already happened so we can now change state to _thread_in_Java.
1865   //--------------------------------------------------------------------
1866   // Transition from _thread_in_native_trans to _thread_in_Java.
1867   __ set_thread_state(_thread_in_Java);
1868   __ bind(after_transition);
1869 
1870   //--------------------------------------------------------------------
1871   // Reguard any pages if necessary.
1872   // Protect native result from being destroyed.
1873   //--------------------------------------------------------------------
1874 
1875   Label no_reguard;
1876 
1877   __ z_cli(Address(Z_thread, JavaThread::stack_guard_state_offset() + in_ByteSize(sizeof(StackOverflow::StackGuardState) - 1)),
1878            StackOverflow::stack_guard_yellow_reserved_disabled);
1879 
1880   __ z_bre(no_reguard);
1881 
1882   save_native_result(masm, ret_type, workspace_slot_offset);
1883   __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages), Z_method);
1884   restore_native_result(masm, ret_type, workspace_slot_offset);
1885 
1886   __ bind(no_reguard);
1887 
1888 
1889   // Synchronized methods (slow path only)
1890   // No pending exceptions for now.
1891   //--------------------------------------------------------------------
1892   // Handle possibly pending exception (will unlock if necessary).
1893   // Native result is, if any is live, in Z_FRES or Z_RES.
1894   //--------------------------------------------------------------------
1895   // Unlock
1896   //--------------------------------------------------------------------
1897   if (method->is_synchronized()) {
1898     const Register r_oop        = Z_R11;
1899     const Register r_box        = Z_R12;
1900     const Register r_tmp1       = Z_R13;
1901     const Register r_tmp2       = Z_R7;
1902     Label done;
1903 
1904     // Get unboxed oop of class mirror or object ...
1905     int   offset = method_is_static ? klass_offset : receiver_offset;
1906 
1907     assert(offset != -1, "");
1908     __ z_lg(r_oop, offset, Z_SP);
1909 
1910     // ... and address of lock object box.
1911     __ add2reg(r_box, lock_offset, Z_SP);
1912 
1913     // Try fastpath for unlocking.
1914     __ compiler_fast_unlock_object(r_oop, r_box, r_tmp1, r_tmp2); // Don't use R1 as temp.
1915     __ z_bre(done);
1916 
1917     // Slow path for unlocking.
1918     // Save and restore any potential method result value around the unlocking operation.
1919     const Register R_exc = Z_R11;
1920 
1921     save_native_result(masm, ret_type, workspace_slot_offset);
1922 
1923     // Must save pending exception around the slow-path VM call. Since it's a
1924     // leaf call, the pending exception (if any) can be kept in a register.
1925     __ z_lg(R_exc, Address(Z_thread, Thread::pending_exception_offset()));
1926     assert(R_exc->is_nonvolatile(), "exception register must be non-volatile");
1927 
1928     // Must clear pending-exception before re-entering the VM. Since this is
1929     // a leaf call, pending-exception-oop can be safely kept in a register.
1930     __ clear_mem(Address(Z_thread, Thread::pending_exception_offset()), sizeof(intptr_t));
1931 
1932     // Inline a special case of call_VM that disallows any pending_exception.
1933 
1934     // Get locked oop from the handle we passed to jni.
1935     __ z_lg(Z_ARG1, offset, Z_SP);
1936     __ add2reg(Z_ARG2, lock_offset, Z_SP);
1937     __ z_lgr(Z_ARG3, Z_thread);
1938 
1939     __ load_const_optimized(Z_R1_scratch, CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C));
1940 
1941     __ call(Z_R1_scratch);
1942 
1943 #ifdef ASSERT
1944     {
1945       Label L;
1946       __ load_and_test_long(Z_R0, Address(Z_thread, Thread::pending_exception_offset()));
1947       __ z_bre(L);
1948       __ stop("no pending exception allowed on exit from IR::monitorexit");
1949       __ bind(L);
1950     }
1951 #endif
1952 
1953     // Check_forward_pending_exception jump to forward_exception if any pending
1954     // exception is set. The forward_exception routine expects to see the
1955     // exception in pending_exception and not in a register. Kind of clumsy,
1956     // since all folks who branch to forward_exception must have tested
1957     // pending_exception first and hence have it in a register already.
1958     __ z_stg(R_exc, Address(Z_thread, Thread::pending_exception_offset()));
1959     restore_native_result(masm, ret_type, workspace_slot_offset);
1960     __ z_bru(done);
1961     __ z_illtrap(0x66);
1962 
1963     __ bind(done);
1964   }
1965 
1966 
1967   //--------------------------------------------------------------------
1968   // Clear "last Java frame" SP and PC.
1969   //--------------------------------------------------------------------
1970   __ verify_thread(); // Z_thread must be correct.
1971 
1972   __ reset_last_Java_frame();
1973 
1974   // Unpack oop result, e.g. JNIHandles::resolve result.
1975   if (is_reference_type(ret_type)) {
1976     __ resolve_jobject(Z_RET, /* tmp1 */ Z_R13, /* tmp2 */ Z_R7);
1977   }
1978 
1979   if (CheckJNICalls) {
1980     // clear_pending_jni_exception_check
1981     __ clear_mem(Address(Z_thread, JavaThread::pending_jni_exception_check_fn_offset()), sizeof(oop));
1982   }
1983 
1984   // Reset handle block.
1985   __ z_lg(Z_R1_scratch, Address(Z_thread, JavaThread::active_handles_offset()));
1986   __ clear_mem(Address(Z_R1_scratch, JNIHandleBlock::top_offset_in_bytes()), 4);
1987 
1988   // Check for pending exceptions.
1989   __ load_and_test_long(Z_R0, Address(Z_thread, Thread::pending_exception_offset()));
1990   __ z_brne(handle_pending_exception);
1991 
1992 
1993   //////////////////////////////////////////////////////////////////////
1994   // Return
1995   //////////////////////////////////////////////////////////////////////
1996 
1997 
1998 #ifndef USE_RESIZE_FRAME
1999   __ pop_frame();                     // Pop wrapper frame.
2000 #else
2001   __ resize_frame(frame_size_in_bytes, Z_R0_scratch);  // Revert stack extension.
2002 #endif
2003   __ restore_return_pc();             // This is the way back to the caller.
2004   __ z_br(Z_R14);
2005 
2006 
2007   //////////////////////////////////////////////////////////////////////
2008   // Out-of-line calls to the runtime.
2009   //////////////////////////////////////////////////////////////////////
2010 
2011 
2012   //---------------------------------------------------------------------
2013   // Handler for pending exceptions (out-of-line).
2014   //---------------------------------------------------------------------
2015   // Since this is a native call, we know the proper exception handler
2016   // is the empty function. We just pop this frame and then jump to
2017   // forward_exception_entry. Z_R14 will contain the native caller's
2018   // return PC.
2019   __ bind(handle_pending_exception);
2020   __ pop_frame();
2021   __ load_const_optimized(Z_R1_scratch, StubRoutines::forward_exception_entry());
2022   __ restore_return_pc();
2023   __ z_br(Z_R1_scratch);
2024 
2025   //---------------------------------------------------------------------
2026   // Handler for a cache miss (out-of-line)
2027   //---------------------------------------------------------------------
2028   __ call_ic_miss_handler(ic_miss, 0x77, 0, Z_R1_scratch);
2029   __ flush();
2030 
2031 
2032   //////////////////////////////////////////////////////////////////////
2033   // end of code generation
2034   //////////////////////////////////////////////////////////////////////
2035 
2036 
2037   nmethod *nm = nmethod::new_native_nmethod(method,
2038                                             compile_id,
2039                                             masm->code(),
2040                                             (int)(wrapper_VEPStart-wrapper_CodeStart),
2041                                             (int)(wrapper_FrameDone-wrapper_CodeStart),
2042                                             stack_slots / VMRegImpl::slots_per_word,
2043                                             (method_is_static ? in_ByteSize(klass_offset) : in_ByteSize(receiver_offset)),
2044                                             in_ByteSize(lock_offset),
2045                                             oop_maps);
2046 
2047   return nm;
2048 }
2049 
2050 static address gen_c2i_adapter(MacroAssembler  *masm,
2051                                int total_args_passed,
2052                                int comp_args_on_stack,
2053                                const BasicType *sig_bt,
2054                                const VMRegPair *regs,
2055                                Label &skip_fixup) {
2056   // Before we get into the guts of the C2I adapter, see if we should be here
2057   // at all. We've come from compiled code and are attempting to jump to the
2058   // interpreter, which means the caller made a static call to get here
2059   // (vcalls always get a compiled target if there is one). Check for a
2060   // compiled target. If there is one, we need to patch the caller's call.
2061 
2062   // These two defs MUST MATCH code in gen_i2c2i_adapter!
2063   const Register ientry = Z_R11;
2064   const Register code   = Z_R11;
2065 
2066   address c2i_entrypoint;
2067   Label   patch_callsite;
2068 
2069   // Regular (verified) c2i entry point.
2070   c2i_entrypoint = __ pc();
2071 
2072   // Call patching needed?
2073   __ load_and_test_long(Z_R0_scratch, method_(code));
2074   __ z_lg(ientry, method_(interpreter_entry));  // Preload interpreter entry (also if patching).
2075   __ z_brne(patch_callsite);                    // Patch required if code != NULL (compiled target exists).
2076 
2077   __ bind(skip_fixup);  // Return point from patch_callsite.
2078 
2079   // Since all args are passed on the stack, total_args_passed*wordSize is the
2080   // space we need. We need ABI scratch area but we use the caller's since
2081   // it has already been allocated.
2082 
2083   const int abi_scratch = frame::z_top_ijava_frame_abi_size;
2084   int       extraspace  = align_up(total_args_passed, 2)*wordSize + abi_scratch;
2085   Register  sender_SP   = Z_R10;
2086   Register  value       = Z_R12;
2087 
2088   // Remember the senderSP so we can pop the interpreter arguments off of the stack.
2089   // In addition, frame manager expects initial_caller_sp in Z_R10.
2090   __ z_lgr(sender_SP, Z_SP);
2091 
2092   // This should always fit in 14 bit immediate.
2093   __ resize_frame(-extraspace, Z_R0_scratch);
2094 
2095   // We use the caller's ABI scratch area (out_preserved_stack_slots) for the initial
2096   // args. This essentially moves the callers ABI scratch area from the top to the
2097   // bottom of the arg area.
2098 
2099   int st_off =  extraspace - wordSize;
2100 
2101   // Now write the args into the outgoing interpreter space.
2102   for (int i = 0; i < total_args_passed; i++) {
2103     VMReg r_1 = regs[i].first();
2104     VMReg r_2 = regs[i].second();
2105     if (!r_1->is_valid()) {
2106       assert(!r_2->is_valid(), "");
2107       continue;
2108     }
2109     if (r_1->is_stack()) {
2110       // The calling convention produces OptoRegs that ignore the preserve area (abi scratch).
2111       // We must account for it here.
2112       int ld_off = (r_1->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size;
2113 
2114       if (!r_2->is_valid()) {
2115         __ z_mvc(Address(Z_SP, st_off), Address(sender_SP, ld_off), sizeof(void*));
2116       } else {
2117         // longs are given 2 64-bit slots in the interpreter,
2118         // but the data is passed in only 1 slot.
2119         if (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) {
2120 #ifdef ASSERT
2121           __ clear_mem(Address(Z_SP, st_off), sizeof(void *));
2122 #endif
2123           st_off -= wordSize;
2124         }
2125         __ z_mvc(Address(Z_SP, st_off), Address(sender_SP, ld_off), sizeof(void*));
2126       }
2127     } else {
2128       if (r_1->is_Register()) {
2129         if (!r_2->is_valid()) {
2130           __ z_st(r_1->as_Register(), st_off, Z_SP);
2131         } else {
2132           // longs are given 2 64-bit slots in the interpreter, but the
2133           // data is passed in only 1 slot.
2134           if (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) {
2135 #ifdef ASSERT
2136             __ clear_mem(Address(Z_SP, st_off), sizeof(void *));
2137 #endif
2138             st_off -= wordSize;
2139           }
2140           __ z_stg(r_1->as_Register(), st_off, Z_SP);
2141         }
2142       } else {
2143         assert(r_1->is_FloatRegister(), "");
2144         if (!r_2->is_valid()) {
2145           __ z_ste(r_1->as_FloatRegister(), st_off, Z_SP);
2146         } else {
2147           // In 64bit, doubles are given 2 64-bit slots in the interpreter, but the
2148           // data is passed in only 1 slot.
2149           // One of these should get known junk...
2150 #ifdef ASSERT
2151           __ z_lzdr(Z_F1);
2152           __ z_std(Z_F1, st_off, Z_SP);
2153 #endif
2154           st_off-=wordSize;
2155           __ z_std(r_1->as_FloatRegister(), st_off, Z_SP);
2156         }
2157       }
2158     }
2159     st_off -= wordSize;
2160   }
2161 
2162 
2163   // Jump to the interpreter just as if interpreter was doing it.
2164   __ add2reg(Z_esp, st_off, Z_SP);
2165 
2166   // Frame_manager expects initial_caller_sp (= SP without resize by c2i) in Z_R10.
2167   __ z_br(ientry);
2168 
2169 
2170   // Prevent illegal entry to out-of-line code.
2171   __ z_illtrap(0x22);
2172 
2173   // Generate out-of-line runtime call to patch caller,
2174   // then continue as interpreted.
2175 
2176   // IF you lose the race you go interpreted.
2177   // We don't see any possible endless c2i -> i2c -> c2i ...
2178   // transitions no matter how rare.
2179   __ bind(patch_callsite);
2180 
2181   RegisterSaver::save_live_registers(masm, RegisterSaver::arg_registers);
2182   __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite), Z_method, Z_R14);
2183   RegisterSaver::restore_live_registers(masm, RegisterSaver::arg_registers);
2184   __ z_bru(skip_fixup);
2185 
2186   // end of out-of-line code
2187 
2188   return c2i_entrypoint;
2189 }
2190 
2191 // On entry, the following registers are set
2192 //
2193 //    Z_thread  r8  - JavaThread*
2194 //    Z_method  r9  - callee's method (method to be invoked)
2195 //    Z_esp     r7  - operand (or expression) stack pointer of caller. one slot above last arg.
2196 //    Z_SP      r15 - SP prepared by call stub such that caller's outgoing args are near top
2197 //
2198 void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm,
2199                                     int total_args_passed,
2200                                     int comp_args_on_stack,
2201                                     const BasicType *sig_bt,
2202                                     const VMRegPair *regs) {
2203   const Register value = Z_R12;
2204   const Register ld_ptr= Z_esp;
2205 
2206   int ld_offset = total_args_passed * wordSize;
2207 
2208   // Cut-out for having no stack args.
2209   if (comp_args_on_stack) {
2210     // Sig words on the stack are greater than VMRegImpl::stack0. Those in
2211     // registers are below. By subtracting stack0, we either get a negative
2212     // number (all values in registers) or the maximum stack slot accessed.
2213     // Convert VMRegImpl (4 byte) stack slots to words.
2214     int comp_words_on_stack = align_up(comp_args_on_stack*VMRegImpl::stack_slot_size, wordSize)>>LogBytesPerWord;
2215     // Round up to miminum stack alignment, in wordSize
2216     comp_words_on_stack = align_up(comp_words_on_stack, 2);
2217 
2218     __ resize_frame(-comp_words_on_stack*wordSize, Z_R0_scratch);
2219   }
2220 
2221   // Now generate the shuffle code. Pick up all register args and move the
2222   // rest through register value=Z_R12.
2223   for (int i = 0; i < total_args_passed; i++) {
2224     if (sig_bt[i] == T_VOID) {
2225       assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
2226       continue;
2227     }
2228 
2229     // Pick up 0, 1 or 2 words from ld_ptr.
2230     assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(),
2231            "scrambled load targets?");
2232     VMReg r_1 = regs[i].first();
2233     VMReg r_2 = regs[i].second();
2234     if (!r_1->is_valid()) {
2235       assert(!r_2->is_valid(), "");
2236       continue;
2237     }
2238     if (r_1->is_FloatRegister()) {
2239       if (!r_2->is_valid()) {
2240         __ z_le(r_1->as_FloatRegister(), ld_offset, ld_ptr);
2241         ld_offset-=wordSize;
2242       } else {
2243         // Skip the unused interpreter slot.
2244         __ z_ld(r_1->as_FloatRegister(), ld_offset - wordSize, ld_ptr);
2245         ld_offset -= 2 * wordSize;
2246       }
2247     } else {
2248       if (r_1->is_stack()) {
2249         // Must do a memory to memory move.
2250         int st_off = (r_1->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size;
2251 
2252         if (!r_2->is_valid()) {
2253           __ z_mvc(Address(Z_SP, st_off), Address(ld_ptr, ld_offset), sizeof(void*));
2254         } else {
2255           // In 64bit, longs are given 2 64-bit slots in the interpreter, but the
2256           // data is passed in only 1 slot.
2257           if (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) {
2258             ld_offset -= wordSize;
2259           }
2260           __ z_mvc(Address(Z_SP, st_off), Address(ld_ptr, ld_offset), sizeof(void*));
2261         }
2262       } else {
2263         if (!r_2->is_valid()) {
2264           // Not sure we need to do this but it shouldn't hurt.
2265           if (is_reference_type(sig_bt[i]) || sig_bt[i] == T_ADDRESS) {
2266             __ z_lg(r_1->as_Register(), ld_offset, ld_ptr);
2267           } else {
2268             __ z_l(r_1->as_Register(), ld_offset, ld_ptr);
2269           }
2270         } else {
2271           // In 64bit, longs are given 2 64-bit slots in the interpreter, but the
2272           // data is passed in only 1 slot.
2273           if (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) {
2274             ld_offset -= wordSize;
2275           }
2276           __ z_lg(r_1->as_Register(), ld_offset, ld_ptr);
2277         }
2278       }
2279       ld_offset -= wordSize;
2280     }
2281   }
2282 
2283   // Jump to the compiled code just as if compiled code was doing it.
2284   // load target address from method:
2285   __ z_lg(Z_R1_scratch, Address(Z_method, Method::from_compiled_offset()));
2286 
2287   // Store method into thread->callee_target.
2288   // 6243940: We might end up in handle_wrong_method if
2289   // the callee is deoptimized as we race thru here. If that
2290   // happens we don't want to take a safepoint because the
2291   // caller frame will look interpreted and arguments are now
2292   // "compiled" so it is much better to make this transition
2293   // invisible to the stack walking code. Unfortunately, if
2294   // we try and find the callee by normal means a safepoint
2295   // is possible. So we stash the desired callee in the thread
2296   // and the vm will find it there should this case occur.
2297   __ z_stg(Z_method, thread_(callee_target));
2298 
2299   __ z_br(Z_R1_scratch);
2300 }
2301 
2302 AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm,
2303                                                             int total_args_passed,
2304                                                             int comp_args_on_stack,
2305                                                             const BasicType *sig_bt,
2306                                                             const VMRegPair *regs,
2307                                                             AdapterFingerPrint* fingerprint) {
2308   __ align(CodeEntryAlignment);
2309   address i2c_entry = __ pc();
2310   gen_i2c_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs);
2311 
2312   address c2i_unverified_entry;
2313 
2314   Label skip_fixup;
2315   {
2316     Label ic_miss;
2317     const int klass_offset           = oopDesc::klass_offset_in_bytes();
2318     const int holder_klass_offset    = CompiledICHolder::holder_klass_offset();
2319     const int holder_metadata_offset = CompiledICHolder::holder_metadata_offset();
2320 
2321     // Out-of-line call to ic_miss handler.
2322     __ call_ic_miss_handler(ic_miss, 0x11, 0, Z_R1_scratch);
2323 
2324     // Unverified Entry Point UEP
2325     __ align(CodeEntryAlignment);
2326     c2i_unverified_entry = __ pc();
2327 
2328     // Check the pointers.
2329     if (!ImplicitNullChecks || MacroAssembler::needs_explicit_null_check(klass_offset)) {
2330       __ z_ltgr(Z_ARG1, Z_ARG1);
2331       __ z_bre(ic_miss);
2332     }
2333     __ verify_oop(Z_ARG1, FILE_AND_LINE);
2334 
2335     // Check ic: object class <-> cached class
2336     // Compress cached class for comparison. That's more efficient.
2337     if (UseCompressedClassPointers) {
2338       __ z_lg(Z_R11, holder_klass_offset, Z_method);             // Z_R11 is overwritten a few instructions down anyway.
2339       __ compare_klass_ptr(Z_R11, klass_offset, Z_ARG1, false); // Cached class can't be zero.
2340     } else {
2341       __ z_clc(klass_offset, sizeof(void *)-1, Z_ARG1, holder_klass_offset, Z_method);
2342     }
2343     __ z_brne(ic_miss);  // Cache miss: call runtime to handle this.
2344 
2345     // This def MUST MATCH code in gen_c2i_adapter!
2346     const Register code = Z_R11;
2347 
2348     __ z_lg(Z_method, holder_metadata_offset, Z_method);
2349     __ load_and_test_long(Z_R0, method_(code));
2350     __ z_brne(ic_miss);  // Cache miss: call runtime to handle this.
2351 
2352     // Fallthru to VEP. Duplicate LTG, but saved taken branch.
2353   }
2354 
2355   address c2i_entry = __ pc();
2356 
2357   // Class initialization barrier for static methods
2358   address c2i_no_clinit_check_entry = NULL;
2359   if (VM_Version::supports_fast_class_init_checks()) {
2360     Label L_skip_barrier;
2361 
2362     { // Bypass the barrier for non-static methods
2363       __ testbit(Address(Z_method, Method::access_flags_offset()), JVM_ACC_STATIC_BIT);
2364       __ z_bfalse(L_skip_barrier); // non-static
2365     }
2366 
2367     Register klass = Z_R11;
2368     __ load_method_holder(klass, Z_method);
2369     __ clinit_barrier(klass, Z_thread, &L_skip_barrier /*L_fast_path*/);
2370 
2371     __ load_const_optimized(klass, SharedRuntime::get_handle_wrong_method_stub());
2372     __ z_br(klass);
2373 
2374     __ bind(L_skip_barrier);
2375     c2i_no_clinit_check_entry = __ pc();
2376   }
2377 
2378   gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup);
2379 
2380   return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry, c2i_no_clinit_check_entry);
2381 }
2382 
2383 // This function returns the adjust size (in number of words) to a c2i adapter
2384 // activation for use during deoptimization.
2385 //
2386 // Actually only compiled frames need to be adjusted, but it
2387 // doesn't harm to adjust entry and interpreter frames, too.
2388 //
2389 int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals) {
2390   assert(callee_locals >= callee_parameters,
2391           "test and remove; got more parms than locals");
2392   // Handle the abi adjustment here instead of doing it in push_skeleton_frames.
2393   return (callee_locals - callee_parameters) * Interpreter::stackElementWords +
2394          frame::z_parent_ijava_frame_abi_size / BytesPerWord;
2395 }
2396 
2397 uint SharedRuntime::in_preserve_stack_slots() {
2398   return frame::jit_in_preserve_size_in_4_byte_units;
2399 }
2400 
2401 uint SharedRuntime::out_preserve_stack_slots() {
2402   return frame::z_jit_out_preserve_size/VMRegImpl::stack_slot_size;
2403 }
2404 
2405 //
2406 // Frame generation for deopt and uncommon trap blobs.
2407 //
2408 static void push_skeleton_frame(MacroAssembler* masm,
2409                           /* Unchanged */
2410                           Register frame_sizes_reg,
2411                           Register pcs_reg,
2412                           /* Invalidate */
2413                           Register frame_size_reg,
2414                           Register pc_reg) {
2415   BLOCK_COMMENT("  push_skeleton_frame {");
2416    __ z_lg(pc_reg, 0, pcs_reg);
2417    __ z_lg(frame_size_reg, 0, frame_sizes_reg);
2418    __ z_stg(pc_reg, _z_abi(return_pc), Z_SP);
2419    Register fp = pc_reg;
2420    __ push_frame(frame_size_reg, fp);
2421 #ifdef ASSERT
2422    // The magic is required for successful walking skeletal frames.
2423    __ load_const_optimized(frame_size_reg/*tmp*/, frame::z_istate_magic_number);
2424    __ z_stg(frame_size_reg, _z_ijava_state_neg(magic), fp);
2425    // Fill other slots that are supposedly not necessary with eye catchers.
2426    __ load_const_optimized(frame_size_reg/*use as tmp*/, 0xdeadbad1);
2427    __ z_stg(frame_size_reg, _z_ijava_state_neg(top_frame_sp), fp);
2428    // The sender_sp of the bottom frame is set before pushing it.
2429    // The sender_sp of non bottom frames is their caller's top_frame_sp, which
2430    // is unknown here. Luckily it is not needed before filling the frame in
2431    // layout_activation(), we assert this by setting an eye catcher (see
2432    // comments on sender_sp in frame_s390.hpp).
2433    __ z_stg(frame_size_reg, _z_ijava_state_neg(sender_sp), Z_SP);
2434 #endif // ASSERT
2435   BLOCK_COMMENT("  } push_skeleton_frame");
2436 }
2437 
2438 // Loop through the UnrollBlock info and create new frames.
2439 static void push_skeleton_frames(MacroAssembler* masm, bool deopt,
2440                             /* read */
2441                             Register unroll_block_reg,
2442                             /* invalidate */
2443                             Register frame_sizes_reg,
2444                             Register number_of_frames_reg,
2445                             Register pcs_reg,
2446                             Register tmp1,
2447                             Register tmp2) {
2448   BLOCK_COMMENT("push_skeleton_frames {");
2449   // _number_of_frames is of type int (deoptimization.hpp).
2450   __ z_lgf(number_of_frames_reg,
2451            Address(unroll_block_reg, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes()));
2452   __ z_lg(pcs_reg,
2453           Address(unroll_block_reg, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes()));
2454   __ z_lg(frame_sizes_reg,
2455           Address(unroll_block_reg, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes()));
2456 
2457   // stack: (caller_of_deoptee, ...).
2458 
2459   // If caller_of_deoptee is a compiled frame, then we extend it to make
2460   // room for the callee's locals and the frame::z_parent_ijava_frame_abi.
2461   // See also Deoptimization::last_frame_adjust() above.
2462   // Note: entry and interpreted frames are adjusted, too. But this doesn't harm.
2463 
2464   __ z_lgf(Z_R1_scratch,
2465            Address(unroll_block_reg, Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes()));
2466   __ z_lgr(tmp1, Z_SP);  // Save the sender sp before extending the frame.
2467   __ resize_frame_sub(Z_R1_scratch, tmp2/*tmp*/);
2468   // The oldest skeletal frame requires a valid sender_sp to make it walkable
2469   // (it is required to find the original pc of caller_of_deoptee if it is marked
2470   // for deoptimization - see nmethod::orig_pc_addr()).
2471   __ z_stg(tmp1, _z_ijava_state_neg(sender_sp), Z_SP);
2472 
2473   // Now push the new interpreter frames.
2474   Label loop, loop_entry;
2475 
2476   // Make sure that there is at least one entry in the array.
2477   DEBUG_ONLY(__ z_ltgr(number_of_frames_reg, number_of_frames_reg));
2478   __ asm_assert_ne("array_size must be > 0", 0x205);
2479 
2480   __ z_bru(loop_entry);
2481 
2482   __ bind(loop);
2483 
2484   __ add2reg(frame_sizes_reg, wordSize);
2485   __ add2reg(pcs_reg, wordSize);
2486 
2487   __ bind(loop_entry);
2488 
2489   // Allocate a new frame, fill in the pc.
2490   push_skeleton_frame(masm, frame_sizes_reg, pcs_reg, tmp1, tmp2);
2491 
2492   __ z_aghi(number_of_frames_reg, -1);  // Emit AGHI, because it sets the condition code
2493   __ z_brne(loop);
2494 
2495   // Set the top frame's return pc.
2496   __ add2reg(pcs_reg, wordSize);
2497   __ z_lg(Z_R0_scratch, 0, pcs_reg);
2498   __ z_stg(Z_R0_scratch, _z_abi(return_pc), Z_SP);
2499   BLOCK_COMMENT("} push_skeleton_frames");
2500 }
2501 
2502 //------------------------------generate_deopt_blob----------------------------
2503 void SharedRuntime::generate_deopt_blob() {
2504   // Allocate space for the code.
2505   ResourceMark rm;
2506   // Setup code generation tools.
2507   CodeBuffer buffer("deopt_blob", 2048, 1024);
2508   InterpreterMacroAssembler* masm = new InterpreterMacroAssembler(&buffer);
2509   Label exec_mode_initialized;
2510   OopMap* map = NULL;
2511   OopMapSet *oop_maps = new OopMapSet();
2512 
2513   unsigned int start_off = __ offset();
2514   Label cont;
2515 
2516   // --------------------------------------------------------------------------
2517   // Normal entry (non-exception case)
2518   //
2519   // We have been called from the deopt handler of the deoptee.
2520   // Z_R14 points behind the call in the deopt handler. We adjust
2521   // it such that it points to the start of the deopt handler.
2522   // The return_pc has been stored in the frame of the deoptee and
2523   // will replace the address of the deopt_handler in the call
2524   // to Deoptimization::fetch_unroll_info below.
2525   // The (int) cast is necessary, because -((unsigned int)14)
2526   // is an unsigned int.
2527   __ add2reg(Z_R14, -(int)NativeCall::max_instruction_size());
2528 
2529   const Register   exec_mode_reg = Z_tmp_1;
2530 
2531   // stack: (deoptee, caller of deoptee, ...)
2532 
2533   // pushes an "unpack" frame
2534   // R14 contains the return address pointing into the deoptimized
2535   // nmethod that was valid just before the nmethod was deoptimized.
2536   // save R14 into the deoptee frame.  the `fetch_unroll_info'
2537   // procedure called below will read it from there.
2538   map = RegisterSaver::save_live_registers(masm, RegisterSaver::all_registers);
2539 
2540   // note the entry point.
2541   __ load_const_optimized(exec_mode_reg, Deoptimization::Unpack_deopt);
2542   __ z_bru(exec_mode_initialized);
2543 
2544 #ifndef COMPILER1
2545   int reexecute_offset = 1; // odd offset will produce odd pc, which triggers an hardware trap
2546 #else
2547   // --------------------------------------------------------------------------
2548   // Reexecute entry
2549   // - Z_R14 = Deopt Handler in nmethod
2550 
2551   int reexecute_offset = __ offset() - start_off;
2552 
2553   // No need to update map as each call to save_live_registers will produce identical oopmap
2554   (void) RegisterSaver::save_live_registers(masm, RegisterSaver::all_registers);
2555 
2556   __ load_const_optimized(exec_mode_reg, Deoptimization::Unpack_reexecute);
2557   __ z_bru(exec_mode_initialized);
2558 #endif
2559 
2560 
2561   // --------------------------------------------------------------------------
2562   // Exception entry. We reached here via a branch. Registers on entry:
2563   // - Z_EXC_OOP (Z_ARG1) = exception oop
2564   // - Z_EXC_PC  (Z_ARG2) = the exception pc.
2565 
2566   int exception_offset = __ offset() - start_off;
2567 
2568   // all registers are dead at this entry point, except for Z_EXC_OOP, and
2569   // Z_EXC_PC which contain the exception oop and exception pc
2570   // respectively.  Set them in TLS and fall thru to the
2571   // unpack_with_exception_in_tls entry point.
2572 
2573   // Store exception oop and pc in thread (location known to GC).
2574   // Need this since the call to "fetch_unroll_info()" may safepoint.
2575   __ z_stg(Z_EXC_OOP, Address(Z_thread, JavaThread::exception_oop_offset()));
2576   __ z_stg(Z_EXC_PC,  Address(Z_thread, JavaThread::exception_pc_offset()));
2577 
2578   // fall through
2579 
2580   int exception_in_tls_offset = __ offset() - start_off;
2581 
2582   // new implementation because exception oop is now passed in JavaThread
2583 
2584   // Prolog for exception case
2585   // All registers must be preserved because they might be used by LinearScan
2586   // Exceptiop oop and throwing PC are passed in JavaThread
2587 
2588   // load throwing pc from JavaThread and us it as the return address of the current frame.
2589   __ z_lg(Z_R1_scratch, Address(Z_thread, JavaThread::exception_pc_offset()));
2590 
2591   // Save everything in sight.
2592   (void) RegisterSaver::save_live_registers(masm, RegisterSaver::all_registers, Z_R1_scratch);
2593 
2594   // Now it is safe to overwrite any register
2595 
2596   // Clear the exception pc field in JavaThread
2597   __ clear_mem(Address(Z_thread, JavaThread::exception_pc_offset()), 8);
2598 
2599   // Deopt during an exception.  Save exec mode for unpack_frames.
2600   __ load_const_optimized(exec_mode_reg, Deoptimization::Unpack_exception);
2601 
2602 
2603 #ifdef ASSERT
2604   // verify that there is really an exception oop in JavaThread
2605   __ z_lg(Z_ARG1, Address(Z_thread, JavaThread::exception_oop_offset()));
2606   __ MacroAssembler::verify_oop(Z_ARG1, FILE_AND_LINE);
2607 
2608   // verify that there is no pending exception
2609   __ asm_assert_mem8_is_zero(in_bytes(Thread::pending_exception_offset()), Z_thread,
2610                              "must not have pending exception here", __LINE__);
2611 #endif
2612 
2613   // --------------------------------------------------------------------------
2614   // At this point, the live registers are saved and
2615   // the exec_mode_reg has been set up correctly.
2616   __ bind(exec_mode_initialized);
2617 
2618   // stack: ("unpack" frame, deoptee, caller_of_deoptee, ...).
2619 
2620   {
2621   const Register unroll_block_reg  = Z_tmp_2;
2622 
2623   // we need to set `last_Java_frame' because `fetch_unroll_info' will
2624   // call `last_Java_frame()'.  however we can't block and no gc will
2625   // occur so we don't need an oopmap. the value of the pc in the
2626   // frame is not particularly important.  it just needs to identify the blob.
2627 
2628   // Don't set last_Java_pc anymore here (is implicitly NULL then).
2629   // the correct PC is retrieved in pd_last_frame() in that case.
2630   __ set_last_Java_frame(/*sp*/Z_SP, noreg);
2631   // With EscapeAnalysis turned on, this call may safepoint
2632   // despite it's marked as "leaf call"!
2633   __ call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info), Z_thread, exec_mode_reg);
2634   // Set an oopmap for the call site this describes all our saved volatile registers
2635   int offs = __ offset();
2636   oop_maps->add_gc_map(offs, map);
2637 
2638   __ reset_last_Java_frame();
2639   // save the return value.
2640   __ z_lgr(unroll_block_reg, Z_RET);
2641   // restore the return registers that have been saved
2642   // (among other registers) by save_live_registers(...).
2643   RegisterSaver::restore_result_registers(masm);
2644 
2645   // reload the exec mode from the UnrollBlock (it might have changed)
2646   __ z_llgf(exec_mode_reg, Address(unroll_block_reg, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes()));
2647 
2648   // In excp_deopt_mode, restore and clear exception oop which we
2649   // stored in the thread during exception entry above. The exception
2650   // oop will be the return value of this stub.
2651   NearLabel skip_restore_excp;
2652   __ compare64_and_branch(exec_mode_reg, Deoptimization::Unpack_exception, Assembler::bcondNotEqual, skip_restore_excp);
2653   __ z_lg(Z_RET, thread_(exception_oop));
2654   __ clear_mem(thread_(exception_oop), 8);
2655   __ bind(skip_restore_excp);
2656 
2657   // remove the "unpack" frame
2658   __ pop_frame();
2659 
2660   // stack: (deoptee, caller of deoptee, ...).
2661 
2662   // pop the deoptee's frame
2663   __ pop_frame();
2664 
2665   // stack: (caller_of_deoptee, ...).
2666 
2667   // loop through the `UnrollBlock' info and create interpreter frames.
2668   push_skeleton_frames(masm, true/*deopt*/,
2669                   unroll_block_reg,
2670                   Z_tmp_3,
2671                   Z_tmp_4,
2672                   Z_ARG5,
2673                   Z_ARG4,
2674                   Z_ARG3);
2675 
2676   // stack: (skeletal interpreter frame, ..., optional skeletal
2677   // interpreter frame, caller of deoptee, ...).
2678   }
2679 
2680   // push an "unpack" frame taking care of float / int return values.
2681   __ push_frame(RegisterSaver::live_reg_frame_size(RegisterSaver::all_registers));
2682 
2683   // stack: (unpack frame, skeletal interpreter frame, ..., optional
2684   // skeletal interpreter frame, caller of deoptee, ...).
2685 
2686   // spill live volatile registers since we'll do a call.
2687   __ z_stg(Z_RET, offset_of(frame::z_abi_160_spill, spill[0]), Z_SP);
2688   __ z_std(Z_FRET, offset_of(frame::z_abi_160_spill, spill[1]), Z_SP);
2689 
2690   // let the unpacker layout information in the skeletal frames just allocated.
2691   __ get_PC(Z_RET);
2692   __ set_last_Java_frame(/*sp*/Z_SP, /*pc*/Z_RET);
2693   __ call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames),
2694                   Z_thread/*thread*/, exec_mode_reg/*exec_mode*/);
2695 
2696   __ reset_last_Java_frame();
2697 
2698   // restore the volatiles saved above.
2699   __ z_lg(Z_RET, offset_of(frame::z_abi_160_spill, spill[0]), Z_SP);
2700   __ z_ld(Z_FRET, offset_of(frame::z_abi_160_spill, spill[1]), Z_SP);
2701 
2702   // pop the "unpack" frame.
2703   __ pop_frame();
2704   __ restore_return_pc();
2705 
2706   // stack: (top interpreter frame, ..., optional interpreter frame,
2707   // caller of deoptee, ...).
2708 
2709   __ z_lg(Z_fp, _z_abi(callers_sp), Z_SP); // restore frame pointer
2710   __ restore_bcp();
2711   __ restore_locals();
2712   __ restore_esp();
2713 
2714   // return to the interpreter entry point.
2715   __ z_br(Z_R14);
2716 
2717   // Make sure all code is generated
2718   masm->flush();
2719 
2720   _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset, reexecute_offset, RegisterSaver::live_reg_frame_size(RegisterSaver::all_registers)/wordSize);
2721   _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset);
2722 }
2723 
2724 
2725 #ifdef COMPILER2
2726 //------------------------------generate_uncommon_trap_blob--------------------
2727 void SharedRuntime::generate_uncommon_trap_blob() {
2728   // Allocate space for the code
2729   ResourceMark rm;
2730   // Setup code generation tools
2731   CodeBuffer buffer("uncommon_trap_blob", 2048, 1024);
2732   InterpreterMacroAssembler* masm = new InterpreterMacroAssembler(&buffer);
2733 
2734   Register unroll_block_reg = Z_tmp_1;
2735   Register klass_index_reg  = Z_ARG2;
2736   Register unc_trap_reg     = Z_ARG2;
2737 
2738   // stack: (deoptee, caller_of_deoptee, ...).
2739 
2740   // push a dummy "unpack" frame and call
2741   // `Deoptimization::uncommon_trap' to pack the compiled frame into a
2742   // vframe array and return the `UnrollBlock' information.
2743 
2744   // save R14 to compiled frame.
2745   __ save_return_pc();
2746   // push the "unpack_frame".
2747   __ push_frame_abi160(0);
2748 
2749   // stack: (unpack frame, deoptee, caller_of_deoptee, ...).
2750 
2751   // set the "unpack" frame as last_Java_frame.
2752   // `Deoptimization::uncommon_trap' expects it and considers its
2753   // sender frame as the deoptee frame.
2754   __ get_PC(Z_R1_scratch);
2755   __ set_last_Java_frame(/*sp*/Z_SP, /*pc*/Z_R1_scratch);
2756 
2757   __ z_lgr(klass_index_reg, Z_ARG1);  // passed implicitly as ARG2
2758   __ z_lghi(Z_ARG3, Deoptimization::Unpack_uncommon_trap);  // passed implicitly as ARG3
2759   BLOCK_COMMENT("call Deoptimization::uncommon_trap()");
2760   __ call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::uncommon_trap), Z_thread);
2761 
2762   __ reset_last_Java_frame();
2763 
2764   // pop the "unpack" frame
2765   __ pop_frame();
2766 
2767   // stack: (deoptee, caller_of_deoptee, ...).
2768 
2769   // save the return value.
2770   __ z_lgr(unroll_block_reg, Z_RET);
2771 
2772   // pop the deoptee frame.
2773   __ pop_frame();
2774 
2775   // stack: (caller_of_deoptee, ...).
2776 
2777 #ifdef ASSERT
2778   assert(Immediate::is_uimm8(Deoptimization::Unpack_LIMIT), "Code not fit for larger immediates");
2779   assert(Immediate::is_uimm8(Deoptimization::Unpack_uncommon_trap), "Code not fit for larger immediates");
2780   const int unpack_kind_byte_offset = Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes()
2781 #ifndef VM_LITTLE_ENDIAN
2782   + 3
2783 #endif
2784   ;
2785   if (Displacement::is_shortDisp(unpack_kind_byte_offset)) {
2786     __ z_cli(unpack_kind_byte_offset, unroll_block_reg, Deoptimization::Unpack_uncommon_trap);
2787   } else {
2788     __ z_cliy(unpack_kind_byte_offset, unroll_block_reg, Deoptimization::Unpack_uncommon_trap);
2789   }
2790   __ asm_assert_eq("SharedRuntime::generate_deopt_blob: expected Unpack_uncommon_trap", 0);
2791 #endif
2792 
2793   __ zap_from_to(Z_SP, Z_SP, Z_R0_scratch, Z_R1, 500, -1);
2794 
2795   // allocate new interpreter frame(s) and possibly resize the caller's frame
2796   // (no more adapters !)
2797   push_skeleton_frames(masm, false/*deopt*/,
2798                   unroll_block_reg,
2799                   Z_tmp_2,
2800                   Z_tmp_3,
2801                   Z_tmp_4,
2802                   Z_ARG5,
2803                   Z_ARG4);
2804 
2805   // stack: (skeletal interpreter frame, ..., optional skeletal
2806   // interpreter frame, (resized) caller of deoptee, ...).
2807 
2808   // push a dummy "unpack" frame taking care of float return values.
2809   // call `Deoptimization::unpack_frames' to layout information in the
2810   // interpreter frames just created
2811 
2812   // push the "unpack" frame
2813    const unsigned int framesize_in_bytes = __ push_frame_abi160(0);
2814 
2815   // stack: (unpack frame, skeletal interpreter frame, ..., optional
2816   // skeletal interpreter frame, (resized) caller of deoptee, ...).
2817 
2818   // set the "unpack" frame as last_Java_frame
2819   __ get_PC(Z_R1_scratch);
2820   __ set_last_Java_frame(/*sp*/Z_SP, /*pc*/Z_R1_scratch);
2821 
2822   // indicate it is the uncommon trap case
2823   BLOCK_COMMENT("call Deoptimization::Unpack_uncommon_trap()");
2824   __ load_const_optimized(unc_trap_reg, Deoptimization::Unpack_uncommon_trap);
2825   // let the unpacker layout information in the skeletal frames just allocated.
2826   __ call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames), Z_thread);
2827 
2828   __ reset_last_Java_frame();
2829   // pop the "unpack" frame
2830   __ pop_frame();
2831   // restore LR from top interpreter frame
2832   __ restore_return_pc();
2833 
2834   // stack: (top interpreter frame, ..., optional interpreter frame,
2835   // (resized) caller of deoptee, ...).
2836 
2837   __ z_lg(Z_fp, _z_abi(callers_sp), Z_SP); // restore frame pointer
2838   __ restore_bcp();
2839   __ restore_locals();
2840   __ restore_esp();
2841 
2842   // return to the interpreter entry point
2843   __ z_br(Z_R14);
2844 
2845   masm->flush();
2846   _uncommon_trap_blob = UncommonTrapBlob::create(&buffer, NULL, framesize_in_bytes/wordSize);
2847 }
2848 #endif // COMPILER2
2849 
2850 
2851 //------------------------------generate_handler_blob------
2852 //
2853 // Generate a special Compile2Runtime blob that saves all registers,
2854 // and setup oopmap.
2855 SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_type) {
2856   assert(StubRoutines::forward_exception_entry() != NULL,
2857          "must be generated before");
2858 
2859   ResourceMark rm;
2860   OopMapSet *oop_maps = new OopMapSet();
2861   OopMap* map;
2862 
2863   // Allocate space for the code. Setup code generation tools.
2864   CodeBuffer buffer("handler_blob", 2048, 1024);
2865   MacroAssembler* masm = new MacroAssembler(&buffer);
2866 
2867   unsigned int start_off = __ offset();
2868   address call_pc = NULL;
2869   int frame_size_in_bytes;
2870 
2871   bool cause_return = (poll_type == POLL_AT_RETURN);
2872   // Make room for return address (or push it again)
2873   if (!cause_return) {
2874     __ z_lg(Z_R14, Address(Z_thread, JavaThread::saved_exception_pc_offset()));
2875   }
2876 
2877   // Save registers, fpu state, and flags
2878   map = RegisterSaver::save_live_registers(masm, RegisterSaver::all_registers);
2879 
2880   if (!cause_return) {
2881     // Keep a copy of the return pc to detect if it gets modified.
2882     __ z_lgr(Z_R6, Z_R14);
2883   }
2884 
2885   // The following is basically a call_VM. However, we need the precise
2886   // address of the call in order to generate an oopmap. Hence, we do all the
2887   // work ourselves.
2888   __ set_last_Java_frame(Z_SP, noreg);
2889 
2890   // call into the runtime to handle the safepoint poll
2891   __ call_VM_leaf(call_ptr, Z_thread);
2892 
2893 
2894   // Set an oopmap for the call site. This oopmap will map all
2895   // oop-registers and debug-info registers as callee-saved. This
2896   // will allow deoptimization at this safepoint to find all possible
2897   // debug-info recordings, as well as let GC find all oops.
2898 
2899   oop_maps->add_gc_map((int)(__ offset()-start_off), map);
2900 
2901   Label noException;
2902 
2903   __ reset_last_Java_frame();
2904 
2905   __ load_and_test_long(Z_R1, thread_(pending_exception));
2906   __ z_bre(noException);
2907 
2908   // Pending exception case, used (sporadically) by
2909   // api/java_lang/Thread.State/index#ThreadState et al.
2910   RegisterSaver::restore_live_registers(masm, RegisterSaver::all_registers);
2911 
2912   // Jump to forward_exception_entry, with the issuing PC in Z_R14
2913   // so it looks like the original nmethod called forward_exception_entry.
2914   __ load_const_optimized(Z_R1_scratch, StubRoutines::forward_exception_entry());
2915   __ z_br(Z_R1_scratch);
2916 
2917   // No exception case
2918   __ bind(noException);
2919 
2920   if (!cause_return) {
2921     Label no_adjust;
2922      // If our stashed return pc was modified by the runtime we avoid touching it
2923     const int offset_of_return_pc = _z_abi16(return_pc) + RegisterSaver::live_reg_frame_size(RegisterSaver::all_registers);
2924     __ z_cg(Z_R6, offset_of_return_pc, Z_SP);
2925     __ z_brne(no_adjust);
2926 
2927     // Adjust return pc forward to step over the safepoint poll instruction
2928     __ instr_size(Z_R1_scratch, Z_R6);
2929     __ z_agr(Z_R6, Z_R1_scratch);
2930     __ z_stg(Z_R6, offset_of_return_pc, Z_SP);
2931 
2932     __ bind(no_adjust);
2933   }
2934 
2935   // Normal exit, restore registers and exit.
2936   RegisterSaver::restore_live_registers(masm, RegisterSaver::all_registers);
2937 
2938   __ z_br(Z_R14);
2939 
2940   // Make sure all code is generated
2941   masm->flush();
2942 
2943   // Fill-out other meta info
2944   return SafepointBlob::create(&buffer, oop_maps, RegisterSaver::live_reg_frame_size(RegisterSaver::all_registers)/wordSize);
2945 }
2946 
2947 
2948 //
2949 // generate_resolve_blob - call resolution (static/virtual/opt-virtual/ic-miss
2950 //
2951 // Generate a stub that calls into vm to find out the proper destination
2952 // of a Java call. All the argument registers are live at this point
2953 // but since this is generic code we don't know what they are and the caller
2954 // must do any gc of the args.
2955 //
2956 RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const char* name) {
2957   assert (StubRoutines::forward_exception_entry() != NULL, "must be generated before");
2958 
2959   // allocate space for the code
2960   ResourceMark rm;
2961 
2962   CodeBuffer buffer(name, 1000, 512);
2963   MacroAssembler* masm                = new MacroAssembler(&buffer);
2964 
2965   OopMapSet *oop_maps = new OopMapSet();
2966   OopMap* map = NULL;
2967 
2968   unsigned int start_off = __ offset();
2969 
2970   map = RegisterSaver::save_live_registers(masm, RegisterSaver::all_registers);
2971 
2972   // We must save a PC from within the stub as return PC
2973   // C code doesn't store the LR where we expect the PC,
2974   // so we would run into trouble upon stack walking.
2975   __ get_PC(Z_R1_scratch);
2976 
2977   unsigned int frame_complete = __ offset();
2978 
2979   __ set_last_Java_frame(/*sp*/Z_SP, Z_R1_scratch);
2980 
2981   __ call_VM_leaf(destination, Z_thread, Z_method);
2982 
2983 
2984   // Set an oopmap for the call site.
2985   // We need this not only for callee-saved registers, but also for volatile
2986   // registers that the compiler might be keeping live across a safepoint.
2987 
2988   oop_maps->add_gc_map((int)(frame_complete-start_off), map);
2989 
2990   // clear last_Java_sp
2991   __ reset_last_Java_frame();
2992 
2993   // check for pending exceptions
2994   Label pending;
2995   __ load_and_test_long(Z_R0, Address(Z_thread, Thread::pending_exception_offset()));
2996   __ z_brne(pending);
2997 
2998   __ z_lgr(Z_R1_scratch, Z_R2); // r1 is neither saved nor restored, r2 contains the continuation.
2999   RegisterSaver::restore_live_registers(masm, RegisterSaver::all_registers);
3000 
3001   // get the returned method
3002   __ get_vm_result_2(Z_method);
3003 
3004   // We are back to the original state on entry and ready to go.
3005   __ z_br(Z_R1_scratch);
3006 
3007   // Pending exception after the safepoint
3008 
3009   __ bind(pending);
3010 
3011   RegisterSaver::restore_live_registers(masm, RegisterSaver::all_registers);
3012 
3013   // exception pending => remove activation and forward to exception handler
3014 
3015   __ z_lgr(Z_R2, Z_R0); // pending_exception
3016   __ clear_mem(Address(Z_thread, JavaThread::vm_result_offset()), sizeof(jlong));
3017   __ load_const_optimized(Z_R1_scratch, StubRoutines::forward_exception_entry());
3018   __ z_br(Z_R1_scratch);
3019 
3020   // -------------
3021   // make sure all code is generated
3022   masm->flush();
3023 
3024   // return the blob
3025   // frame_size_words or bytes??
3026   return RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, RegisterSaver::live_reg_frame_size(RegisterSaver::all_registers)/wordSize,
3027                                        oop_maps, true);
3028 
3029 }
3030 
3031 //------------------------------Montgomery multiplication------------------------
3032 //
3033 
3034 // Subtract 0:b from carry:a. Return carry.
3035 static unsigned long
3036 sub(unsigned long a[], unsigned long b[], unsigned long carry, long len) {
3037   unsigned long i, c = 8 * (unsigned long)(len - 1);
3038   __asm__ __volatile__ (
3039     "SLGR   %[i], %[i]         \n" // initialize to 0 and pre-set carry
3040     "LGHI   0, 8               \n" // index increment (for BRXLG)
3041     "LGR    1, %[c]            \n" // index limit (for BRXLG)
3042     "0:                        \n"
3043     "LG     %[c], 0(%[i],%[a]) \n"
3044     "SLBG   %[c], 0(%[i],%[b]) \n" // subtract with borrow
3045     "STG    %[c], 0(%[i],%[a]) \n"
3046     "BRXLG  %[i], 0, 0b        \n" // while ((i+=8)<limit);
3047     "SLBGR  %[c], %[c]         \n" // save carry - 1
3048     : [i]"=&a"(i), [c]"+r"(c)
3049     : [a]"a"(a), [b]"a"(b)
3050     : "cc", "memory", "r0", "r1"
3051  );
3052   return carry + c;
3053 }
3054 
3055 // Multiply (unsigned) Long A by Long B, accumulating the double-
3056 // length result into the accumulator formed of T0, T1, and T2.
3057 inline void MACC(unsigned long A[], long A_ind,
3058                  unsigned long B[], long B_ind,
3059                  unsigned long &T0, unsigned long &T1, unsigned long &T2) {
3060   long A_si = 8 * A_ind,
3061        B_si = 8 * B_ind;
3062   __asm__ __volatile__ (
3063     "LG     1, 0(%[A_si],%[A]) \n"
3064     "MLG    0, 0(%[B_si],%[B]) \n" // r0r1 = A * B
3065     "ALGR   %[T0], 1           \n"
3066     "LGHI   1, 0               \n" // r1 = 0
3067     "ALCGR  %[T1], 0           \n"
3068     "ALCGR  %[T2], 1           \n"
3069     : [T0]"+r"(T0), [T1]"+r"(T1), [T2]"+r"(T2)
3070     : [A]"r"(A), [A_si]"r"(A_si), [B]"r"(B), [B_si]"r"(B_si)
3071     : "cc", "r0", "r1"
3072  );
3073 }
3074 
3075 // As above, but add twice the double-length result into the
3076 // accumulator.
3077 inline void MACC2(unsigned long A[], long A_ind,
3078                   unsigned long B[], long B_ind,
3079                   unsigned long &T0, unsigned long &T1, unsigned long &T2) {
3080   const unsigned long zero = 0;
3081   long A_si = 8 * A_ind,
3082        B_si = 8 * B_ind;
3083   __asm__ __volatile__ (
3084     "LG     1, 0(%[A_si],%[A]) \n"
3085     "MLG    0, 0(%[B_si],%[B]) \n" // r0r1 = A * B
3086     "ALGR   %[T0], 1           \n"
3087     "ALCGR  %[T1], 0           \n"
3088     "ALCGR  %[T2], %[zero]     \n"
3089     "ALGR   %[T0], 1           \n"
3090     "ALCGR  %[T1], 0           \n"
3091     "ALCGR  %[T2], %[zero]     \n"
3092     : [T0]"+r"(T0), [T1]"+r"(T1), [T2]"+r"(T2)
3093     : [A]"r"(A), [A_si]"r"(A_si), [B]"r"(B), [B_si]"r"(B_si), [zero]"r"(zero)
3094     : "cc", "r0", "r1"
3095  );
3096 }
3097 
3098 // Fast Montgomery multiplication. The derivation of the algorithm is
3099 // in "A Cryptographic Library for the Motorola DSP56000,
3100 // Dusse and Kaliski, Proc. EUROCRYPT 90, pp. 230-237".
3101 static void
3102 montgomery_multiply(unsigned long a[], unsigned long b[], unsigned long n[],
3103                     unsigned long m[], unsigned long inv, int len) {
3104   unsigned long t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator
3105   int i;
3106 
3107   assert(inv * n[0] == -1UL, "broken inverse in Montgomery multiply");
3108 
3109   for (i = 0; i < len; i++) {
3110     int j;
3111     for (j = 0; j < i; j++) {
3112       MACC(a, j, b, i-j, t0, t1, t2);
3113       MACC(m, j, n, i-j, t0, t1, t2);
3114     }
3115     MACC(a, i, b, 0, t0, t1, t2);
3116     m[i] = t0 * inv;
3117     MACC(m, i, n, 0, t0, t1, t2);
3118 
3119     assert(t0 == 0, "broken Montgomery multiply");
3120 
3121     t0 = t1; t1 = t2; t2 = 0;
3122   }
3123 
3124   for (i = len; i < 2 * len; i++) {
3125     int j;
3126     for (j = i - len + 1; j < len; j++) {
3127       MACC(a, j, b, i-j, t0, t1, t2);
3128       MACC(m, j, n, i-j, t0, t1, t2);
3129     }
3130     m[i-len] = t0;
3131     t0 = t1; t1 = t2; t2 = 0;
3132   }
3133 
3134   while (t0) {
3135     t0 = sub(m, n, t0, len);
3136   }
3137 }
3138 
3139 // Fast Montgomery squaring. This uses asymptotically 25% fewer
3140 // multiplies so it should be up to 25% faster than Montgomery
3141 // multiplication. However, its loop control is more complex and it
3142 // may actually run slower on some machines.
3143 static void
3144 montgomery_square(unsigned long a[], unsigned long n[],
3145                   unsigned long m[], unsigned long inv, int len) {
3146   unsigned long t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator
3147   int i;
3148 
3149   assert(inv * n[0] == -1UL, "broken inverse in Montgomery multiply");
3150 
3151   for (i = 0; i < len; i++) {
3152     int j;
3153     int end = (i+1)/2;
3154     for (j = 0; j < end; j++) {
3155       MACC2(a, j, a, i-j, t0, t1, t2);
3156       MACC(m, j, n, i-j, t0, t1, t2);
3157     }
3158     if ((i & 1) == 0) {
3159       MACC(a, j, a, j, t0, t1, t2);
3160     }
3161     for (; j < i; j++) {
3162       MACC(m, j, n, i-j, t0, t1, t2);
3163     }
3164     m[i] = t0 * inv;
3165     MACC(m, i, n, 0, t0, t1, t2);
3166 
3167     assert(t0 == 0, "broken Montgomery square");
3168 
3169     t0 = t1; t1 = t2; t2 = 0;
3170   }
3171 
3172   for (i = len; i < 2*len; i++) {
3173     int start = i-len+1;
3174     int end = start + (len - start)/2;
3175     int j;
3176     for (j = start; j < end; j++) {
3177       MACC2(a, j, a, i-j, t0, t1, t2);
3178       MACC(m, j, n, i-j, t0, t1, t2);
3179     }
3180     if ((i & 1) == 0) {
3181       MACC(a, j, a, j, t0, t1, t2);
3182     }
3183     for (; j < len; j++) {
3184       MACC(m, j, n, i-j, t0, t1, t2);
3185     }
3186     m[i-len] = t0;
3187     t0 = t1; t1 = t2; t2 = 0;
3188   }
3189 
3190   while (t0) {
3191     t0 = sub(m, n, t0, len);
3192   }
3193 }
3194 
3195 // The threshold at which squaring is advantageous was determined
3196 // experimentally on an i7-3930K (Ivy Bridge) CPU @ 3.5GHz.
3197 // Value seems to be ok for other platforms, too.
3198 #define MONTGOMERY_SQUARING_THRESHOLD 64
3199 
3200 // Copy len longwords from s to d, word-swapping as we go. The
3201 // destination array is reversed.
3202 static void reverse_words(unsigned long *s, unsigned long *d, int len) {
3203   d += len;
3204   while(len-- > 0) {
3205     d--;
3206     unsigned long s_val = *s;
3207     // Swap words in a longword on little endian machines.
3208 #ifdef VM_LITTLE_ENDIAN
3209      Unimplemented();
3210 #endif
3211     *d = s_val;
3212     s++;
3213   }
3214 }
3215 
3216 void SharedRuntime::montgomery_multiply(jint *a_ints, jint *b_ints, jint *n_ints,
3217                                         jint len, jlong inv,
3218                                         jint *m_ints) {
3219   len = len & 0x7fffFFFF; // C2 does not respect int to long conversion for stub calls.
3220   assert(len % 2 == 0, "array length in montgomery_multiply must be even");
3221   int longwords = len/2;
3222 
3223   // Make very sure we don't use so much space that the stack might
3224   // overflow. 512 jints corresponds to an 16384-bit integer and
3225   // will use here a total of 8k bytes of stack space.
3226   int divisor = sizeof(unsigned long) * 4;
3227   guarantee(longwords <= 8192 / divisor, "must be");
3228   int total_allocation = longwords * sizeof (unsigned long) * 4;
3229   unsigned long *scratch = (unsigned long *)alloca(total_allocation);
3230 
3231   // Local scratch arrays
3232   unsigned long
3233     *a = scratch + 0 * longwords,
3234     *b = scratch + 1 * longwords,
3235     *n = scratch + 2 * longwords,
3236     *m = scratch + 3 * longwords;
3237 
3238   reverse_words((unsigned long *)a_ints, a, longwords);
3239   reverse_words((unsigned long *)b_ints, b, longwords);
3240   reverse_words((unsigned long *)n_ints, n, longwords);
3241 
3242   ::montgomery_multiply(a, b, n, m, (unsigned long)inv, longwords);
3243 
3244   reverse_words(m, (unsigned long *)m_ints, longwords);
3245 }
3246 
3247 void SharedRuntime::montgomery_square(jint *a_ints, jint *n_ints,
3248                                       jint len, jlong inv,
3249                                       jint *m_ints) {
3250   len = len & 0x7fffFFFF; // C2 does not respect int to long conversion for stub calls.
3251   assert(len % 2 == 0, "array length in montgomery_square must be even");
3252   int longwords = len/2;
3253 
3254   // Make very sure we don't use so much space that the stack might
3255   // overflow. 512 jints corresponds to an 16384-bit integer and
3256   // will use here a total of 6k bytes of stack space.
3257   int divisor = sizeof(unsigned long) * 3;
3258   guarantee(longwords <= (8192 / divisor), "must be");
3259   int total_allocation = longwords * sizeof (unsigned long) * 3;
3260   unsigned long *scratch = (unsigned long *)alloca(total_allocation);
3261 
3262   // Local scratch arrays
3263   unsigned long
3264     *a = scratch + 0 * longwords,
3265     *n = scratch + 1 * longwords,
3266     *m = scratch + 2 * longwords;
3267 
3268   reverse_words((unsigned long *)a_ints, a, longwords);
3269   reverse_words((unsigned long *)n_ints, n, longwords);
3270 
3271   if (len >= MONTGOMERY_SQUARING_THRESHOLD) {
3272     ::montgomery_square(a, n, m, (unsigned long)inv, longwords);
3273   } else {
3274     ::montgomery_multiply(a, a, n, m, (unsigned long)inv, longwords);
3275   }
3276 
3277   reverse_words(m, (unsigned long *)m_ints, longwords);
3278 }
3279 
3280 extern "C"
3281 int SpinPause() {
3282   return 0;
3283 }