/*
 * Copyright (c) 2016, 2026, Oracle and/or its affiliates. All rights reserved.
 * Copyright (c) 2016, 2024 SAP SE. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 *
 */

#include "asm/macroAssembler.inline.hpp"
#include "code/debugInfoRec.hpp"
#include "code/vtableStubs.hpp"
#include "code/compiledIC.hpp"
#include "compiler/oopMap.hpp"
#include "gc/shared/barrierSetAssembler.hpp"
#include "gc/shared/gcLocker.hpp"
#include "interpreter/interpreter.hpp"
#include "interpreter/interp_masm.hpp"
#include "memory/resourceArea.hpp"
#include "nativeInst_s390.hpp"
#include "oops/klass.inline.hpp"
#include "prims/methodHandles.hpp"
#include "registerSaver_s390.hpp"
#include "runtime/jniHandles.hpp"
#include "runtime/safepointMechanism.hpp"
#include "runtime/sharedRuntime.hpp"
#include "runtime/signature.hpp"
#include "runtime/stubRoutines.hpp"
#include "runtime/timerTrace.hpp"
#include "runtime/vframeArray.hpp"
#include "utilities/align.hpp"
#include "utilities/macros.hpp"
#include "vmreg_s390.inline.hpp"
#ifdef COMPILER1
#include "c1/c1_Runtime1.hpp"
#endif
#ifdef COMPILER2
#include "opto/ad.hpp"
#include "opto/runtime.hpp"
#endif

#ifdef PRODUCT
#define __ masm->
#else
#define __ (Verbose ? (masm->block_comment(FILE_AND_LINE),masm):masm)->
#endif

#define BLOCK_COMMENT(str) __ block_comment(str)
#define BIND(label)        bind(label); BLOCK_COMMENT(#label ":")

#define RegisterSaver_LiveIntReg(regname) \
  { RegisterSaver::int_reg,   regname->encoding(), regname->as_VMReg() }

#define RegisterSaver_LiveFloatReg(regname) \
  { RegisterSaver::float_reg, regname->encoding(), regname->as_VMReg() }

// Registers which are not saved/restored, but still they have got a frame slot.
// Used to get same frame size for RegisterSaver_LiveRegs and RegisterSaver_LiveRegsWithoutR2
#define RegisterSaver_ExcludedIntReg(regname) \
  { RegisterSaver::excluded_reg, regname->encoding(), regname->as_VMReg() }

// Registers which are not saved/restored, but still they have got a frame slot.
// Used to get same frame size for RegisterSaver_LiveRegs and RegisterSaver_LiveRegsWithoutR2.
#define RegisterSaver_ExcludedFloatReg(regname) \
  { RegisterSaver::excluded_reg, regname->encoding(), regname->as_VMReg() }

#define RegisterSaver_LiveVReg(regname) \
  { RegisterSaver::v_reg,      regname->encoding(), regname->as_VMReg() }

static const RegisterSaver::LiveRegType RegisterSaver_LiveRegs[] = {
  // Live registers which get spilled to the stack. Register positions
  // in this array correspond directly to the stack layout.
  //
  // live float registers:
  //
  RegisterSaver_LiveFloatReg(Z_F0 ),
  // RegisterSaver_ExcludedFloatReg(Z_F1 ), // scratch (Z_fscratch_1)
  RegisterSaver_LiveFloatReg(Z_F2 ),
  RegisterSaver_LiveFloatReg(Z_F3 ),
  RegisterSaver_LiveFloatReg(Z_F4 ),
  RegisterSaver_LiveFloatReg(Z_F5 ),
  RegisterSaver_LiveFloatReg(Z_F6 ),
  RegisterSaver_LiveFloatReg(Z_F7 ),
  RegisterSaver_LiveFloatReg(Z_F8 ),
  RegisterSaver_LiveFloatReg(Z_F9 ),
  RegisterSaver_LiveFloatReg(Z_F10),
  RegisterSaver_LiveFloatReg(Z_F11),
  RegisterSaver_LiveFloatReg(Z_F12),
  RegisterSaver_LiveFloatReg(Z_F13),
  RegisterSaver_LiveFloatReg(Z_F14),
  RegisterSaver_LiveFloatReg(Z_F15),
  //
  // RegisterSaver_ExcludedIntReg(Z_R0), // scratch
  // RegisterSaver_ExcludedIntReg(Z_R1), // scratch
  RegisterSaver_LiveIntReg(Z_R2 ),
  RegisterSaver_LiveIntReg(Z_R3 ),
  RegisterSaver_LiveIntReg(Z_R4 ),
  RegisterSaver_LiveIntReg(Z_R5 ),
  RegisterSaver_LiveIntReg(Z_R6 ),
  RegisterSaver_LiveIntReg(Z_R7 ),
  RegisterSaver_LiveIntReg(Z_R8 ),
  RegisterSaver_LiveIntReg(Z_R9 ),
  RegisterSaver_LiveIntReg(Z_R10),
  RegisterSaver_LiveIntReg(Z_R11),
  RegisterSaver_LiveIntReg(Z_R12),
  RegisterSaver_LiveIntReg(Z_R13),
  // RegisterSaver_ExcludedIntReg(Z_R14), // return pc (Saved in caller frame.)
  // RegisterSaver_ExcludedIntReg(Z_R15)  // stack pointer
};

static const RegisterSaver::LiveRegType RegisterSaver_LiveIntRegs[] = {
  // Live registers which get spilled to the stack. Register positions
  // in this array correspond directly to the stack layout.
  //
  // live float registers: All excluded, but still they get a stack slot to get same frame size.
  //
  RegisterSaver_ExcludedFloatReg(Z_F0 ),
  // RegisterSaver_ExcludedFloatReg(Z_F1 ), // scratch (Z_fscratch_1)
  RegisterSaver_ExcludedFloatReg(Z_F2 ),
  RegisterSaver_ExcludedFloatReg(Z_F3 ),
  RegisterSaver_ExcludedFloatReg(Z_F4 ),
  RegisterSaver_ExcludedFloatReg(Z_F5 ),
  RegisterSaver_ExcludedFloatReg(Z_F6 ),
  RegisterSaver_ExcludedFloatReg(Z_F7 ),
  RegisterSaver_ExcludedFloatReg(Z_F8 ),
  RegisterSaver_ExcludedFloatReg(Z_F9 ),
  RegisterSaver_ExcludedFloatReg(Z_F10),
  RegisterSaver_ExcludedFloatReg(Z_F11),
  RegisterSaver_ExcludedFloatReg(Z_F12),
  RegisterSaver_ExcludedFloatReg(Z_F13),
  RegisterSaver_ExcludedFloatReg(Z_F14),
  RegisterSaver_ExcludedFloatReg(Z_F15),
  //
  // RegisterSaver_ExcludedIntReg(Z_R0), // scratch
  // RegisterSaver_ExcludedIntReg(Z_R1), // scratch
  RegisterSaver_LiveIntReg(Z_R2 ),
  RegisterSaver_LiveIntReg(Z_R3 ),
  RegisterSaver_LiveIntReg(Z_R4 ),
  RegisterSaver_LiveIntReg(Z_R5 ),
  RegisterSaver_LiveIntReg(Z_R6 ),
  RegisterSaver_LiveIntReg(Z_R7 ),
  RegisterSaver_LiveIntReg(Z_R8 ),
  RegisterSaver_LiveIntReg(Z_R9 ),
  RegisterSaver_LiveIntReg(Z_R10),
  RegisterSaver_LiveIntReg(Z_R11),
  RegisterSaver_LiveIntReg(Z_R12),
  RegisterSaver_LiveIntReg(Z_R13),
  // RegisterSaver_ExcludedIntReg(Z_R14), // return pc (Saved in caller frame.)
  // RegisterSaver_ExcludedIntReg(Z_R15)  // stack pointer
};

static const RegisterSaver::LiveRegType RegisterSaver_LiveRegsWithoutR2[] = {
  // Live registers which get spilled to the stack. Register positions
  // in this array correspond directly to the stack layout.
  //
  // live float registers:
  //
  RegisterSaver_LiveFloatReg(Z_F0 ),
  // RegisterSaver_ExcludedFloatReg(Z_F1 ), // scratch (Z_fscratch_1)
  RegisterSaver_LiveFloatReg(Z_F2 ),
  RegisterSaver_LiveFloatReg(Z_F3 ),
  RegisterSaver_LiveFloatReg(Z_F4 ),
  RegisterSaver_LiveFloatReg(Z_F5 ),
  RegisterSaver_LiveFloatReg(Z_F6 ),
  RegisterSaver_LiveFloatReg(Z_F7 ),
  RegisterSaver_LiveFloatReg(Z_F8 ),
  RegisterSaver_LiveFloatReg(Z_F9 ),
  RegisterSaver_LiveFloatReg(Z_F10),
  RegisterSaver_LiveFloatReg(Z_F11),
  RegisterSaver_LiveFloatReg(Z_F12),
  RegisterSaver_LiveFloatReg(Z_F13),
  RegisterSaver_LiveFloatReg(Z_F14),
  RegisterSaver_LiveFloatReg(Z_F15),
  //
  // RegisterSaver_ExcludedIntReg(Z_R0), // scratch
  // RegisterSaver_ExcludedIntReg(Z_R1), // scratch
  RegisterSaver_ExcludedIntReg(Z_R2), // Omit saving R2.
  RegisterSaver_LiveIntReg(Z_R3 ),
  RegisterSaver_LiveIntReg(Z_R4 ),
  RegisterSaver_LiveIntReg(Z_R5 ),
  RegisterSaver_LiveIntReg(Z_R6 ),
  RegisterSaver_LiveIntReg(Z_R7 ),
  RegisterSaver_LiveIntReg(Z_R8 ),
  RegisterSaver_LiveIntReg(Z_R9 ),
  RegisterSaver_LiveIntReg(Z_R10),
  RegisterSaver_LiveIntReg(Z_R11),
  RegisterSaver_LiveIntReg(Z_R12),
  RegisterSaver_LiveIntReg(Z_R13),
  // RegisterSaver_ExcludedIntReg(Z_R14), // return pc (Saved in caller frame.)
  // RegisterSaver_ExcludedIntReg(Z_R15)  // stack pointer
};

// Live argument registers which get spilled to the stack.
static const RegisterSaver::LiveRegType RegisterSaver_LiveArgRegs[] = {
  RegisterSaver_LiveFloatReg(Z_FARG1),
  RegisterSaver_LiveFloatReg(Z_FARG2),
  RegisterSaver_LiveFloatReg(Z_FARG3),
  RegisterSaver_LiveFloatReg(Z_FARG4),
  RegisterSaver_LiveIntReg(Z_ARG1),
  RegisterSaver_LiveIntReg(Z_ARG2),
  RegisterSaver_LiveIntReg(Z_ARG3),
  RegisterSaver_LiveIntReg(Z_ARG4),
  RegisterSaver_LiveIntReg(Z_ARG5)
};

static const RegisterSaver::LiveRegType RegisterSaver_LiveVolatileRegs[] = {
  // Live registers which get spilled to the stack. Register positions
  // in this array correspond directly to the stack layout.
  //
  // live float registers:
  //
  RegisterSaver_LiveFloatReg(Z_F0 ),
  // RegisterSaver_ExcludedFloatReg(Z_F1 ), // scratch (Z_fscratch_1)
  RegisterSaver_LiveFloatReg(Z_F2 ),
  RegisterSaver_LiveFloatReg(Z_F3 ),
  RegisterSaver_LiveFloatReg(Z_F4 ),
  RegisterSaver_LiveFloatReg(Z_F5 ),
  RegisterSaver_LiveFloatReg(Z_F6 ),
  RegisterSaver_LiveFloatReg(Z_F7 ),
  // RegisterSaver_LiveFloatReg(Z_F8 ), // non-volatile
  // RegisterSaver_LiveFloatReg(Z_F9 ), // non-volatile
  // RegisterSaver_LiveFloatReg(Z_F10), // non-volatile
  // RegisterSaver_LiveFloatReg(Z_F11), // non-volatile
  // RegisterSaver_LiveFloatReg(Z_F12), // non-volatile
  // RegisterSaver_LiveFloatReg(Z_F13), // non-volatile
  // RegisterSaver_LiveFloatReg(Z_F14), // non-volatile
  // RegisterSaver_LiveFloatReg(Z_F15), // non-volatile
  //
  // RegisterSaver_ExcludedIntReg(Z_R0), // scratch
  // RegisterSaver_ExcludedIntReg(Z_R1), // scratch
  RegisterSaver_LiveIntReg(Z_R2 ),
  RegisterSaver_LiveIntReg(Z_R3 ),
  RegisterSaver_LiveIntReg(Z_R4 ),
  RegisterSaver_LiveIntReg(Z_R5 ),
  // RegisterSaver_LiveIntReg(Z_R6 ), // non-volatile
  // RegisterSaver_LiveIntReg(Z_R7 ), // non-volatile
  // RegisterSaver_LiveIntReg(Z_R8 ), // non-volatile
  // RegisterSaver_LiveIntReg(Z_R9 ), // non-volatile
  // RegisterSaver_LiveIntReg(Z_R10), // non-volatile
  // RegisterSaver_LiveIntReg(Z_R11), // non-volatile
  // RegisterSaver_LiveIntReg(Z_R12), // non-volatile
  // RegisterSaver_LiveIntReg(Z_R13), // non-volatile
  // RegisterSaver_ExcludedIntReg(Z_R14), // return pc (Saved in caller frame.)
  // RegisterSaver_ExcludedIntReg(Z_R15)  // stack pointer
};

static const RegisterSaver::LiveRegType RegisterSaver_LiveVRegs[] = {
  // live vector registers (optional, only these are used by C2):
  RegisterSaver_LiveVReg( Z_V16 ),
  RegisterSaver_LiveVReg( Z_V17 ),
  RegisterSaver_LiveVReg( Z_V18 ),
  RegisterSaver_LiveVReg( Z_V19 ),
  RegisterSaver_LiveVReg( Z_V20 ),
  RegisterSaver_LiveVReg( Z_V21 ),
  RegisterSaver_LiveVReg( Z_V22 ),
  RegisterSaver_LiveVReg( Z_V23 ),
  RegisterSaver_LiveVReg( Z_V24 ),
  RegisterSaver_LiveVReg( Z_V25 ),
  RegisterSaver_LiveVReg( Z_V26 ),
  RegisterSaver_LiveVReg( Z_V27 ),
  RegisterSaver_LiveVReg( Z_V28 ),
  RegisterSaver_LiveVReg( Z_V29 ),
  RegisterSaver_LiveVReg( Z_V30 ),
  RegisterSaver_LiveVReg( Z_V31 )
};

int RegisterSaver::live_reg_save_size(RegisterSet reg_set) {
  int reg_space = -1;
  switch (reg_set) {
    case all_registers:           reg_space = sizeof(RegisterSaver_LiveRegs); break;
    case all_registers_except_r2: reg_space = sizeof(RegisterSaver_LiveRegsWithoutR2); break;
    case all_integer_registers:   reg_space = sizeof(RegisterSaver_LiveIntRegs); break;
    case all_volatile_registers:  reg_space = sizeof(RegisterSaver_LiveVolatileRegs); break;
    case arg_registers:           reg_space = sizeof(RegisterSaver_LiveArgRegs); break;
    default: ShouldNotReachHere();
  }
  return (reg_space / sizeof(RegisterSaver::LiveRegType)) * reg_size;
}

int RegisterSaver::calculate_vregstosave_num() {
  return (sizeof(RegisterSaver_LiveVRegs) / sizeof(RegisterSaver::LiveRegType));
}

int RegisterSaver::live_reg_frame_size(RegisterSet reg_set, bool save_vectors) {
  const int vregstosave_num = save_vectors ? calculate_vregstosave_num() : 0;
  return live_reg_save_size(reg_set) + vregstosave_num * v_reg_size + frame::z_abi_160_size;
}


// return_pc: Specify the register that should be stored as the return pc in the current frame.
OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, RegisterSet reg_set, Register return_pc, bool save_vectors) {
  // Record volatile registers as callee-save values in an OopMap so
  // their save locations will be propagated to the caller frame's
  // RegisterMap during StackFrameStream construction (needed for
  // deoptimization; see compiledVFrame::create_stack_value).

  // Calculate frame size.
  const int frame_size_in_bytes  = live_reg_frame_size(reg_set, save_vectors);
  const int frame_size_in_slots  = frame_size_in_bytes / sizeof(jint);
  const int vregstosave_num = save_vectors ? calculate_vregstosave_num() : 0;
  const int register_save_offset = frame_size_in_bytes - (live_reg_save_size(reg_set) + vregstosave_num * v_reg_size);

  // OopMap frame size is in c2 stack slots (sizeof(jint)) not bytes or words.
  OopMap* map = new OopMap(frame_size_in_slots, 0);

  int regstosave_num = 0;
  const RegisterSaver::LiveRegType* live_regs = nullptr;

  switch (reg_set) {
    case all_registers:
      regstosave_num = sizeof(RegisterSaver_LiveRegs)/sizeof(RegisterSaver::LiveRegType);
      live_regs      = RegisterSaver_LiveRegs;
      break;
    case all_registers_except_r2:
      regstosave_num = sizeof(RegisterSaver_LiveRegsWithoutR2)/sizeof(RegisterSaver::LiveRegType);;
      live_regs      = RegisterSaver_LiveRegsWithoutR2;
      break;
    case all_integer_registers:
      regstosave_num = sizeof(RegisterSaver_LiveIntRegs)/sizeof(RegisterSaver::LiveRegType);
      live_regs      = RegisterSaver_LiveIntRegs;
      break;
    case all_volatile_registers:
      regstosave_num = sizeof(RegisterSaver_LiveVolatileRegs)/sizeof(RegisterSaver::LiveRegType);
      live_regs      = RegisterSaver_LiveVolatileRegs;
      break;
    case arg_registers:
      regstosave_num = sizeof(RegisterSaver_LiveArgRegs)/sizeof(RegisterSaver::LiveRegType);;
      live_regs      = RegisterSaver_LiveArgRegs;
      break;
    default: ShouldNotReachHere();
  }

  // Save return pc in old frame.
  __ save_return_pc(return_pc);

  // Push a new frame (includes stack linkage).
  // Use return_pc as scratch for push_frame. Z_R0_scratch (the default) and Z_R1_scratch are
  // illegally used to pass parameters by RangeCheckStub::emit_code().
  __ push_frame(frame_size_in_bytes, return_pc);
  // We have to restore return_pc right away.
  // Nobody else will. Furthermore, return_pc isn't necessarily the default (Z_R14).
  // Nobody else knows which register we saved.
  __ z_lg(return_pc, _z_common_abi(return_pc) + frame_size_in_bytes, Z_SP);

  // Register save area in new frame starts above z_abi_160 area.
  int offset = register_save_offset;

  Register first = noreg;
  Register last  = noreg;
  int      first_offset = -1;
  bool     float_spilled = false;

  for (int i = 0; i < regstosave_num; i++, offset += reg_size) {
    int reg_num  = live_regs[i].reg_num;
    int reg_type = live_regs[i].reg_type;

    switch (reg_type) {
      case RegisterSaver::int_reg: {
        Register reg = as_Register(reg_num);
        if (last != reg->predecessor()) {
          if (first != noreg) {
            __ z_stmg(first, last, first_offset, Z_SP);
          }
          first = reg;
          first_offset = offset;
          DEBUG_ONLY(float_spilled = false);
        }
        last = reg;
        assert(last != Z_R0, "r0 would require special treatment");
        assert(!float_spilled, "for simplicity, do not mix up ints and floats in RegisterSaver_LiveRegs[]");
        break;
      }

      case RegisterSaver::excluded_reg: // Not saved/restored, but with dedicated slot.
        continue; // Continue with next loop iteration.

      case RegisterSaver::float_reg: {
        FloatRegister freg = as_FloatRegister(reg_num);
        __ z_std(freg, offset, Z_SP);
        DEBUG_ONLY(float_spilled = true);
        break;
      }

      default:
        ShouldNotReachHere();
        break;
    }

    // Second set_callee_saved is really a waste but we'll keep things as they were for now
    map->set_callee_saved(VMRegImpl::stack2reg(offset >> 2), live_regs[i].vmreg);
    map->set_callee_saved(VMRegImpl::stack2reg((offset + half_reg_size) >> 2), live_regs[i].vmreg->next());
  }
  assert(first != noreg, "Should spill at least one int reg.");
  __ z_stmg(first, last, first_offset, Z_SP);

  for (int i = 0; i < vregstosave_num; i++, offset += v_reg_size) {
    int reg_num  = RegisterSaver_LiveVRegs[i].reg_num;

    __ z_vst(as_VectorRegister(reg_num), Address(Z_SP, offset));

    map->set_callee_saved(VMRegImpl::stack2reg(offset>>2),
                   RegisterSaver_LiveVRegs[i].vmreg);
    map->set_callee_saved(VMRegImpl::stack2reg((offset + half_reg_size ) >> 2),
                   RegisterSaver_LiveVRegs[i].vmreg->next());
    map->set_callee_saved(VMRegImpl::stack2reg((offset + (half_reg_size * 2)) >> 2),
                   RegisterSaver_LiveVRegs[i].vmreg->next(2));
    map->set_callee_saved(VMRegImpl::stack2reg((offset + (half_reg_size * 3)) >> 2),
                   RegisterSaver_LiveVRegs[i].vmreg->next(3));
  }

  assert(offset == frame_size_in_bytes, "consistency check");

  // And we're done.
  return map;
}


// Generate the OopMap (again, regs where saved before).
OopMap* RegisterSaver::generate_oop_map(MacroAssembler* masm, RegisterSet reg_set) {
  // Calculate frame size.
  const int frame_size_in_bytes  = live_reg_frame_size(reg_set);
  const int frame_size_in_slots  = frame_size_in_bytes / sizeof(jint);
  const int register_save_offset = frame_size_in_bytes - live_reg_save_size(reg_set);

  // OopMap frame size is in c2 stack slots (sizeof(jint)) not bytes or words.
  OopMap* map = new OopMap(frame_size_in_slots, 0);

  int regstosave_num = 0;
  const RegisterSaver::LiveRegType* live_regs = nullptr;

  switch (reg_set) {
    case all_registers:
      regstosave_num = sizeof(RegisterSaver_LiveRegs)/sizeof(RegisterSaver::LiveRegType);
      live_regs      = RegisterSaver_LiveRegs;
      break;
    case all_registers_except_r2:
      regstosave_num = sizeof(RegisterSaver_LiveRegsWithoutR2)/sizeof(RegisterSaver::LiveRegType);;
      live_regs      = RegisterSaver_LiveRegsWithoutR2;
      break;
    case all_integer_registers:
      regstosave_num = sizeof(RegisterSaver_LiveIntRegs)/sizeof(RegisterSaver::LiveRegType);
      live_regs      = RegisterSaver_LiveIntRegs;
      break;
    case all_volatile_registers:
      regstosave_num = sizeof(RegisterSaver_LiveVolatileRegs)/sizeof(RegisterSaver::LiveRegType);
      live_regs      = RegisterSaver_LiveVolatileRegs;
      break;
    case arg_registers:
      regstosave_num = sizeof(RegisterSaver_LiveArgRegs)/sizeof(RegisterSaver::LiveRegType);;
      live_regs      = RegisterSaver_LiveArgRegs;
      break;
    default: ShouldNotReachHere();
  }

  // Register save area in new frame starts above z_abi_160 area.
  int offset = register_save_offset;
  for (int i = 0; i < regstosave_num; i++) {
    if (live_regs[i].reg_type < RegisterSaver::excluded_reg) {
      map->set_callee_saved(VMRegImpl::stack2reg(offset>>2), live_regs[i].vmreg);
      map->set_callee_saved(VMRegImpl::stack2reg((offset + half_reg_size)>>2), live_regs[i].vmreg->next());
    }
    offset += reg_size;
  }
#ifdef ASSERT
  assert(offset == frame_size_in_bytes, "consistency check");
#endif
  return map;
}


// Pop the current frame and restore all the registers that we saved.
void RegisterSaver::restore_live_registers(MacroAssembler* masm, RegisterSet reg_set, bool save_vectors) {
  int offset;
  const int vregstosave_num = save_vectors ? calculate_vregstosave_num() : 0;
  const int register_save_offset = live_reg_frame_size(reg_set, save_vectors) - (live_reg_save_size(reg_set) + vregstosave_num * v_reg_size);

  Register first = noreg;
  Register last = noreg;
  int      first_offset = -1;
  bool     float_spilled = false;

  int regstosave_num = 0;
  const RegisterSaver::LiveRegType* live_regs = nullptr;

  switch (reg_set) {
    case all_registers:
      regstosave_num = sizeof(RegisterSaver_LiveRegs)/sizeof(RegisterSaver::LiveRegType);;
      live_regs      = RegisterSaver_LiveRegs;
      break;
    case all_registers_except_r2:
      regstosave_num = sizeof(RegisterSaver_LiveRegsWithoutR2)/sizeof(RegisterSaver::LiveRegType);;
      live_regs      = RegisterSaver_LiveRegsWithoutR2;
      break;
    case all_integer_registers:
      regstosave_num = sizeof(RegisterSaver_LiveIntRegs)/sizeof(RegisterSaver::LiveRegType);
      live_regs      = RegisterSaver_LiveIntRegs;
      break;
    case all_volatile_registers:
      regstosave_num = sizeof(RegisterSaver_LiveVolatileRegs)/sizeof(RegisterSaver::LiveRegType);;
      live_regs      = RegisterSaver_LiveVolatileRegs;
      break;
    case arg_registers:
      regstosave_num = sizeof(RegisterSaver_LiveArgRegs)/sizeof(RegisterSaver::LiveRegType);;
      live_regs      = RegisterSaver_LiveArgRegs;
      break;
    default: ShouldNotReachHere();
  }

  // Restore all registers (ints and floats).

  // Register save area in new frame starts above z_abi_160 area.
  offset = register_save_offset;

  for (int i = 0; i < regstosave_num; i++, offset += reg_size) {
    int reg_num  = live_regs[i].reg_num;
    int reg_type = live_regs[i].reg_type;

    switch (reg_type) {
      case RegisterSaver::excluded_reg:
        continue; // Continue with next loop iteration.

      case RegisterSaver::int_reg: {
        Register reg = as_Register(reg_num);
        if (last != reg->predecessor()) {
          if (first != noreg) {
            __ z_lmg(first, last, first_offset, Z_SP);
          }
          first = reg;
          first_offset = offset;
          DEBUG_ONLY(float_spilled = false);
        }
        last = reg;
        assert(last != Z_R0, "r0 would require special treatment");
        assert(!float_spilled, "for simplicity, do not mix up ints and floats in RegisterSaver_LiveRegs[]");
        break;
      }

      case RegisterSaver::float_reg: {
        FloatRegister freg = as_FloatRegister(reg_num);
        __ z_ld(freg, offset, Z_SP);
        DEBUG_ONLY(float_spilled = true);
        break;
      }

      default:
        ShouldNotReachHere();
    }
  }
  assert(first != noreg, "Should spill at least one int reg.");
  __ z_lmg(first, last, first_offset, Z_SP);

  for (int i = 0; i < vregstosave_num; i++, offset += v_reg_size) {
    int reg_num  = RegisterSaver_LiveVRegs[i].reg_num;

    __ z_vl(as_VectorRegister(reg_num), Address(Z_SP, offset));
  }

  // Pop the frame.
  __ pop_frame();

  // Restore the flags.
  __ restore_return_pc();
}


// Pop the current frame and restore the registers that might be holding a result.
void RegisterSaver::restore_result_registers(MacroAssembler* masm) {
  const int regstosave_num       = sizeof(RegisterSaver_LiveRegs) /
                                   sizeof(RegisterSaver::LiveRegType);
  const int register_save_offset = live_reg_frame_size(all_registers) - live_reg_save_size(all_registers);

  // Restore all result registers (ints and floats).
  int offset = register_save_offset;
  for (int i = 0; i < regstosave_num; i++, offset += reg_size) {
    int reg_num = RegisterSaver_LiveRegs[i].reg_num;
    int reg_type = RegisterSaver_LiveRegs[i].reg_type;
    switch (reg_type) {
      case RegisterSaver::excluded_reg:
        continue; // Continue with next loop iteration.
      case RegisterSaver::int_reg: {
        if (as_Register(reg_num) == Z_RET) { // int result_reg
          __ z_lg(as_Register(reg_num), offset, Z_SP);
        }
        break;
      }
      case RegisterSaver::float_reg: {
        if (as_FloatRegister(reg_num) == Z_FRET) { // float result_reg
          __ z_ld(as_FloatRegister(reg_num), offset, Z_SP);
        }
        break;
      }
      default:
        ShouldNotReachHere();
    }
  }
  assert(offset == live_reg_frame_size(all_registers), "consistency check");
}

// ---------------------------------------------------------------------------
void SharedRuntime::save_native_result(MacroAssembler * masm,
                                       BasicType ret_type,
                                       int frame_slots) {
  Address memaddr(Z_SP, frame_slots * VMRegImpl::stack_slot_size);

  switch (ret_type) {
    case T_BOOLEAN:  // Save shorter types as int. Do we need sign extension at restore??
    case T_BYTE:
    case T_CHAR:
    case T_SHORT:
    case T_INT:
      __ reg2mem_opt(Z_RET, memaddr, false);
      break;
    case T_OBJECT:   // Save pointer types as long.
    case T_ARRAY:
    case T_ADDRESS:
    case T_VOID:
    case T_LONG:
      __ reg2mem_opt(Z_RET, memaddr);
      break;
    case T_FLOAT:
      __ freg2mem_opt(Z_FRET, memaddr, false);
      break;
    case T_DOUBLE:
      __ freg2mem_opt(Z_FRET, memaddr);
      break;
    default:
      ShouldNotReachHere();
      break;
  }
}

void SharedRuntime::restore_native_result(MacroAssembler *masm,
                                          BasicType       ret_type,
                                          int             frame_slots) {
  Address memaddr(Z_SP, frame_slots * VMRegImpl::stack_slot_size);

  switch (ret_type) {
    case T_BOOLEAN:  // Restore shorter types as int. Do we need sign extension at restore??
    case T_BYTE:
    case T_CHAR:
    case T_SHORT:
    case T_INT:
      __ mem2reg_opt(Z_RET, memaddr, false);
      break;
    case T_OBJECT:   // Restore pointer types as long.
    case T_ARRAY:
    case T_ADDRESS:
    case T_VOID:
    case T_LONG:
      __ mem2reg_opt(Z_RET, memaddr);
      break;
    case T_FLOAT:
      __ mem2freg_opt(Z_FRET, memaddr, false);
      break;
    case T_DOUBLE:
      __ mem2freg_opt(Z_FRET, memaddr);
      break;
    default:
      ShouldNotReachHere();
      break;
  }
}

// ---------------------------------------------------------------------------
// Read the array of BasicTypes from a signature, and compute where the
// arguments should go. Values in the VMRegPair regs array refer to 4-byte
// quantities. Values less than VMRegImpl::stack0 are registers, those above
// refer to 4-byte stack slots. All stack slots are based off of the stack pointer
// as framesizes are fixed.
// VMRegImpl::stack0 refers to the first slot 0(sp).
// VMRegImpl::stack0+1 refers to the memory word 4-byes higher. Registers
// up to Register::number_of_registers are the 64-bit integer registers.

// Note: the INPUTS in sig_bt are in units of Java argument words, which are
// either 32-bit or 64-bit depending on the build. The OUTPUTS are in 32-bit
// units regardless of build.

// The Java calling convention is a "shifted" version of the C ABI.
// By skipping the first C ABI register we can call non-static jni methods
// with small numbers of arguments without having to shuffle the arguments
// at all. Since we control the java ABI we ought to at least get some
// advantage out of it.
int SharedRuntime::java_calling_convention(const BasicType *sig_bt,
                                           VMRegPair *regs,
                                           int total_args_passed) {
  // c2c calling conventions for compiled-compiled calls.

  // An int/float occupies 1 slot here.
  const int inc_stk_for_intfloat   = 1; // 1 slots for ints and floats.
  const int inc_stk_for_longdouble = 2; // 2 slots for longs and doubles.

  const VMReg z_iarg_reg[5] = {
    Z_R2->as_VMReg(),
    Z_R3->as_VMReg(),
    Z_R4->as_VMReg(),
    Z_R5->as_VMReg(),
    Z_R6->as_VMReg()
  };
  const VMReg z_farg_reg[4] = {
    Z_F0->as_VMReg(),
    Z_F2->as_VMReg(),
    Z_F4->as_VMReg(),
    Z_F6->as_VMReg()
  };
  const int z_num_iarg_registers = sizeof(z_iarg_reg) / sizeof(z_iarg_reg[0]);
  const int z_num_farg_registers = sizeof(z_farg_reg) / sizeof(z_farg_reg[0]);

  assert(Register::number_of_arg_registers == z_num_iarg_registers, "iarg reg count mismatch");
  assert(FloatRegister::number_of_arg_registers == z_num_farg_registers, "farg reg count mismatch");

  int i;
  int stk = 0;
  int ireg = 0;
  int freg = 0;

  for (int i = 0; i < total_args_passed; ++i) {
    switch (sig_bt[i]) {
      case T_BOOLEAN:
      case T_CHAR:
      case T_BYTE:
      case T_SHORT:
      case T_INT:
        if (ireg < z_num_iarg_registers) {
          // Put int/ptr in register.
          regs[i].set1(z_iarg_reg[ireg]);
          ++ireg;
        } else {
          // Put int/ptr on stack.
          regs[i].set1(VMRegImpl::stack2reg(stk));
          stk += inc_stk_for_intfloat;
        }
        break;
      case T_LONG:
        assert((i + 1) < total_args_passed && sig_bt[i+1] == T_VOID, "expecting half");
        if (ireg < z_num_iarg_registers) {
          // Put long in register.
          regs[i].set2(z_iarg_reg[ireg]);
          ++ireg;
        } else {
          // Put long on stack and align to 2 slots.
          if (stk & 0x1) { ++stk; }
          regs[i].set2(VMRegImpl::stack2reg(stk));
          stk += inc_stk_for_longdouble;
        }
        break;
      case T_OBJECT:
      case T_ARRAY:
      case T_ADDRESS:
        if (ireg < z_num_iarg_registers) {
          // Put ptr in register.
          regs[i].set2(z_iarg_reg[ireg]);
          ++ireg;
        } else {
          // Put ptr on stack and align to 2 slots, because
          // "64-bit pointers record oop-ishness on 2 aligned adjacent
          // registers." (see OopFlow::build_oop_map).
          if (stk & 0x1) { ++stk; }
          regs[i].set2(VMRegImpl::stack2reg(stk));
          stk += inc_stk_for_longdouble;
        }
        break;
      case T_FLOAT:
        if (freg < z_num_farg_registers) {
          // Put float in register.
          regs[i].set1(z_farg_reg[freg]);
          ++freg;
        } else {
          // Put float on stack.
          regs[i].set1(VMRegImpl::stack2reg(stk));
          stk += inc_stk_for_intfloat;
        }
        break;
      case T_DOUBLE:
        assert((i + 1) < total_args_passed && sig_bt[i+1] == T_VOID, "expecting half");
        if (freg < z_num_farg_registers) {
          // Put double in register.
          regs[i].set2(z_farg_reg[freg]);
          ++freg;
        } else {
          // Put double on stack and align to 2 slots.
          if (stk & 0x1) { ++stk; }
          regs[i].set2(VMRegImpl::stack2reg(stk));
          stk += inc_stk_for_longdouble;
        }
        break;
      case T_VOID:
        assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half");
        // Do not count halves.
        regs[i].set_bad();
        break;
      default:
        ShouldNotReachHere();
    }
  }
  return stk;
}

int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
                                        VMRegPair *regs,
                                        int total_args_passed) {

  // Calling conventions for C runtime calls and calls to JNI native methods.
  const VMReg z_iarg_reg[5] = {
    Z_R2->as_VMReg(),
    Z_R3->as_VMReg(),
    Z_R4->as_VMReg(),
    Z_R5->as_VMReg(),
    Z_R6->as_VMReg()
  };
  const VMReg z_farg_reg[4] = {
    Z_F0->as_VMReg(),
    Z_F2->as_VMReg(),
    Z_F4->as_VMReg(),
    Z_F6->as_VMReg()
  };
  const int z_num_iarg_registers = sizeof(z_iarg_reg) / sizeof(z_iarg_reg[0]);
  const int z_num_farg_registers = sizeof(z_farg_reg) / sizeof(z_farg_reg[0]);

  // Check calling conventions consistency.
  assert(Register::number_of_arg_registers == z_num_iarg_registers, "iarg reg count mismatch");
  assert(FloatRegister::number_of_arg_registers == z_num_farg_registers, "farg reg count mismatch");

  // Avoid passing C arguments in the wrong stack slots.

  // 'Stk' counts stack slots. Due to alignment, 32 bit values occupy
  // 2 such slots, like 64 bit values do.
  const int inc_stk_for_intfloat   = 2; // 2 slots for ints and floats.
  const int inc_stk_for_longdouble = 2; // 2 slots for longs and doubles.

  int i;
  // Leave room for C-compatible ABI
  int stk = (frame::z_abi_160_size - frame::z_jit_out_preserve_size) / VMRegImpl::stack_slot_size;
  int freg = 0;
  int ireg = 0;

  // We put the first 5 arguments into registers and the rest on the
  // stack. Float arguments are already in their argument registers
  // due to c2c calling conventions (see calling_convention).
  for (int i = 0; i < total_args_passed; ++i) {
    switch (sig_bt[i]) {
      case T_BOOLEAN:
      case T_CHAR:
      case T_BYTE:
      case T_SHORT:
      case T_INT:
        // Fall through, handle as long.
      case T_LONG:
      case T_OBJECT:
      case T_ARRAY:
      case T_ADDRESS:
      case T_METADATA:
        // Oops are already boxed if required (JNI).
        if (ireg < z_num_iarg_registers) {
          regs[i].set2(z_iarg_reg[ireg]);
          ++ireg;
        } else {
          regs[i].set2(VMRegImpl::stack2reg(stk));
          stk += inc_stk_for_longdouble;
        }
        break;
      case T_FLOAT:
        if (freg < z_num_farg_registers) {
          regs[i].set1(z_farg_reg[freg]);
          ++freg;
        } else {
          regs[i].set1(VMRegImpl::stack2reg(stk+1));
          stk +=  inc_stk_for_intfloat;
        }
        break;
      case T_DOUBLE:
        assert((i + 1) < total_args_passed && sig_bt[i+1] == T_VOID, "expecting half");
        if (freg < z_num_farg_registers) {
          regs[i].set2(z_farg_reg[freg]);
          ++freg;
        } else {
          // Put double on stack.
          regs[i].set2(VMRegImpl::stack2reg(stk));
          stk += inc_stk_for_longdouble;
        }
        break;
      case T_VOID:
        // Do not count halves.
        regs[i].set_bad();
        break;
      default:
        ShouldNotReachHere();
    }
  }
  return align_up(stk, 2);
}

int SharedRuntime::vector_calling_convention(VMRegPair *regs,
                                             uint num_bits,
                                             uint total_args_passed) {
  Unimplemented();
  return 0;
}

////////////////////////////////////////////////////////////////////////
//
//  Argument shufflers
//
////////////////////////////////////////////////////////////////////////

//----------------------------------------------------------------------
// The java_calling_convention describes stack locations as ideal slots on
// a frame with no abi restrictions. Since we must observe abi restrictions
// (like the placement of the register window) the slots must be biased by
// the following value.
//----------------------------------------------------------------------
static int reg2slot(VMReg r) {
  return r->reg2stack() + SharedRuntime::out_preserve_stack_slots();
}

static int reg2offset(VMReg r) {
  return reg2slot(r) * VMRegImpl::stack_slot_size;
}

static void verify_oop_args(MacroAssembler *masm,
                            int total_args_passed,
                            const BasicType *sig_bt,
                            const VMRegPair *regs) {
  if (!VerifyOops) { return; }

  for (int i = 0; i < total_args_passed; i++) {
    if (is_reference_type(sig_bt[i])) {
      VMReg r = regs[i].first();
      assert(r->is_valid(), "bad oop arg");

      if (r->is_stack()) {
        __ z_lg(Z_R0_scratch,
                Address(Z_SP, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize));
        __ verify_oop(Z_R0_scratch, FILE_AND_LINE);
      } else {
        __ verify_oop(r->as_Register(), FILE_AND_LINE);
      }
    }
  }
}

static void gen_special_dispatch(MacroAssembler *masm,
                                 int total_args_passed,
                                 vmIntrinsics::ID special_dispatch,
                                 const BasicType *sig_bt,
                                 const VMRegPair *regs) {
  verify_oop_args(masm, total_args_passed, sig_bt, regs);

  // Now write the args into the outgoing interpreter space.
  bool     has_receiver   = false;
  Register receiver_reg   = noreg;
  int      member_arg_pos = -1;
  Register member_reg     = noreg;
  int      ref_kind       = MethodHandles::signature_polymorphic_intrinsic_ref_kind(special_dispatch);

  if (ref_kind != 0) {
    member_arg_pos = total_args_passed - 1;  // trailing MemberName argument
    member_reg = Z_R9;                       // Known to be free at this point.
    has_receiver = MethodHandles::ref_kind_has_receiver(ref_kind);
  } else if (special_dispatch == vmIntrinsics::_linkToNative) {
    member_arg_pos = total_args_passed - 1;  // trailing NativeEntryPoint argument
    member_reg = Z_R9;  // known to be free at this point
  } else {
    guarantee(special_dispatch == vmIntrinsics::_invokeBasic,
              "special_dispatch=%d", vmIntrinsics::as_int(special_dispatch));
    has_receiver = true;
  }

  if (member_reg != noreg) {
    // Load the member_arg into register, if necessary.
    assert(member_arg_pos >= 0 && member_arg_pos < total_args_passed, "oob");
    assert(sig_bt[member_arg_pos] == T_OBJECT, "dispatch argument must be an object");

    VMReg r = regs[member_arg_pos].first();
    assert(r->is_valid(), "bad member arg");

    if (r->is_stack()) {
      __ z_lg(member_reg, Address(Z_SP, reg2offset(r)));
    } else {
      // No data motion is needed.
      member_reg = r->as_Register();
    }
  }

  if (has_receiver) {
    // Make sure the receiver is loaded into a register.
    assert(total_args_passed > 0, "oob");
    assert(sig_bt[0] == T_OBJECT, "receiver argument must be an object");

    VMReg r = regs[0].first();
    assert(r->is_valid(), "bad receiver arg");

    if (r->is_stack()) {
      // Porting note: This assumes that compiled calling conventions always
      // pass the receiver oop in a register. If this is not true on some
      // platform, pick a temp and load the receiver from stack.
      assert(false, "receiver always in a register");
      receiver_reg = Z_R13;  // Known to be free at this point.
      __ z_lg(receiver_reg, Address(Z_SP, reg2offset(r)));
    } else {
      // No data motion is needed.
      receiver_reg = r->as_Register();
    }
  }

  // Figure out which address we are really jumping to:
  MethodHandles::generate_method_handle_dispatch(masm, special_dispatch,
                                                 receiver_reg, member_reg,
                                                 /*for_compiler_entry:*/ true);
}

////////////////////////////////////////////////////////////////////////
//
//  Argument shufflers
//
////////////////////////////////////////////////////////////////////////

// Is the size of a vector size (in bytes) bigger than a size saved by default?
// 8 bytes registers are saved by default on z/Architecture.
bool SharedRuntime::is_wide_vector(int size) {
  // Note, MaxVectorSize == 8/16 on this platform.
  assert(size <= (SuperwordUseVX ? 16 : 8), "%d bytes vectors are not supported", size);
  return size > 8;
}

//----------------------------------------------------------------------
// An oop arg. Must pass a handle not the oop itself
//----------------------------------------------------------------------
static void object_move(MacroAssembler *masm,
                        OopMap *map,
                        int oop_handle_offset,
                        int framesize_in_slots,
                        VMRegPair src,
                        VMRegPair dst,
                        bool is_receiver,
                        int *receiver_offset) {
  int frame_offset = framesize_in_slots*VMRegImpl::stack_slot_size;

  assert(!is_receiver || (is_receiver && (*receiver_offset == -1)), "only one receiving object per call, please.");

  // Must pass a handle. First figure out the location we use as a handle.

  if (src.first()->is_stack()) {
    // Oop is already on the stack, put handle on stack or in register
    // If handle will be on the stack, use temp reg to calculate it.
    Register rHandle = dst.first()->is_stack() ? Z_R1 : dst.first()->as_Register();
    Label    skip;
    int      slot_in_older_frame = reg2slot(src.first());

    guarantee(!is_receiver, "expecting receiver in register");
    map->set_oop(VMRegImpl::stack2reg(slot_in_older_frame + framesize_in_slots));

    __ add2reg(rHandle, reg2offset(src.first())+frame_offset, Z_SP);
    __ load_and_test_long(Z_R0, Address(rHandle));
    __ z_brne(skip);
    // Use a null handle if oop is null.
    __ clear_reg(rHandle, true, false);
    __ bind(skip);

    // Copy handle to the right place (register or stack).
    if (dst.first()->is_stack()) {
      __ z_stg(rHandle, reg2offset(dst.first()), Z_SP);
    } // else
      // nothing to do. rHandle uses the correct register
  } else {
    // Oop is passed in an input register. We must flush it to the stack.
    const Register rOop = src.first()->as_Register();
    const Register rHandle = dst.first()->is_stack() ? Z_R1 : dst.first()->as_Register();
    int            oop_slot = (rOop->encoding()-Z_ARG1->encoding()) * VMRegImpl::slots_per_word + oop_handle_offset;
    int            oop_slot_offset = oop_slot*VMRegImpl::stack_slot_size;
    NearLabel skip;

    if (is_receiver) {
      *receiver_offset = oop_slot_offset;
    }
    map->set_oop(VMRegImpl::stack2reg(oop_slot));

    // Flush Oop to stack, calculate handle.
    __ z_stg(rOop, oop_slot_offset, Z_SP);
    __ add2reg(rHandle, oop_slot_offset, Z_SP);

    // If Oop is null, use a null handle.
    __ compare64_and_branch(rOop, (RegisterOrConstant)0L, Assembler::bcondNotEqual, skip);
    __ clear_reg(rHandle, true, false);
    __ bind(skip);

    // Copy handle to the right place (register or stack).
    if (dst.first()->is_stack()) {
      __ z_stg(rHandle, reg2offset(dst.first()), Z_SP);
    } // else
      // nothing to do here, since rHandle = dst.first()->as_Register in this case.
  }
}

//----------------------------------------------------------------------
// A float arg. May have to do float reg to int reg conversion
//----------------------------------------------------------------------
static void float_move(MacroAssembler *masm,
                       VMRegPair src,
                       VMRegPair dst,
                       int framesize_in_slots,
                       int workspace_slot_offset) {
  int frame_offset = framesize_in_slots * VMRegImpl::stack_slot_size;
  int workspace_offset = workspace_slot_offset * VMRegImpl::stack_slot_size;

  // We do not accept an argument in a VMRegPair to be spread over two slots,
  // no matter what physical location (reg or stack) the slots may have.
  // We just check for the unaccepted slot to be invalid.
  assert(!src.second()->is_valid(), "float in arg spread over two slots");
  assert(!dst.second()->is_valid(), "float out arg spread over two slots");

  if (src.first()->is_stack()) {
    if (dst.first()->is_stack()) {
      // stack -> stack. The easiest of the bunch.
      __ z_mvc(Address(Z_SP, reg2offset(dst.first())),
               Address(Z_SP, reg2offset(src.first()) + frame_offset), sizeof(float));
    } else {
      // stack to reg
      Address memaddr(Z_SP, reg2offset(src.first()) + frame_offset);
      if (dst.first()->is_Register()) {
        __ mem2reg_opt(dst.first()->as_Register(), memaddr, false);
      } else {
        __ mem2freg_opt(dst.first()->as_FloatRegister(), memaddr, false);
      }
    }
  } else if (src.first()->is_Register()) {
    if (dst.first()->is_stack()) {
      // gpr -> stack
      __ reg2mem_opt(src.first()->as_Register(),
                     Address(Z_SP, reg2offset(dst.first()), false ));
    } else {
      if (dst.first()->is_Register()) {
        // gpr -> gpr
        __ move_reg_if_needed(dst.first()->as_Register(), T_INT,
                              src.first()->as_Register(), T_INT);
      } else {
        if (VM_Version::has_FPSupportEnhancements()) {
          // gpr -> fpr. Exploit z10 capability of direct transfer.
          __ z_ldgr(dst.first()->as_FloatRegister(), src.first()->as_Register());
        } else {
          // gpr -> fpr. Use work space on stack to transfer data.
          Address   stackaddr(Z_SP, workspace_offset);

          __ reg2mem_opt(src.first()->as_Register(), stackaddr, false);
          __ mem2freg_opt(dst.first()->as_FloatRegister(), stackaddr, false);
        }
      }
    }
  } else {
    if (dst.first()->is_stack()) {
      // fpr -> stack
      __ freg2mem_opt(src.first()->as_FloatRegister(),
                      Address(Z_SP, reg2offset(dst.first())), false);
    } else {
      if (dst.first()->is_Register()) {
        if (VM_Version::has_FPSupportEnhancements()) {
          // fpr -> gpr.
          __ z_lgdr(dst.first()->as_Register(), src.first()->as_FloatRegister());
        } else {
          // fpr -> gpr. Use work space on stack to transfer data.
          Address   stackaddr(Z_SP, workspace_offset);

          __ freg2mem_opt(src.first()->as_FloatRegister(), stackaddr, false);
          __ mem2reg_opt(dst.first()->as_Register(), stackaddr, false);
        }
      } else {
        // fpr -> fpr
        __ move_freg_if_needed(dst.first()->as_FloatRegister(), T_FLOAT,
                               src.first()->as_FloatRegister(), T_FLOAT);
      }
    }
  }
}

//----------------------------------------------------------------------
// A double arg. May have to do double reg to long reg conversion
//----------------------------------------------------------------------
static void double_move(MacroAssembler *masm,
                        VMRegPair src,
                        VMRegPair dst,
                        int framesize_in_slots,
                        int workspace_slot_offset) {
  int frame_offset = framesize_in_slots*VMRegImpl::stack_slot_size;
  int workspace_offset = workspace_slot_offset*VMRegImpl::stack_slot_size;

  // Since src is always a java calling convention we know that the
  // src pair is always either all registers or all stack (and aligned?)

  if (src.first()->is_stack()) {
    if (dst.first()->is_stack()) {
      // stack -> stack. The easiest of the bunch.
      __ z_mvc(Address(Z_SP, reg2offset(dst.first())),
               Address(Z_SP, reg2offset(src.first()) + frame_offset), sizeof(double));
    } else {
      // stack to reg
      Address stackaddr(Z_SP, reg2offset(src.first()) + frame_offset);

      if (dst.first()->is_Register()) {
        __ mem2reg_opt(dst.first()->as_Register(), stackaddr);
      } else {
        __ mem2freg_opt(dst.first()->as_FloatRegister(), stackaddr);
      }
    }
  } else if (src.first()->is_Register()) {
    if (dst.first()->is_stack()) {
      // gpr -> stack
      __ reg2mem_opt(src.first()->as_Register(),
                     Address(Z_SP, reg2offset(dst.first())));
    } else {
      if (dst.first()->is_Register()) {
        // gpr -> gpr
        __ move_reg_if_needed(dst.first()->as_Register(), T_LONG,
                              src.first()->as_Register(), T_LONG);
      } else {
        if (VM_Version::has_FPSupportEnhancements()) {
          // gpr -> fpr. Exploit z10 capability of direct transfer.
          __ z_ldgr(dst.first()->as_FloatRegister(), src.first()->as_Register());
        } else {
          // gpr -> fpr. Use work space on stack to transfer data.
          Address stackaddr(Z_SP, workspace_offset);
          __ reg2mem_opt(src.first()->as_Register(), stackaddr);
          __ mem2freg_opt(dst.first()->as_FloatRegister(), stackaddr);
        }
      }
    }
  } else {
    if (dst.first()->is_stack()) {
      // fpr -> stack
      __ freg2mem_opt(src.first()->as_FloatRegister(),
                      Address(Z_SP, reg2offset(dst.first())));
    } else {
      if (dst.first()->is_Register()) {
        if (VM_Version::has_FPSupportEnhancements()) {
          // fpr -> gpr. Exploit z10 capability of direct transfer.
          __ z_lgdr(dst.first()->as_Register(), src.first()->as_FloatRegister());
        } else {
          // fpr -> gpr. Use work space on stack to transfer data.
          Address stackaddr(Z_SP, workspace_offset);

          __ freg2mem_opt(src.first()->as_FloatRegister(), stackaddr);
          __ mem2reg_opt(dst.first()->as_Register(), stackaddr);
        }
      } else {
        // fpr -> fpr
        // In theory these overlap but the ordering is such that this is likely a nop.
        __ move_freg_if_needed(dst.first()->as_FloatRegister(), T_DOUBLE,
                               src.first()->as_FloatRegister(), T_DOUBLE);
      }
    }
  }
}

//----------------------------------------------------------------------
// A long arg.
//----------------------------------------------------------------------
static void long_move(MacroAssembler *masm,
                      VMRegPair src,
                      VMRegPair dst,
                      int framesize_in_slots) {
  int frame_offset = framesize_in_slots*VMRegImpl::stack_slot_size;

  if (src.first()->is_stack()) {
    if (dst.first()->is_stack()) {
      // stack -> stack. The easiest of the bunch.
      __ z_mvc(Address(Z_SP, reg2offset(dst.first())),
               Address(Z_SP, reg2offset(src.first()) + frame_offset), sizeof(long));
    } else {
      // stack to reg
      assert(dst.first()->is_Register(), "long dst value must be in GPR");
      __ mem2reg_opt(dst.first()->as_Register(),
                      Address(Z_SP, reg2offset(src.first()) + frame_offset));
    }
  } else {
    // reg to reg
    assert(src.first()->is_Register(), "long src value must be in GPR");
    if (dst.first()->is_stack()) {
      // reg -> stack
      __ reg2mem_opt(src.first()->as_Register(),
                     Address(Z_SP, reg2offset(dst.first())));
    } else {
      // reg -> reg
      assert(dst.first()->is_Register(), "long dst value must be in GPR");
      __ move_reg_if_needed(dst.first()->as_Register(),
                            T_LONG, src.first()->as_Register(), T_LONG);
    }
  }
}


//----------------------------------------------------------------------
// A int-like arg.
//----------------------------------------------------------------------
// On z/Architecture we will store integer like items to the stack as 64 bit
// items, according to the z/Architecture ABI, even though Java would only store
// 32 bits for a parameter.
// We do sign extension for all base types. That is ok since the only
// unsigned base type is T_CHAR, and T_CHAR uses only 16 bits of an int.
// Sign extension 32->64 bit will thus not affect the value.
//----------------------------------------------------------------------
static void move32_64(MacroAssembler *masm,
                      VMRegPair src,
                      VMRegPair dst,
                      int framesize_in_slots) {
  int frame_offset = framesize_in_slots * VMRegImpl::stack_slot_size;

  if (src.first()->is_stack()) {
    Address memaddr(Z_SP, reg2offset(src.first()) + frame_offset);
    if (dst.first()->is_stack()) {
      // stack -> stack. MVC not possible due to sign extension.
      Address firstaddr(Z_SP, reg2offset(dst.first()));
      __ mem2reg_signed_opt(Z_R0_scratch, memaddr);
      __ reg2mem_opt(Z_R0_scratch, firstaddr);
    } else {
      // stack -> reg, sign extended
      __ mem2reg_signed_opt(dst.first()->as_Register(), memaddr);
    }
  } else {
    if (dst.first()->is_stack()) {
      // reg -> stack, sign extended
      Address firstaddr(Z_SP, reg2offset(dst.first()));
      __ z_lgfr(src.first()->as_Register(), src.first()->as_Register());
      __ reg2mem_opt(src.first()->as_Register(), firstaddr);
    } else {
      // reg -> reg, sign extended
      __ z_lgfr(dst.first()->as_Register(), src.first()->as_Register());
    }
  }
}

//----------------------------------------------------------------------
// Wrap a JNI call.
//----------------------------------------------------------------------
#undef USE_RESIZE_FRAME
nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm,
                                                const methodHandle& method,
                                                int compile_id,
                                                BasicType *in_sig_bt,
                                                VMRegPair *in_regs,
                                                BasicType ret_type) {
  int total_in_args = method->size_of_parameters();
  if (method->is_method_handle_intrinsic()) {
    vmIntrinsics::ID iid = method->intrinsic_id();
    intptr_t start = (intptr_t) __ pc();
    int vep_offset = ((intptr_t) __ pc()) - start;

    gen_special_dispatch(masm, total_in_args,
                         method->intrinsic_id(), in_sig_bt, in_regs);

    int frame_complete = ((intptr_t)__ pc()) - start; // Not complete, period.

    __ flush();

    int stack_slots = SharedRuntime::out_preserve_stack_slots();  // No out slots at all, actually.

    return nmethod::new_native_nmethod(method,
                                       compile_id,
                                       masm->code(),
                                       vep_offset,
                                       frame_complete,
                                       stack_slots / VMRegImpl::slots_per_word,
                                       in_ByteSize(-1),
                                       in_ByteSize(-1),
                                       (OopMapSet *) nullptr);
  }


  ///////////////////////////////////////////////////////////////////////
  //
  //  Precalculations before generating any code
  //
  ///////////////////////////////////////////////////////////////////////

  address native_func = method->native_function();
  assert(native_func != nullptr, "must have function");

  //---------------------------------------------------------------------
  // We have received a description of where all the java args are located
  // on entry to the wrapper. We need to convert these args to where
  // the jni function will expect them. To figure out where they go
  // we convert the java signature to a C signature by inserting
  // the hidden arguments as arg[0] and possibly arg[1] (static method).
  //
  // The first hidden argument arg[0] is a pointer to the JNI environment.
  // It is generated for every call.
  // The second argument arg[1] to the JNI call, which is hidden for static
  // methods, is the boxed lock object. For static calls, the lock object
  // is the static method itself. The oop is constructed here. for instance
  // calls, the lock is performed on the object itself, the pointer of
  // which is passed as the first visible argument.
  //---------------------------------------------------------------------

  // Additionally, on z/Architecture we must convert integers
  // to longs in the C signature. We do this in advance in order to have
  // no trouble with indexes into the bt-arrays.
  // So convert the signature and registers now, and adjust the total number
  // of in-arguments accordingly.
  bool method_is_static = method->is_static();
  int  total_c_args     = total_in_args + (method_is_static ? 2 : 1);

  BasicType *out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args);
  VMRegPair *out_regs   = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args);

  // Create the signature for the C call:
  //   1) add the JNIEnv*
  //   2) add the class if the method is static
  //   3) copy the rest of the incoming signature (shifted by the number of
  //      hidden arguments)

  int argc = 0;
  out_sig_bt[argc++] = T_ADDRESS;
  if (method->is_static()) {
    out_sig_bt[argc++] = T_OBJECT;
  }

  for (int i = 0; i < total_in_args; i++) {
    out_sig_bt[argc++] = in_sig_bt[i];
  }

  ///////////////////////////////////////////////////////////////////////
  // Now figure out where the args must be stored and how much stack space
  // they require (neglecting out_preserve_stack_slots but providing space
  // for storing the first five register arguments).
  // It's weird, see int_stk_helper.
  ///////////////////////////////////////////////////////////////////////

  //---------------------------------------------------------------------
  // Compute framesize for the wrapper.
  //
  // - We need to handlize all oops passed in registers.
  // - We must create space for them here that is disjoint from the save area.
  // - We always just allocate 5 words for storing down these object.
  //   This allows us to simply record the base and use the Ireg number to
  //   decide which slot to use.
  // - Note that the reg number used to index the stack slot is the inbound
  //   number, not the outbound number.
  // - We must shuffle args to match the native convention,
  //   and to include var-args space.
  //---------------------------------------------------------------------

  //---------------------------------------------------------------------
  // Calculate the total number of stack slots we will need:
  // - 1) abi requirements
  // - 2) outgoing args
  // - 3) space for inbound oop handle area
  // - 4) space for handlizing a klass if static method
  // - 5) space for a lock if synchronized method
  // - 6) workspace (save rtn value, int<->float reg moves, ...)
  // - 7) filler slots for alignment
  //---------------------------------------------------------------------
  // Here is how the space we have allocated will look like.
  // Since we use resize_frame, we do not create a new stack frame,
  // but just extend the one we got with our own data area.
  //
  // If an offset or pointer name points to a separator line, it is
  // assumed that addressing with offset 0 selects storage starting
  // at the first byte above the separator line.
  //
  //
  //     ...                   ...
  //      | caller's frame      |
  // FP-> |---------------------|
  //      | filler slots, if any|
  //     7| #slots == mult of 2 |
  //      |---------------------|
  //      | work space          |
  //     6| 2 slots = 8 bytes   |
  //      |---------------------|
  //     5| lock box (if sync)  |
  //      |---------------------| <- lock_slot_offset
  //     4| klass (if static)   |
  //      |---------------------| <- klass_slot_offset
  //     3| oopHandle area      |
  //      |                     |
  //      |                     |
  //      |---------------------| <- oop_handle_offset
  //     2| outbound memory     |
  //     ...                   ...
  //      | based arguments     |
  //      |---------------------|
  //      | vararg              |
  //     ...                   ...
  //      | area                |
  //      |---------------------| <- out_arg_slot_offset
  //     1| out_preserved_slots |
  //     ...                   ...
  //      | (z_abi spec)        |
  // SP-> |---------------------| <- FP_slot_offset (back chain)
  //     ...                   ...
  //
  //---------------------------------------------------------------------

  // *_slot_offset indicates offset from SP in #stack slots
  // *_offset      indicates offset from SP in #bytes

  int stack_slots = c_calling_convention(out_sig_bt, out_regs, total_c_args) + // 1+2
                    SharedRuntime::out_preserve_stack_slots(); // see c_calling_convention

  // Now the space for the inbound oop handle area.
  int total_save_slots = Register::number_of_arg_registers * VMRegImpl::slots_per_word;

  int oop_handle_slot_offset = stack_slots;
  stack_slots += total_save_slots;                                        // 3)

  int klass_slot_offset = 0;
  int klass_offset      = -1;
  if (method_is_static) {                                                 // 4)
    klass_slot_offset  = stack_slots;
    klass_offset       = klass_slot_offset * VMRegImpl::stack_slot_size;
    stack_slots       += VMRegImpl::slots_per_word;
  }

  int lock_slot_offset = 0;
  int lock_offset      = -1;
  if (method->is_synchronized()) {                                        // 5)
    lock_slot_offset   = stack_slots;
    lock_offset        = lock_slot_offset * VMRegImpl::stack_slot_size;
    stack_slots       += VMRegImpl::slots_per_word;
  }

  int workspace_slot_offset= stack_slots;                                 // 6)
  stack_slots         += 2;

  // Now compute actual number of stack words we need.
  // Round to align stack properly.
  stack_slots = align_up(stack_slots,                                     // 7)
                         frame::alignment_in_bytes / VMRegImpl::stack_slot_size);
  int frame_size_in_bytes = stack_slots * VMRegImpl::stack_slot_size;


  ///////////////////////////////////////////////////////////////////////
  // Now we can start generating code
  ///////////////////////////////////////////////////////////////////////

  unsigned int wrapper_CodeStart  = __ offset();
  unsigned int wrapper_UEPStart;
  unsigned int wrapper_VEPStart;
  unsigned int wrapper_FrameDone;
  unsigned int wrapper_CRegsSet;
  Label     handle_pending_exception;

  //---------------------------------------------------------------------
  // Unverified entry point (UEP)
  //---------------------------------------------------------------------

  // check ic: object class <-> cached class
  if (!method_is_static) {
    wrapper_UEPStart = __ ic_check(CodeEntryAlignment /* end_alignment */);
  }

  //---------------------------------------------------------------------
  // Verified entry point (VEP)
  //---------------------------------------------------------------------
  wrapper_VEPStart = __ offset();

  if (method->needs_clinit_barrier()) {
    assert(VM_Version::supports_fast_class_init_checks(), "sanity");
    Label L_skip_barrier;
    Register klass = Z_R1_scratch;
    // Notify OOP recorder (don't need the relocation)
    AddressLiteral md = __ constant_metadata_address(method->method_holder());
    __ load_const_optimized(klass, md.value());
    __ clinit_barrier(klass, Z_thread, &L_skip_barrier /*L_fast_path*/);

    __ load_const_optimized(klass, SharedRuntime::get_handle_wrong_method_stub());
    __ z_br(klass);

    __ bind(L_skip_barrier);
  }

  __ save_return_pc();
  __ generate_stack_overflow_check(frame_size_in_bytes);  // Check before creating frame.
#ifndef USE_RESIZE_FRAME
  __ push_frame(frame_size_in_bytes);                     // Create a new frame for the wrapper.
#else
  __ resize_frame(-frame_size_in_bytes, Z_R0_scratch);    // No new frame for the wrapper.
                                                          // Just resize the existing one.
#endif

  BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
  bs->nmethod_entry_barrier(masm);

  wrapper_FrameDone = __ offset();

  // Native nmethod wrappers never take possession of the oop arguments.
  // So the caller will gc the arguments.
  // The only thing we need an oopMap for is if the call is static.
  //
  // An OopMap for lock (and class if static), and one for the VM call itself
  OopMapSet  *oop_maps        = new OopMapSet();
  OopMap     *map             = new OopMap(stack_slots * 2, 0 /* arg_slots*/);

  //////////////////////////////////////////////////////////////////////
  //
  // The Grand Shuffle
  //
  //////////////////////////////////////////////////////////////////////
  //
  // We immediately shuffle the arguments so that for any vm call we have
  // to make from here on out (sync slow path, jvmti, etc.) we will have
  // captured the oops from our caller and have a valid oopMap for them.
  //
  //--------------------------------------------------------------------
  // Natives require 1 or 2 extra arguments over the normal ones: the JNIEnv*
  // (derived from JavaThread* which is in Z_thread) and, if static,
  // the class mirror instead of a receiver. This pretty much guarantees that
  // register layout will not match. We ignore these extra arguments during
  // the shuffle. The shuffle is described by the two calling convention
  // vectors we have in our possession. We simply walk the java vector to
  // get the source locations and the c vector to get the destinations.
  //
  // This is a trick. We double the stack slots so we can claim
  // the oops in the caller's frame. Since we are sure to have
  // more args than the caller doubling is enough to make
  // sure we can capture all the incoming oop args from the caller.
  //--------------------------------------------------------------------

  // Record sp-based slot for receiver on stack for non-static methods.
  int receiver_offset = -1;

  //--------------------------------------------------------------------
  // We move the arguments backwards because the floating point registers
  // destination will always be to a register with a greater or equal
  // register number or the stack.
  //   jix is the index of the incoming Java arguments.
  //   cix is the index of the outgoing C arguments.
  //--------------------------------------------------------------------

#ifdef ASSERT
  bool reg_destroyed[Register::number_of_registers];
  bool freg_destroyed[FloatRegister::number_of_registers];
  for (int r = 0; r < Register::number_of_registers; r++) {
    reg_destroyed[r] = false;
  }
  for (int f = 0; f < FloatRegister::number_of_registers; f++) {
    freg_destroyed[f] = false;
  }
#endif // ASSERT

  for (int jix = total_in_args - 1, cix = total_c_args - 1; jix >= 0; jix--, cix--) {
#ifdef ASSERT
    if (in_regs[jix].first()->is_Register()) {
      assert(!reg_destroyed[in_regs[jix].first()->as_Register()->encoding()], "ack!");
    } else {
      if (in_regs[jix].first()->is_FloatRegister()) {
        assert(!freg_destroyed[in_regs[jix].first()->as_FloatRegister()->encoding()], "ack!");
      }
    }
    if (out_regs[cix].first()->is_Register()) {
      reg_destroyed[out_regs[cix].first()->as_Register()->encoding()] = true;
    } else {
      if (out_regs[cix].first()->is_FloatRegister()) {
        freg_destroyed[out_regs[cix].first()->as_FloatRegister()->encoding()] = true;
      }
    }
#endif // ASSERT

    switch (in_sig_bt[jix]) {
      // Due to casting, small integers should only occur in pairs with type T_LONG.
      case T_BOOLEAN:
      case T_CHAR:
      case T_BYTE:
      case T_SHORT:
      case T_INT:
        // Move int and do sign extension.
        move32_64(masm, in_regs[jix], out_regs[cix], stack_slots);
        break;

      case T_LONG :
        long_move(masm, in_regs[jix], out_regs[cix], stack_slots);
        break;

      case T_ARRAY:
      case T_OBJECT:
        object_move(masm, map, oop_handle_slot_offset, stack_slots, in_regs[jix], out_regs[cix],
                    ((jix == 0) && (!method_is_static)),
                    &receiver_offset);
        break;
      case T_VOID:
        break;

      case T_FLOAT:
        float_move(masm, in_regs[jix], out_regs[cix], stack_slots, workspace_slot_offset);
        break;

      case T_DOUBLE:
        assert(jix+1 <  total_in_args && in_sig_bt[jix+1]  == T_VOID && out_sig_bt[cix+1] == T_VOID, "bad arg list");
        double_move(masm, in_regs[jix], out_regs[cix], stack_slots, workspace_slot_offset);
        break;

      case T_ADDRESS:
        assert(false, "found T_ADDRESS in java args");
        break;

      default:
        ShouldNotReachHere();
    }
  }

  //--------------------------------------------------------------------
  // Pre-load a static method's oop into ARG2.
  // Used both by locking code and the normal JNI call code.
  //--------------------------------------------------------------------
  if (method_is_static) {
    __ set_oop_constant(JNIHandles::make_local(method->method_holder()->java_mirror()), Z_ARG2);

    // Now handlize the static class mirror in ARG2. It's known not-null.
    __ z_stg(Z_ARG2, klass_offset, Z_SP);
    map->set_oop(VMRegImpl::stack2reg(klass_slot_offset));
    __ add2reg(Z_ARG2, klass_offset, Z_SP);
  }

  // Get JNIEnv* which is first argument to native.
  __ add2reg(Z_ARG1, in_bytes(JavaThread::jni_environment_offset()), Z_thread);

  //////////////////////////////////////////////////////////////////////
  // We have all of the arguments setup at this point.
  // We MUST NOT touch any outgoing regs from this point on.
  // So if we must call out we must push a new frame.
  //////////////////////////////////////////////////////////////////////


  // Calc the current pc into Z_R10 and into wrapper_CRegsSet.
  // Both values represent the same position.
  __ get_PC(Z_R10);                // PC into register
  wrapper_CRegsSet = __ offset();  // and into into variable.

  // Z_R10 now has the pc loaded that we will use when we finally call to native.

  // We use the same pc/oopMap repeatedly when we call out.
  oop_maps->add_gc_map((int)(wrapper_CRegsSet-wrapper_CodeStart), map);

  // Lock a synchronized method.

  if (method->is_synchronized()) {

    // ATTENTION: args and Z_R10 must be preserved.
    Register r_oop  = Z_R11;
    Register r_box  = Z_R12;
    Register r_tmp1 = Z_R13;
    Register r_tmp2 = Z_R7;
    Label done;

    // Load the oop for the object or class. R_carg2_classorobject contains
    // either the handlized oop from the incoming arguments or the handlized
    // class mirror (if the method is static).
    __ z_lg(r_oop, 0, Z_ARG2);

    lock_offset = (lock_slot_offset * VMRegImpl::stack_slot_size);
    // Get the lock box slot's address.
    __ add2reg(r_box, lock_offset, Z_SP);

    // Try fastpath for locking.
    // Fast_lock kills r_temp_1, r_temp_2.
    __ compiler_fast_lock_object(r_oop, r_box, r_tmp1, r_tmp2);
    __ z_bre(done);

    //-------------------------------------------------------------------------
    // None of the above fast optimizations worked so we have to get into the
    // slow case of monitor enter. Inline a special case of call_VM that
    // disallows any pending_exception.
    //-------------------------------------------------------------------------

    Register oldSP = Z_R11;

    __ z_lgr(oldSP, Z_SP);

    RegisterSaver::save_live_registers(masm, RegisterSaver::arg_registers);

    // Prepare arguments for call.
    __ z_lg(Z_ARG1, 0, Z_ARG2); // Ynboxed class mirror or unboxed object.
    __ add2reg(Z_ARG2, lock_offset, oldSP);
    __ z_lgr(Z_ARG3, Z_thread);

    __ set_last_Java_frame(oldSP, Z_R10 /* gc map pc */);

    // Do the call.
    __ load_const_optimized(Z_R1_scratch, CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_locking_C));
    __ call(Z_R1_scratch);

    __ reset_last_Java_frame();

    RegisterSaver::restore_live_registers(masm, RegisterSaver::arg_registers);
#ifdef ASSERT
    { Label L;
      __ load_and_test_long(Z_R0, Address(Z_thread, Thread::pending_exception_offset()));
      __ z_bre(L);
      __ stop("no pending exception allowed on exit from IR::monitorenter");
      __ bind(L);
    }
#endif
    __ bind(done);
  } // lock for synchronized methods


  //////////////////////////////////////////////////////////////////////
  // Finally just about ready to make the JNI call.
  //////////////////////////////////////////////////////////////////////

  // Use that pc we placed in Z_R10 a while back as the current frame anchor.
  __ set_last_Java_frame(Z_SP, Z_R10);

  // Transition from _thread_in_Java to _thread_in_native.
  __ set_thread_state(_thread_in_native);

  //////////////////////////////////////////////////////////////////////
  // This is the JNI call.
  //////////////////////////////////////////////////////////////////////

  __ call_c(native_func);


  //////////////////////////////////////////////////////////////////////
  // We have survived the call once we reach here.
  //////////////////////////////////////////////////////////////////////


  //--------------------------------------------------------------------
  // Unpack native results.
  //--------------------------------------------------------------------
  // For int-types, we do any needed sign-extension required.
  // Care must be taken that the return value (in Z_ARG1 = Z_RET = Z_R2
  // or in Z_FARG0 = Z_FRET = Z_F0) will survive any VM calls for
  // blocking or unlocking.
  // An OOP result (handle) is done specially in the slow-path code.
  //--------------------------------------------------------------------
  switch (ret_type) {
    case T_VOID:    break;         // Nothing to do!
    case T_FLOAT:   break;         // Got it where we want it (unless slow-path)
    case T_DOUBLE:  break;         // Got it where we want it (unless slow-path)
    case T_LONG:    break;         // Got it where we want it (unless slow-path)
    case T_OBJECT:  break;         // Really a handle.
                                   // Cannot de-handlize until after reclaiming jvm_lock.
    case T_ARRAY:   break;

    case T_BOOLEAN:                // 0 -> false(0); !0 -> true(1)
      __ z_lngfr(Z_RET, Z_RET);    // Force sign bit on except for zero.
      __ z_srlg(Z_RET, Z_RET, 63); // Shift sign bit into least significant pos.
      break;
    case T_BYTE:    __ z_lgbr(Z_RET, Z_RET);  break; // sign extension
    case T_CHAR:    __ z_llghr(Z_RET, Z_RET); break; // unsigned result
    case T_SHORT:   __ z_lghr(Z_RET, Z_RET);  break; // sign extension
    case T_INT:     __ z_lgfr(Z_RET, Z_RET);  break; // sign-extend for beauty.

    default:
      ShouldNotReachHere();
      break;
  }

  // Switch thread to "native transition" state before reading the synchronization state.
  // This additional state is necessary because reading and testing the synchronization
  // state is not atomic w.r.t. GC, as this scenario demonstrates:
  //   - Java thread A, in _thread_in_native state, loads _not_synchronized and is preempted.
  //   - VM thread changes sync state to synchronizing and suspends threads for GC.
  //   - Thread A is resumed to finish this native method, but doesn't block here since it
  //     didn't see any synchronization in progress, and escapes.

  // Transition from _thread_in_native to _thread_in_native_trans.
  __ set_thread_state(_thread_in_native_trans);

  // Safepoint synchronization
  //--------------------------------------------------------------------
  // Must we block?
  //--------------------------------------------------------------------
  // Block, if necessary, before resuming in _thread_in_Java state.
  // In order for GC to work, don't clear the last_Java_sp until after blocking.
  //--------------------------------------------------------------------
  {
    Label no_block, sync;

    save_native_result(masm, ret_type, workspace_slot_offset); // Make Z_R2 available as work reg.

    // Force this write out before the read below.
    if (!UseSystemMemoryBarrier) {
      __ z_fence();
    }

    __ safepoint_poll(sync, Z_R1);

    __ load_and_test_int(Z_R0, Address(Z_thread, JavaThread::suspend_flags_offset()));
    __ z_bre(no_block);

    // Block. Save any potential method result value before the operation and
    // use a leaf call to leave the last_Java_frame setup undisturbed. Doing this
    // lets us share the oopMap we used when we went native rather than create
    // a distinct one for this pc.
    //
    __ bind(sync);
    __ z_acquire();

    address entry_point = CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans);

    __ call_VM_leaf(entry_point, Z_thread);

    __ bind(no_block);
    restore_native_result(masm, ret_type, workspace_slot_offset);
  }

  //--------------------------------------------------------------------
  // Thread state is thread_in_native_trans. Any safepoint blocking has
  // already happened so we can now change state to _thread_in_Java.
  //--------------------------------------------------------------------
  // Transition from _thread_in_native_trans to _thread_in_Java.
  __ set_thread_state(_thread_in_Java);

  //--------------------------------------------------------------------
  // Reguard any pages if necessary.
  // Protect native result from being destroyed.
  //--------------------------------------------------------------------

  Label no_reguard;

  __ z_cli(Address(Z_thread, JavaThread::stack_guard_state_offset() + in_ByteSize(sizeof(StackOverflow::StackGuardState) - 1)),
           StackOverflow::stack_guard_yellow_reserved_disabled);

  __ z_bre(no_reguard);

  save_native_result(masm, ret_type, workspace_slot_offset);
  __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages), Z_method);
  restore_native_result(masm, ret_type, workspace_slot_offset);

  __ bind(no_reguard);


  // Synchronized methods (slow path only)
  // No pending exceptions for now.
  //--------------------------------------------------------------------
  // Handle possibly pending exception (will unlock if necessary).
  // Native result is, if any is live, in Z_FRES or Z_RES.
  //--------------------------------------------------------------------
  // Unlock
  //--------------------------------------------------------------------
  if (method->is_synchronized()) {
    const Register r_oop        = Z_R11;
    const Register r_box        = Z_R12;
    const Register r_tmp1       = Z_R13;
    const Register r_tmp2       = Z_R7;
    Label done;

    // Get unboxed oop of class mirror or object ...
    int   offset = method_is_static ? klass_offset : receiver_offset;

    assert(offset != -1, "");
    __ z_lg(r_oop, offset, Z_SP);

    // ... and address of lock object box.
    __ add2reg(r_box, lock_offset, Z_SP);

    // Try fastpath for unlocking.
    // Fast_unlock kills r_tmp1, r_tmp2.
    __ compiler_fast_unlock_object(r_oop, r_box, r_tmp1, r_tmp2);
    __ z_bre(done);

    // Slow path for unlocking.
    // Save and restore any potential method result value around the unlocking operation.
    const Register R_exc = Z_R11;

    save_native_result(masm, ret_type, workspace_slot_offset);

    // Must save pending exception around the slow-path VM call. Since it's a
    // leaf call, the pending exception (if any) can be kept in a register.
    __ z_lg(R_exc, Address(Z_thread, Thread::pending_exception_offset()));
    assert(R_exc->is_nonvolatile(), "exception register must be non-volatile");

    // Must clear pending-exception before re-entering the VM. Since this is
    // a leaf call, pending-exception-oop can be safely kept in a register.
    __ clear_mem(Address(Z_thread, Thread::pending_exception_offset()), sizeof(intptr_t));

    // Inline a special case of call_VM that disallows any pending_exception.

    // Get locked oop from the handle we passed to jni.
    __ z_lg(Z_ARG1, offset, Z_SP);
    __ add2reg(Z_ARG2, lock_offset, Z_SP);
    __ z_lgr(Z_ARG3, Z_thread);

    __ load_const_optimized(Z_R1_scratch, CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C));

    __ call(Z_R1_scratch);

#ifdef ASSERT
    {
      Label L;
      __ load_and_test_long(Z_R0, Address(Z_thread, Thread::pending_exception_offset()));
      __ z_bre(L);
      __ stop("no pending exception allowed on exit from IR::monitorexit");
      __ bind(L);
    }
#endif

    // Check_forward_pending_exception jump to forward_exception if any pending
    // exception is set. The forward_exception routine expects to see the
    // exception in pending_exception and not in a register. Kind of clumsy,
    // since all folks who branch to forward_exception must have tested
    // pending_exception first and hence have it in a register already.
    __ z_stg(R_exc, Address(Z_thread, Thread::pending_exception_offset()));
    restore_native_result(masm, ret_type, workspace_slot_offset);
    __ z_bru(done);
    __ z_illtrap(0x66);

    __ bind(done);
  }


  //--------------------------------------------------------------------
  // Clear "last Java frame" SP and PC.
  //--------------------------------------------------------------------

  __ reset_last_Java_frame();

  // Unpack oop result, e.g. JNIHandles::resolve result.
  if (is_reference_type(ret_type)) {
    __ resolve_jobject(Z_RET, /* tmp1 */ Z_R13, /* tmp2 */ Z_R7);
  }

  if (CheckJNICalls) {
    // clear_pending_jni_exception_check
    __ clear_mem(Address(Z_thread, JavaThread::pending_jni_exception_check_fn_offset()), sizeof(oop));
  }

  // Reset handle block.
  __ z_lg(Z_R1_scratch, Address(Z_thread, JavaThread::active_handles_offset()));
  __ clear_mem(Address(Z_R1_scratch, JNIHandleBlock::top_offset()), 4);

  // Check for pending exceptions.
  __ load_and_test_long(Z_R0, Address(Z_thread, Thread::pending_exception_offset()));
  __ z_brne(handle_pending_exception);


  //////////////////////////////////////////////////////////////////////
  // Return
  //////////////////////////////////////////////////////////////////////


#ifndef USE_RESIZE_FRAME
  __ pop_frame();                     // Pop wrapper frame.
#else
  __ resize_frame(frame_size_in_bytes, Z_R0_scratch);  // Revert stack extension.
#endif
  __ restore_return_pc();             // This is the way back to the caller.
  __ z_br(Z_R14);


  //////////////////////////////////////////////////////////////////////
  // Out-of-line calls to the runtime.
  //////////////////////////////////////////////////////////////////////


  //---------------------------------------------------------------------
  // Handler for pending exceptions (out-of-line).
  //---------------------------------------------------------------------
  // Since this is a native call, we know the proper exception handler
  // is the empty function. We just pop this frame and then jump to
  // forward_exception_entry. Z_R14 will contain the native caller's
  // return PC.
  __ bind(handle_pending_exception);
  __ pop_frame();
  __ load_const_optimized(Z_R1_scratch, StubRoutines::forward_exception_entry());
  __ restore_return_pc();
  __ z_br(Z_R1_scratch);

  __ flush();
  //////////////////////////////////////////////////////////////////////
  // end of code generation
  //////////////////////////////////////////////////////////////////////


  nmethod *nm = nmethod::new_native_nmethod(method,
                                            compile_id,
                                            masm->code(),
                                            (int)(wrapper_VEPStart-wrapper_CodeStart),
                                            (int)(wrapper_FrameDone-wrapper_CodeStart),
                                            stack_slots / VMRegImpl::slots_per_word,
                                            (method_is_static ? in_ByteSize(klass_offset) : in_ByteSize(receiver_offset)),
                                            in_ByteSize(lock_offset),
                                            oop_maps);

  return nm;
}

static address gen_c2i_adapter(MacroAssembler  *masm,
                               int comp_args_on_stack,
                               const GrowableArray<SigEntry>* sig,
                               const VMRegPair *regs,
                               Label &skip_fixup) {
  // Before we get into the guts of the C2I adapter, see if we should be here
  // at all. We've come from compiled code and are attempting to jump to the
  // interpreter, which means the caller made a static call to get here
  // (vcalls always get a compiled target if there is one). Check for a
  // compiled target. If there is one, we need to patch the caller's call.

  // These two defs MUST MATCH code in gen_i2c2i_adapter!
  const Register ientry = Z_R11;
  const Register code   = Z_R11;

  address c2i_entrypoint;
  Label   patch_callsite;

  // Regular (verified) c2i entry point.
  c2i_entrypoint = __ pc();

  // Call patching needed?
  __ load_and_test_long(Z_R0_scratch, method_(code));
  __ z_lg(ientry, method_(interpreter_entry));  // Preload interpreter entry (also if patching).
  __ z_brne(patch_callsite);                    // Patch required if code isn't null (compiled target exists).

  __ bind(skip_fixup);  // Return point from patch_callsite.

  // Since all args are passed on the stack, total_args_passed*wordSize is the
  // space we need. We need ABI scratch area but we use the caller's since
  // it has already been allocated.
  int       total_args_passed = sig->length();
  const int abi_scratch = frame::z_top_ijava_frame_abi_size;
  int       extraspace  = align_up(total_args_passed, 2)*wordSize + abi_scratch;
  Register  sender_SP   = Z_R10;
  Register  value       = Z_R12;

  // Remember the senderSP so we can pop the interpreter arguments off of the stack.
  // In addition, template interpreter expects initial_caller_sp in Z_R10.
  __ z_lgr(sender_SP, Z_SP);

  // This should always fit in 14 bit immediate.
  __ resize_frame(-extraspace, Z_R0_scratch);

  // We use the caller's ABI scratch area (out_preserved_stack_slots) for the initial
  // args. This essentially moves the callers ABI scratch area from the top to the
  // bottom of the arg area.

  int st_off =  extraspace - wordSize;

  // Now write the args into the outgoing interpreter space.
  for (int i = 0; i < total_args_passed; i++) {
    BasicType bt = sig->at(i)._bt;

    VMReg r_1 = regs[i].first();
    VMReg r_2 = regs[i].second();
    if (!r_1->is_valid()) {
      assert(!r_2->is_valid(), "");
      continue;
    }
    if (r_1->is_stack()) {
      // The calling convention produces OptoRegs that ignore the preserve area (abi scratch).
      // We must account for it here.
      int ld_off = (r_1->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size;

      if (!r_2->is_valid()) {
        __ z_mvc(Address(Z_SP, st_off), Address(sender_SP, ld_off), sizeof(void*));
      } else {
        // longs are given 2 64-bit slots in the interpreter,
        // but the data is passed in only 1 slot.
        if (bt == T_LONG || bt == T_DOUBLE) {
#ifdef ASSERT
          __ clear_mem(Address(Z_SP, st_off), sizeof(void *));
#endif
          st_off -= wordSize;
        }
        __ z_mvc(Address(Z_SP, st_off), Address(sender_SP, ld_off), sizeof(void*));
      }
    } else {
      if (r_1->is_Register()) {
        if (!r_2->is_valid()) {
          __ z_st(r_1->as_Register(), st_off, Z_SP);
        } else {
          // longs are given 2 64-bit slots in the interpreter, but the
          // data is passed in only 1 slot.
          if (bt == T_LONG || bt == T_DOUBLE) {
#ifdef ASSERT
            __ clear_mem(Address(Z_SP, st_off), sizeof(void *));
#endif
            st_off -= wordSize;
          }
          __ z_stg(r_1->as_Register(), st_off, Z_SP);
        }
      } else {
        assert(r_1->is_FloatRegister(), "");
        if (!r_2->is_valid()) {
          __ z_ste(r_1->as_FloatRegister(), st_off, Z_SP);
        } else {
          // In 64bit, doubles are given 2 64-bit slots in the interpreter, but the
          // data is passed in only 1 slot.
          // One of these should get known junk...
#ifdef ASSERT
          __ z_lzdr(Z_F1);
          __ z_std(Z_F1, st_off, Z_SP);
#endif
          st_off-=wordSize;
          __ z_std(r_1->as_FloatRegister(), st_off, Z_SP);
        }
      }
    }
    st_off -= wordSize;
  }


  // Jump to the interpreter just as if interpreter was doing it.
  __ add2reg(Z_esp, st_off, Z_SP);

  // Frame_manager expects initial_caller_sp (= SP without resize by c2i) in Z_R10.
  __ z_br(ientry);


  // Prevent illegal entry to out-of-line code.
  __ z_illtrap(0x22);

  // Generate out-of-line runtime call to patch caller,
  // then continue as interpreted.

  // IF you lose the race you go interpreted.
  // We don't see any possible endless c2i -> i2c -> c2i ...
  // transitions no matter how rare.
  __ bind(patch_callsite);

  RegisterSaver::save_live_registers(masm, RegisterSaver::arg_registers);
  __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite), Z_method, Z_R14);
  RegisterSaver::restore_live_registers(masm, RegisterSaver::arg_registers);
  __ z_bru(skip_fixup);

  // end of out-of-line code

  return c2i_entrypoint;
}

// On entry, the following registers are set
//
//    Z_thread  r8  - JavaThread*
//    Z_method  r9  - callee's method (method to be invoked)
//    Z_esp     r7  - operand (or expression) stack pointer of caller. one slot above last arg.
//    Z_SP      r15 - SP prepared by call stub such that caller's outgoing args are near top
//
void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm,
                                    int comp_args_on_stack,
                                    const GrowableArray<SigEntry>* sig,
                                    const VMRegPair *regs) {
  const Register value = Z_R12;
  const Register ld_ptr= Z_esp;
  int total_args_passed = sig->length();

  int ld_offset = total_args_passed * wordSize;

  // Cut-out for having no stack args.
  if (comp_args_on_stack) {
    // Sig words on the stack are greater than VMRegImpl::stack0. Those in
    // registers are below. By subtracting stack0, we either get a negative
    // number (all values in registers) or the maximum stack slot accessed.
    // Convert VMRegImpl (4 byte) stack slots to words.
    int comp_words_on_stack = align_up(comp_args_on_stack*VMRegImpl::stack_slot_size, wordSize)>>LogBytesPerWord;
    // Round up to miminum stack alignment, in wordSize
    comp_words_on_stack = align_up(comp_words_on_stack, 2);

    __ resize_frame(-comp_words_on_stack*wordSize, Z_R0_scratch);
  }

  // Now generate the shuffle code. Pick up all register args and move the
  // rest through register value=Z_R12.
  for (int i = 0; i < total_args_passed; i++) {
    BasicType bt = sig->at(i)._bt;
    if (bt == T_VOID) {
      assert(i > 0 && (sig->at(i - 1)._bt == T_LONG || sig->at(i - 1)._bt == T_DOUBLE), "missing half");
      continue;
    }

    // Pick up 0, 1 or 2 words from ld_ptr.
    assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(),
           "scrambled load targets?");
    VMReg r_1 = regs[i].first();
    VMReg r_2 = regs[i].second();
    if (!r_1->is_valid()) {
      assert(!r_2->is_valid(), "");
      continue;
    }
    if (r_1->is_FloatRegister()) {
      if (!r_2->is_valid()) {
        __ z_le(r_1->as_FloatRegister(), ld_offset, ld_ptr);
        ld_offset-=wordSize;
      } else {
        // Skip the unused interpreter slot.
        __ z_ld(r_1->as_FloatRegister(), ld_offset - wordSize, ld_ptr);
        ld_offset -= 2 * wordSize;
      }
    } else {
      if (r_1->is_stack()) {
        // Must do a memory to memory move.
        int st_off = (r_1->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size;

        if (!r_2->is_valid()) {
          __ z_mvc(Address(Z_SP, st_off), Address(ld_ptr, ld_offset), sizeof(void*));
        } else {
          // In 64bit, longs are given 2 64-bit slots in the interpreter, but the
          // data is passed in only 1 slot.
          if (bt == T_LONG || bt == T_DOUBLE) {
            ld_offset -= wordSize;
          }
          __ z_mvc(Address(Z_SP, st_off), Address(ld_ptr, ld_offset), sizeof(void*));
        }
      } else {
        if (!r_2->is_valid()) {
          // Not sure we need to do this but it shouldn't hurt.
          if (is_reference_type(bt) || bt == T_ADDRESS) {
            __ z_lg(r_1->as_Register(), ld_offset, ld_ptr);
          } else {
            __ z_l(r_1->as_Register(), ld_offset, ld_ptr);
          }
        } else {
          // In 64bit, longs are given 2 64-bit slots in the interpreter, but the
          // data is passed in only 1 slot.
          if (bt == T_LONG || bt == T_DOUBLE) {
            ld_offset -= wordSize;
          }
          __ z_lg(r_1->as_Register(), ld_offset, ld_ptr);
        }
      }
      ld_offset -= wordSize;
    }
  }

  // Jump to the compiled code just as if compiled code was doing it.
  // load target address from method:
  __ z_lg(Z_R1_scratch, Address(Z_method, Method::from_compiled_offset()));

  // Store method into thread->callee_target.
  // 6243940: We might end up in handle_wrong_method if
  // the callee is deoptimized as we race thru here. If that
  // happens we don't want to take a safepoint because the
  // caller frame will look interpreted and arguments are now
  // "compiled" so it is much better to make this transition
  // invisible to the stack walking code. Unfortunately, if
  // we try and find the callee by normal means a safepoint
  // is possible. So we stash the desired callee in the thread
  // and the vm will find it there should this case occur.
  __ z_stg(Z_method, thread_(callee_target));

  __ z_br(Z_R1_scratch);
}

void SharedRuntime::generate_i2c2i_adapters(MacroAssembler* masm,
                                            int comp_args_on_stack,
                                            const GrowableArray<SigEntry>* sig,
                                            const VMRegPair* regs,
                                            const GrowableArray<SigEntry>* sig_cc,
                                            const VMRegPair* regs_cc,
                                            const GrowableArray<SigEntry>* sig_cc_ro,
                                            const VMRegPair* regs_cc_ro,
                                            address entry_address[AdapterBlob::ENTRY_COUNT],
                                            AdapterBlob*& new_adapter,
                                            bool allocate_code_blob) {
  __ align(CodeEntryAlignment);
  entry_address[AdapterBlob::I2C] = __ pc();
  gen_i2c_adapter(masm, comp_args_on_stack, sig, regs);

  Label skip_fixup;
  {
    Label ic_miss;

    // Out-of-line call to ic_miss handler.
    __ call_ic_miss_handler(ic_miss, 0x11, 0, Z_R1_scratch);

    // Unverified Entry Point UEP
    __ align(CodeEntryAlignment);
    entry_address[AdapterBlob::C2I_Unverified] = __ pc();

    __ ic_check(2);
    __ z_lg(Z_method, Address(Z_inline_cache, CompiledICData::speculated_method_offset()));
    // This def MUST MATCH code in gen_c2i_adapter!
    const Register code = Z_R11;

    __ load_and_test_long(Z_R0, method_(code));
    __ z_brne(ic_miss);  // Cache miss: call runtime to handle this.

    // Fallthru to VEP. Duplicate LTG, but saved taken branch.
  }

  entry_address[AdapterBlob::C2I] = __ pc();

  // Class initialization barrier for static methods
  entry_address[AdapterBlob::C2I_No_Clinit_Check] = nullptr;
  assert(VM_Version::supports_fast_class_init_checks(), "sanity");
  Label L_skip_barrier;

  // Bypass the barrier for non-static methods
  __ testbit_ushort(Address(Z_method, Method::access_flags_offset()), JVM_ACC_STATIC_BIT);
  __ z_bfalse(L_skip_barrier); // non-static

  Register klass = Z_R11;
  __ load_method_holder(klass, Z_method);
  __ clinit_barrier(klass, Z_thread, &L_skip_barrier /*L_fast_path*/);

  __ load_const_optimized(klass, SharedRuntime::get_handle_wrong_method_stub());
  __ z_br(klass);

  __ bind(L_skip_barrier);
  entry_address[AdapterBlob::C2I_No_Clinit_Check] = __ pc();

  gen_c2i_adapter(masm, comp_args_on_stack, sig, regs, skip_fixup);
  return;
}

// This function returns the adjust size (in number of words) to a c2i adapter
// activation for use during deoptimization.
//
// Actually only compiled frames need to be adjusted, but it
// doesn't harm to adjust entry and interpreter frames, too.
//
int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals) {
  assert(callee_locals >= callee_parameters,
          "test and remove; got more parms than locals");
  // Handle the abi adjustment here instead of doing it in push_skeleton_frames.
  return (callee_locals - callee_parameters) * Interpreter::stackElementWords +
         frame::z_parent_ijava_frame_abi_size / BytesPerWord;
}

uint SharedRuntime::in_preserve_stack_slots() {
  return frame::jit_in_preserve_size_in_4_byte_units;
}

uint SharedRuntime::out_preserve_stack_slots() {
  return frame::z_jit_out_preserve_size/VMRegImpl::stack_slot_size;
}

VMReg SharedRuntime::thread_register() {
  Unimplemented();
  return nullptr;
}

//
// Frame generation for deopt and uncommon trap blobs.
//
static void push_skeleton_frame(MacroAssembler* masm,
                          /* Unchanged */
                          Register frame_sizes_reg,
                          Register pcs_reg,
                          /* Invalidate */
                          Register frame_size_reg,
                          Register pc_reg) {
  BLOCK_COMMENT("  push_skeleton_frame {");
   __ z_lg(pc_reg, 0, pcs_reg);
   __ z_lg(frame_size_reg, 0, frame_sizes_reg);
   __ z_stg(pc_reg, _z_abi(return_pc), Z_SP);
   Register fp = pc_reg;
   __ push_frame(frame_size_reg, fp);
#ifdef ASSERT
   // The magic is required for successful walking skeletal frames.
   __ load_const_optimized(frame_size_reg/*tmp*/, frame::z_istate_magic_number);
   __ z_stg(frame_size_reg, _z_ijava_state_neg(magic), fp);
   // Fill other slots that are supposedly not necessary with eye catchers.
   __ load_const_optimized(frame_size_reg/*use as tmp*/, 0xdeadbad1);
   __ z_stg(frame_size_reg, _z_ijava_state_neg(top_frame_sp), fp);
   // The sender_sp of the bottom frame is set before pushing it.
   // The sender_sp of non bottom frames is their caller's top_frame_sp, which
   // is unknown here. Luckily it is not needed before filling the frame in
   // layout_activation(), we assert this by setting an eye catcher (see
   // comments on sender_sp in frame_s390.hpp).
   __ z_stg(frame_size_reg, _z_ijava_state_neg(sender_sp), Z_SP);
#endif // ASSERT
  BLOCK_COMMENT("  } push_skeleton_frame");
}

// Loop through the UnrollBlock info and create new frames.
static void push_skeleton_frames(MacroAssembler* masm, bool deopt,
                            /* read */
                            Register unroll_block_reg,
                            /* invalidate */
                            Register frame_sizes_reg,
                            Register number_of_frames_reg,
                            Register pcs_reg,
                            Register tmp1,
                            Register tmp2) {
  BLOCK_COMMENT("push_skeleton_frames {");
  // _number_of_frames is of type int (deoptimization.hpp).
  __ z_lgf(number_of_frames_reg,
           Address(unroll_block_reg, Deoptimization::UnrollBlock::number_of_frames_offset()));
  __ z_lg(pcs_reg,
          Address(unroll_block_reg, Deoptimization::UnrollBlock::frame_pcs_offset()));
  __ z_lg(frame_sizes_reg,
          Address(unroll_block_reg, Deoptimization::UnrollBlock::frame_sizes_offset()));

  // stack: (caller_of_deoptee, ...).

  // If caller_of_deoptee is a compiled frame, then we extend it to make
  // room for the callee's locals and the frame::z_parent_ijava_frame_abi.
  // See also Deoptimization::last_frame_adjust() above.
  // Note: entry and interpreted frames are adjusted, too. But this doesn't harm.

  __ z_lgf(Z_R1_scratch,
           Address(unroll_block_reg, Deoptimization::UnrollBlock::caller_adjustment_offset()));
  __ z_lgr(tmp1, Z_SP);  // Save the sender sp before extending the frame.
  __ resize_frame_sub(Z_R1_scratch, tmp2/*tmp*/);
  // The oldest skeletal frame requires a valid sender_sp to make it walkable
  // (it is required to find the original pc of caller_of_deoptee if it is marked
  // for deoptimization - see nmethod::orig_pc_addr()).
  __ z_stg(tmp1, _z_ijava_state_neg(sender_sp), Z_SP);

  // Now push the new interpreter frames.
  Label loop, loop_entry;

  // Make sure that there is at least one entry in the array.
  DEBUG_ONLY(__ z_ltgr(number_of_frames_reg, number_of_frames_reg));
  __ asm_assert(Assembler::bcondNotZero, "array_size must be > 0", 0x205);

  __ z_bru(loop_entry);

  __ bind(loop);

  __ add2reg(frame_sizes_reg, wordSize);
  __ add2reg(pcs_reg, wordSize);

  __ bind(loop_entry);

  // Allocate a new frame, fill in the pc.
  push_skeleton_frame(masm, frame_sizes_reg, pcs_reg, tmp1, tmp2);

  __ z_aghi(number_of_frames_reg, -1);  // Emit AGHI, because it sets the condition code
  __ z_brne(loop);

  // Set the top frame's return pc.
  __ add2reg(pcs_reg, wordSize);
  __ z_lg(Z_R0_scratch, 0, pcs_reg);
  __ z_stg(Z_R0_scratch, _z_abi(return_pc), Z_SP);
  BLOCK_COMMENT("} push_skeleton_frames");
}

//------------------------------generate_deopt_blob----------------------------
void SharedRuntime::generate_deopt_blob() {
  // Allocate space for the code.
  ResourceMark rm;
  // Setup code generation tools.
  const char* name = SharedRuntime::stub_name(StubId::shared_deopt_id);
  CodeBuffer buffer(name, 2048, 1024);
  InterpreterMacroAssembler* masm = new InterpreterMacroAssembler(&buffer);
  Label exec_mode_initialized;
  OopMap* map = nullptr;
  OopMapSet *oop_maps = new OopMapSet();

  unsigned int start_off = __ offset();
  Label cont;

  // --------------------------------------------------------------------------
  // Normal entry (non-exception case)
  //
  // We have been called from the deopt handler of the deoptee.
  // Z_R14 points to the entry point of the deopt handler.
  // The return_pc has been stored in the frame of the deoptee and
  // will replace the address of the deopt_handler in the call
  // to Deoptimization::fetch_unroll_info below.

  const Register   exec_mode_reg = Z_tmp_1;

  // stack: (deoptee, caller of deoptee, ...)

  // pushes an "unpack" frame
  // R14 contains the return address pointing into the deoptimized
  // nmethod that was valid just before the nmethod was deoptimized.
  // save R14 into the deoptee frame.  the `fetch_unroll_info'
  // procedure called below will read it from there.
  map = RegisterSaver::save_live_registers(masm, RegisterSaver::all_registers);

  // note the entry point.
  __ load_const_optimized(exec_mode_reg, Deoptimization::Unpack_deopt);
  __ z_bru(exec_mode_initialized);

#ifndef COMPILER1
  int reexecute_offset = 1; // odd offset will produce odd pc, which triggers an hardware trap
#else
  // --------------------------------------------------------------------------
  // Reexecute entry
  // - Z_R14 = Deopt Handler in nmethod

  int reexecute_offset = __ offset() - start_off;

  // No need to update map as each call to save_live_registers will produce identical oopmap
  (void) RegisterSaver::save_live_registers(masm, RegisterSaver::all_registers);

  __ load_const_optimized(exec_mode_reg, Deoptimization::Unpack_reexecute);
  __ z_bru(exec_mode_initialized);
#endif


  // --------------------------------------------------------------------------
  // Exception entry. We reached here via a branch. Registers on entry:
  // - Z_EXC_OOP (Z_ARG1) = exception oop
  // - Z_EXC_PC  (Z_ARG2) = the exception pc.

  int exception_offset = __ offset() - start_off;

  // all registers are dead at this entry point, except for Z_EXC_OOP, and
  // Z_EXC_PC which contain the exception oop and exception pc
  // respectively.  Set them in TLS and fall thru to the
  // unpack_with_exception_in_tls entry point.

  // Store exception oop and pc in thread (location known to GC).
  // Need this since the call to "fetch_unroll_info()" may safepoint.
  __ z_stg(Z_EXC_OOP, Address(Z_thread, JavaThread::exception_oop_offset()));
  __ z_stg(Z_EXC_PC,  Address(Z_thread, JavaThread::exception_pc_offset()));

  // fall through

  int exception_in_tls_offset = __ offset() - start_off;

  // new implementation because exception oop is now passed in JavaThread

  // Prolog for exception case
  // All registers must be preserved because they might be used by LinearScan
  // Exceptiop oop and throwing PC are passed in JavaThread

  // load throwing pc from JavaThread and us it as the return address of the current frame.
  __ z_lg(Z_R1_scratch, Address(Z_thread, JavaThread::exception_pc_offset()));

  // Save everything in sight.
  (void) RegisterSaver::save_live_registers(masm, RegisterSaver::all_registers, Z_R1_scratch);

  // Now it is safe to overwrite any register

  // Clear the exception pc field in JavaThread
  __ clear_mem(Address(Z_thread, JavaThread::exception_pc_offset()), 8);

  // Deopt during an exception.  Save exec mode for unpack_frames.
  __ load_const_optimized(exec_mode_reg, Deoptimization::Unpack_exception);


#ifdef ASSERT
  // verify that there is really an exception oop in JavaThread
  __ z_lg(Z_ARG1, Address(Z_thread, JavaThread::exception_oop_offset()));
  __ MacroAssembler::verify_oop(Z_ARG1, FILE_AND_LINE);

  // verify that there is no pending exception
  __ asm_assert_mem8_is_zero(in_bytes(Thread::pending_exception_offset()), Z_thread,
                             "must not have pending exception here", __LINE__);
#endif

  // --------------------------------------------------------------------------
  // At this point, the live registers are saved and
  // the exec_mode_reg has been set up correctly.
  __ bind(exec_mode_initialized);

  // stack: ("unpack" frame, deoptee, caller_of_deoptee, ...).

  const Register unroll_block_reg  = Z_tmp_2;

  // we need to set `last_Java_frame' because `fetch_unroll_info' will
  // call `last_Java_frame()'.  however we can't block and no gc will
  // occur so we don't need an oopmap. the value of the pc in the
  // frame is not particularly important.  it just needs to identify the blob.

  // Don't set last_Java_pc anymore here (is implicitly null then).
  // the correct PC is retrieved in pd_last_frame() in that case.
  __ set_last_Java_frame(/*sp*/Z_SP, noreg);
  // With EscapeAnalysis turned on, this call may safepoint
  // despite it's marked as "leaf call"!
  __ call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info), Z_thread, exec_mode_reg);
  // Set an oopmap for the call site this describes all our saved volatile registers
  int oop_map_offs = __ offset();
  oop_maps->add_gc_map(oop_map_offs, map);

  __ reset_last_Java_frame();
  // save the return value.
  __ z_lgr(unroll_block_reg, Z_RET);
  // restore the return registers that have been saved
  // (among other registers) by save_live_registers(...).
  RegisterSaver::restore_result_registers(masm);

  // reload the exec mode from the UnrollBlock (it might have changed)
  __ z_llgf(exec_mode_reg, Address(unroll_block_reg, Deoptimization::UnrollBlock::unpack_kind_offset()));

  // In excp_deopt_mode, restore and clear exception oop which we
  // stored in the thread during exception entry above. The exception
  // oop will be the return value of this stub.
  NearLabel skip_restore_excp;
  __ compare64_and_branch(exec_mode_reg, Deoptimization::Unpack_exception, Assembler::bcondNotEqual, skip_restore_excp);
  __ z_lg(Z_RET, thread_(exception_oop));
  __ clear_mem(thread_(exception_oop), 8);
  __ bind(skip_restore_excp);

  // remove the "unpack" frame
  __ pop_frame();

  // stack: (deoptee, caller of deoptee, ...).

  // pop the deoptee's frame
  __ pop_frame();

  // stack: (caller_of_deoptee, ...).

  // loop through the `UnrollBlock' info and create interpreter frames.
  push_skeleton_frames(masm, true/*deopt*/,
                  unroll_block_reg,
                  Z_tmp_3,
                  Z_tmp_4,
                  Z_ARG5,
                  Z_ARG4,
                  Z_ARG3);

  // stack: (skeletal interpreter frame, ..., optional skeletal
  // interpreter frame, caller of deoptee, ...).

  // push an "unpack" frame taking care of float / int return values.
  __ push_frame(RegisterSaver::live_reg_frame_size(RegisterSaver::all_registers));

  // stack: (unpack frame, skeletal interpreter frame, ..., optional
  // skeletal interpreter frame, caller of deoptee, ...).

  // spill live volatile registers since we'll do a call.
  __ z_stg(Z_RET, offset_of(frame::z_abi_160_spill, spill[0]), Z_SP);
  __ z_std(Z_FRET, offset_of(frame::z_abi_160_spill, spill[1]), Z_SP);

  // let the unpacker layout information in the skeletal frames just allocated.
  __ get_PC(Z_RET, oop_map_offs - __ offset());
  __ set_last_Java_frame(/*sp*/Z_SP, /*pc*/Z_RET);
  __ call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames),
                  Z_thread/*thread*/, exec_mode_reg/*exec_mode*/);

  __ reset_last_Java_frame();

  // restore the volatiles saved above.
  __ z_lg(Z_RET, offset_of(frame::z_abi_160_spill, spill[0]), Z_SP);
  __ z_ld(Z_FRET, offset_of(frame::z_abi_160_spill, spill[1]), Z_SP);

  // pop the "unpack" frame.
  __ pop_frame();
  __ restore_return_pc();

  // stack: (top interpreter frame, ..., optional interpreter frame,
  // caller of deoptee, ...).

  __ z_lg(Z_fp, _z_abi(callers_sp), Z_SP); // restore frame pointer
  __ restore_bcp();
  __ restore_locals();
  __ restore_esp();

  // return to the interpreter entry point.
  __ z_br(Z_R14);

  // Make sure all code is generated
  masm->flush();

  _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset, reexecute_offset, RegisterSaver::live_reg_frame_size(RegisterSaver::all_registers)/wordSize);
  _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset);
}


#ifdef COMPILER2
//------------------------------generate_uncommon_trap_blob--------------------
UncommonTrapBlob* OptoRuntime::generate_uncommon_trap_blob() {
  // Allocate space for the code
  ResourceMark rm;
  // Setup code generation tools
  const char* name = OptoRuntime::stub_name(StubId::c2_uncommon_trap_id);
  CodeBuffer buffer(name, 2048, 1024);
  if (buffer.blob() == nullptr) {
    return nullptr;
  }
  InterpreterMacroAssembler* masm = new InterpreterMacroAssembler(&buffer);

  Register unroll_block_reg = Z_tmp_1;
  Register klass_index_reg  = Z_ARG2;
  Register unc_trap_reg     = Z_ARG2;

  // stack: (deoptee, caller_of_deoptee, ...).

  // push a dummy "unpack" frame and call
  // `Deoptimization::uncommon_trap' to pack the compiled frame into a
  // vframe array and return the `UnrollBlock' information.

  // save R14 to compiled frame.
  __ save_return_pc();
  // push the "unpack_frame".
  __ push_frame_abi160(0);

  // stack: (unpack frame, deoptee, caller_of_deoptee, ...).

  // set the "unpack" frame as last_Java_frame.
  // `Deoptimization::uncommon_trap' expects it and considers its
  // sender frame as the deoptee frame.
  __ get_PC(Z_R1_scratch);
  __ set_last_Java_frame(/*sp*/Z_SP, /*pc*/Z_R1_scratch);

  __ z_lgr(klass_index_reg, Z_ARG1);  // passed implicitly as ARG2
  __ z_lghi(Z_ARG3, Deoptimization::Unpack_uncommon_trap);  // passed implicitly as ARG3
  BLOCK_COMMENT("call Deoptimization::uncommon_trap()");
  __ call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::uncommon_trap), Z_thread);

  __ reset_last_Java_frame();

  // pop the "unpack" frame
  __ pop_frame();

  // stack: (deoptee, caller_of_deoptee, ...).

  // save the return value.
  __ z_lgr(unroll_block_reg, Z_RET);

  // pop the deoptee frame.
  __ pop_frame();

  // stack: (caller_of_deoptee, ...).

#ifdef ASSERT
  assert(Immediate::is_uimm8(Deoptimization::Unpack_LIMIT), "Code not fit for larger immediates");
  assert(Immediate::is_uimm8(Deoptimization::Unpack_uncommon_trap), "Code not fit for larger immediates");
  const int unpack_kind_byte_offset = in_bytes(Deoptimization::UnrollBlock::unpack_kind_offset())
#ifndef VM_LITTLE_ENDIAN
  + 3
#endif
  ;
  if (Displacement::is_shortDisp(unpack_kind_byte_offset)) {
    __ z_cli(unpack_kind_byte_offset, unroll_block_reg, Deoptimization::Unpack_uncommon_trap);
  } else {
    __ z_cliy(unpack_kind_byte_offset, unroll_block_reg, Deoptimization::Unpack_uncommon_trap);
  }
  __ asm_assert(Assembler::bcondEqual, "OptoRuntime::generate_deopt_blob: expected Unpack_uncommon_trap", 0);
#endif

  __ zap_from_to(Z_SP, Z_SP, Z_R0_scratch, Z_R1, 500, -1);

  // allocate new interpreter frame(s) and possibly resize the caller's frame
  // (no more adapters !)
  push_skeleton_frames(masm, false/*deopt*/,
                  unroll_block_reg,
                  Z_tmp_2,
                  Z_tmp_3,
                  Z_tmp_4,
                  Z_ARG5,
                  Z_ARG4);

  // stack: (skeletal interpreter frame, ..., optional skeletal
  // interpreter frame, (resized) caller of deoptee, ...).

  // push a dummy "unpack" frame taking care of float return values.
  // call `Deoptimization::unpack_frames' to layout information in the
  // interpreter frames just created

  // push the "unpack" frame
   const unsigned int framesize_in_bytes = __ push_frame_abi160(0);

  // stack: (unpack frame, skeletal interpreter frame, ..., optional
  // skeletal interpreter frame, (resized) caller of deoptee, ...).

  // set the "unpack" frame as last_Java_frame
  __ get_PC(Z_R1_scratch);
  __ set_last_Java_frame(/*sp*/Z_SP, /*pc*/Z_R1_scratch);

  // indicate it is the uncommon trap case
  BLOCK_COMMENT("call Deoptimization::Unpack_uncommon_trap()");
  __ load_const_optimized(unc_trap_reg, Deoptimization::Unpack_uncommon_trap);
  // let the unpacker layout information in the skeletal frames just allocated.
  __ call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames), Z_thread);

  __ reset_last_Java_frame();
  // pop the "unpack" frame
  __ pop_frame();
  // restore LR from top interpreter frame
  __ restore_return_pc();

  // stack: (top interpreter frame, ..., optional interpreter frame,
  // (resized) caller of deoptee, ...).

  __ z_lg(Z_fp, _z_abi(callers_sp), Z_SP); // restore frame pointer
  __ restore_bcp();
  __ restore_locals();
  __ restore_esp();

  // return to the interpreter entry point
  __ z_br(Z_R14);

  masm->flush();
  return UncommonTrapBlob::create(&buffer, nullptr, framesize_in_bytes/wordSize);
}
#endif // COMPILER2


//------------------------------generate_handler_blob------
//
// Generate a special Compile2Runtime blob that saves all registers,
// and setup oopmap.
SafepointBlob* SharedRuntime::generate_handler_blob(StubId id, address call_ptr) {
  assert(StubRoutines::forward_exception_entry() != nullptr,
         "must be generated before");
  assert(is_polling_page_id(id), "expected a polling page stub id");

  ResourceMark rm;
  OopMapSet *oop_maps = new OopMapSet();
  OopMap* map;

  // Allocate space for the code. Setup code generation tools.
  const char* name = SharedRuntime::stub_name(id);
  CodeBuffer buffer(name, 2048, 1024);
  MacroAssembler* masm = new MacroAssembler(&buffer);

  unsigned int start_off = __ offset();
  address call_pc = nullptr;
  int frame_size_in_bytes;

  bool cause_return = (id == StubId::shared_polling_page_return_handler_id);
  // Make room for return address (or push it again)
  if (!cause_return) {
    __ z_lg(Z_R14, Address(Z_thread, JavaThread::saved_exception_pc_offset()));
  }

  bool save_vectors = (id == StubId::shared_polling_page_vectors_safepoint_handler_id);
  // Save registers, fpu state, and flags
  map = RegisterSaver::save_live_registers(masm, RegisterSaver::all_registers, Z_R14, save_vectors);

  if (!cause_return) {
    // Keep a copy of the return pc to detect if it gets modified.
    __ z_lgr(Z_R6, Z_R14);
  }

  // The following is basically a call_VM. However, we need the precise
  // address of the call in order to generate an oopmap. Hence, we do all the
  // work ourselves.
  __ set_last_Java_frame(Z_SP, noreg);

  // call into the runtime to handle the safepoint poll
  __ call_VM_leaf(call_ptr, Z_thread);


  // Set an oopmap for the call site. This oopmap will map all
  // oop-registers and debug-info registers as callee-saved. This
  // will allow deoptimization at this safepoint to find all possible
  // debug-info recordings, as well as let GC find all oops.

  oop_maps->add_gc_map((int)(__ offset()-start_off), map);

  Label noException;

  __ reset_last_Java_frame();

  __ load_and_test_long(Z_R1, thread_(pending_exception));
  __ z_bre(noException);

  // Pending exception case, used (sporadically) by
  // api/java_lang/Thread.State/index#ThreadState et al.
  RegisterSaver::restore_live_registers(masm, RegisterSaver::all_registers, save_vectors);

  // Jump to forward_exception_entry, with the issuing PC in Z_R14
  // so it looks like the original nmethod called forward_exception_entry.
  __ load_const_optimized(Z_R1_scratch, StubRoutines::forward_exception_entry());
  __ z_br(Z_R1_scratch);

  // No exception case
  __ bind(noException);

  if (!cause_return) {
    Label no_adjust;
     // If our stashed return pc was modified by the runtime we avoid touching it
    const int offset_of_return_pc = _z_common_abi(return_pc) + RegisterSaver::live_reg_frame_size(RegisterSaver::all_registers, save_vectors);
    __ z_cg(Z_R6, offset_of_return_pc, Z_SP);
    __ z_brne(no_adjust);

    // Adjust return pc forward to step over the safepoint poll instruction
    __ instr_size(Z_R1_scratch, Z_R6);
    __ z_agr(Z_R6, Z_R1_scratch);
    __ z_stg(Z_R6, offset_of_return_pc, Z_SP);

    __ bind(no_adjust);
  }

  // Normal exit, restore registers and exit.
  RegisterSaver::restore_live_registers(masm, RegisterSaver::all_registers, save_vectors);

  __ z_br(Z_R14);

  // Make sure all code is generated
  masm->flush();

  // Fill-out other meta info
  return SafepointBlob::create(&buffer, oop_maps, RegisterSaver::live_reg_frame_size(RegisterSaver::all_registers, save_vectors)/wordSize);
}


//
// generate_resolve_blob - call resolution (static/virtual/opt-virtual/ic-miss
//
// Generate a stub that calls into vm to find out the proper destination
// of a Java call. All the argument registers are live at this point
// but since this is generic code we don't know what they are and the caller
// must do any gc of the args.
//
RuntimeStub* SharedRuntime::generate_resolve_blob(StubId id, address destination) {
  assert (StubRoutines::forward_exception_entry() != nullptr, "must be generated before");
  assert(is_resolve_id(id), "expected a resolve stub id");

  // allocate space for the code
  ResourceMark rm;

  const char* name = SharedRuntime::stub_name(id);
  CodeBuffer buffer(name, 1000, 512);
  MacroAssembler* masm                = new MacroAssembler(&buffer);

  OopMapSet *oop_maps = new OopMapSet();
  OopMap* map = nullptr;

  unsigned int start_off = __ offset();

  map = RegisterSaver::save_live_registers(masm, RegisterSaver::all_registers);

  // We must save a PC from within the stub as return PC
  // C code doesn't store the LR where we expect the PC,
  // so we would run into trouble upon stack walking.
  __ get_PC(Z_R1_scratch);

  unsigned int frame_complete = __ offset();

  __ set_last_Java_frame(/*sp*/Z_SP, Z_R1_scratch);

  __ call_VM_leaf(destination, Z_thread, Z_method);


  // Set an oopmap for the call site.
  // We need this not only for callee-saved registers, but also for volatile
  // registers that the compiler might be keeping live across a safepoint.

  oop_maps->add_gc_map((int)(frame_complete-start_off), map);

  // clear last_Java_sp
  __ reset_last_Java_frame();

  // check for pending exceptions
  Label pending;
  __ load_and_test_long(Z_R0, Address(Z_thread, Thread::pending_exception_offset()));
  __ z_brne(pending);

  __ z_lgr(Z_R1_scratch, Z_R2); // r1 is neither saved nor restored, r2 contains the continuation.
  RegisterSaver::restore_live_registers(masm, RegisterSaver::all_registers);

  // get the returned method
  __ get_vm_result_metadata(Z_method);

  // We are back to the original state on entry and ready to go.
  __ z_br(Z_R1_scratch);

  // Pending exception after the safepoint

  __ bind(pending);

  RegisterSaver::restore_live_registers(masm, RegisterSaver::all_registers);

  // exception pending => remove activation and forward to exception handler

  __ z_lgr(Z_R2, Z_R0); // pending_exception
  __ clear_mem(Address(Z_thread, JavaThread::vm_result_oop_offset()), sizeof(jlong));
  __ load_const_optimized(Z_R1_scratch, StubRoutines::forward_exception_entry());
  __ z_br(Z_R1_scratch);

  // -------------
  // make sure all code is generated
  masm->flush();

  // return the blob
  // frame_size_words or bytes??
  return RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, RegisterSaver::live_reg_frame_size(RegisterSaver::all_registers)/wordSize,
                                       oop_maps, true);

}

// Continuation point for throwing of implicit exceptions that are
// not handled in the current activation. Fabricates an exception
// oop and initiates normal exception dispatching in this
// frame. Only callee-saved registers are preserved (through the
// normal RegisterMap handling). If the compiler
// needs all registers to be preserved between the fault point and
// the exception handler then it must assume responsibility for that
// in AbstractCompiler::continuation_for_implicit_null_exception or
// continuation_for_implicit_division_by_zero_exception. All other
// implicit exceptions (e.g., NullPointerException or
// AbstractMethodError on entry) are either at call sites or
// otherwise assume that stack unwinding will be initiated, so
// caller saved registers were assumed volatile in the compiler.

// Note that we generate only this stub into a RuntimeStub, because
// it needs to be properly traversed and ignored during GC, so we
// change the meaning of the "__" macro within this method.

// Note: the routine set_pc_not_at_call_for_caller in
// SharedRuntime.cpp requires that this code be generated into a
// RuntimeStub.

RuntimeStub* SharedRuntime::generate_throw_exception(StubId id, address runtime_entry) {
  assert(is_throw_id(id), "expected a throw stub id");

  const char* name = SharedRuntime::stub_name(id);

  int insts_size = 256;
  int locs_size  = 0;

  ResourceMark rm;
  const char* timer_msg = "SharedRuntime generate_throw_exception";
  TraceTime timer(timer_msg, TRACETIME_LOG(Info, startuptime));

  CodeBuffer      code(name, insts_size, locs_size);
  MacroAssembler* masm = new MacroAssembler(&code);
  int framesize_in_bytes;
  address start = __ pc();

  __ save_return_pc();
  framesize_in_bytes = __ push_frame_abi160(0);

  address frame_complete_pc = __ pc();

  // Note that we always have a runtime stub frame on the top of stack at this point.
  __ get_PC(Z_R1);
  __ set_last_Java_frame(/*sp*/Z_SP, /*pc*/Z_R1);

  // Do the call.
  BLOCK_COMMENT("call runtime_entry");
  __ call_VM_leaf(runtime_entry, Z_thread);

  __ reset_last_Java_frame();

#ifdef ASSERT
  // Make sure that this code is only executed if there is a pending exception.
  { Label L;
    __ z_lg(Z_R0,
            in_bytes(Thread::pending_exception_offset()),
            Z_thread);
    __ z_ltgr(Z_R0, Z_R0);
    __ z_brne(L);
    __ stop("SharedRuntime::throw_exception: no pending exception");
    __ bind(L);
  }
#endif

  __ pop_frame();
  __ restore_return_pc();

  __ load_const_optimized(Z_R1, StubRoutines::forward_exception_entry());
  __ z_br(Z_R1);

  RuntimeStub* stub =
    RuntimeStub::new_runtime_stub(name, &code,
                                  frame_complete_pc - start,
                                  framesize_in_bytes/wordSize,
                                  nullptr /*oop_maps*/, false);

  return stub;
}

//------------------------------Montgomery multiplication------------------------
//

// Subtract 0:b from carry:a. Return carry.
static unsigned long
sub(unsigned long a[], unsigned long b[], unsigned long carry, long len) {
  unsigned long i, c = 8 * (unsigned long)(len - 1);
  __asm__ __volatile__ (
    "SLGR   %[i], %[i]         \n" // initialize to 0 and pre-set carry
    "LGHI   0, 8               \n" // index increment (for BRXLG)
    "LGR    1, %[c]            \n" // index limit (for BRXLG)
    "0:                        \n"
    "LG     %[c], 0(%[i],%[a]) \n"
    "SLBG   %[c], 0(%[i],%[b]) \n" // subtract with borrow
    "STG    %[c], 0(%[i],%[a]) \n"
    "BRXLG  %[i], 0, 0b        \n" // while ((i+=8)<limit);
    "SLBGR  %[c], %[c]         \n" // save carry - 1
    : [i]"=&a"(i), [c]"+r"(c)
    : [a]"a"(a), [b]"a"(b)
    : "cc", "memory", "r0", "r1"
 );
  return carry + c;
}

// Multiply (unsigned) Long A by Long B, accumulating the double-
// length result into the accumulator formed of T0, T1, and T2.
inline void MACC(unsigned long A[], long A_ind,
                 unsigned long B[], long B_ind,
                 unsigned long &T0, unsigned long &T1, unsigned long &T2) {
  long A_si = 8 * A_ind,
       B_si = 8 * B_ind;
  __asm__ __volatile__ (
    "LG     1, 0(%[A_si],%[A]) \n"
    "MLG    0, 0(%[B_si],%[B]) \n" // r0r1 = A * B
    "ALGR   %[T0], 1           \n"
    "LGHI   1, 0               \n" // r1 = 0
    "ALCGR  %[T1], 0           \n"
    "ALCGR  %[T2], 1           \n"
    : [T0]"+r"(T0), [T1]"+r"(T1), [T2]"+r"(T2)
    : [A]"r"(A), [A_si]"r"(A_si), [B]"r"(B), [B_si]"r"(B_si)
    : "cc", "r0", "r1"
 );
}

// As above, but add twice the double-length result into the
// accumulator.
inline void MACC2(unsigned long A[], long A_ind,
                  unsigned long B[], long B_ind,
                  unsigned long &T0, unsigned long &T1, unsigned long &T2) {
  const unsigned long zero = 0;
  long A_si = 8 * A_ind,
       B_si = 8 * B_ind;
  __asm__ __volatile__ (
    "LG     1, 0(%[A_si],%[A]) \n"
    "MLG    0, 0(%[B_si],%[B]) \n" // r0r1 = A * B
    "ALGR   %[T0], 1           \n"
    "ALCGR  %[T1], 0           \n"
    "ALCGR  %[T2], %[zero]     \n"
    "ALGR   %[T0], 1           \n"
    "ALCGR  %[T1], 0           \n"
    "ALCGR  %[T2], %[zero]     \n"
    : [T0]"+r"(T0), [T1]"+r"(T1), [T2]"+r"(T2)
    : [A]"r"(A), [A_si]"r"(A_si), [B]"r"(B), [B_si]"r"(B_si), [zero]"r"(zero)
    : "cc", "r0", "r1"
 );
}

// Fast Montgomery multiplication. The derivation of the algorithm is
// in "A Cryptographic Library for the Motorola DSP56000,
// Dusse and Kaliski, Proc. EUROCRYPT 90, pp. 230-237".
static void
montgomery_multiply(unsigned long a[], unsigned long b[], unsigned long n[],
                    unsigned long m[], unsigned long inv, int len) {
  unsigned long t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator
  int i;

  assert(inv * n[0] == -1UL, "broken inverse in Montgomery multiply");

  for (i = 0; i < len; i++) {
    int j;
    for (j = 0; j < i; j++) {
      MACC(a, j, b, i-j, t0, t1, t2);
      MACC(m, j, n, i-j, t0, t1, t2);
    }
    MACC(a, i, b, 0, t0, t1, t2);
    m[i] = t0 * inv;
    MACC(m, i, n, 0, t0, t1, t2);

    assert(t0 == 0, "broken Montgomery multiply");

    t0 = t1; t1 = t2; t2 = 0;
  }

  for (i = len; i < 2 * len; i++) {
    int j;
    for (j = i - len + 1; j < len; j++) {
      MACC(a, j, b, i-j, t0, t1, t2);
      MACC(m, j, n, i-j, t0, t1, t2);
    }
    m[i-len] = t0;
    t0 = t1; t1 = t2; t2 = 0;
  }

  while (t0) {
    t0 = sub(m, n, t0, len);
  }
}

// Fast Montgomery squaring. This uses asymptotically 25% fewer
// multiplies so it should be up to 25% faster than Montgomery
// multiplication. However, its loop control is more complex and it
// may actually run slower on some machines.
static void
montgomery_square(unsigned long a[], unsigned long n[],
                  unsigned long m[], unsigned long inv, int len) {
  unsigned long t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator
  int i;

  assert(inv * n[0] == -1UL, "broken inverse in Montgomery multiply");

  for (i = 0; i < len; i++) {
    int j;
    int end = (i+1)/2;
    for (j = 0; j < end; j++) {
      MACC2(a, j, a, i-j, t0, t1, t2);
      MACC(m, j, n, i-j, t0, t1, t2);
    }
    if ((i & 1) == 0) {
      MACC(a, j, a, j, t0, t1, t2);
    }
    for (; j < i; j++) {
      MACC(m, j, n, i-j, t0, t1, t2);
    }
    m[i] = t0 * inv;
    MACC(m, i, n, 0, t0, t1, t2);

    assert(t0 == 0, "broken Montgomery square");

    t0 = t1; t1 = t2; t2 = 0;
  }

  for (i = len; i < 2*len; i++) {
    int start = i-len+1;
    int end = start + (len - start)/2;
    int j;
    for (j = start; j < end; j++) {
      MACC2(a, j, a, i-j, t0, t1, t2);
      MACC(m, j, n, i-j, t0, t1, t2);
    }
    if ((i & 1) == 0) {
      MACC(a, j, a, j, t0, t1, t2);
    }
    for (; j < len; j++) {
      MACC(m, j, n, i-j, t0, t1, t2);
    }
    m[i-len] = t0;
    t0 = t1; t1 = t2; t2 = 0;
  }

  while (t0) {
    t0 = sub(m, n, t0, len);
  }
}

// The threshold at which squaring is advantageous was determined
// experimentally on an i7-3930K (Ivy Bridge) CPU @ 3.5GHz.
// Value seems to be ok for other platforms, too.
#define MONTGOMERY_SQUARING_THRESHOLD 64

// Copy len longwords from s to d, word-swapping as we go. The
// destination array is reversed.
static void reverse_words(unsigned long *s, unsigned long *d, int len) {
  d += len;
  while(len-- > 0) {
    d--;
    unsigned long s_val = *s;
    // Swap words in a longword on little endian machines.
#ifdef VM_LITTLE_ENDIAN
     Unimplemented();
#endif
    *d = s_val;
    s++;
  }
}

void SharedRuntime::montgomery_multiply(jint *a_ints, jint *b_ints, jint *n_ints,
                                        jint len, jlong inv,
                                        jint *m_ints) {
  len = len & 0x7fffFFFF; // C2 does not respect int to long conversion for stub calls.
  assert(len % 2 == 0, "array length in montgomery_multiply must be even");
  int longwords = len/2;

  // Make very sure we don't use so much space that the stack might
  // overflow. 512 jints corresponds to an 16384-bit integer and
  // will use here a total of 8k bytes of stack space.
  int divisor = sizeof(unsigned long) * 4;
  guarantee(longwords <= 8192 / divisor, "must be");
  int total_allocation = longwords * sizeof (unsigned long) * 4;
  unsigned long *scratch = (unsigned long *)alloca(total_allocation);

  // Local scratch arrays
  unsigned long
    *a = scratch + 0 * longwords,
    *b = scratch + 1 * longwords,
    *n = scratch + 2 * longwords,
    *m = scratch + 3 * longwords;

  reverse_words((unsigned long *)a_ints, a, longwords);
  reverse_words((unsigned long *)b_ints, b, longwords);
  reverse_words((unsigned long *)n_ints, n, longwords);

  ::montgomery_multiply(a, b, n, m, (unsigned long)inv, longwords);

  reverse_words(m, (unsigned long *)m_ints, longwords);
}

void SharedRuntime::montgomery_square(jint *a_ints, jint *n_ints,
                                      jint len, jlong inv,
                                      jint *m_ints) {
  len = len & 0x7fffFFFF; // C2 does not respect int to long conversion for stub calls.
  assert(len % 2 == 0, "array length in montgomery_square must be even");
  int longwords = len/2;

  // Make very sure we don't use so much space that the stack might
  // overflow. 512 jints corresponds to an 16384-bit integer and
  // will use here a total of 6k bytes of stack space.
  int divisor = sizeof(unsigned long) * 3;
  guarantee(longwords <= (8192 / divisor), "must be");
  int total_allocation = longwords * sizeof (unsigned long) * 3;
  unsigned long *scratch = (unsigned long *)alloca(total_allocation);

  // Local scratch arrays
  unsigned long
    *a = scratch + 0 * longwords,
    *n = scratch + 1 * longwords,
    *m = scratch + 2 * longwords;

  reverse_words((unsigned long *)a_ints, a, longwords);
  reverse_words((unsigned long *)n_ints, n, longwords);

  if (len >= MONTGOMERY_SQUARING_THRESHOLD) {
    ::montgomery_square(a, n, m, (unsigned long)inv, longwords);
  } else {
    ::montgomery_multiply(a, a, n, m, (unsigned long)inv, longwords);
  }

  reverse_words(m, (unsigned long *)m_ints, longwords);
}

extern "C"
int SpinPause() {
  return 0;
}

#if INCLUDE_JFR
RuntimeStub* SharedRuntime::generate_jfr_write_checkpoint() {
  if (!Continuations::enabled()) return nullptr;
  Unimplemented();
  return nullptr;
}

RuntimeStub* SharedRuntime::generate_jfr_return_lease() {
  if (!Continuations::enabled()) return nullptr;
  Unimplemented();
  return nullptr;
}

#endif // INCLUDE_JFR

const uint SharedRuntime::java_return_convention_max_int = Argument::n_int_register_parameters_j;
const uint SharedRuntime::java_return_convention_max_float = Argument::n_float_register_parameters_j;

int SharedRuntime::java_return_convention(const BasicType *sig_bt, VMRegPair *regs, int total_args_passed) {
  Unimplemented();
  return 0;
}

BufferedInlineTypeBlob* SharedRuntime::generate_buffered_inline_type_adapter(const InlineKlass* vk) {
  Unimplemented();
  return nullptr;
}
