1 /*
   2  * Copyright (c) 2008, 2023, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "asm/assembler.inline.hpp"
  27 #include "code/debugInfoRec.hpp"
  28 #include "code/icBuffer.hpp"
  29 #include "code/vtableStubs.hpp"
  30 #include "compiler/oopMap.hpp"
  31 #include "gc/shared/barrierSetAssembler.hpp"
  32 #include "interpreter/interpreter.hpp"
  33 #include "logging/log.hpp"
  34 #include "memory/resourceArea.hpp"
  35 #include "oops/compiledICHolder.hpp"
  36 #include "oops/klass.inline.hpp"
  37 #include "prims/methodHandles.hpp"
  38 #include "runtime/jniHandles.hpp"
  39 #include "runtime/sharedRuntime.hpp"
  40 #include "runtime/safepointMechanism.hpp"
  41 #include "runtime/stubRoutines.hpp"
  42 #include "runtime/vframeArray.hpp"
  43 #include "utilities/align.hpp"
  44 #include "utilities/powerOfTwo.hpp"
  45 #include "vmreg_arm.inline.hpp"
  46 #ifdef COMPILER1
  47 #include "c1/c1_Runtime1.hpp"
  48 #endif
  49 #ifdef COMPILER2
  50 #include "opto/runtime.hpp"
  51 #endif
  52 
  53 #define __ masm->
  54 
  55 class RegisterSaver {
  56 public:
  57 
  58   // Special registers:
  59   //              32-bit ARM     64-bit ARM
  60   //  Rthread:       R10            R28
  61   //  LR:            R14            R30
  62 
  63   // Rthread is callee saved in the C ABI and never changed by compiled code:
  64   // no need to save it.
  65 
  66   // 2 slots for LR: the one at LR_offset and an other one at R14/R30_offset.
  67   // The one at LR_offset is a return address that is needed by stack walking.
  68   // A c2 method uses LR as a standard register so it may be live when we
  69   // branch to the runtime. The slot at R14/R30_offset is for the value of LR
  70   // in case it's live in the method we are coming from.
  71 
  72 
  73   enum RegisterLayout {
  74     fpu_save_size = FloatRegisterImpl::number_of_registers,
  75 #ifndef __SOFTFP__
  76     D0_offset = 0,
  77 #endif
  78     R0_offset = fpu_save_size,
  79     R1_offset,
  80     R2_offset,
  81     R3_offset,
  82     R4_offset,
  83     R5_offset,
  84     R6_offset,
  85 #if (FP_REG_NUM != 7)
  86     // if not saved as FP
  87     R7_offset,
  88 #endif
  89     R8_offset,
  90     R9_offset,
  91 #if (FP_REG_NUM != 11)
  92     // if not saved as FP
  93     R11_offset,
  94 #endif
  95     R12_offset,
  96     R14_offset,
  97     FP_offset,
  98     LR_offset,
  99     reg_save_size,
 100 
 101     Rmethod_offset = R9_offset,
 102     Rtemp_offset = R12_offset,
 103   };
 104 
 105   // all regs but Rthread (R10), FP (R7 or R11), SP and PC
 106   // (altFP_7_11 is the one among R7 and R11 which is not FP)
 107 #define SAVED_BASE_REGS (RegisterSet(R0, R6) | RegisterSet(R8, R9) | RegisterSet(R12) | R14 | altFP_7_11)
 108 
 109 
 110   //  When LR may be live in the nmethod from which we are coming
 111   //  then lr_saved is true, the return address is saved before the
 112   //  call to save_live_register by the caller and LR contains the
 113   //  live value.
 114 
 115   static OopMap* save_live_registers(MacroAssembler* masm,
 116                                      int* total_frame_words,
 117                                      bool lr_saved = false);
 118   static void restore_live_registers(MacroAssembler* masm, bool restore_lr = true);
 119 
 120 };
 121 
 122 
 123 
 124 
 125 OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm,
 126                                            int* total_frame_words,
 127                                            bool lr_saved) {
 128   *total_frame_words = reg_save_size;
 129 
 130   OopMapSet *oop_maps = new OopMapSet();
 131   OopMap* map = new OopMap(VMRegImpl::slots_per_word * (*total_frame_words), 0);
 132 
 133   if (lr_saved) {
 134     __ push(RegisterSet(FP));
 135   } else {
 136     __ push(RegisterSet(FP) | RegisterSet(LR));
 137   }
 138   __ push(SAVED_BASE_REGS);
 139   if (HaveVFP) {
 140     if (VM_Version::has_vfp3_32()) {
 141       __ fpush(FloatRegisterSet(D16, 16));
 142     } else {
 143       if (FloatRegisterImpl::number_of_registers > 32) {
 144         assert(FloatRegisterImpl::number_of_registers == 64, "nb fp registers should be 64");
 145         __ sub(SP, SP, 32 * wordSize);
 146       }
 147     }
 148     __ fpush(FloatRegisterSet(D0, 16));
 149   } else {
 150     __ sub(SP, SP, fpu_save_size * wordSize);
 151   }
 152 
 153   int i;
 154   int j=0;
 155   for (i = R0_offset; i <= R9_offset; i++) {
 156     if (j == FP_REG_NUM) {
 157       // skip the FP register, managed below.
 158       j++;
 159     }
 160     map->set_callee_saved(VMRegImpl::stack2reg(i), as_Register(j)->as_VMReg());
 161     j++;
 162   }
 163   assert(j == R10->encoding(), "must be");
 164 #if (FP_REG_NUM != 11)
 165   // add R11, if not managed as FP
 166   map->set_callee_saved(VMRegImpl::stack2reg(R11_offset), R11->as_VMReg());
 167 #endif
 168   map->set_callee_saved(VMRegImpl::stack2reg(R12_offset), R12->as_VMReg());
 169   map->set_callee_saved(VMRegImpl::stack2reg(R14_offset), R14->as_VMReg());
 170   if (HaveVFP) {
 171     for (i = 0; i < (VM_Version::has_vfp3_32() ? 64 : 32); i+=2) {
 172       map->set_callee_saved(VMRegImpl::stack2reg(i), as_FloatRegister(i)->as_VMReg());
 173       map->set_callee_saved(VMRegImpl::stack2reg(i + 1), as_FloatRegister(i)->as_VMReg()->next());
 174     }
 175   }
 176 
 177   return map;
 178 }
 179 
 180 void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_lr) {
 181   if (HaveVFP) {
 182     __ fpop(FloatRegisterSet(D0, 16));
 183     if (VM_Version::has_vfp3_32()) {
 184       __ fpop(FloatRegisterSet(D16, 16));
 185     } else {
 186       if (FloatRegisterImpl::number_of_registers > 32) {
 187         assert(FloatRegisterImpl::number_of_registers == 64, "nb fp registers should be 64");
 188         __ add(SP, SP, 32 * wordSize);
 189       }
 190     }
 191   } else {
 192     __ add(SP, SP, fpu_save_size * wordSize);
 193   }
 194   __ pop(SAVED_BASE_REGS);
 195   if (restore_lr) {
 196     __ pop(RegisterSet(FP) | RegisterSet(LR));
 197   } else {
 198     __ pop(RegisterSet(FP));
 199   }
 200 }
 201 
 202 
 203 static void push_result_registers(MacroAssembler* masm, BasicType ret_type) {
 204 #ifdef __ABI_HARD__
 205   if (ret_type == T_DOUBLE || ret_type == T_FLOAT) {
 206     __ sub(SP, SP, 8);
 207     __ fstd(D0, Address(SP));
 208     return;
 209   }
 210 #endif // __ABI_HARD__
 211   __ raw_push(R0, R1);
 212 }
 213 
 214 static void pop_result_registers(MacroAssembler* masm, BasicType ret_type) {
 215 #ifdef __ABI_HARD__
 216   if (ret_type == T_DOUBLE || ret_type == T_FLOAT) {
 217     __ fldd(D0, Address(SP));
 218     __ add(SP, SP, 8);
 219     return;
 220   }
 221 #endif // __ABI_HARD__
 222   __ raw_pop(R0, R1);
 223 }
 224 
 225 static void push_param_registers(MacroAssembler* masm, int fp_regs_in_arguments) {
 226   // R1-R3 arguments need to be saved, but we push 4 registers for 8-byte alignment
 227   __ push(RegisterSet(R0, R3));
 228 
 229   // preserve arguments
 230   // Likely not needed as the locking code won't probably modify volatile FP registers,
 231   // but there is no way to guarantee that
 232   if (fp_regs_in_arguments) {
 233     // convert fp_regs_in_arguments to a number of double registers
 234     int double_regs_num = (fp_regs_in_arguments + 1) >> 1;
 235     __ fpush_hardfp(FloatRegisterSet(D0, double_regs_num));
 236   }
 237 }
 238 
 239 static void pop_param_registers(MacroAssembler* masm, int fp_regs_in_arguments) {
 240   if (fp_regs_in_arguments) {
 241     int double_regs_num = (fp_regs_in_arguments + 1) >> 1;
 242     __ fpop_hardfp(FloatRegisterSet(D0, double_regs_num));
 243   }
 244   __ pop(RegisterSet(R0, R3));
 245 }
 246 
 247 
 248 
 249 // Is vector's size (in bytes) bigger than a size saved by default?
 250 // All vector registers are saved by default on ARM.
 251 bool SharedRuntime::is_wide_vector(int size) {
 252   return false;
 253 }
 254 
 255 int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
 256                                         VMRegPair *regs,
 257                                         VMRegPair *regs2,
 258                                         int total_args_passed) {
 259   assert(regs2 == nullptr, "not needed on arm");
 260 
 261   int slot = 0;
 262   int ireg = 0;
 263 #ifdef __ABI_HARD__
 264   int fp_slot = 0;
 265   int single_fpr_slot = 0;
 266 #endif // __ABI_HARD__
 267   for (int i = 0; i < total_args_passed; i++) {
 268     switch (sig_bt[i]) {
 269     case T_SHORT:
 270     case T_CHAR:
 271     case T_BYTE:
 272     case T_BOOLEAN:
 273     case T_INT:
 274     case T_ARRAY:
 275     case T_OBJECT:
 276     case T_ADDRESS:
 277     case T_METADATA:
 278 #ifndef __ABI_HARD__
 279     case T_FLOAT:
 280 #endif // !__ABI_HARD__
 281       if (ireg < 4) {
 282         Register r = as_Register(ireg);
 283         regs[i].set1(r->as_VMReg());
 284         ireg++;
 285       } else {
 286         regs[i].set1(VMRegImpl::stack2reg(slot));
 287         slot++;
 288       }
 289       break;
 290     case T_LONG:
 291 #ifndef __ABI_HARD__
 292     case T_DOUBLE:
 293 #endif // !__ABI_HARD__
 294       assert((i + 1) < total_args_passed && sig_bt[i+1] == T_VOID, "missing Half" );
 295       if (ireg <= 2) {
 296 #if (ALIGN_WIDE_ARGUMENTS == 1)
 297         if(ireg & 1) ireg++;  // Aligned location required
 298 #endif
 299         Register r1 = as_Register(ireg);
 300         Register r2 = as_Register(ireg + 1);
 301         regs[i].set_pair(r2->as_VMReg(), r1->as_VMReg());
 302         ireg += 2;
 303 #if (ALIGN_WIDE_ARGUMENTS == 0)
 304       } else if (ireg == 3) {
 305         // uses R3 + one stack slot
 306         Register r = as_Register(ireg);
 307         regs[i].set_pair(VMRegImpl::stack2reg(slot), r->as_VMReg());
 308         ireg += 1;
 309         slot += 1;
 310 #endif
 311       } else {
 312         if (slot & 1) slot++; // Aligned location required
 313         regs[i].set_pair(VMRegImpl::stack2reg(slot+1), VMRegImpl::stack2reg(slot));
 314         slot += 2;
 315         ireg = 4;
 316       }
 317       break;
 318     case T_VOID:
 319       regs[i].set_bad();
 320       break;
 321 #ifdef __ABI_HARD__
 322     case T_FLOAT:
 323       if ((fp_slot < 16)||(single_fpr_slot & 1)) {
 324         if ((single_fpr_slot & 1) == 0) {
 325           single_fpr_slot = fp_slot;
 326           fp_slot += 2;
 327         }
 328         FloatRegister r = as_FloatRegister(single_fpr_slot);
 329         single_fpr_slot++;
 330         regs[i].set1(r->as_VMReg());
 331       } else {
 332         regs[i].set1(VMRegImpl::stack2reg(slot));
 333         slot++;
 334       }
 335       break;
 336     case T_DOUBLE:
 337       assert(ALIGN_WIDE_ARGUMENTS == 1, "ABI_HARD not supported with unaligned wide arguments");
 338       if (fp_slot <= 14) {
 339         FloatRegister r1 = as_FloatRegister(fp_slot);
 340         FloatRegister r2 = as_FloatRegister(fp_slot+1);
 341         regs[i].set_pair(r2->as_VMReg(), r1->as_VMReg());
 342         fp_slot += 2;
 343       } else {
 344         if(slot & 1) slot++;
 345         regs[i].set_pair(VMRegImpl::stack2reg(slot+1), VMRegImpl::stack2reg(slot));
 346         slot += 2;
 347         single_fpr_slot = 16;
 348       }
 349       break;
 350 #endif // __ABI_HARD__
 351     default:
 352       ShouldNotReachHere();
 353     }
 354   }
 355   return slot;
 356 }
 357 
 358 int SharedRuntime::vector_calling_convention(VMRegPair *regs,
 359                                              uint num_bits,
 360                                              uint total_args_passed) {
 361   Unimplemented();
 362   return 0;
 363 }
 364 
 365 int SharedRuntime::java_calling_convention(const BasicType *sig_bt,
 366                                            VMRegPair *regs,
 367                                            int total_args_passed) {
 368 #ifdef __SOFTFP__
 369   // soft float is the same as the C calling convention.
 370   return c_calling_convention(sig_bt, regs, nullptr, total_args_passed);
 371 #endif // __SOFTFP__
 372   int slot = 0;
 373   int ireg = 0;
 374   int freg = 0;
 375   int single_fpr = 0;
 376 
 377   for (int i = 0; i < total_args_passed; i++) {
 378     switch (sig_bt[i]) {
 379     case T_SHORT:
 380     case T_CHAR:
 381     case T_BYTE:
 382     case T_BOOLEAN:
 383     case T_INT:
 384     case T_ARRAY:
 385     case T_OBJECT:
 386     case T_ADDRESS:
 387       if (ireg < 4) {
 388         Register r = as_Register(ireg++);
 389         regs[i].set1(r->as_VMReg());
 390       } else {
 391         regs[i].set1(VMRegImpl::stack2reg(slot++));
 392       }
 393       break;
 394     case T_FLOAT:
 395       // C2 utilizes S14/S15 for mem-mem moves
 396       if ((freg < 16 COMPILER2_PRESENT(-2)) || (single_fpr & 1)) {
 397         if ((single_fpr & 1) == 0) {
 398           single_fpr = freg;
 399           freg += 2;
 400         }
 401         FloatRegister r = as_FloatRegister(single_fpr++);
 402         regs[i].set1(r->as_VMReg());
 403       } else {
 404         regs[i].set1(VMRegImpl::stack2reg(slot++));
 405       }
 406       break;
 407     case T_DOUBLE:
 408       // C2 utilizes S14/S15 for mem-mem moves
 409       if (freg <= 14 COMPILER2_PRESENT(-2)) {
 410         FloatRegister r1 = as_FloatRegister(freg);
 411         FloatRegister r2 = as_FloatRegister(freg + 1);
 412         regs[i].set_pair(r2->as_VMReg(), r1->as_VMReg());
 413         freg += 2;
 414       } else {
 415         // Keep internally the aligned calling convention,
 416         // ignoring ALIGN_WIDE_ARGUMENTS
 417         if (slot & 1) slot++;
 418         regs[i].set_pair(VMRegImpl::stack2reg(slot + 1), VMRegImpl::stack2reg(slot));
 419         slot += 2;
 420         single_fpr = 16;
 421       }
 422       break;
 423     case T_LONG:
 424       // Keep internally the aligned calling convention,
 425       // ignoring ALIGN_WIDE_ARGUMENTS
 426       if (ireg <= 2) {
 427         if (ireg & 1) ireg++;
 428         Register r1 = as_Register(ireg);
 429         Register r2 = as_Register(ireg + 1);
 430         regs[i].set_pair(r2->as_VMReg(), r1->as_VMReg());
 431         ireg += 2;
 432       } else {
 433         if (slot & 1) slot++;
 434         regs[i].set_pair(VMRegImpl::stack2reg(slot + 1), VMRegImpl::stack2reg(slot));
 435         slot += 2;
 436         ireg = 4;
 437       }
 438       break;
 439     case T_VOID:
 440       regs[i].set_bad();
 441       break;
 442     default:
 443       ShouldNotReachHere();
 444     }
 445   }
 446 
 447   if (slot & 1) slot++;
 448   return slot;
 449 }
 450 
 451 static void patch_callers_callsite(MacroAssembler *masm) {
 452   Label skip;
 453 
 454   __ ldr(Rtemp, Address(Rmethod, Method::code_offset()));
 455   __ cbz(Rtemp, skip);
 456 
 457   // Pushing an even number of registers for stack alignment.
 458   // Selecting R9, which had to be saved anyway for some platforms.
 459   __ push(RegisterSet(R0, R3) | R9 | LR);
 460   __ fpush_hardfp(FloatRegisterSet(D0, 8));
 461 
 462   __ mov(R0, Rmethod);
 463   __ mov(R1, LR);
 464   __ call(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite));
 465 
 466   __ fpop_hardfp(FloatRegisterSet(D0, 8));
 467   __ pop(RegisterSet(R0, R3) | R9 | LR);
 468 
 469   __ bind(skip);
 470 }
 471 
 472 void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm,
 473                                     int total_args_passed, int comp_args_on_stack,
 474                                     const BasicType *sig_bt, const VMRegPair *regs) {
 475   // TODO: ARM - May be can use ldm to load arguments
 476   const Register tmp = Rtemp; // avoid erasing R5_mh
 477 
 478   // Next assert may not be needed but safer. Extra analysis required
 479   // if this there is not enough free registers and we need to use R5 here.
 480   assert_different_registers(tmp, R5_mh);
 481 
 482   // 6243940 We might end up in handle_wrong_method if
 483   // the callee is deoptimized as we race thru here. If that
 484   // happens we don't want to take a safepoint because the
 485   // caller frame will look interpreted and arguments are now
 486   // "compiled" so it is much better to make this transition
 487   // invisible to the stack walking code. Unfortunately if
 488   // we try and find the callee by normal means a safepoint
 489   // is possible. So we stash the desired callee in the thread
 490   // and the vm will find there should this case occur.
 491   Address callee_target_addr(Rthread, JavaThread::callee_target_offset());
 492   __ str(Rmethod, callee_target_addr);
 493 
 494 
 495   assert_different_registers(tmp, R0, R1, R2, R3, Rsender_sp, Rmethod);
 496 
 497   const Register initial_sp = Rmethod; // temporarily scratched
 498 
 499   // Old code was modifying R4 but this looks unsafe (particularly with JSR292)
 500   assert_different_registers(tmp, R0, R1, R2, R3, Rsender_sp, initial_sp);
 501 
 502   __ mov(initial_sp, SP);
 503 
 504   if (comp_args_on_stack) {
 505     __ sub_slow(SP, SP, comp_args_on_stack * VMRegImpl::stack_slot_size);
 506   }
 507   __ bic(SP, SP, StackAlignmentInBytes - 1);
 508 
 509   for (int i = 0; i < total_args_passed; i++) {
 510     if (sig_bt[i] == T_VOID) {
 511       assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
 512       continue;
 513     }
 514     assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(), "must be ordered");
 515     int arg_offset = Interpreter::expr_offset_in_bytes(total_args_passed - 1 - i);
 516 
 517     VMReg r_1 = regs[i].first();
 518     VMReg r_2 = regs[i].second();
 519     if (r_1->is_stack()) {
 520       int stack_offset = r_1->reg2stack() * VMRegImpl::stack_slot_size;
 521       if (!r_2->is_valid()) {
 522         __ ldr(tmp, Address(initial_sp, arg_offset));
 523         __ str(tmp, Address(SP, stack_offset));
 524       } else {
 525         __ ldr(tmp, Address(initial_sp, arg_offset - Interpreter::stackElementSize));
 526         __ str(tmp, Address(SP, stack_offset));
 527         __ ldr(tmp, Address(initial_sp, arg_offset));
 528         __ str(tmp, Address(SP, stack_offset + wordSize));
 529       }
 530     } else if (r_1->is_Register()) {
 531       if (!r_2->is_valid()) {
 532         __ ldr(r_1->as_Register(), Address(initial_sp, arg_offset));
 533       } else {
 534         __ ldr(r_1->as_Register(), Address(initial_sp, arg_offset - Interpreter::stackElementSize));
 535         __ ldr(r_2->as_Register(), Address(initial_sp, arg_offset));
 536       }
 537     } else if (r_1->is_FloatRegister()) {
 538 #ifdef __SOFTFP__
 539       ShouldNotReachHere();
 540 #endif // __SOFTFP__
 541       if (!r_2->is_valid()) {
 542         __ flds(r_1->as_FloatRegister(), Address(initial_sp, arg_offset));
 543       } else {
 544         __ fldd(r_1->as_FloatRegister(), Address(initial_sp, arg_offset - Interpreter::stackElementSize));
 545       }
 546     } else {
 547       assert(!r_1->is_valid() && !r_2->is_valid(), "must be");
 548     }
 549   }
 550 
 551   // restore Rmethod (scratched for initial_sp)
 552   __ ldr(Rmethod, callee_target_addr);
 553   __ ldr(PC, Address(Rmethod, Method::from_compiled_offset()));
 554 
 555 }
 556 
 557 static void gen_c2i_adapter(MacroAssembler *masm,
 558                             int total_args_passed,  int comp_args_on_stack,
 559                             const BasicType *sig_bt, const VMRegPair *regs,
 560                             Label& skip_fixup) {
 561   // TODO: ARM - May be can use stm to deoptimize arguments
 562   const Register tmp = Rtemp;
 563 
 564   patch_callers_callsite(masm);
 565   __ bind(skip_fixup);
 566 
 567   __ mov(Rsender_sp, SP); // not yet saved
 568 
 569 
 570   int extraspace = total_args_passed * Interpreter::stackElementSize;
 571   if (extraspace) {
 572     __ sub_slow(SP, SP, extraspace);
 573   }
 574 
 575   for (int i = 0; i < total_args_passed; i++) {
 576     if (sig_bt[i] == T_VOID) {
 577       assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
 578       continue;
 579     }
 580     int stack_offset = (total_args_passed - 1 - i) * Interpreter::stackElementSize;
 581 
 582     VMReg r_1 = regs[i].first();
 583     VMReg r_2 = regs[i].second();
 584     if (r_1->is_stack()) {
 585       int arg_offset = r_1->reg2stack() * VMRegImpl::stack_slot_size + extraspace;
 586       if (!r_2->is_valid()) {
 587         __ ldr(tmp, Address(SP, arg_offset));
 588         __ str(tmp, Address(SP, stack_offset));
 589       } else {
 590         __ ldr(tmp, Address(SP, arg_offset));
 591         __ str(tmp, Address(SP, stack_offset - Interpreter::stackElementSize));
 592         __ ldr(tmp, Address(SP, arg_offset + wordSize));
 593         __ str(tmp, Address(SP, stack_offset));
 594       }
 595     } else if (r_1->is_Register()) {
 596       if (!r_2->is_valid()) {
 597         __ str(r_1->as_Register(), Address(SP, stack_offset));
 598       } else {
 599         __ str(r_1->as_Register(), Address(SP, stack_offset - Interpreter::stackElementSize));
 600         __ str(r_2->as_Register(), Address(SP, stack_offset));
 601       }
 602     } else if (r_1->is_FloatRegister()) {
 603 #ifdef __SOFTFP__
 604       ShouldNotReachHere();
 605 #endif // __SOFTFP__
 606       if (!r_2->is_valid()) {
 607         __ fsts(r_1->as_FloatRegister(), Address(SP, stack_offset));
 608       } else {
 609         __ fstd(r_1->as_FloatRegister(), Address(SP, stack_offset - Interpreter::stackElementSize));
 610       }
 611     } else {
 612       assert(!r_1->is_valid() && !r_2->is_valid(), "must be");
 613     }
 614   }
 615 
 616   __ ldr(PC, Address(Rmethod, Method::interpreter_entry_offset()));
 617 
 618 }
 619 
 620 AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm,
 621                                                             int total_args_passed,
 622                                                             int comp_args_on_stack,
 623                                                             const BasicType *sig_bt,
 624                                                             const VMRegPair *regs,
 625                                                             AdapterFingerPrint* fingerprint) {
 626   address i2c_entry = __ pc();
 627   gen_i2c_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs);
 628 
 629   address c2i_unverified_entry = __ pc();
 630   Label skip_fixup;
 631   const Register receiver       = R0;
 632   const Register holder_klass   = Rtemp; // XXX should be OK for C2 but not 100% sure
 633   const Register receiver_klass = R4;
 634 
 635   __ load_klass(receiver_klass, receiver);
 636   __ ldr(holder_klass, Address(Ricklass, CompiledICHolder::holder_klass_offset()));
 637   __ ldr(Rmethod, Address(Ricklass, CompiledICHolder::holder_metadata_offset()));
 638   __ cmp(receiver_klass, holder_klass);
 639 
 640   __ ldr(Rtemp, Address(Rmethod, Method::code_offset()), eq);
 641   __ cmp(Rtemp, 0, eq);
 642   __ b(skip_fixup, eq);
 643   __ jump(SharedRuntime::get_ic_miss_stub(), relocInfo::runtime_call_type, noreg, ne);
 644 
 645   address c2i_entry = __ pc();
 646   gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup);
 647 
 648   return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry);
 649 }
 650 
 651 
 652 static int reg2offset_in(VMReg r) {
 653   // Account for saved FP and LR
 654   return r->reg2stack() * VMRegImpl::stack_slot_size + 2*wordSize;
 655 }
 656 
 657 static int reg2offset_out(VMReg r) {
 658   return (r->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size;
 659 }
 660 
 661 
 662 static void verify_oop_args(MacroAssembler* masm,
 663                             const methodHandle& method,
 664                             const BasicType* sig_bt,
 665                             const VMRegPair* regs) {
 666   Register temp_reg = Rmethod;  // not part of any compiled calling seq
 667   if (VerifyOops) {
 668     for (int i = 0; i < method->size_of_parameters(); i++) {
 669       if (sig_bt[i] == T_OBJECT || sig_bt[i] == T_ARRAY) {
 670         VMReg r = regs[i].first();
 671         assert(r->is_valid(), "bad oop arg");
 672         if (r->is_stack()) {
 673           __ ldr(temp_reg, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size));
 674           __ verify_oop(temp_reg);
 675         } else {
 676           __ verify_oop(r->as_Register());
 677         }
 678       }
 679     }
 680   }
 681 }
 682 
 683 static void gen_special_dispatch(MacroAssembler* masm,
 684                                  const methodHandle& method,
 685                                  const BasicType* sig_bt,
 686                                  const VMRegPair* regs) {
 687   verify_oop_args(masm, method, sig_bt, regs);
 688   vmIntrinsics::ID iid = method->intrinsic_id();
 689 
 690   // Now write the args into the outgoing interpreter space
 691   bool     has_receiver   = false;
 692   Register receiver_reg   = noreg;
 693   int      member_arg_pos = -1;
 694   Register member_reg     = noreg;
 695   int      ref_kind       = MethodHandles::signature_polymorphic_intrinsic_ref_kind(iid);
 696   if (ref_kind != 0) {
 697     member_arg_pos = method->size_of_parameters() - 1;  // trailing MemberName argument
 698     member_reg = Rmethod;  // known to be free at this point
 699     has_receiver = MethodHandles::ref_kind_has_receiver(ref_kind);
 700   } else if (iid == vmIntrinsics::_invokeBasic) {
 701     has_receiver = true;
 702   } else {
 703     fatal("unexpected intrinsic id %d", vmIntrinsics::as_int(iid));
 704   }
 705 
 706   if (member_reg != noreg) {
 707     // Load the member_arg into register, if necessary.
 708     SharedRuntime::check_member_name_argument_is_last_argument(method, sig_bt, regs);
 709     VMReg r = regs[member_arg_pos].first();
 710     if (r->is_stack()) {
 711       __ ldr(member_reg, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size));
 712     } else {
 713       // no data motion is needed
 714       member_reg = r->as_Register();
 715     }
 716   }
 717 
 718   if (has_receiver) {
 719     // Make sure the receiver is loaded into a register.
 720     assert(method->size_of_parameters() > 0, "oob");
 721     assert(sig_bt[0] == T_OBJECT, "receiver argument must be an object");
 722     VMReg r = regs[0].first();
 723     assert(r->is_valid(), "bad receiver arg");
 724     if (r->is_stack()) {
 725       // Porting note:  This assumes that compiled calling conventions always
 726       // pass the receiver oop in a register.  If this is not true on some
 727       // platform, pick a temp and load the receiver from stack.
 728       assert(false, "receiver always in a register");
 729       receiver_reg = j_rarg0;  // known to be free at this point
 730       __ ldr(receiver_reg, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size));
 731     } else {
 732       // no data motion is needed
 733       receiver_reg = r->as_Register();
 734     }
 735   }
 736 
 737   // Figure out which address we are really jumping to:
 738   MethodHandles::generate_method_handle_dispatch(masm, iid,
 739                                                  receiver_reg, member_reg, /*for_compiler_entry:*/ true);
 740 }
 741 
 742 // ---------------------------------------------------------------------------
 743 // Generate a native wrapper for a given method.  The method takes arguments
 744 // in the Java compiled code convention, marshals them to the native
 745 // convention (handlizes oops, etc), transitions to native, makes the call,
 746 // returns to java state (possibly blocking), unhandlizes any result and
 747 // returns.
 748 nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
 749                                                 const methodHandle& method,
 750                                                 int compile_id,
 751                                                 BasicType* in_sig_bt,
 752                                                 VMRegPair* in_regs,
 753                                                 BasicType ret_type) {
 754   if (method->is_method_handle_intrinsic()) {
 755     vmIntrinsics::ID iid = method->intrinsic_id();
 756     intptr_t start = (intptr_t)__ pc();
 757     int vep_offset = ((intptr_t)__ pc()) - start;
 758     gen_special_dispatch(masm,
 759                          method,
 760                          in_sig_bt,
 761                          in_regs);
 762     int frame_complete = ((intptr_t)__ pc()) - start;  // not complete, period
 763     __ flush();
 764     int stack_slots = SharedRuntime::out_preserve_stack_slots();  // no out slots at all, actually
 765     return nmethod::new_native_nmethod(method,
 766                                        compile_id,
 767                                        masm->code(),
 768                                        vep_offset,
 769                                        frame_complete,
 770                                        stack_slots / VMRegImpl::slots_per_word,
 771                                        in_ByteSize(-1),
 772                                        in_ByteSize(-1),
 773                                        (OopMapSet*)nullptr);
 774   }
 775   // Arguments for JNI method include JNIEnv and Class if static
 776 
 777   // Usage of Rtemp should be OK since scratched by native call
 778 
 779   bool method_is_static = method->is_static();
 780 
 781   const int total_in_args = method->size_of_parameters();
 782   int total_c_args = total_in_args + (method_is_static ? 2 : 1);
 783 
 784   BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args);
 785   VMRegPair* out_regs   = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args);
 786 
 787   int argc = 0;
 788   out_sig_bt[argc++] = T_ADDRESS;
 789   if (method_is_static) {
 790     out_sig_bt[argc++] = T_OBJECT;
 791   }
 792 
 793   int i;
 794   for (i = 0; i < total_in_args; i++) {
 795     out_sig_bt[argc++] = in_sig_bt[i];
 796   }
 797 
 798   int out_arg_slots = c_calling_convention(out_sig_bt, out_regs, nullptr, total_c_args);
 799   int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots;
 800   // Since object arguments need to be wrapped, we must preserve space
 801   // for those object arguments which come in registers (GPR_PARAMS maximum)
 802   // plus one more slot for Klass handle (for static methods)
 803   int oop_handle_offset = stack_slots;
 804   stack_slots += (GPR_PARAMS + 1) * VMRegImpl::slots_per_word;
 805 
 806   // Plus a lock if needed
 807   int lock_slot_offset = 0;
 808   if (method->is_synchronized()) {
 809     lock_slot_offset = stack_slots;
 810     assert(sizeof(BasicLock) == wordSize, "adjust this code");
 811     stack_slots += VMRegImpl::slots_per_word;
 812   }
 813 
 814   // Space to save return address and FP
 815   stack_slots += 2 * VMRegImpl::slots_per_word;
 816 
 817   // Calculate the final stack size taking account of alignment
 818   stack_slots = align_up(stack_slots, StackAlignmentInBytes / VMRegImpl::stack_slot_size);
 819   int stack_size = stack_slots * VMRegImpl::stack_slot_size;
 820   int lock_slot_fp_offset = stack_size - 2 * wordSize -
 821     lock_slot_offset * VMRegImpl::stack_slot_size;
 822 
 823   // Unverified entry point
 824   address start = __ pc();
 825 
 826   // Inline cache check, same as in C1_MacroAssembler::inline_cache_check()
 827   const Register receiver = R0; // see receiverOpr()
 828   __ load_klass(Rtemp, receiver);
 829   __ cmp(Rtemp, Ricklass);
 830   Label verified;
 831 
 832   __ b(verified, eq); // jump over alignment no-ops too
 833   __ jump(SharedRuntime::get_ic_miss_stub(), relocInfo::runtime_call_type, Rtemp);
 834   __ align(CodeEntryAlignment);
 835 
 836   // Verified entry point
 837   __ bind(verified);
 838   int vep_offset = __ pc() - start;
 839 
 840 
 841   if ((InlineObjectHash && method->intrinsic_id() == vmIntrinsics::_hashCode) || (method->intrinsic_id() == vmIntrinsics::_identityHashCode)) {
 842     // Object.hashCode, System.identityHashCode can pull the hashCode from the header word
 843     // instead of doing a full VM transition once it's been computed.
 844     Label slow_case;
 845     const Register obj_reg = R0;
 846 
 847     // Unlike for Object.hashCode, System.identityHashCode is static method and
 848     // gets object as argument instead of the receiver.
 849     if (method->intrinsic_id() == vmIntrinsics::_identityHashCode) {
 850       assert(method->is_static(), "method should be static");
 851       // return 0 for null reference input, return val = R0 = obj_reg = 0
 852       __ cmp(obj_reg, 0);
 853       __ bx(LR, eq);
 854     }
 855 
 856     __ ldr(Rtemp, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
 857 
 858     assert(markWord::unlocked_value == 1, "adjust this code");
 859     __ tbz(Rtemp, exact_log2(markWord::unlocked_value), slow_case);
 860 
 861     __ bics(Rtemp, Rtemp, ~markWord::hash_mask_in_place);
 862     __ mov(R0, AsmOperand(Rtemp, lsr, markWord::hash_shift), ne);
 863     __ bx(LR, ne);
 864 
 865     __ bind(slow_case);
 866   }
 867 
 868   // Bang stack pages
 869   __ arm_stack_overflow_check(stack_size, Rtemp);
 870 
 871   // Setup frame linkage
 872   __ raw_push(FP, LR);
 873   __ mov(FP, SP);
 874   __ sub_slow(SP, SP, stack_size - 2*wordSize);
 875 
 876   BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
 877   assert(bs != nullptr, "Sanity");
 878   bs->nmethod_entry_barrier(masm);
 879 
 880   int frame_complete = __ pc() - start;
 881 
 882   OopMapSet* oop_maps = new OopMapSet();
 883   OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
 884   const int extra_args = method_is_static ? 2 : 1;
 885   int receiver_offset = -1;
 886   int fp_regs_in_arguments = 0;
 887 
 888   for (i = total_in_args; --i >= 0; ) {
 889     switch (in_sig_bt[i]) {
 890     case T_ARRAY:
 891     case T_OBJECT: {
 892       VMReg src = in_regs[i].first();
 893       VMReg dst = out_regs[i + extra_args].first();
 894       if (src->is_stack()) {
 895         assert(dst->is_stack(), "must be");
 896         assert(i != 0, "Incoming receiver is always in a register");
 897         __ ldr(Rtemp, Address(FP, reg2offset_in(src)));
 898         __ cmp(Rtemp, 0);
 899         __ add(Rtemp, FP, reg2offset_in(src), ne);
 900         __ str(Rtemp, Address(SP, reg2offset_out(dst)));
 901         int offset_in_older_frame = src->reg2stack() + SharedRuntime::out_preserve_stack_slots();
 902         map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + stack_slots));
 903       } else {
 904         int offset = oop_handle_offset * VMRegImpl::stack_slot_size;
 905         __ str(src->as_Register(), Address(SP, offset));
 906         map->set_oop(VMRegImpl::stack2reg(oop_handle_offset));
 907         if ((i == 0) && (!method_is_static)) {
 908           receiver_offset = offset;
 909         }
 910         oop_handle_offset += VMRegImpl::slots_per_word;
 911 
 912         if (dst->is_stack()) {
 913           __ movs(Rtemp, src->as_Register());
 914           __ add(Rtemp, SP, offset, ne);
 915           __ str(Rtemp, Address(SP, reg2offset_out(dst)));
 916         } else {
 917           __ movs(dst->as_Register(), src->as_Register());
 918           __ add(dst->as_Register(), SP, offset, ne);
 919         }
 920       }
 921     }
 922 
 923     case T_VOID:
 924       break;
 925 
 926 
 927 #ifdef __SOFTFP__
 928     case T_DOUBLE:
 929 #endif
 930     case T_LONG: {
 931       VMReg src_1 = in_regs[i].first();
 932       VMReg src_2 = in_regs[i].second();
 933       VMReg dst_1 = out_regs[i + extra_args].first();
 934       VMReg dst_2 = out_regs[i + extra_args].second();
 935 #if (ALIGN_WIDE_ARGUMENTS == 0)
 936       // C convention can mix a register and a stack slot for a
 937       // 64-bits native argument.
 938 
 939       // Note: following code should work independently of whether
 940       // the Java calling convention follows C convention or whether
 941       // it aligns 64-bit values.
 942       if (dst_2->is_Register()) {
 943         if (src_1->as_Register() != dst_1->as_Register()) {
 944           assert(src_1->as_Register() != dst_2->as_Register() &&
 945                  src_2->as_Register() != dst_2->as_Register(), "must be");
 946           __ mov(dst_2->as_Register(), src_2->as_Register());
 947           __ mov(dst_1->as_Register(), src_1->as_Register());
 948         } else {
 949           assert(src_2->as_Register() == dst_2->as_Register(), "must be");
 950         }
 951       } else if (src_2->is_Register()) {
 952         if (dst_1->is_Register()) {
 953           // dst mixes a register and a stack slot
 954           assert(dst_2->is_stack() && src_1->is_Register() && src_2->is_Register(), "must be");
 955           assert(src_1->as_Register() != dst_1->as_Register(), "must be");
 956           __ str(src_2->as_Register(), Address(SP, reg2offset_out(dst_2)));
 957           __ mov(dst_1->as_Register(), src_1->as_Register());
 958         } else {
 959           // registers to stack slots
 960           assert(dst_2->is_stack() && src_1->is_Register() && src_2->is_Register(), "must be");
 961           __ str(src_1->as_Register(), Address(SP, reg2offset_out(dst_1)));
 962           __ str(src_2->as_Register(), Address(SP, reg2offset_out(dst_2)));
 963         }
 964       } else if (src_1->is_Register()) {
 965         if (dst_1->is_Register()) {
 966           // src and dst must be R3 + stack slot
 967           assert(dst_1->as_Register() == src_1->as_Register(), "must be");
 968           __ ldr(Rtemp,    Address(FP, reg2offset_in(src_2)));
 969           __ str(Rtemp,    Address(SP, reg2offset_out(dst_2)));
 970         } else {
 971           // <R3,stack> -> <stack,stack>
 972           assert(dst_2->is_stack() && src_2->is_stack(), "must be");
 973           __ ldr(LR, Address(FP, reg2offset_in(src_2)));
 974           __ str(src_1->as_Register(), Address(SP, reg2offset_out(dst_1)));
 975           __ str(LR, Address(SP, reg2offset_out(dst_2)));
 976         }
 977       } else {
 978         assert(src_2->is_stack() && dst_1->is_stack() && dst_2->is_stack(), "must be");
 979         __ ldr(Rtemp, Address(FP, reg2offset_in(src_1)));
 980         __ ldr(LR,    Address(FP, reg2offset_in(src_2)));
 981         __ str(Rtemp, Address(SP, reg2offset_out(dst_1)));
 982         __ str(LR,    Address(SP, reg2offset_out(dst_2)));
 983       }
 984 #else // ALIGN_WIDE_ARGUMENTS
 985       if (src_1->is_stack()) {
 986         assert(src_2->is_stack() && dst_1->is_stack() && dst_2->is_stack(), "must be");
 987         __ ldr(Rtemp, Address(FP, reg2offset_in(src_1)));
 988         __ ldr(LR,    Address(FP, reg2offset_in(src_2)));
 989         __ str(Rtemp, Address(SP, reg2offset_out(dst_1)));
 990         __ str(LR,    Address(SP, reg2offset_out(dst_2)));
 991       } else if (dst_1->is_stack()) {
 992         assert(dst_2->is_stack() && src_1->is_Register() && src_2->is_Register(), "must be");
 993         __ str(src_1->as_Register(), Address(SP, reg2offset_out(dst_1)));
 994         __ str(src_2->as_Register(), Address(SP, reg2offset_out(dst_2)));
 995       } else if (src_1->as_Register() == dst_1->as_Register()) {
 996         assert(src_2->as_Register() == dst_2->as_Register(), "must be");
 997       } else {
 998         assert(src_1->as_Register() != dst_2->as_Register() &&
 999                src_2->as_Register() != dst_2->as_Register(), "must be");
1000         __ mov(dst_2->as_Register(), src_2->as_Register());
1001         __ mov(dst_1->as_Register(), src_1->as_Register());
1002       }
1003 #endif // ALIGN_WIDE_ARGUMENTS
1004       break;
1005     }
1006 
1007 #if (!defined __SOFTFP__ && !defined __ABI_HARD__)
1008     case T_FLOAT: {
1009       VMReg src = in_regs[i].first();
1010       VMReg dst = out_regs[i + extra_args].first();
1011       if (src->is_stack()) {
1012         assert(dst->is_stack(), "must be");
1013         __ ldr(Rtemp, Address(FP, reg2offset_in(src)));
1014         __ str(Rtemp, Address(SP, reg2offset_out(dst)));
1015       } else if (dst->is_stack()) {
1016         __ fsts(src->as_FloatRegister(), Address(SP, reg2offset_out(dst)));
1017       } else {
1018         assert(src->is_FloatRegister() && dst->is_Register(), "must be");
1019         __ fmrs(dst->as_Register(), src->as_FloatRegister());
1020       }
1021       break;
1022     }
1023 
1024     case T_DOUBLE: {
1025       VMReg src_1 = in_regs[i].first();
1026       VMReg src_2 = in_regs[i].second();
1027       VMReg dst_1 = out_regs[i + extra_args].first();
1028       VMReg dst_2 = out_regs[i + extra_args].second();
1029       if (src_1->is_stack()) {
1030         assert(src_2->is_stack() && dst_1->is_stack() && dst_2->is_stack(), "must be");
1031         __ ldr(Rtemp, Address(FP, reg2offset_in(src_1)));
1032         __ ldr(LR,    Address(FP, reg2offset_in(src_2)));
1033         __ str(Rtemp, Address(SP, reg2offset_out(dst_1)));
1034         __ str(LR,    Address(SP, reg2offset_out(dst_2)));
1035       } else if (dst_1->is_stack()) {
1036         assert(dst_2->is_stack() && src_1->is_FloatRegister(), "must be");
1037         __ fstd(src_1->as_FloatRegister(), Address(SP, reg2offset_out(dst_1)));
1038 #if (ALIGN_WIDE_ARGUMENTS == 0)
1039       } else if (dst_2->is_stack()) {
1040         assert(! src_2->is_stack(), "must be"); // assuming internal java convention is aligned
1041         // double register must go into R3 + one stack slot
1042         __ fmrrd(dst_1->as_Register(), Rtemp, src_1->as_FloatRegister());
1043         __ str(Rtemp, Address(SP, reg2offset_out(dst_2)));
1044 #endif
1045       } else {
1046         assert(src_1->is_FloatRegister() && dst_1->is_Register() && dst_2->is_Register(), "must be");
1047         __ fmrrd(dst_1->as_Register(), dst_2->as_Register(), src_1->as_FloatRegister());
1048       }
1049       break;
1050     }
1051 #endif // __SOFTFP__
1052 
1053 #ifdef __ABI_HARD__
1054     case T_FLOAT: {
1055       VMReg src = in_regs[i].first();
1056       VMReg dst = out_regs[i + extra_args].first();
1057       if (src->is_stack()) {
1058         if (dst->is_stack()) {
1059           __ ldr(Rtemp, Address(FP, reg2offset_in(src)));
1060           __ str(Rtemp, Address(SP, reg2offset_out(dst)));
1061         } else {
1062           // C2 Java calling convention does not populate S14 and S15, therefore
1063           // those need to be loaded from stack here
1064           __ flds(dst->as_FloatRegister(), Address(FP, reg2offset_in(src)));
1065           fp_regs_in_arguments++;
1066         }
1067       } else {
1068         assert(src->is_FloatRegister(), "must be");
1069         fp_regs_in_arguments++;
1070       }
1071       break;
1072     }
1073     case T_DOUBLE: {
1074       VMReg src_1 = in_regs[i].first();
1075       VMReg src_2 = in_regs[i].second();
1076       VMReg dst_1 = out_regs[i + extra_args].first();
1077       VMReg dst_2 = out_regs[i + extra_args].second();
1078       if (src_1->is_stack()) {
1079         if (dst_1->is_stack()) {
1080           assert(dst_2->is_stack(), "must be");
1081           __ ldr(Rtemp, Address(FP, reg2offset_in(src_1)));
1082           __ ldr(LR,    Address(FP, reg2offset_in(src_2)));
1083           __ str(Rtemp, Address(SP, reg2offset_out(dst_1)));
1084           __ str(LR,    Address(SP, reg2offset_out(dst_2)));
1085         } else {
1086           // C2 Java calling convention does not populate S14 and S15, therefore
1087           // those need to be loaded from stack here
1088           __ fldd(dst_1->as_FloatRegister(), Address(FP, reg2offset_in(src_1)));
1089           fp_regs_in_arguments += 2;
1090         }
1091       } else {
1092         assert(src_1->is_FloatRegister() && src_2->is_FloatRegister(), "must be");
1093         fp_regs_in_arguments += 2;
1094       }
1095       break;
1096     }
1097 #endif // __ABI_HARD__
1098 
1099     default: {
1100       assert(in_sig_bt[i] != T_ADDRESS, "found T_ADDRESS in java args");
1101       VMReg src = in_regs[i].first();
1102       VMReg dst = out_regs[i + extra_args].first();
1103       if (src->is_stack()) {
1104         assert(dst->is_stack(), "must be");
1105         __ ldr(Rtemp, Address(FP, reg2offset_in(src)));
1106         __ str(Rtemp, Address(SP, reg2offset_out(dst)));
1107       } else if (dst->is_stack()) {
1108         __ str(src->as_Register(), Address(SP, reg2offset_out(dst)));
1109       } else {
1110         assert(src->is_Register() && dst->is_Register(), "must be");
1111         __ mov(dst->as_Register(), src->as_Register());
1112       }
1113     }
1114     }
1115   }
1116 
1117   // Get Klass mirror
1118   int klass_offset = -1;
1119   if (method_is_static) {
1120     klass_offset = oop_handle_offset * VMRegImpl::stack_slot_size;
1121     __ mov_oop(Rtemp, JNIHandles::make_local(method->method_holder()->java_mirror()));
1122     __ add(c_rarg1, SP, klass_offset);
1123     __ str(Rtemp, Address(SP, klass_offset));
1124     map->set_oop(VMRegImpl::stack2reg(oop_handle_offset));
1125   }
1126 
1127   // the PC offset given to add_gc_map must match the PC saved in set_last_Java_frame
1128   int pc_offset = __ set_last_Java_frame(SP, FP, true, Rtemp);
1129   assert(((__ pc()) - start) == __ offset(), "warning: start differs from code_begin");
1130   oop_maps->add_gc_map(pc_offset, map);
1131 
1132   // Order last_Java_pc store with the thread state transition (to _thread_in_native)
1133   __ membar(MacroAssembler::StoreStore, Rtemp);
1134 
1135   // RedefineClasses() tracing support for obsolete method entry
1136   if (log_is_enabled(Trace, redefine, class, obsolete)) {
1137     __ save_caller_save_registers();
1138     __ mov(R0, Rthread);
1139     __ mov_metadata(R1, method());
1140     __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::rc_trace_method_entry), R0, R1);
1141     __ restore_caller_save_registers();
1142   }
1143 
1144   const Register sync_handle = R5;
1145   const Register sync_obj    = R6;
1146   const Register disp_hdr    = altFP_7_11;
1147   const Register tmp         = R8;
1148 
1149   Label slow_lock, lock_done, fast_lock;
1150   if (method->is_synchronized()) {
1151     // The first argument is a handle to sync object (a class or an instance)
1152     __ ldr(sync_obj, Address(R1));
1153     // Remember the handle for the unlocking code
1154     __ mov(sync_handle, R1);
1155 
1156     if (LockingMode == LM_LIGHTWEIGHT) {
1157       log_trace(fastlock)("SharedRuntime lock fast");
1158       __ lightweight_lock(sync_obj /* object */, disp_hdr /* t1 */, tmp /* t2 */, Rtemp /* t3 */,
1159                           0x7 /* savemask */, slow_lock);
1160       // Fall through to lock_done
1161     } else if (LockingMode == LM_LEGACY) {
1162       const Register mark = tmp;
1163       // On MP platforms the next load could return a 'stale' value if the memory location has been modified by another thread.
1164       // That would be acceptable as either CAS or slow case path is taken in that case
1165 
1166       __ ldr(mark, Address(sync_obj, oopDesc::mark_offset_in_bytes()));
1167       __ sub(disp_hdr, FP, lock_slot_fp_offset);
1168       __ tst(mark, markWord::unlocked_value);
1169       __ b(fast_lock, ne);
1170 
1171       // Check for recursive lock
1172       // See comments in InterpreterMacroAssembler::lock_object for
1173       // explanations on the fast recursive locking check.
1174       // Check independently the low bits and the distance to SP
1175       // -1- test low 2 bits
1176       __ movs(Rtemp, AsmOperand(mark, lsl, 30));
1177       // -2- test (hdr - SP) if the low two bits are 0
1178       __ sub(Rtemp, mark, SP, eq);
1179       __ movs(Rtemp, AsmOperand(Rtemp, lsr, exact_log2(os::vm_page_size())), eq);
1180       // If still 'eq' then recursive locking OK
1181       // set to zero if recursive lock, set to non zero otherwise (see discussion in JDK-8267042)
1182       __ str(Rtemp, Address(disp_hdr, BasicLock::displaced_header_offset_in_bytes()));
1183       __ b(lock_done, eq);
1184       __ b(slow_lock);
1185 
1186       __ bind(fast_lock);
1187       __ str(mark, Address(disp_hdr, BasicLock::displaced_header_offset_in_bytes()));
1188 
1189       __ cas_for_lock_acquire(mark, disp_hdr, sync_obj, Rtemp, slow_lock);
1190     }
1191     __ bind(lock_done);
1192   }
1193 
1194   // Get JNIEnv*
1195   __ add(c_rarg0, Rthread, in_bytes(JavaThread::jni_environment_offset()));
1196 
1197   // Perform thread state transition
1198   __ mov(Rtemp, _thread_in_native);
1199   __ str(Rtemp, Address(Rthread, JavaThread::thread_state_offset()));
1200 
1201   // Finally, call the native method
1202   __ call(method->native_function());
1203 
1204   // Set FPSCR/FPCR to a known state
1205   if (AlwaysRestoreFPU) {
1206     __ restore_default_fp_mode();
1207   }
1208 
1209   // Ensure a Boolean result is mapped to 0..1
1210   if (ret_type == T_BOOLEAN) {
1211     __ c2bool(R0);
1212   }
1213 
1214   // Do a safepoint check while thread is in transition state
1215   Label call_safepoint_runtime, return_to_java;
1216   __ mov(Rtemp, _thread_in_native_trans);
1217   __ str_32(Rtemp, Address(Rthread, JavaThread::thread_state_offset()));
1218 
1219   // make sure the store is observed before reading the SafepointSynchronize state and further mem refs
1220   if (!UseSystemMemoryBarrier) {
1221     __ membar(MacroAssembler::Membar_mask_bits(MacroAssembler::StoreLoad | MacroAssembler::StoreStore), Rtemp);
1222   }
1223 
1224   __ safepoint_poll(R2, call_safepoint_runtime);
1225   __ ldr_u32(R3, Address(Rthread, JavaThread::suspend_flags_offset()));
1226   __ cmp(R3, 0);
1227   __ b(call_safepoint_runtime, ne);
1228 
1229   __ bind(return_to_java);
1230 
1231   // Perform thread state transition and reguard stack yellow pages if needed
1232   Label reguard, reguard_done;
1233   __ mov(Rtemp, _thread_in_Java);
1234   __ ldr_s32(R2, Address(Rthread, JavaThread::stack_guard_state_offset()));
1235   __ str_32(Rtemp, Address(Rthread, JavaThread::thread_state_offset()));
1236 
1237   __ cmp(R2, StackOverflow::stack_guard_yellow_reserved_disabled);
1238   __ b(reguard, eq);
1239   __ bind(reguard_done);
1240 
1241   Label slow_unlock, unlock_done;
1242   if (method->is_synchronized()) {
1243     if (LockingMode == LM_LIGHTWEIGHT) {
1244       log_trace(fastlock)("SharedRuntime unlock fast");
1245       __ lightweight_unlock(sync_obj, R2 /* t1 */, tmp /* t2 */, Rtemp /* t3 */,
1246                             7 /* savemask */, slow_unlock);
1247       // Fall through
1248     } else if (LockingMode == LM_LEGACY) {
1249       // See C1_MacroAssembler::unlock_object() for more comments
1250       __ ldr(sync_obj, Address(sync_handle));
1251 
1252       // See C1_MacroAssembler::unlock_object() for more comments
1253       __ ldr(R2, Address(disp_hdr, BasicLock::displaced_header_offset_in_bytes()));
1254       __ cbz(R2, unlock_done);
1255 
1256       __ cas_for_lock_release(disp_hdr, R2, sync_obj, Rtemp, slow_unlock);
1257     }
1258     __ bind(unlock_done);
1259   }
1260 
1261   // Set last java frame and handle block to zero
1262   __ ldr(LR, Address(Rthread, JavaThread::active_handles_offset()));
1263   __ reset_last_Java_frame(Rtemp); // sets Rtemp to 0 on 32-bit ARM
1264 
1265   __ str_32(Rtemp, Address(LR, JNIHandleBlock::top_offset()));
1266   if (CheckJNICalls) {
1267     __ str(__ zero_register(Rtemp), Address(Rthread, JavaThread::pending_jni_exception_check_fn_offset()));
1268   }
1269 
1270   // Unbox oop result, e.g. JNIHandles::resolve value in R0.
1271   if (ret_type == T_OBJECT || ret_type == T_ARRAY) {
1272     __ resolve_jobject(R0,      // value
1273                        Rtemp,   // tmp1
1274                        R1_tmp); // tmp2
1275   }
1276 
1277   // Any exception pending?
1278   __ ldr(Rtemp, Address(Rthread, Thread::pending_exception_offset()));
1279   __ mov(SP, FP);
1280 
1281   __ cmp(Rtemp, 0);
1282   // Pop the frame and return if no exception pending
1283   __ pop(RegisterSet(FP) | RegisterSet(PC), eq);
1284   // Pop the frame and forward the exception. Rexception_pc contains return address.
1285   __ ldr(FP, Address(SP, wordSize, post_indexed), ne);
1286   __ ldr(Rexception_pc, Address(SP, wordSize, post_indexed), ne);
1287   __ jump(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type, Rtemp);
1288 
1289   // Safepoint operation and/or pending suspend request is in progress.
1290   // Save the return values and call the runtime function by hand.
1291   __ bind(call_safepoint_runtime);
1292   push_result_registers(masm, ret_type);
1293   __ mov(R0, Rthread);
1294   __ call(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans));
1295   pop_result_registers(masm, ret_type);
1296   __ b(return_to_java);
1297 
1298   // Reguard stack pages. Save native results around a call to C runtime.
1299   __ bind(reguard);
1300   push_result_registers(masm, ret_type);
1301   __ call(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages));
1302   pop_result_registers(masm, ret_type);
1303   __ b(reguard_done);
1304 
1305   if (method->is_synchronized()) {
1306     // Locking slow case
1307     __ bind(slow_lock);
1308 
1309     push_param_registers(masm, fp_regs_in_arguments);
1310 
1311     // last_Java_frame is already set, so do call_VM manually; no exception can occur
1312     __ mov(R0, sync_obj);
1313     __ mov(R1, disp_hdr);
1314     __ mov(R2, Rthread);
1315     __ call(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_locking_C));
1316 
1317     pop_param_registers(masm, fp_regs_in_arguments);
1318 
1319     __ b(lock_done);
1320 
1321     // Unlocking slow case
1322     __ bind(slow_unlock);
1323 
1324     push_result_registers(masm, ret_type);
1325 
1326     // Clear pending exception before reentering VM.
1327     // Can store the oop in register since it is a leaf call.
1328     assert_different_registers(Rtmp_save1, sync_obj, disp_hdr);
1329     __ ldr(Rtmp_save1, Address(Rthread, Thread::pending_exception_offset()));
1330     Register zero = __ zero_register(Rtemp);
1331     __ str(zero, Address(Rthread, Thread::pending_exception_offset()));
1332     __ mov(R0, sync_obj);
1333     __ mov(R1, disp_hdr);
1334     __ mov(R2, Rthread);
1335     __ call(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C));
1336     __ str(Rtmp_save1, Address(Rthread, Thread::pending_exception_offset()));
1337 
1338     pop_result_registers(masm, ret_type);
1339 
1340     __ b(unlock_done);
1341   }
1342 
1343   __ flush();
1344   return nmethod::new_native_nmethod(method,
1345                                      compile_id,
1346                                      masm->code(),
1347                                      vep_offset,
1348                                      frame_complete,
1349                                      stack_slots / VMRegImpl::slots_per_word,
1350                                      in_ByteSize(method_is_static ? klass_offset : receiver_offset),
1351                                      in_ByteSize(lock_slot_offset * VMRegImpl::stack_slot_size),
1352                                      oop_maps);
1353 }
1354 
1355 // this function returns the adjust size (in number of words) to a c2i adapter
1356 // activation for use during deoptimization
1357 int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals) {
1358   int extra_locals_size = (callee_locals - callee_parameters) * Interpreter::stackElementWords;
1359   return extra_locals_size;
1360 }
1361 
1362 
1363 // Number of stack slots between incoming argument block and the start of
1364 // a new frame.  The PROLOG must add this many slots to the stack.  The
1365 // EPILOG must remove this many slots.
1366 // FP + LR
1367 uint SharedRuntime::in_preserve_stack_slots() {
1368   return 2 * VMRegImpl::slots_per_word;
1369 }
1370 
1371 uint SharedRuntime::out_preserve_stack_slots() {
1372   return 0;
1373 }
1374 
1375 //------------------------------generate_deopt_blob----------------------------
1376 void SharedRuntime::generate_deopt_blob() {
1377   ResourceMark rm;
1378   CodeBuffer buffer("deopt_blob", 1024, 1024);
1379   int frame_size_in_words;
1380   OopMapSet* oop_maps;
1381   int reexecute_offset;
1382   int exception_in_tls_offset;
1383   int exception_offset;
1384 
1385   MacroAssembler* masm = new MacroAssembler(&buffer);
1386   Label cont;
1387   const Register Rkind   = R9; // caller-saved
1388   const Register Rublock = R6;
1389   const Register Rsender = altFP_7_11;
1390   assert_different_registers(Rkind, Rublock, Rsender, Rexception_obj, Rexception_pc, R0, R1, R2, R3, R8, Rtemp);
1391 
1392   address start = __ pc();
1393 
1394   oop_maps = new OopMapSet();
1395   // LR saved by caller (can be live in c2 method)
1396 
1397   // A deopt is a case where LR may be live in the c2 nmethod. So it's
1398   // not possible to call the deopt blob from the nmethod and pass the
1399   // address of the deopt handler of the nmethod in LR. What happens
1400   // now is that the caller of the deopt blob pushes the current
1401   // address so the deopt blob doesn't have to do it. This way LR can
1402   // be preserved, contains the live value from the nmethod and is
1403   // saved at R14/R30_offset here.
1404   OopMap* map = RegisterSaver::save_live_registers(masm, &frame_size_in_words, true);
1405   __ mov(Rkind, Deoptimization::Unpack_deopt);
1406   __ b(cont);
1407 
1408   exception_offset = __ pc() - start;
1409 
1410   // Transfer Rexception_obj & Rexception_pc in TLS and fall thru to the
1411   // exception_in_tls_offset entry point.
1412   __ str(Rexception_obj, Address(Rthread, JavaThread::exception_oop_offset()));
1413   __ str(Rexception_pc, Address(Rthread, JavaThread::exception_pc_offset()));
1414   // Force return value to null to avoid confusing the escape analysis
1415   // logic. Everything is dead here anyway.
1416   __ mov(R0, 0);
1417 
1418   exception_in_tls_offset = __ pc() - start;
1419 
1420   // Exception data is in JavaThread structure
1421   // Patch the return address of the current frame
1422   __ ldr(LR, Address(Rthread, JavaThread::exception_pc_offset()));
1423   (void) RegisterSaver::save_live_registers(masm, &frame_size_in_words);
1424   {
1425     const Register Rzero = __ zero_register(Rtemp); // XXX should be OK for C2 but not 100% sure
1426     __ str(Rzero, Address(Rthread, JavaThread::exception_pc_offset()));
1427   }
1428   __ mov(Rkind, Deoptimization::Unpack_exception);
1429   __ b(cont);
1430 
1431   reexecute_offset = __ pc() - start;
1432 
1433   (void) RegisterSaver::save_live_registers(masm, &frame_size_in_words);
1434   __ mov(Rkind, Deoptimization::Unpack_reexecute);
1435 
1436   // Calculate UnrollBlock and save the result in Rublock
1437   __ bind(cont);
1438   __ mov(R0, Rthread);
1439   __ mov(R1, Rkind);
1440 
1441   int pc_offset = __ set_last_Java_frame(SP, FP, false, Rtemp); // note: FP may not need to be saved (not on x86)
1442   assert(((__ pc()) - start) == __ offset(), "warning: start differs from code_begin");
1443   __ call(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info));
1444   if (pc_offset == -1) {
1445     pc_offset = __ offset();
1446   }
1447   oop_maps->add_gc_map(pc_offset, map);
1448   __ reset_last_Java_frame(Rtemp); // Rtemp free since scratched by far call
1449 
1450   __ mov(Rublock, R0);
1451 
1452   // Reload Rkind from the UnrollBlock (might have changed)
1453   __ ldr_s32(Rkind, Address(Rublock, Deoptimization::UnrollBlock::unpack_kind_offset()));
1454   Label noException;
1455   __ cmp_32(Rkind, Deoptimization::Unpack_exception);   // Was exception pending?
1456   __ b(noException, ne);
1457   // handle exception case
1458 #ifdef ASSERT
1459   // assert that exception_pc is zero in tls
1460   { Label L;
1461     __ ldr(Rexception_pc, Address(Rthread, JavaThread::exception_pc_offset()));
1462     __ cbz(Rexception_pc, L);
1463     __ stop("exception pc should be null");
1464     __ bind(L);
1465   }
1466 #endif
1467   __ ldr(Rexception_obj, Address(Rthread, JavaThread::exception_oop_offset()));
1468   __ verify_oop(Rexception_obj);
1469   {
1470     const Register Rzero = __ zero_register(Rtemp);
1471     __ str(Rzero, Address(Rthread, JavaThread::exception_oop_offset()));
1472   }
1473 
1474   __ bind(noException);
1475 
1476   // This frame is going away.  Fetch return value, so we can move it to
1477   // a new frame.
1478   __ ldr(R0, Address(SP, RegisterSaver::R0_offset * wordSize));
1479   __ ldr(R1, Address(SP, RegisterSaver::R1_offset * wordSize));
1480 #ifndef __SOFTFP__
1481   __ ldr_double(D0, Address(SP, RegisterSaver::D0_offset * wordSize));
1482 #endif
1483   // pop frame
1484   __ add(SP, SP, RegisterSaver::reg_save_size * wordSize);
1485 
1486   // Set initial stack state before pushing interpreter frames
1487   __ ldr_s32(Rtemp, Address(Rublock, Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset()));
1488   __ ldr(R2, Address(Rublock, Deoptimization::UnrollBlock::frame_pcs_offset()));
1489   __ ldr(R3, Address(Rublock, Deoptimization::UnrollBlock::frame_sizes_offset()));
1490 
1491   __ add(SP, SP, Rtemp);
1492 
1493 #ifdef ASSERT
1494   // Compilers generate code that bang the stack by as much as the
1495   // interpreter would need. So this stack banging should never
1496   // trigger a fault. Verify that it does not on non product builds.
1497   // See if it is enough stack to push deoptimized frames.
1498   //
1499   // The compiled method that we are deoptimizing was popped from the stack.
1500   // If the stack bang results in a stack overflow, we don't return to the
1501   // method that is being deoptimized. The stack overflow exception is
1502   // propagated to the caller of the deoptimized method. Need to get the pc
1503   // from the caller in LR and restore FP.
1504   __ ldr(LR, Address(R2, 0));
1505   __ ldr(FP, Address(Rublock, Deoptimization::UnrollBlock::initial_info_offset()));
1506   __ ldr_s32(R8, Address(Rublock, Deoptimization::UnrollBlock::total_frame_sizes_offset()));
1507   __ arm_stack_overflow_check(R8, Rtemp);
1508 #endif
1509   __ ldr_s32(R8, Address(Rublock, Deoptimization::UnrollBlock::number_of_frames_offset()));
1510 
1511   // Pick up the initial fp we should save
1512   // XXX Note: was ldr(FP, Address(FP));
1513 
1514   // The compiler no longer uses FP as a frame pointer for the
1515   // compiled code. It can be used by the allocator in C2 or to
1516   // memorize the original SP for JSR292 call sites.
1517 
1518   // Hence, ldr(FP, Address(FP)) is probably not correct. For x86,
1519   // Deoptimization::fetch_unroll_info computes the right FP value and
1520   // stores it in Rublock.initial_info. This has been activated for ARM.
1521   __ ldr(FP, Address(Rublock, Deoptimization::UnrollBlock::initial_info_offset()));
1522 
1523   __ ldr_s32(Rtemp, Address(Rublock, Deoptimization::UnrollBlock::caller_adjustment_offset()));
1524   __ mov(Rsender, SP);
1525   __ sub(SP, SP, Rtemp);
1526 
1527   // Push interpreter frames in a loop
1528   Label loop;
1529   __ bind(loop);
1530   __ ldr(LR, Address(R2, wordSize, post_indexed));         // load frame pc
1531   __ ldr(Rtemp, Address(R3, wordSize, post_indexed));      // load frame size
1532 
1533   __ raw_push(FP, LR);                                     // create new frame
1534   __ mov(FP, SP);
1535   __ sub(Rtemp, Rtemp, 2*wordSize);
1536 
1537   __ sub(SP, SP, Rtemp);
1538 
1539   __ str(Rsender, Address(FP, frame::interpreter_frame_sender_sp_offset * wordSize));
1540   __ mov(LR, 0);
1541   __ str(LR, Address(FP, frame::interpreter_frame_last_sp_offset * wordSize));
1542 
1543   __ subs(R8, R8, 1);                               // decrement counter
1544   __ mov(Rsender, SP);
1545   __ b(loop, ne);
1546 
1547   // Re-push self-frame
1548   __ ldr(LR, Address(R2));
1549   __ raw_push(FP, LR);
1550   __ mov(FP, SP);
1551   __ sub(SP, SP, (frame_size_in_words - 2) * wordSize);
1552 
1553   // Restore frame locals after moving the frame
1554   __ str(R0, Address(SP, RegisterSaver::R0_offset * wordSize));
1555   __ str(R1, Address(SP, RegisterSaver::R1_offset * wordSize));
1556 
1557 #ifndef __SOFTFP__
1558   __ str_double(D0, Address(SP, RegisterSaver::D0_offset * wordSize));
1559 #endif // !__SOFTFP__
1560 
1561 #ifdef ASSERT
1562   // Reload Rkind from the UnrollBlock and check that it was not overwritten (Rkind is not callee-saved)
1563   { Label L;
1564     __ ldr_s32(Rtemp, Address(Rublock, Deoptimization::UnrollBlock::unpack_kind_offset()));
1565     __ cmp_32(Rkind, Rtemp);
1566     __ b(L, eq);
1567     __ stop("Rkind was overwritten");
1568     __ bind(L);
1569   }
1570 #endif
1571 
1572   // Call unpack_frames with proper arguments
1573   __ mov(R0, Rthread);
1574   __ mov(R1, Rkind);
1575 
1576   pc_offset = __ set_last_Java_frame(SP, FP, true, Rtemp);
1577   assert(((__ pc()) - start) == __ offset(), "warning: start differs from code_begin");
1578   __ call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames));
1579   if (pc_offset == -1) {
1580     pc_offset = __ offset();
1581   }
1582   oop_maps->add_gc_map(pc_offset, new OopMap(frame_size_in_words * VMRegImpl::slots_per_word, 0));
1583   __ reset_last_Java_frame(Rtemp); // Rtemp free since scratched by far call
1584 
1585   // Collect return values, pop self-frame and jump to interpreter
1586   __ ldr(R0, Address(SP, RegisterSaver::R0_offset * wordSize));
1587   __ ldr(R1, Address(SP, RegisterSaver::R1_offset * wordSize));
1588   // Interpreter floats controlled by __SOFTFP__, but compiler
1589   // float return value registers controlled by __ABI_HARD__
1590   // This matters for vfp-sflt builds.
1591 #ifndef __SOFTFP__
1592   // Interpreter hard float
1593 #ifdef __ABI_HARD__
1594   // Compiler float return value in FP registers
1595   __ ldr_double(D0, Address(SP, RegisterSaver::D0_offset * wordSize));
1596 #else
1597   // Compiler float return value in integer registers,
1598   // copy to D0 for interpreter (S0 <-- R0)
1599   __ fmdrr(D0_tos, R0, R1);
1600 #endif
1601 #endif // !__SOFTFP__
1602   __ mov(SP, FP);
1603 
1604   __ pop(RegisterSet(FP) | RegisterSet(PC));
1605 
1606   __ flush();
1607 
1608   _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset,
1609                                            reexecute_offset, frame_size_in_words);
1610   _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset);
1611 }
1612 
1613 #ifdef COMPILER2
1614 
1615 //------------------------------generate_uncommon_trap_blob--------------------
1616 // Ought to generate an ideal graph & compile, but here's some ASM
1617 // instead.
1618 void SharedRuntime::generate_uncommon_trap_blob() {
1619   // allocate space for the code
1620   ResourceMark rm;
1621 
1622   // setup code generation tools
1623 #ifdef _LP64
1624   CodeBuffer buffer("uncommon_trap_blob", 2700, 512);
1625 #else
1626   // Measured 8/7/03 at 660 in 32bit debug build
1627   CodeBuffer buffer("uncommon_trap_blob", 2000, 512);
1628 #endif
1629   // bypassed when code generation useless
1630   MacroAssembler* masm               = new MacroAssembler(&buffer);
1631   const Register Rublock = R6;
1632   const Register Rsender = altFP_7_11;
1633   assert_different_registers(Rublock, Rsender, Rexception_obj, R0, R1, R2, R3, R8, Rtemp);
1634 
1635   //
1636   // This is the entry point for all traps the compiler takes when it thinks
1637   // it cannot handle further execution of compilation code. The frame is
1638   // deoptimized in these cases and converted into interpreter frames for
1639   // execution
1640   // The steps taken by this frame are as follows:
1641   //   - push a fake "unpack_frame"
1642   //   - call the C routine Deoptimization::uncommon_trap (this function
1643   //     packs the current compiled frame into vframe arrays and returns
1644   //     information about the number and size of interpreter frames which
1645   //     are equivalent to the frame which is being deoptimized)
1646   //   - deallocate the "unpack_frame"
1647   //   - deallocate the deoptimization frame
1648   //   - in a loop using the information returned in the previous step
1649   //     push interpreter frames;
1650   //   - create a dummy "unpack_frame"
1651   //   - call the C routine: Deoptimization::unpack_frames (this function
1652   //     lays out values on the interpreter frame which was just created)
1653   //   - deallocate the dummy unpack_frame
1654   //   - return to the interpreter entry point
1655   //
1656   //  Refer to the following methods for more information:
1657   //   - Deoptimization::uncommon_trap
1658   //   - Deoptimization::unpack_frame
1659 
1660   // the unloaded class index is in R0 (first parameter to this blob)
1661 
1662   __ raw_push(FP, LR);
1663   __ set_last_Java_frame(SP, FP, false, Rtemp);
1664   __ mov(R2, Deoptimization::Unpack_uncommon_trap);
1665   __ mov(R1, R0);
1666   __ mov(R0, Rthread);
1667   __ call(CAST_FROM_FN_PTR(address, Deoptimization::uncommon_trap));
1668   __ mov(Rublock, R0);
1669   __ reset_last_Java_frame(Rtemp);
1670   __ raw_pop(FP, LR);
1671 
1672 #ifdef ASSERT
1673   { Label L;
1674     __ ldr_s32(Rtemp, Address(Rublock, Deoptimization::UnrollBlock::unpack_kind_offset()));
1675     __ cmp_32(Rtemp, Deoptimization::Unpack_uncommon_trap);
1676     __ b(L, eq);
1677     __ stop("SharedRuntime::generate_uncommon_trap_blob: expected Unpack_uncommon_trap");
1678     __ bind(L);
1679   }
1680 #endif
1681 
1682 
1683   // Set initial stack state before pushing interpreter frames
1684   __ ldr_s32(Rtemp, Address(Rublock, Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset()));
1685   __ ldr(R2, Address(Rublock, Deoptimization::UnrollBlock::frame_pcs_offset()));
1686   __ ldr(R3, Address(Rublock, Deoptimization::UnrollBlock::frame_sizes_offset()));
1687 
1688   __ add(SP, SP, Rtemp);
1689 
1690   // See if it is enough stack to push deoptimized frames.
1691 #ifdef ASSERT
1692   // Compilers generate code that bang the stack by as much as the
1693   // interpreter would need. So this stack banging should never
1694   // trigger a fault. Verify that it does not on non product builds.
1695   //
1696   // The compiled method that we are deoptimizing was popped from the stack.
1697   // If the stack bang results in a stack overflow, we don't return to the
1698   // method that is being deoptimized. The stack overflow exception is
1699   // propagated to the caller of the deoptimized method. Need to get the pc
1700   // from the caller in LR and restore FP.
1701   __ ldr(LR, Address(R2, 0));
1702   __ ldr(FP, Address(Rublock, Deoptimization::UnrollBlock::initial_info_offset()));
1703   __ ldr_s32(R8, Address(Rublock, Deoptimization::UnrollBlock::total_frame_sizes_offset()));
1704   __ arm_stack_overflow_check(R8, Rtemp);
1705 #endif
1706   __ ldr_s32(R8, Address(Rublock, Deoptimization::UnrollBlock::number_of_frames_offset()));
1707   __ ldr_s32(Rtemp, Address(Rublock, Deoptimization::UnrollBlock::caller_adjustment_offset()));
1708   __ mov(Rsender, SP);
1709   __ sub(SP, SP, Rtemp);
1710   //  __ ldr(FP, Address(FP));
1711   __ ldr(FP, Address(Rublock, Deoptimization::UnrollBlock::initial_info_offset()));
1712 
1713   // Push interpreter frames in a loop
1714   Label loop;
1715   __ bind(loop);
1716   __ ldr(LR, Address(R2, wordSize, post_indexed));         // load frame pc
1717   __ ldr(Rtemp, Address(R3, wordSize, post_indexed));      // load frame size
1718 
1719   __ raw_push(FP, LR);                                     // create new frame
1720   __ mov(FP, SP);
1721   __ sub(Rtemp, Rtemp, 2*wordSize);
1722 
1723   __ sub(SP, SP, Rtemp);
1724 
1725   __ str(Rsender, Address(FP, frame::interpreter_frame_sender_sp_offset * wordSize));
1726   __ mov(LR, 0);
1727   __ str(LR, Address(FP, frame::interpreter_frame_last_sp_offset * wordSize));
1728   __ subs(R8, R8, 1);                               // decrement counter
1729   __ mov(Rsender, SP);
1730   __ b(loop, ne);
1731 
1732   // Re-push self-frame
1733   __ ldr(LR, Address(R2));
1734   __ raw_push(FP, LR);
1735   __ mov(FP, SP);
1736 
1737   // Call unpack_frames with proper arguments
1738   __ mov(R0, Rthread);
1739   __ mov(R1, Deoptimization::Unpack_uncommon_trap);
1740   __ set_last_Java_frame(SP, FP, true, Rtemp);
1741   __ call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames));
1742   //  oop_maps->add_gc_map(__ pc() - start, new OopMap(frame_size_in_words, 0));
1743   __ reset_last_Java_frame(Rtemp);
1744 
1745   __ mov(SP, FP);
1746   __ pop(RegisterSet(FP) | RegisterSet(PC));
1747 
1748   masm->flush();
1749   _uncommon_trap_blob = UncommonTrapBlob::create(&buffer, nullptr, 2 /* LR+FP */);
1750 }
1751 
1752 #endif // COMPILER2
1753 
1754 //------------------------------generate_handler_blob------
1755 //
1756 // Generate a special Compile2Runtime blob that saves all registers,
1757 // setup oopmap, and calls safepoint code to stop the compiled code for
1758 // a safepoint.
1759 //
1760 SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_type) {
1761   assert(StubRoutines::forward_exception_entry() != nullptr, "must be generated before");
1762 
1763   ResourceMark rm;
1764   CodeBuffer buffer("handler_blob", 256, 256);
1765   int frame_size_words;
1766   OopMapSet* oop_maps;
1767 
1768   bool cause_return = (poll_type == POLL_AT_RETURN);
1769 
1770   MacroAssembler* masm = new MacroAssembler(&buffer);
1771   address start = __ pc();
1772   oop_maps = new OopMapSet();
1773 
1774   if (!cause_return) {
1775     __ sub(SP, SP, 4); // make room for LR which may still be live
1776                        // here if we are coming from a c2 method
1777   }
1778 
1779   OopMap* map = RegisterSaver::save_live_registers(masm, &frame_size_words, !cause_return);
1780   if (!cause_return) {
1781     // update saved PC with correct value
1782     // need 2 steps because LR can be live in c2 method
1783     __ ldr(LR, Address(Rthread, JavaThread::saved_exception_pc_offset()));
1784     __ str(LR, Address(SP, RegisterSaver::LR_offset * wordSize));
1785   }
1786 
1787   __ mov(R0, Rthread);
1788   int pc_offset = __ set_last_Java_frame(SP, FP, false, Rtemp); // note: FP may not need to be saved (not on x86)
1789   assert(((__ pc()) - start) == __ offset(), "warning: start differs from code_begin");
1790   __ call(call_ptr);
1791   if (pc_offset == -1) {
1792     pc_offset = __ offset();
1793   }
1794   oop_maps->add_gc_map(pc_offset, map);
1795   __ reset_last_Java_frame(Rtemp); // Rtemp free since scratched by far call
1796 
1797   if (!cause_return) {
1798     // If our stashed return pc was modified by the runtime we avoid touching it
1799     __ ldr(R3_tmp, Address(Rthread, JavaThread::saved_exception_pc_offset()));
1800     __ ldr(R2_tmp, Address(SP, RegisterSaver::LR_offset * wordSize));
1801     __ cmp(R2_tmp, R3_tmp);
1802     // Adjust return pc forward to step over the safepoint poll instruction
1803     __ add(R2_tmp, R2_tmp, 4, eq);
1804     __ str(R2_tmp, Address(SP, RegisterSaver::LR_offset * wordSize), eq);
1805 
1806     // Check for pending exception
1807     __ ldr(Rtemp, Address(Rthread, Thread::pending_exception_offset()));
1808     __ cmp(Rtemp, 0);
1809 
1810     RegisterSaver::restore_live_registers(masm, false);
1811     __ pop(PC, eq);
1812     __ pop(Rexception_pc);
1813   } else {
1814     // Check for pending exception
1815     __ ldr(Rtemp, Address(Rthread, Thread::pending_exception_offset()));
1816     __ cmp(Rtemp, 0);
1817 
1818     RegisterSaver::restore_live_registers(masm);
1819     __ bx(LR, eq);
1820     __ mov(Rexception_pc, LR);
1821   }
1822 
1823   __ jump(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type, Rtemp);
1824 
1825   __ flush();
1826 
1827   return SafepointBlob::create(&buffer, oop_maps, frame_size_words);
1828 }
1829 
1830 RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const char* name) {
1831   assert(StubRoutines::forward_exception_entry() != nullptr, "must be generated before");
1832 
1833   ResourceMark rm;
1834   CodeBuffer buffer(name, 1000, 512);
1835   int frame_size_words;
1836   OopMapSet *oop_maps;
1837   int frame_complete;
1838 
1839   MacroAssembler* masm = new MacroAssembler(&buffer);
1840   Label pending_exception;
1841 
1842   int start = __ offset();
1843 
1844   oop_maps = new OopMapSet();
1845   OopMap* map = RegisterSaver::save_live_registers(masm, &frame_size_words);
1846 
1847   frame_complete = __ offset();
1848 
1849   __ mov(R0, Rthread);
1850 
1851   int pc_offset = __ set_last_Java_frame(SP, FP, false, Rtemp);
1852   assert(start == 0, "warning: start differs from code_begin");
1853   __ call(destination);
1854   if (pc_offset == -1) {
1855     pc_offset = __ offset();
1856   }
1857   oop_maps->add_gc_map(pc_offset, map);
1858   __ reset_last_Java_frame(Rtemp); // Rtemp free since scratched by far call
1859 
1860   __ ldr(R1, Address(Rthread, Thread::pending_exception_offset()));
1861   __ cbnz(R1, pending_exception);
1862 
1863   // Overwrite saved register values
1864 
1865   // Place metadata result of VM call into Rmethod
1866   __ get_vm_result_2(R1, Rtemp);
1867   __ str(R1, Address(SP, RegisterSaver::Rmethod_offset * wordSize));
1868 
1869   // Place target address (VM call result) into Rtemp
1870   __ str(R0, Address(SP, RegisterSaver::Rtemp_offset * wordSize));
1871 
1872   RegisterSaver::restore_live_registers(masm);
1873   __ jump(Rtemp);
1874 
1875   __ bind(pending_exception);
1876 
1877   RegisterSaver::restore_live_registers(masm);
1878   const Register Rzero = __ zero_register(Rtemp);
1879   __ str(Rzero, Address(Rthread, JavaThread::vm_result_2_offset()));
1880   __ mov(Rexception_pc, LR);
1881   __ jump(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type, Rtemp);
1882 
1883   __ flush();
1884 
1885   return RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_words, oop_maps, true);
1886 }