1 /*
   2  * Copyright (c) 2008, 2022, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "asm/assembler.inline.hpp"
  27 #include "code/debugInfoRec.hpp"
  28 #include "code/icBuffer.hpp"
  29 #include "code/vtableStubs.hpp"
  30 #include "compiler/oopMap.hpp"
  31 #include "interpreter/interpreter.hpp"
  32 #include "logging/log.hpp"
  33 #include "memory/resourceArea.hpp"
  34 #include "oops/compiledICHolder.hpp"
  35 #include "oops/klass.inline.hpp"
  36 #include "prims/methodHandles.hpp"
  37 #include "runtime/jniHandles.hpp"
  38 #include "runtime/sharedRuntime.hpp"
  39 #include "runtime/safepointMechanism.hpp"
  40 #include "runtime/stubRoutines.hpp"
  41 #include "runtime/vframeArray.hpp"
  42 #include "utilities/align.hpp"
  43 #include "utilities/powerOfTwo.hpp"
  44 #include "vmreg_arm.inline.hpp"
  45 #ifdef COMPILER1
  46 #include "c1/c1_Runtime1.hpp"
  47 #endif
  48 #ifdef COMPILER2
  49 #include "opto/runtime.hpp"
  50 #endif
  51 
  52 #define __ masm->
  53 
  54 class RegisterSaver {
  55 public:
  56 
  57   // Special registers:
  58   //              32-bit ARM     64-bit ARM
  59   //  Rthread:       R10            R28
  60   //  LR:            R14            R30
  61 
  62   // Rthread is callee saved in the C ABI and never changed by compiled code:
  63   // no need to save it.
  64 
  65   // 2 slots for LR: the one at LR_offset and an other one at R14/R30_offset.
  66   // The one at LR_offset is a return address that is needed by stack walking.
  67   // A c2 method uses LR as a standard register so it may be live when we
  68   // branch to the runtime. The slot at R14/R30_offset is for the value of LR
  69   // in case it's live in the method we are coming from.
  70 
  71 
  72   enum RegisterLayout {
  73     fpu_save_size = FloatRegisterImpl::number_of_registers,
  74 #ifndef __SOFTFP__
  75     D0_offset = 0,
  76 #endif
  77     R0_offset = fpu_save_size,
  78     R1_offset,
  79     R2_offset,
  80     R3_offset,
  81     R4_offset,
  82     R5_offset,
  83     R6_offset,
  84 #if (FP_REG_NUM != 7)
  85     // if not saved as FP
  86     R7_offset,
  87 #endif
  88     R8_offset,
  89     R9_offset,
  90 #if (FP_REG_NUM != 11)
  91     // if not saved as FP
  92     R11_offset,
  93 #endif
  94     R12_offset,
  95     R14_offset,
  96     FP_offset,
  97     LR_offset,
  98     reg_save_size,
  99 
 100     Rmethod_offset = R9_offset,
 101     Rtemp_offset = R12_offset,
 102   };
 103 
 104   // all regs but Rthread (R10), FP (R7 or R11), SP and PC
 105   // (altFP_7_11 is the one among R7 and R11 which is not FP)
 106 #define SAVED_BASE_REGS (RegisterSet(R0, R6) | RegisterSet(R8, R9) | RegisterSet(R12) | R14 | altFP_7_11)
 107 
 108 
 109   //  When LR may be live in the nmethod from which we are coming
 110   //  then lr_saved is true, the return address is saved before the
 111   //  call to save_live_register by the caller and LR contains the
 112   //  live value.
 113 
 114   static OopMap* save_live_registers(MacroAssembler* masm,
 115                                      int* total_frame_words,
 116                                      bool lr_saved = false);
 117   static void restore_live_registers(MacroAssembler* masm, bool restore_lr = true);
 118 
 119 };
 120 
 121 
 122 
 123 
 124 OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm,
 125                                            int* total_frame_words,
 126                                            bool lr_saved) {
 127   *total_frame_words = reg_save_size;
 128 
 129   OopMapSet *oop_maps = new OopMapSet();
 130   OopMap* map = new OopMap(VMRegImpl::slots_per_word * (*total_frame_words), 0);
 131 
 132   if (lr_saved) {
 133     __ push(RegisterSet(FP));
 134   } else {
 135     __ push(RegisterSet(FP) | RegisterSet(LR));
 136   }
 137   __ push(SAVED_BASE_REGS);
 138   if (HaveVFP) {
 139     if (VM_Version::has_vfp3_32()) {
 140       __ fpush(FloatRegisterSet(D16, 16));
 141     } else {
 142       if (FloatRegisterImpl::number_of_registers > 32) {
 143         assert(FloatRegisterImpl::number_of_registers == 64, "nb fp registers should be 64");
 144         __ sub(SP, SP, 32 * wordSize);
 145       }
 146     }
 147     __ fpush(FloatRegisterSet(D0, 16));
 148   } else {
 149     __ sub(SP, SP, fpu_save_size * wordSize);
 150   }
 151 
 152   int i;
 153   int j=0;
 154   for (i = R0_offset; i <= R9_offset; i++) {
 155     if (j == FP_REG_NUM) {
 156       // skip the FP register, managed below.
 157       j++;
 158     }
 159     map->set_callee_saved(VMRegImpl::stack2reg(i), as_Register(j)->as_VMReg());
 160     j++;
 161   }
 162   assert(j == R10->encoding(), "must be");
 163 #if (FP_REG_NUM != 11)
 164   // add R11, if not managed as FP
 165   map->set_callee_saved(VMRegImpl::stack2reg(R11_offset), R11->as_VMReg());
 166 #endif
 167   map->set_callee_saved(VMRegImpl::stack2reg(R12_offset), R12->as_VMReg());
 168   map->set_callee_saved(VMRegImpl::stack2reg(R14_offset), R14->as_VMReg());
 169   if (HaveVFP) {
 170     for (i = 0; i < (VM_Version::has_vfp3_32() ? 64 : 32); i+=2) {
 171       map->set_callee_saved(VMRegImpl::stack2reg(i), as_FloatRegister(i)->as_VMReg());
 172       map->set_callee_saved(VMRegImpl::stack2reg(i + 1), as_FloatRegister(i)->as_VMReg()->next());
 173     }
 174   }
 175 
 176   return map;
 177 }
 178 
 179 void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_lr) {
 180   if (HaveVFP) {
 181     __ fpop(FloatRegisterSet(D0, 16));
 182     if (VM_Version::has_vfp3_32()) {
 183       __ fpop(FloatRegisterSet(D16, 16));
 184     } else {
 185       if (FloatRegisterImpl::number_of_registers > 32) {
 186         assert(FloatRegisterImpl::number_of_registers == 64, "nb fp registers should be 64");
 187         __ add(SP, SP, 32 * wordSize);
 188       }
 189     }
 190   } else {
 191     __ add(SP, SP, fpu_save_size * wordSize);
 192   }
 193   __ pop(SAVED_BASE_REGS);
 194   if (restore_lr) {
 195     __ pop(RegisterSet(FP) | RegisterSet(LR));
 196   } else {
 197     __ pop(RegisterSet(FP));
 198   }
 199 }
 200 
 201 
 202 static void push_result_registers(MacroAssembler* masm, BasicType ret_type) {
 203 #ifdef __ABI_HARD__
 204   if (ret_type == T_DOUBLE || ret_type == T_FLOAT) {
 205     __ sub(SP, SP, 8);
 206     __ fstd(D0, Address(SP));
 207     return;
 208   }
 209 #endif // __ABI_HARD__
 210   __ raw_push(R0, R1);
 211 }
 212 
 213 static void pop_result_registers(MacroAssembler* masm, BasicType ret_type) {
 214 #ifdef __ABI_HARD__
 215   if (ret_type == T_DOUBLE || ret_type == T_FLOAT) {
 216     __ fldd(D0, Address(SP));
 217     __ add(SP, SP, 8);
 218     return;
 219   }
 220 #endif // __ABI_HARD__
 221   __ raw_pop(R0, R1);
 222 }
 223 
 224 static void push_param_registers(MacroAssembler* masm, int fp_regs_in_arguments) {
 225   // R1-R3 arguments need to be saved, but we push 4 registers for 8-byte alignment
 226   __ push(RegisterSet(R0, R3));
 227 
 228   // preserve arguments
 229   // Likely not needed as the locking code won't probably modify volatile FP registers,
 230   // but there is no way to guarantee that
 231   if (fp_regs_in_arguments) {
 232     // convert fp_regs_in_arguments to a number of double registers
 233     int double_regs_num = (fp_regs_in_arguments + 1) >> 1;
 234     __ fpush_hardfp(FloatRegisterSet(D0, double_regs_num));
 235   }
 236 }
 237 
 238 static void pop_param_registers(MacroAssembler* masm, int fp_regs_in_arguments) {
 239   if (fp_regs_in_arguments) {
 240     int double_regs_num = (fp_regs_in_arguments + 1) >> 1;
 241     __ fpop_hardfp(FloatRegisterSet(D0, double_regs_num));
 242   }
 243   __ pop(RegisterSet(R0, R3));
 244 }
 245 
 246 
 247 
 248 // Is vector's size (in bytes) bigger than a size saved by default?
 249 // All vector registers are saved by default on ARM.
 250 bool SharedRuntime::is_wide_vector(int size) {
 251   return false;
 252 }
 253 
 254 int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
 255                                         VMRegPair *regs,
 256                                         VMRegPair *regs2,
 257                                         int total_args_passed) {
 258   assert(regs2 == NULL, "not needed on arm");
 259 
 260   int slot = 0;
 261   int ireg = 0;
 262 #ifdef __ABI_HARD__
 263   int fp_slot = 0;
 264   int single_fpr_slot = 0;
 265 #endif // __ABI_HARD__
 266   for (int i = 0; i < total_args_passed; i++) {
 267     switch (sig_bt[i]) {
 268     case T_SHORT:
 269     case T_CHAR:
 270     case T_BYTE:
 271     case T_BOOLEAN:
 272     case T_INT:
 273     case T_ARRAY:
 274     case T_OBJECT:
 275     case T_ADDRESS:
 276     case T_METADATA:
 277 #ifndef __ABI_HARD__
 278     case T_FLOAT:
 279 #endif // !__ABI_HARD__
 280       if (ireg < 4) {
 281         Register r = as_Register(ireg);
 282         regs[i].set1(r->as_VMReg());
 283         ireg++;
 284       } else {
 285         regs[i].set1(VMRegImpl::stack2reg(slot));
 286         slot++;
 287       }
 288       break;
 289     case T_LONG:
 290 #ifndef __ABI_HARD__
 291     case T_DOUBLE:
 292 #endif // !__ABI_HARD__
 293       assert((i + 1) < total_args_passed && sig_bt[i+1] == T_VOID, "missing Half" );
 294       if (ireg <= 2) {
 295 #if (ALIGN_WIDE_ARGUMENTS == 1)
 296         if(ireg & 1) ireg++;  // Aligned location required
 297 #endif
 298         Register r1 = as_Register(ireg);
 299         Register r2 = as_Register(ireg + 1);
 300         regs[i].set_pair(r2->as_VMReg(), r1->as_VMReg());
 301         ireg += 2;
 302 #if (ALIGN_WIDE_ARGUMENTS == 0)
 303       } else if (ireg == 3) {
 304         // uses R3 + one stack slot
 305         Register r = as_Register(ireg);
 306         regs[i].set_pair(VMRegImpl::stack2reg(slot), r->as_VMReg());
 307         ireg += 1;
 308         slot += 1;
 309 #endif
 310       } else {
 311         if (slot & 1) slot++; // Aligned location required
 312         regs[i].set_pair(VMRegImpl::stack2reg(slot+1), VMRegImpl::stack2reg(slot));
 313         slot += 2;
 314         ireg = 4;
 315       }
 316       break;
 317     case T_VOID:
 318       regs[i].set_bad();
 319       break;
 320 #ifdef __ABI_HARD__
 321     case T_FLOAT:
 322       if ((fp_slot < 16)||(single_fpr_slot & 1)) {
 323         if ((single_fpr_slot & 1) == 0) {
 324           single_fpr_slot = fp_slot;
 325           fp_slot += 2;
 326         }
 327         FloatRegister r = as_FloatRegister(single_fpr_slot);
 328         single_fpr_slot++;
 329         regs[i].set1(r->as_VMReg());
 330       } else {
 331         regs[i].set1(VMRegImpl::stack2reg(slot));
 332         slot++;
 333       }
 334       break;
 335     case T_DOUBLE:
 336       assert(ALIGN_WIDE_ARGUMENTS == 1, "ABI_HARD not supported with unaligned wide arguments");
 337       if (fp_slot <= 14) {
 338         FloatRegister r1 = as_FloatRegister(fp_slot);
 339         FloatRegister r2 = as_FloatRegister(fp_slot+1);
 340         regs[i].set_pair(r2->as_VMReg(), r1->as_VMReg());
 341         fp_slot += 2;
 342       } else {
 343         if(slot & 1) slot++;
 344         regs[i].set_pair(VMRegImpl::stack2reg(slot+1), VMRegImpl::stack2reg(slot));
 345         slot += 2;
 346         single_fpr_slot = 16;
 347       }
 348       break;
 349 #endif // __ABI_HARD__
 350     default:
 351       ShouldNotReachHere();
 352     }
 353   }
 354   return slot;
 355 }
 356 
 357 int SharedRuntime::vector_calling_convention(VMRegPair *regs,
 358                                              uint num_bits,
 359                                              uint total_args_passed) {
 360   Unimplemented();
 361   return 0;
 362 }
 363 
 364 int SharedRuntime::java_calling_convention(const BasicType *sig_bt,
 365                                            VMRegPair *regs,
 366                                            int total_args_passed) {
 367 #ifdef __SOFTFP__
 368   // soft float is the same as the C calling convention.
 369   return c_calling_convention(sig_bt, regs, NULL, total_args_passed);
 370 #endif // __SOFTFP__
 371   int slot = 0;
 372   int ireg = 0;
 373   int freg = 0;
 374   int single_fpr = 0;
 375 
 376   for (int i = 0; i < total_args_passed; i++) {
 377     switch (sig_bt[i]) {
 378     case T_SHORT:
 379     case T_CHAR:
 380     case T_BYTE:
 381     case T_BOOLEAN:
 382     case T_INT:
 383     case T_ARRAY:
 384     case T_OBJECT:
 385     case T_ADDRESS:
 386       if (ireg < 4) {
 387         Register r = as_Register(ireg++);
 388         regs[i].set1(r->as_VMReg());
 389       } else {
 390         regs[i].set1(VMRegImpl::stack2reg(slot++));
 391       }
 392       break;
 393     case T_FLOAT:
 394       // C2 utilizes S14/S15 for mem-mem moves
 395       if ((freg < 16 COMPILER2_PRESENT(-2)) || (single_fpr & 1)) {
 396         if ((single_fpr & 1) == 0) {
 397           single_fpr = freg;
 398           freg += 2;
 399         }
 400         FloatRegister r = as_FloatRegister(single_fpr++);
 401         regs[i].set1(r->as_VMReg());
 402       } else {
 403         regs[i].set1(VMRegImpl::stack2reg(slot++));
 404       }
 405       break;
 406     case T_DOUBLE:
 407       // C2 utilizes S14/S15 for mem-mem moves
 408       if (freg <= 14 COMPILER2_PRESENT(-2)) {
 409         FloatRegister r1 = as_FloatRegister(freg);
 410         FloatRegister r2 = as_FloatRegister(freg + 1);
 411         regs[i].set_pair(r2->as_VMReg(), r1->as_VMReg());
 412         freg += 2;
 413       } else {
 414         // Keep internally the aligned calling convention,
 415         // ignoring ALIGN_WIDE_ARGUMENTS
 416         if (slot & 1) slot++;
 417         regs[i].set_pair(VMRegImpl::stack2reg(slot + 1), VMRegImpl::stack2reg(slot));
 418         slot += 2;
 419         single_fpr = 16;
 420       }
 421       break;
 422     case T_LONG:
 423       // Keep internally the aligned calling convention,
 424       // ignoring ALIGN_WIDE_ARGUMENTS
 425       if (ireg <= 2) {
 426         if (ireg & 1) ireg++;
 427         Register r1 = as_Register(ireg);
 428         Register r2 = as_Register(ireg + 1);
 429         regs[i].set_pair(r2->as_VMReg(), r1->as_VMReg());
 430         ireg += 2;
 431       } else {
 432         if (slot & 1) slot++;
 433         regs[i].set_pair(VMRegImpl::stack2reg(slot + 1), VMRegImpl::stack2reg(slot));
 434         slot += 2;
 435         ireg = 4;
 436       }
 437       break;
 438     case T_VOID:
 439       regs[i].set_bad();
 440       break;
 441     default:
 442       ShouldNotReachHere();
 443     }
 444   }
 445 
 446   if (slot & 1) slot++;
 447   return slot;
 448 }
 449 
 450 static void patch_callers_callsite(MacroAssembler *masm) {
 451   Label skip;
 452 
 453   __ ldr(Rtemp, Address(Rmethod, Method::code_offset()));
 454   __ cbz(Rtemp, skip);
 455 
 456   // Pushing an even number of registers for stack alignment.
 457   // Selecting R9, which had to be saved anyway for some platforms.
 458   __ push(RegisterSet(R0, R3) | R9 | LR);
 459   __ fpush_hardfp(FloatRegisterSet(D0, 8));
 460 
 461   __ mov(R0, Rmethod);
 462   __ mov(R1, LR);
 463   __ call(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite));
 464 
 465   __ fpop_hardfp(FloatRegisterSet(D0, 8));
 466   __ pop(RegisterSet(R0, R3) | R9 | LR);
 467 
 468   __ bind(skip);
 469 }
 470 
 471 void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm,
 472                                     int total_args_passed, int comp_args_on_stack,
 473                                     const BasicType *sig_bt, const VMRegPair *regs) {
 474   // TODO: ARM - May be can use ldm to load arguments
 475   const Register tmp = Rtemp; // avoid erasing R5_mh
 476 
 477   // Next assert may not be needed but safer. Extra analysis required
 478   // if this there is not enough free registers and we need to use R5 here.
 479   assert_different_registers(tmp, R5_mh);
 480 
 481   // 6243940 We might end up in handle_wrong_method if
 482   // the callee is deoptimized as we race thru here. If that
 483   // happens we don't want to take a safepoint because the
 484   // caller frame will look interpreted and arguments are now
 485   // "compiled" so it is much better to make this transition
 486   // invisible to the stack walking code. Unfortunately if
 487   // we try and find the callee by normal means a safepoint
 488   // is possible. So we stash the desired callee in the thread
 489   // and the vm will find there should this case occur.
 490   Address callee_target_addr(Rthread, JavaThread::callee_target_offset());
 491   __ str(Rmethod, callee_target_addr);
 492 
 493 
 494   assert_different_registers(tmp, R0, R1, R2, R3, Rsender_sp, Rmethod);
 495 
 496   const Register initial_sp = Rmethod; // temporarily scratched
 497 
 498   // Old code was modifying R4 but this looks unsafe (particularly with JSR292)
 499   assert_different_registers(tmp, R0, R1, R2, R3, Rsender_sp, initial_sp);
 500 
 501   __ mov(initial_sp, SP);
 502 
 503   if (comp_args_on_stack) {
 504     __ sub_slow(SP, SP, comp_args_on_stack * VMRegImpl::stack_slot_size);
 505   }
 506   __ bic(SP, SP, StackAlignmentInBytes - 1);
 507 
 508   for (int i = 0; i < total_args_passed; i++) {
 509     if (sig_bt[i] == T_VOID) {
 510       assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
 511       continue;
 512     }
 513     assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(), "must be ordered");
 514     int arg_offset = Interpreter::expr_offset_in_bytes(total_args_passed - 1 - i);
 515 
 516     VMReg r_1 = regs[i].first();
 517     VMReg r_2 = regs[i].second();
 518     if (r_1->is_stack()) {
 519       int stack_offset = r_1->reg2stack() * VMRegImpl::stack_slot_size;
 520       if (!r_2->is_valid()) {
 521         __ ldr(tmp, Address(initial_sp, arg_offset));
 522         __ str(tmp, Address(SP, stack_offset));
 523       } else {
 524         __ ldr(tmp, Address(initial_sp, arg_offset - Interpreter::stackElementSize));
 525         __ str(tmp, Address(SP, stack_offset));
 526         __ ldr(tmp, Address(initial_sp, arg_offset));
 527         __ str(tmp, Address(SP, stack_offset + wordSize));
 528       }
 529     } else if (r_1->is_Register()) {
 530       if (!r_2->is_valid()) {
 531         __ ldr(r_1->as_Register(), Address(initial_sp, arg_offset));
 532       } else {
 533         __ ldr(r_1->as_Register(), Address(initial_sp, arg_offset - Interpreter::stackElementSize));
 534         __ ldr(r_2->as_Register(), Address(initial_sp, arg_offset));
 535       }
 536     } else if (r_1->is_FloatRegister()) {
 537 #ifdef __SOFTFP__
 538       ShouldNotReachHere();
 539 #endif // __SOFTFP__
 540       if (!r_2->is_valid()) {
 541         __ flds(r_1->as_FloatRegister(), Address(initial_sp, arg_offset));
 542       } else {
 543         __ fldd(r_1->as_FloatRegister(), Address(initial_sp, arg_offset - Interpreter::stackElementSize));
 544       }
 545     } else {
 546       assert(!r_1->is_valid() && !r_2->is_valid(), "must be");
 547     }
 548   }
 549 
 550   // restore Rmethod (scratched for initial_sp)
 551   __ ldr(Rmethod, callee_target_addr);
 552   __ ldr(PC, Address(Rmethod, Method::from_compiled_offset()));
 553 
 554 }
 555 
 556 static void gen_c2i_adapter(MacroAssembler *masm,
 557                             int total_args_passed,  int comp_args_on_stack,
 558                             const BasicType *sig_bt, const VMRegPair *regs,
 559                             Label& skip_fixup) {
 560   // TODO: ARM - May be can use stm to deoptimize arguments
 561   const Register tmp = Rtemp;
 562 
 563   patch_callers_callsite(masm);
 564   __ bind(skip_fixup);
 565 
 566   __ mov(Rsender_sp, SP); // not yet saved
 567 
 568 
 569   int extraspace = total_args_passed * Interpreter::stackElementSize;
 570   if (extraspace) {
 571     __ sub_slow(SP, SP, extraspace);
 572   }
 573 
 574   for (int i = 0; i < total_args_passed; i++) {
 575     if (sig_bt[i] == T_VOID) {
 576       assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
 577       continue;
 578     }
 579     int stack_offset = (total_args_passed - 1 - i) * Interpreter::stackElementSize;
 580 
 581     VMReg r_1 = regs[i].first();
 582     VMReg r_2 = regs[i].second();
 583     if (r_1->is_stack()) {
 584       int arg_offset = r_1->reg2stack() * VMRegImpl::stack_slot_size + extraspace;
 585       if (!r_2->is_valid()) {
 586         __ ldr(tmp, Address(SP, arg_offset));
 587         __ str(tmp, Address(SP, stack_offset));
 588       } else {
 589         __ ldr(tmp, Address(SP, arg_offset));
 590         __ str(tmp, Address(SP, stack_offset - Interpreter::stackElementSize));
 591         __ ldr(tmp, Address(SP, arg_offset + wordSize));
 592         __ str(tmp, Address(SP, stack_offset));
 593       }
 594     } else if (r_1->is_Register()) {
 595       if (!r_2->is_valid()) {
 596         __ str(r_1->as_Register(), Address(SP, stack_offset));
 597       } else {
 598         __ str(r_1->as_Register(), Address(SP, stack_offset - Interpreter::stackElementSize));
 599         __ str(r_2->as_Register(), Address(SP, stack_offset));
 600       }
 601     } else if (r_1->is_FloatRegister()) {
 602 #ifdef __SOFTFP__
 603       ShouldNotReachHere();
 604 #endif // __SOFTFP__
 605       if (!r_2->is_valid()) {
 606         __ fsts(r_1->as_FloatRegister(), Address(SP, stack_offset));
 607       } else {
 608         __ fstd(r_1->as_FloatRegister(), Address(SP, stack_offset - Interpreter::stackElementSize));
 609       }
 610     } else {
 611       assert(!r_1->is_valid() && !r_2->is_valid(), "must be");
 612     }
 613   }
 614 
 615   __ ldr(PC, Address(Rmethod, Method::interpreter_entry_offset()));
 616 
 617 }
 618 
 619 AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm,
 620                                                             int total_args_passed,
 621                                                             int comp_args_on_stack,
 622                                                             const BasicType *sig_bt,
 623                                                             const VMRegPair *regs,
 624                                                             AdapterFingerPrint* fingerprint) {
 625   address i2c_entry = __ pc();
 626   gen_i2c_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs);
 627 
 628   address c2i_unverified_entry = __ pc();
 629   Label skip_fixup;
 630   const Register receiver       = R0;
 631   const Register holder_klass   = Rtemp; // XXX should be OK for C2 but not 100% sure
 632   const Register receiver_klass = R4;
 633 
 634   __ load_klass(receiver_klass, receiver);
 635   __ ldr(holder_klass, Address(Ricklass, CompiledICHolder::holder_klass_offset()));
 636   __ ldr(Rmethod, Address(Ricklass, CompiledICHolder::holder_metadata_offset()));
 637   __ cmp(receiver_klass, holder_klass);
 638 
 639   __ ldr(Rtemp, Address(Rmethod, Method::code_offset()), eq);
 640   __ cmp(Rtemp, 0, eq);
 641   __ b(skip_fixup, eq);
 642   __ jump(SharedRuntime::get_ic_miss_stub(), relocInfo::runtime_call_type, noreg, ne);
 643 
 644   address c2i_entry = __ pc();
 645   gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup);
 646 
 647   __ flush();
 648   return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry);
 649 }
 650 
 651 
 652 static int reg2offset_in(VMReg r) {
 653   // Account for saved FP and LR
 654   return r->reg2stack() * VMRegImpl::stack_slot_size + 2*wordSize;
 655 }
 656 
 657 static int reg2offset_out(VMReg r) {
 658   return (r->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size;
 659 }
 660 
 661 
 662 static void verify_oop_args(MacroAssembler* masm,
 663                             const methodHandle& method,
 664                             const BasicType* sig_bt,
 665                             const VMRegPair* regs) {
 666   Register temp_reg = Rmethod;  // not part of any compiled calling seq
 667   if (VerifyOops) {
 668     for (int i = 0; i < method->size_of_parameters(); i++) {
 669       if (sig_bt[i] == T_OBJECT || sig_bt[i] == T_ARRAY) {
 670         VMReg r = regs[i].first();
 671         assert(r->is_valid(), "bad oop arg");
 672         if (r->is_stack()) {
 673           __ ldr(temp_reg, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size));
 674           __ verify_oop(temp_reg);
 675         } else {
 676           __ verify_oop(r->as_Register());
 677         }
 678       }
 679     }
 680   }
 681 }
 682 
 683 static void gen_special_dispatch(MacroAssembler* masm,
 684                                  const methodHandle& method,
 685                                  const BasicType* sig_bt,
 686                                  const VMRegPair* regs) {
 687   verify_oop_args(masm, method, sig_bt, regs);
 688   vmIntrinsics::ID iid = method->intrinsic_id();
 689 
 690   // Now write the args into the outgoing interpreter space
 691   bool     has_receiver   = false;
 692   Register receiver_reg   = noreg;
 693   int      member_arg_pos = -1;
 694   Register member_reg     = noreg;
 695   int      ref_kind       = MethodHandles::signature_polymorphic_intrinsic_ref_kind(iid);
 696   if (ref_kind != 0) {
 697     member_arg_pos = method->size_of_parameters() - 1;  // trailing MemberName argument
 698     member_reg = Rmethod;  // known to be free at this point
 699     has_receiver = MethodHandles::ref_kind_has_receiver(ref_kind);
 700   } else if (iid == vmIntrinsics::_invokeBasic) {
 701     has_receiver = true;
 702   } else {
 703     fatal("unexpected intrinsic id %d", vmIntrinsics::as_int(iid));
 704   }
 705 
 706   if (member_reg != noreg) {
 707     // Load the member_arg into register, if necessary.
 708     SharedRuntime::check_member_name_argument_is_last_argument(method, sig_bt, regs);
 709     VMReg r = regs[member_arg_pos].first();
 710     if (r->is_stack()) {
 711       __ ldr(member_reg, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size));
 712     } else {
 713       // no data motion is needed
 714       member_reg = r->as_Register();
 715     }
 716   }
 717 
 718   if (has_receiver) {
 719     // Make sure the receiver is loaded into a register.
 720     assert(method->size_of_parameters() > 0, "oob");
 721     assert(sig_bt[0] == T_OBJECT, "receiver argument must be an object");
 722     VMReg r = regs[0].first();
 723     assert(r->is_valid(), "bad receiver arg");
 724     if (r->is_stack()) {
 725       // Porting note:  This assumes that compiled calling conventions always
 726       // pass the receiver oop in a register.  If this is not true on some
 727       // platform, pick a temp and load the receiver from stack.
 728       assert(false, "receiver always in a register");
 729       receiver_reg = j_rarg0;  // known to be free at this point
 730       __ ldr(receiver_reg, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size));
 731     } else {
 732       // no data motion is needed
 733       receiver_reg = r->as_Register();
 734     }
 735   }
 736 
 737   // Figure out which address we are really jumping to:
 738   MethodHandles::generate_method_handle_dispatch(masm, iid,
 739                                                  receiver_reg, member_reg, /*for_compiler_entry:*/ true);
 740 }
 741 
 742 // ---------------------------------------------------------------------------
 743 // Generate a native wrapper for a given method.  The method takes arguments
 744 // in the Java compiled code convention, marshals them to the native
 745 // convention (handlizes oops, etc), transitions to native, makes the call,
 746 // returns to java state (possibly blocking), unhandlizes any result and
 747 // returns.
 748 nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
 749                                                 const methodHandle& method,
 750                                                 int compile_id,
 751                                                 BasicType* in_sig_bt,
 752                                                 VMRegPair* in_regs,
 753                                                 BasicType ret_type) {
 754   if (method->is_method_handle_intrinsic()) {
 755     vmIntrinsics::ID iid = method->intrinsic_id();
 756     intptr_t start = (intptr_t)__ pc();
 757     int vep_offset = ((intptr_t)__ pc()) - start;
 758     gen_special_dispatch(masm,
 759                          method,
 760                          in_sig_bt,
 761                          in_regs);
 762     int frame_complete = ((intptr_t)__ pc()) - start;  // not complete, period
 763     __ flush();
 764     int stack_slots = SharedRuntime::out_preserve_stack_slots();  // no out slots at all, actually
 765     return nmethod::new_native_nmethod(method,
 766                                        compile_id,
 767                                        masm->code(),
 768                                        vep_offset,
 769                                        frame_complete,
 770                                        stack_slots / VMRegImpl::slots_per_word,
 771                                        in_ByteSize(-1),
 772                                        (OopMapSet*)NULL);
 773   }
 774   // Arguments for JNI method include JNIEnv and Class if static
 775 
 776   // Usage of Rtemp should be OK since scratched by native call
 777 
 778   bool method_is_static = method->is_static();
 779 
 780   const int total_in_args = method->size_of_parameters();
 781   int total_c_args = total_in_args + (method_is_static ? 2 : 1);
 782 
 783   BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args);
 784   VMRegPair* out_regs   = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args);
 785 
 786   int argc = 0;
 787   out_sig_bt[argc++] = T_ADDRESS;
 788   if (method_is_static) {
 789     out_sig_bt[argc++] = T_OBJECT;
 790   }
 791 
 792   int i;
 793   for (i = 0; i < total_in_args; i++) {
 794     out_sig_bt[argc++] = in_sig_bt[i];
 795   }
 796 
 797   int out_arg_slots = c_calling_convention(out_sig_bt, out_regs, NULL, total_c_args);
 798   int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots;
 799   // Since object arguments need to be wrapped, we must preserve space
 800   // for those object arguments which come in registers (GPR_PARAMS maximum)
 801   // plus one more slot for Klass handle (for static methods)
 802   int oop_handle_offset = stack_slots;
 803   stack_slots += (GPR_PARAMS + 1) * VMRegImpl::slots_per_word;
 804 
 805   // Space to save return address and FP
 806   stack_slots += 2 * VMRegImpl::slots_per_word;
 807 
 808   // Calculate the final stack size taking account of alignment
 809   stack_slots = align_up(stack_slots, StackAlignmentInBytes / VMRegImpl::stack_slot_size);
 810   int stack_size = stack_slots * VMRegImpl::stack_slot_size;
 811 
 812   // Unverified entry point
 813   address start = __ pc();
 814 
 815   // Inline cache check, same as in C1_MacroAssembler::inline_cache_check()
 816   const Register receiver = R0; // see receiverOpr()
 817   __ load_klass(Rtemp, receiver);
 818   __ cmp(Rtemp, Ricklass);
 819   Label verified;
 820 
 821   __ b(verified, eq); // jump over alignment no-ops too
 822   __ jump(SharedRuntime::get_ic_miss_stub(), relocInfo::runtime_call_type, Rtemp);
 823   __ align(CodeEntryAlignment);
 824 
 825   // Verified entry point
 826   __ bind(verified);
 827   int vep_offset = __ pc() - start;
 828 
 829 
 830   if ((InlineObjectHash && method->intrinsic_id() == vmIntrinsics::_hashCode) || (method->intrinsic_id() == vmIntrinsics::_identityHashCode)) {
 831     // Object.hashCode, System.identityHashCode can pull the hashCode from the header word
 832     // instead of doing a full VM transition once it's been computed.
 833     Label slow_case;
 834     const Register obj_reg = R0;
 835 
 836     // Unlike for Object.hashCode, System.identityHashCode is static method and
 837     // gets object as argument instead of the receiver.
 838     if (method->intrinsic_id() == vmIntrinsics::_identityHashCode) {
 839       assert(method->is_static(), "method should be static");
 840       // return 0 for null reference input, return val = R0 = obj_reg = 0
 841       __ cmp(obj_reg, 0);
 842       __ bx(LR, eq);
 843     }
 844 
 845     __ ldr(Rtemp, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
 846 
 847     assert(markWord::unlocked_value == 1, "adjust this code");
 848     __ tbz(Rtemp, exact_log2(markWord::unlocked_value), slow_case);
 849 
 850     __ bics(Rtemp, Rtemp, ~markWord::hash_mask_in_place);
 851     __ mov(R0, AsmOperand(Rtemp, lsr, markWord::hash_shift), ne);
 852     __ bx(LR, ne);
 853 
 854     __ bind(slow_case);
 855   }
 856 
 857   // Bang stack pages
 858   __ arm_stack_overflow_check(stack_size, Rtemp);
 859 
 860   // Setup frame linkage
 861   __ raw_push(FP, LR);
 862   __ mov(FP, SP);
 863   __ sub_slow(SP, SP, stack_size - 2*wordSize);
 864 
 865   int frame_complete = __ pc() - start;
 866 
 867   OopMapSet* oop_maps = new OopMapSet();
 868   OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
 869   const int extra_args = method_is_static ? 2 : 1;
 870   int receiver_offset = -1;
 871   int fp_regs_in_arguments = 0;
 872 
 873   for (i = total_in_args; --i >= 0; ) {
 874     switch (in_sig_bt[i]) {
 875     case T_ARRAY:
 876     case T_OBJECT: {
 877       VMReg src = in_regs[i].first();
 878       VMReg dst = out_regs[i + extra_args].first();
 879       if (src->is_stack()) {
 880         assert(dst->is_stack(), "must be");
 881         assert(i != 0, "Incoming receiver is always in a register");
 882         __ ldr(Rtemp, Address(FP, reg2offset_in(src)));
 883         __ cmp(Rtemp, 0);
 884         __ add(Rtemp, FP, reg2offset_in(src), ne);
 885         __ str(Rtemp, Address(SP, reg2offset_out(dst)));
 886         int offset_in_older_frame = src->reg2stack() + SharedRuntime::out_preserve_stack_slots();
 887         map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + stack_slots));
 888       } else {
 889         int offset = oop_handle_offset * VMRegImpl::stack_slot_size;
 890         __ str(src->as_Register(), Address(SP, offset));
 891         map->set_oop(VMRegImpl::stack2reg(oop_handle_offset));
 892         if ((i == 0) && (!method_is_static)) {
 893           receiver_offset = offset;
 894         }
 895         oop_handle_offset += VMRegImpl::slots_per_word;
 896 
 897         if (dst->is_stack()) {
 898           __ movs(Rtemp, src->as_Register());
 899           __ add(Rtemp, SP, offset, ne);
 900           __ str(Rtemp, Address(SP, reg2offset_out(dst)));
 901         } else {
 902           __ movs(dst->as_Register(), src->as_Register());
 903           __ add(dst->as_Register(), SP, offset, ne);
 904         }
 905       }
 906     }
 907 
 908     case T_VOID:
 909       break;
 910 
 911 
 912 #ifdef __SOFTFP__
 913     case T_DOUBLE:
 914 #endif
 915     case T_LONG: {
 916       VMReg src_1 = in_regs[i].first();
 917       VMReg src_2 = in_regs[i].second();
 918       VMReg dst_1 = out_regs[i + extra_args].first();
 919       VMReg dst_2 = out_regs[i + extra_args].second();
 920 #if (ALIGN_WIDE_ARGUMENTS == 0)
 921       // C convention can mix a register and a stack slot for a
 922       // 64-bits native argument.
 923 
 924       // Note: following code should work independently of whether
 925       // the Java calling convention follows C convention or whether
 926       // it aligns 64-bit values.
 927       if (dst_2->is_Register()) {
 928         if (src_1->as_Register() != dst_1->as_Register()) {
 929           assert(src_1->as_Register() != dst_2->as_Register() &&
 930                  src_2->as_Register() != dst_2->as_Register(), "must be");
 931           __ mov(dst_2->as_Register(), src_2->as_Register());
 932           __ mov(dst_1->as_Register(), src_1->as_Register());
 933         } else {
 934           assert(src_2->as_Register() == dst_2->as_Register(), "must be");
 935         }
 936       } else if (src_2->is_Register()) {
 937         if (dst_1->is_Register()) {
 938           // dst mixes a register and a stack slot
 939           assert(dst_2->is_stack() && src_1->is_Register() && src_2->is_Register(), "must be");
 940           assert(src_1->as_Register() != dst_1->as_Register(), "must be");
 941           __ str(src_2->as_Register(), Address(SP, reg2offset_out(dst_2)));
 942           __ mov(dst_1->as_Register(), src_1->as_Register());
 943         } else {
 944           // registers to stack slots
 945           assert(dst_2->is_stack() && src_1->is_Register() && src_2->is_Register(), "must be");
 946           __ str(src_1->as_Register(), Address(SP, reg2offset_out(dst_1)));
 947           __ str(src_2->as_Register(), Address(SP, reg2offset_out(dst_2)));
 948         }
 949       } else if (src_1->is_Register()) {
 950         if (dst_1->is_Register()) {
 951           // src and dst must be R3 + stack slot
 952           assert(dst_1->as_Register() == src_1->as_Register(), "must be");
 953           __ ldr(Rtemp,    Address(FP, reg2offset_in(src_2)));
 954           __ str(Rtemp,    Address(SP, reg2offset_out(dst_2)));
 955         } else {
 956           // <R3,stack> -> <stack,stack>
 957           assert(dst_2->is_stack() && src_2->is_stack(), "must be");
 958           __ ldr(LR, Address(FP, reg2offset_in(src_2)));
 959           __ str(src_1->as_Register(), Address(SP, reg2offset_out(dst_1)));
 960           __ str(LR, Address(SP, reg2offset_out(dst_2)));
 961         }
 962       } else {
 963         assert(src_2->is_stack() && dst_1->is_stack() && dst_2->is_stack(), "must be");
 964         __ ldr(Rtemp, Address(FP, reg2offset_in(src_1)));
 965         __ ldr(LR,    Address(FP, reg2offset_in(src_2)));
 966         __ str(Rtemp, Address(SP, reg2offset_out(dst_1)));
 967         __ str(LR,    Address(SP, reg2offset_out(dst_2)));
 968       }
 969 #else // ALIGN_WIDE_ARGUMENTS
 970       if (src_1->is_stack()) {
 971         assert(src_2->is_stack() && dst_1->is_stack() && dst_2->is_stack(), "must be");
 972         __ ldr(Rtemp, Address(FP, reg2offset_in(src_1)));
 973         __ ldr(LR,    Address(FP, reg2offset_in(src_2)));
 974         __ str(Rtemp, Address(SP, reg2offset_out(dst_1)));
 975         __ str(LR,    Address(SP, reg2offset_out(dst_2)));
 976       } else if (dst_1->is_stack()) {
 977         assert(dst_2->is_stack() && src_1->is_Register() && src_2->is_Register(), "must be");
 978         __ str(src_1->as_Register(), Address(SP, reg2offset_out(dst_1)));
 979         __ str(src_2->as_Register(), Address(SP, reg2offset_out(dst_2)));
 980       } else if (src_1->as_Register() == dst_1->as_Register()) {
 981         assert(src_2->as_Register() == dst_2->as_Register(), "must be");
 982       } else {
 983         assert(src_1->as_Register() != dst_2->as_Register() &&
 984                src_2->as_Register() != dst_2->as_Register(), "must be");
 985         __ mov(dst_2->as_Register(), src_2->as_Register());
 986         __ mov(dst_1->as_Register(), src_1->as_Register());
 987       }
 988 #endif // ALIGN_WIDE_ARGUMENTS
 989       break;
 990     }
 991 
 992 #if (!defined __SOFTFP__ && !defined __ABI_HARD__)
 993     case T_FLOAT: {
 994       VMReg src = in_regs[i].first();
 995       VMReg dst = out_regs[i + extra_args].first();
 996       if (src->is_stack()) {
 997         assert(dst->is_stack(), "must be");
 998         __ ldr(Rtemp, Address(FP, reg2offset_in(src)));
 999         __ str(Rtemp, Address(SP, reg2offset_out(dst)));
1000       } else if (dst->is_stack()) {
1001         __ fsts(src->as_FloatRegister(), Address(SP, reg2offset_out(dst)));
1002       } else {
1003         assert(src->is_FloatRegister() && dst->is_Register(), "must be");
1004         __ fmrs(dst->as_Register(), src->as_FloatRegister());
1005       }
1006       break;
1007     }
1008 
1009     case T_DOUBLE: {
1010       VMReg src_1 = in_regs[i].first();
1011       VMReg src_2 = in_regs[i].second();
1012       VMReg dst_1 = out_regs[i + extra_args].first();
1013       VMReg dst_2 = out_regs[i + extra_args].second();
1014       if (src_1->is_stack()) {
1015         assert(src_2->is_stack() && dst_1->is_stack() && dst_2->is_stack(), "must be");
1016         __ ldr(Rtemp, Address(FP, reg2offset_in(src_1)));
1017         __ ldr(LR,    Address(FP, reg2offset_in(src_2)));
1018         __ str(Rtemp, Address(SP, reg2offset_out(dst_1)));
1019         __ str(LR,    Address(SP, reg2offset_out(dst_2)));
1020       } else if (dst_1->is_stack()) {
1021         assert(dst_2->is_stack() && src_1->is_FloatRegister(), "must be");
1022         __ fstd(src_1->as_FloatRegister(), Address(SP, reg2offset_out(dst_1)));
1023 #if (ALIGN_WIDE_ARGUMENTS == 0)
1024       } else if (dst_2->is_stack()) {
1025         assert(! src_2->is_stack(), "must be"); // assuming internal java convention is aligned
1026         // double register must go into R3 + one stack slot
1027         __ fmrrd(dst_1->as_Register(), Rtemp, src_1->as_FloatRegister());
1028         __ str(Rtemp, Address(SP, reg2offset_out(dst_2)));
1029 #endif
1030       } else {
1031         assert(src_1->is_FloatRegister() && dst_1->is_Register() && dst_2->is_Register(), "must be");
1032         __ fmrrd(dst_1->as_Register(), dst_2->as_Register(), src_1->as_FloatRegister());
1033       }
1034       break;
1035     }
1036 #endif // __SOFTFP__
1037 
1038 #ifdef __ABI_HARD__
1039     case T_FLOAT: {
1040       VMReg src = in_regs[i].first();
1041       VMReg dst = out_regs[i + extra_args].first();
1042       if (src->is_stack()) {
1043         if (dst->is_stack()) {
1044           __ ldr(Rtemp, Address(FP, reg2offset_in(src)));
1045           __ str(Rtemp, Address(SP, reg2offset_out(dst)));
1046         } else {
1047           // C2 Java calling convention does not populate S14 and S15, therefore
1048           // those need to be loaded from stack here
1049           __ flds(dst->as_FloatRegister(), Address(FP, reg2offset_in(src)));
1050           fp_regs_in_arguments++;
1051         }
1052       } else {
1053         assert(src->is_FloatRegister(), "must be");
1054         fp_regs_in_arguments++;
1055       }
1056       break;
1057     }
1058     case T_DOUBLE: {
1059       VMReg src_1 = in_regs[i].first();
1060       VMReg src_2 = in_regs[i].second();
1061       VMReg dst_1 = out_regs[i + extra_args].first();
1062       VMReg dst_2 = out_regs[i + extra_args].second();
1063       if (src_1->is_stack()) {
1064         if (dst_1->is_stack()) {
1065           assert(dst_2->is_stack(), "must be");
1066           __ ldr(Rtemp, Address(FP, reg2offset_in(src_1)));
1067           __ ldr(LR,    Address(FP, reg2offset_in(src_2)));
1068           __ str(Rtemp, Address(SP, reg2offset_out(dst_1)));
1069           __ str(LR,    Address(SP, reg2offset_out(dst_2)));
1070         } else {
1071           // C2 Java calling convention does not populate S14 and S15, therefore
1072           // those need to be loaded from stack here
1073           __ fldd(dst_1->as_FloatRegister(), Address(FP, reg2offset_in(src_1)));
1074           fp_regs_in_arguments += 2;
1075         }
1076       } else {
1077         assert(src_1->is_FloatRegister() && src_2->is_FloatRegister(), "must be");
1078         fp_regs_in_arguments += 2;
1079       }
1080       break;
1081     }
1082 #endif // __ABI_HARD__
1083 
1084     default: {
1085       assert(in_sig_bt[i] != T_ADDRESS, "found T_ADDRESS in java args");
1086       VMReg src = in_regs[i].first();
1087       VMReg dst = out_regs[i + extra_args].first();
1088       if (src->is_stack()) {
1089         assert(dst->is_stack(), "must be");
1090         __ ldr(Rtemp, Address(FP, reg2offset_in(src)));
1091         __ str(Rtemp, Address(SP, reg2offset_out(dst)));
1092       } else if (dst->is_stack()) {
1093         __ str(src->as_Register(), Address(SP, reg2offset_out(dst)));
1094       } else {
1095         assert(src->is_Register() && dst->is_Register(), "must be");
1096         __ mov(dst->as_Register(), src->as_Register());
1097       }
1098     }
1099     }
1100   }
1101 
1102   // Get Klass mirror
1103   int klass_offset = -1;
1104   if (method_is_static) {
1105     klass_offset = oop_handle_offset * VMRegImpl::stack_slot_size;
1106     __ mov_oop(Rtemp, JNIHandles::make_local(method->method_holder()->java_mirror()));
1107     __ add(c_rarg1, SP, klass_offset);
1108     __ str(Rtemp, Address(SP, klass_offset));
1109     map->set_oop(VMRegImpl::stack2reg(oop_handle_offset));
1110   }
1111 
1112   // the PC offset given to add_gc_map must match the PC saved in set_last_Java_frame
1113   int pc_offset = __ set_last_Java_frame(SP, FP, true, Rtemp);
1114   assert(((__ pc()) - start) == __ offset(), "warning: start differs from code_begin");
1115   oop_maps->add_gc_map(pc_offset, map);
1116 
1117   // Order last_Java_pc store with the thread state transition (to _thread_in_native)
1118   __ membar(MacroAssembler::StoreStore, Rtemp);
1119 
1120   // RedefineClasses() tracing support for obsolete method entry
1121   if (log_is_enabled(Trace, redefine, class, obsolete)) {
1122     __ save_caller_save_registers();
1123     __ mov(R0, Rthread);
1124     __ mov_metadata(R1, method());
1125     __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::rc_trace_method_entry), R0, R1);
1126     __ restore_caller_save_registers();
1127   }
1128 
1129   const Register sync_handle = R5;
1130   const Register sync_obj    = R6;
1131   const Register disp_hdr    = altFP_7_11;
1132   const Register tmp         = R8;
1133 
1134   Label slow_lock, lock_done, fast_lock;
1135   if (method->is_synchronized()) {
1136     // The first argument is a handle to sync object (a class or an instance)
1137     __ ldr(sync_obj, Address(R1));
1138     // Remember the handle for the unlocking code
1139     __ mov(sync_handle, R1);
1140 
1141     // TODO: Implement fast-locking.
1142     __ b(slow_lock);
1143     __ bind(lock_done);
1144   }
1145 
1146   // Get JNIEnv*
1147   __ add(c_rarg0, Rthread, in_bytes(JavaThread::jni_environment_offset()));
1148 
1149   // Perform thread state transition
1150   __ mov(Rtemp, _thread_in_native);
1151   __ str(Rtemp, Address(Rthread, JavaThread::thread_state_offset()));
1152 
1153   // Finally, call the native method
1154   __ call(method->native_function());
1155 
1156   // Set FPSCR/FPCR to a known state
1157   if (AlwaysRestoreFPU) {
1158     __ restore_default_fp_mode();
1159   }
1160 
1161   // Ensure a Boolean result is mapped to 0..1
1162   if (ret_type == T_BOOLEAN) {
1163     __ c2bool(R0);
1164   }
1165 
1166   // Do a safepoint check while thread is in transition state
1167   Label call_safepoint_runtime, return_to_java;
1168   __ mov(Rtemp, _thread_in_native_trans);
1169   __ str_32(Rtemp, Address(Rthread, JavaThread::thread_state_offset()));
1170 
1171   // make sure the store is observed before reading the SafepointSynchronize state and further mem refs
1172   __ membar(MacroAssembler::Membar_mask_bits(MacroAssembler::StoreLoad | MacroAssembler::StoreStore), Rtemp);
1173 
1174   __ safepoint_poll(R2, call_safepoint_runtime);
1175   __ ldr_u32(R3, Address(Rthread, JavaThread::suspend_flags_offset()));
1176   __ cmp(R3, 0);
1177   __ b(call_safepoint_runtime, ne);
1178 
1179   __ bind(return_to_java);
1180 
1181   // Perform thread state transition and reguard stack yellow pages if needed
1182   Label reguard, reguard_done;
1183   __ mov(Rtemp, _thread_in_Java);
1184   __ ldr_s32(R2, Address(Rthread, JavaThread::stack_guard_state_offset()));
1185   __ str_32(Rtemp, Address(Rthread, JavaThread::thread_state_offset()));
1186 
1187   __ cmp(R2, StackOverflow::stack_guard_yellow_reserved_disabled);
1188   __ b(reguard, eq);
1189   __ bind(reguard_done);
1190 
1191   Label slow_unlock, unlock_done;
1192   if (method->is_synchronized()) {
1193     __ ldr(sync_obj, Address(sync_handle));
1194     // TODO: Implement fast-unlocking.
1195     __ b(slow_unlock);
1196     __ bind(unlock_done);
1197   }
1198 
1199   // Set last java frame and handle block to zero
1200   __ ldr(LR, Address(Rthread, JavaThread::active_handles_offset()));
1201   __ reset_last_Java_frame(Rtemp); // sets Rtemp to 0 on 32-bit ARM
1202 
1203   __ str_32(Rtemp, Address(LR, JNIHandleBlock::top_offset_in_bytes()));
1204   if (CheckJNICalls) {
1205     __ str(__ zero_register(Rtemp), Address(Rthread, JavaThread::pending_jni_exception_check_fn_offset()));
1206   }
1207 
1208   // Unbox oop result, e.g. JNIHandles::resolve value in R0.
1209   if (ret_type == T_OBJECT || ret_type == T_ARRAY) {
1210     __ resolve_jobject(R0,      // value
1211                        Rtemp,   // tmp1
1212                        R1_tmp); // tmp2
1213   }
1214 
1215   // Any exception pending?
1216   __ ldr(Rtemp, Address(Rthread, Thread::pending_exception_offset()));
1217   __ mov(SP, FP);
1218 
1219   __ cmp(Rtemp, 0);
1220   // Pop the frame and return if no exception pending
1221   __ pop(RegisterSet(FP) | RegisterSet(PC), eq);
1222   // Pop the frame and forward the exception. Rexception_pc contains return address.
1223   __ ldr(FP, Address(SP, wordSize, post_indexed), ne);
1224   __ ldr(Rexception_pc, Address(SP, wordSize, post_indexed), ne);
1225   __ jump(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type, Rtemp);
1226 
1227   // Safepoint operation and/or pending suspend request is in progress.
1228   // Save the return values and call the runtime function by hand.
1229   __ bind(call_safepoint_runtime);
1230   push_result_registers(masm, ret_type);
1231   __ mov(R0, Rthread);
1232   __ call(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans));
1233   pop_result_registers(masm, ret_type);
1234   __ b(return_to_java);
1235 
1236   // Reguard stack pages. Save native results around a call to C runtime.
1237   __ bind(reguard);
1238   push_result_registers(masm, ret_type);
1239   __ call(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages));
1240   pop_result_registers(masm, ret_type);
1241   __ b(reguard_done);
1242 
1243   if (method->is_synchronized()) {
1244     // Locking slow case
1245     __ bind(slow_lock);
1246 
1247     push_param_registers(masm, fp_regs_in_arguments);
1248 
1249     // last_Java_frame is already set, so do call_VM manually; no exception can occur
1250     __ mov(R0, sync_obj);
1251     __ mov(R1, Rthread);
1252     __ call(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_locking_C));
1253 
1254     pop_param_registers(masm, fp_regs_in_arguments);
1255 
1256     __ b(lock_done);
1257 
1258     // Unlocking slow case
1259     __ bind(slow_unlock);
1260 
1261     push_result_registers(masm, ret_type);
1262 
1263     // Clear pending exception before reentering VM.
1264     // Can store the oop in register since it is a leaf call.
1265     assert_different_registers(Rtmp_save1, sync_obj, disp_hdr);
1266     __ ldr(Rtmp_save1, Address(Rthread, Thread::pending_exception_offset()));
1267     Register zero = __ zero_register(Rtemp);
1268     __ str(zero, Address(Rthread, Thread::pending_exception_offset()));
1269     __ mov(R0, sync_obj);
1270     __ mov(R1, Rthread);
1271     __ call(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C));
1272     __ str(Rtmp_save1, Address(Rthread, Thread::pending_exception_offset()));
1273 
1274     pop_result_registers(masm, ret_type);
1275 
1276     __ b(unlock_done);
1277   }
1278 
1279   __ flush();
1280   return nmethod::new_native_nmethod(method,
1281                                      compile_id,
1282                                      masm->code(),
1283                                      vep_offset,
1284                                      frame_complete,
1285                                      stack_slots / VMRegImpl::slots_per_word,
1286                                      in_ByteSize(method_is_static ? klass_offset : receiver_offset),
1287                                      oop_maps);
1288 }
1289 
1290 // this function returns the adjust size (in number of words) to a c2i adapter
1291 // activation for use during deoptimization
1292 int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals) {
1293   int extra_locals_size = (callee_locals - callee_parameters) * Interpreter::stackElementWords;
1294   return extra_locals_size;
1295 }
1296 
1297 
1298 // Number of stack slots between incoming argument block and the start of
1299 // a new frame.  The PROLOG must add this many slots to the stack.  The
1300 // EPILOG must remove this many slots.
1301 // FP + LR
1302 uint SharedRuntime::in_preserve_stack_slots() {
1303   return 2 * VMRegImpl::slots_per_word;
1304 }
1305 
1306 uint SharedRuntime::out_preserve_stack_slots() {
1307   return 0;
1308 }
1309 
1310 //------------------------------generate_deopt_blob----------------------------
1311 void SharedRuntime::generate_deopt_blob() {
1312   ResourceMark rm;
1313   CodeBuffer buffer("deopt_blob", 1024, 1024);
1314   int frame_size_in_words;
1315   OopMapSet* oop_maps;
1316   int reexecute_offset;
1317   int exception_in_tls_offset;
1318   int exception_offset;
1319 
1320   MacroAssembler* masm = new MacroAssembler(&buffer);
1321   Label cont;
1322   const Register Rkind   = R9; // caller-saved
1323   const Register Rublock = R6;
1324   const Register Rsender = altFP_7_11;
1325   assert_different_registers(Rkind, Rublock, Rsender, Rexception_obj, Rexception_pc, R0, R1, R2, R3, R8, Rtemp);
1326 
1327   address start = __ pc();
1328 
1329   oop_maps = new OopMapSet();
1330   // LR saved by caller (can be live in c2 method)
1331 
1332   // A deopt is a case where LR may be live in the c2 nmethod. So it's
1333   // not possible to call the deopt blob from the nmethod and pass the
1334   // address of the deopt handler of the nmethod in LR. What happens
1335   // now is that the caller of the deopt blob pushes the current
1336   // address so the deopt blob doesn't have to do it. This way LR can
1337   // be preserved, contains the live value from the nmethod and is
1338   // saved at R14/R30_offset here.
1339   OopMap* map = RegisterSaver::save_live_registers(masm, &frame_size_in_words, true);
1340   __ mov(Rkind, Deoptimization::Unpack_deopt);
1341   __ b(cont);
1342 
1343   exception_offset = __ pc() - start;
1344 
1345   // Transfer Rexception_obj & Rexception_pc in TLS and fall thru to the
1346   // exception_in_tls_offset entry point.
1347   __ str(Rexception_obj, Address(Rthread, JavaThread::exception_oop_offset()));
1348   __ str(Rexception_pc, Address(Rthread, JavaThread::exception_pc_offset()));
1349   // Force return value to NULL to avoid confusing the escape analysis
1350   // logic. Everything is dead here anyway.
1351   __ mov(R0, 0);
1352 
1353   exception_in_tls_offset = __ pc() - start;
1354 
1355   // Exception data is in JavaThread structure
1356   // Patch the return address of the current frame
1357   __ ldr(LR, Address(Rthread, JavaThread::exception_pc_offset()));
1358   (void) RegisterSaver::save_live_registers(masm, &frame_size_in_words);
1359   {
1360     const Register Rzero = __ zero_register(Rtemp); // XXX should be OK for C2 but not 100% sure
1361     __ str(Rzero, Address(Rthread, JavaThread::exception_pc_offset()));
1362   }
1363   __ mov(Rkind, Deoptimization::Unpack_exception);
1364   __ b(cont);
1365 
1366   reexecute_offset = __ pc() - start;
1367 
1368   (void) RegisterSaver::save_live_registers(masm, &frame_size_in_words);
1369   __ mov(Rkind, Deoptimization::Unpack_reexecute);
1370 
1371   // Calculate UnrollBlock and save the result in Rublock
1372   __ bind(cont);
1373   __ mov(R0, Rthread);
1374   __ mov(R1, Rkind);
1375 
1376   int pc_offset = __ set_last_Java_frame(SP, FP, false, Rtemp); // note: FP may not need to be saved (not on x86)
1377   assert(((__ pc()) - start) == __ offset(), "warning: start differs from code_begin");
1378   __ call(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info));
1379   if (pc_offset == -1) {
1380     pc_offset = __ offset();
1381   }
1382   oop_maps->add_gc_map(pc_offset, map);
1383   __ reset_last_Java_frame(Rtemp); // Rtemp free since scratched by far call
1384 
1385   __ mov(Rublock, R0);
1386 
1387   // Reload Rkind from the UnrollBlock (might have changed)
1388   __ ldr_s32(Rkind, Address(Rublock, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes()));
1389   Label noException;
1390   __ cmp_32(Rkind, Deoptimization::Unpack_exception);   // Was exception pending?
1391   __ b(noException, ne);
1392   // handle exception case
1393 #ifdef ASSERT
1394   // assert that exception_pc is zero in tls
1395   { Label L;
1396     __ ldr(Rexception_pc, Address(Rthread, JavaThread::exception_pc_offset()));
1397     __ cbz(Rexception_pc, L);
1398     __ stop("exception pc should be null");
1399     __ bind(L);
1400   }
1401 #endif
1402   __ ldr(Rexception_obj, Address(Rthread, JavaThread::exception_oop_offset()));
1403   __ verify_oop(Rexception_obj);
1404   {
1405     const Register Rzero = __ zero_register(Rtemp);
1406     __ str(Rzero, Address(Rthread, JavaThread::exception_oop_offset()));
1407   }
1408 
1409   __ bind(noException);
1410 
1411   // This frame is going away.  Fetch return value, so we can move it to
1412   // a new frame.
1413   __ ldr(R0, Address(SP, RegisterSaver::R0_offset * wordSize));
1414   __ ldr(R1, Address(SP, RegisterSaver::R1_offset * wordSize));
1415 #ifndef __SOFTFP__
1416   __ ldr_double(D0, Address(SP, RegisterSaver::D0_offset * wordSize));
1417 #endif
1418   // pop frame
1419   __ add(SP, SP, RegisterSaver::reg_save_size * wordSize);
1420 
1421   // Set initial stack state before pushing interpreter frames
1422   __ ldr_s32(Rtemp, Address(Rublock, Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset_in_bytes()));
1423   __ ldr(R2, Address(Rublock, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes()));
1424   __ ldr(R3, Address(Rublock, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes()));
1425 
1426   __ add(SP, SP, Rtemp);
1427 
1428 #ifdef ASSERT
1429   // Compilers generate code that bang the stack by as much as the
1430   // interpreter would need. So this stack banging should never
1431   // trigger a fault. Verify that it does not on non product builds.
1432   // See if it is enough stack to push deoptimized frames.
1433   //
1434   // The compiled method that we are deoptimizing was popped from the stack.
1435   // If the stack bang results in a stack overflow, we don't return to the
1436   // method that is being deoptimized. The stack overflow exception is
1437   // propagated to the caller of the deoptimized method. Need to get the pc
1438   // from the caller in LR and restore FP.
1439   __ ldr(LR, Address(R2, 0));
1440   __ ldr(FP, Address(Rublock, Deoptimization::UnrollBlock::initial_info_offset_in_bytes()));
1441   __ ldr_s32(R8, Address(Rublock, Deoptimization::UnrollBlock::total_frame_sizes_offset_in_bytes()));
1442   __ arm_stack_overflow_check(R8, Rtemp);
1443 #endif
1444   __ ldr_s32(R8, Address(Rublock, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes()));
1445 
1446   // Pick up the initial fp we should save
1447   // XXX Note: was ldr(FP, Address(FP));
1448 
1449   // The compiler no longer uses FP as a frame pointer for the
1450   // compiled code. It can be used by the allocator in C2 or to
1451   // memorize the original SP for JSR292 call sites.
1452 
1453   // Hence, ldr(FP, Address(FP)) is probably not correct. For x86,
1454   // Deoptimization::fetch_unroll_info computes the right FP value and
1455   // stores it in Rublock.initial_info. This has been activated for ARM.
1456   __ ldr(FP, Address(Rublock, Deoptimization::UnrollBlock::initial_info_offset_in_bytes()));
1457 
1458   __ ldr_s32(Rtemp, Address(Rublock, Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes()));
1459   __ mov(Rsender, SP);
1460   __ sub(SP, SP, Rtemp);
1461 
1462   // Push interpreter frames in a loop
1463   Label loop;
1464   __ bind(loop);
1465   __ ldr(LR, Address(R2, wordSize, post_indexed));         // load frame pc
1466   __ ldr(Rtemp, Address(R3, wordSize, post_indexed));      // load frame size
1467 
1468   __ raw_push(FP, LR);                                     // create new frame
1469   __ mov(FP, SP);
1470   __ sub(Rtemp, Rtemp, 2*wordSize);
1471 
1472   __ sub(SP, SP, Rtemp);
1473 
1474   __ str(Rsender, Address(FP, frame::interpreter_frame_sender_sp_offset * wordSize));
1475   __ mov(LR, 0);
1476   __ str(LR, Address(FP, frame::interpreter_frame_last_sp_offset * wordSize));
1477 
1478   __ subs(R8, R8, 1);                               // decrement counter
1479   __ mov(Rsender, SP);
1480   __ b(loop, ne);
1481 
1482   // Re-push self-frame
1483   __ ldr(LR, Address(R2));
1484   __ raw_push(FP, LR);
1485   __ mov(FP, SP);
1486   __ sub(SP, SP, (frame_size_in_words - 2) * wordSize);
1487 
1488   // Restore frame locals after moving the frame
1489   __ str(R0, Address(SP, RegisterSaver::R0_offset * wordSize));
1490   __ str(R1, Address(SP, RegisterSaver::R1_offset * wordSize));
1491 
1492 #ifndef __SOFTFP__
1493   __ str_double(D0, Address(SP, RegisterSaver::D0_offset * wordSize));
1494 #endif // !__SOFTFP__
1495 
1496 #ifdef ASSERT
1497   // Reload Rkind from the UnrollBlock and check that it was not overwritten (Rkind is not callee-saved)
1498   { Label L;
1499     __ ldr_s32(Rtemp, Address(Rublock, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes()));
1500     __ cmp_32(Rkind, Rtemp);
1501     __ b(L, eq);
1502     __ stop("Rkind was overwritten");
1503     __ bind(L);
1504   }
1505 #endif
1506 
1507   // Call unpack_frames with proper arguments
1508   __ mov(R0, Rthread);
1509   __ mov(R1, Rkind);
1510 
1511   pc_offset = __ set_last_Java_frame(SP, FP, true, Rtemp);
1512   assert(((__ pc()) - start) == __ offset(), "warning: start differs from code_begin");
1513   __ call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames));
1514   if (pc_offset == -1) {
1515     pc_offset = __ offset();
1516   }
1517   oop_maps->add_gc_map(pc_offset, new OopMap(frame_size_in_words * VMRegImpl::slots_per_word, 0));
1518   __ reset_last_Java_frame(Rtemp); // Rtemp free since scratched by far call
1519 
1520   // Collect return values, pop self-frame and jump to interpreter
1521   __ ldr(R0, Address(SP, RegisterSaver::R0_offset * wordSize));
1522   __ ldr(R1, Address(SP, RegisterSaver::R1_offset * wordSize));
1523   // Interpreter floats controlled by __SOFTFP__, but compiler
1524   // float return value registers controlled by __ABI_HARD__
1525   // This matters for vfp-sflt builds.
1526 #ifndef __SOFTFP__
1527   // Interpreter hard float
1528 #ifdef __ABI_HARD__
1529   // Compiler float return value in FP registers
1530   __ ldr_double(D0, Address(SP, RegisterSaver::D0_offset * wordSize));
1531 #else
1532   // Compiler float return value in integer registers,
1533   // copy to D0 for interpreter (S0 <-- R0)
1534   __ fmdrr(D0_tos, R0, R1);
1535 #endif
1536 #endif // !__SOFTFP__
1537   __ mov(SP, FP);
1538 
1539   __ pop(RegisterSet(FP) | RegisterSet(PC));
1540 
1541   __ flush();
1542 
1543   _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset,
1544                                            reexecute_offset, frame_size_in_words);
1545   _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset);
1546 }
1547 
1548 #ifdef COMPILER2
1549 
1550 //------------------------------generate_uncommon_trap_blob--------------------
1551 // Ought to generate an ideal graph & compile, but here's some ASM
1552 // instead.
1553 void SharedRuntime::generate_uncommon_trap_blob() {
1554   // allocate space for the code
1555   ResourceMark rm;
1556 
1557   // setup code generation tools
1558   int pad = VerifyThread ? 512 : 0;
1559 #ifdef _LP64
1560   CodeBuffer buffer("uncommon_trap_blob", 2700+pad, 512);
1561 #else
1562   // Measured 8/7/03 at 660 in 32bit debug build (no VerifyThread)
1563   // Measured 8/7/03 at 1028 in 32bit debug build (VerifyThread)
1564   CodeBuffer buffer("uncommon_trap_blob", 2000+pad, 512);
1565 #endif
1566   // bypassed when code generation useless
1567   MacroAssembler* masm               = new MacroAssembler(&buffer);
1568   const Register Rublock = R6;
1569   const Register Rsender = altFP_7_11;
1570   assert_different_registers(Rublock, Rsender, Rexception_obj, R0, R1, R2, R3, R8, Rtemp);
1571 
1572   //
1573   // This is the entry point for all traps the compiler takes when it thinks
1574   // it cannot handle further execution of compilation code. The frame is
1575   // deoptimized in these cases and converted into interpreter frames for
1576   // execution
1577   // The steps taken by this frame are as follows:
1578   //   - push a fake "unpack_frame"
1579   //   - call the C routine Deoptimization::uncommon_trap (this function
1580   //     packs the current compiled frame into vframe arrays and returns
1581   //     information about the number and size of interpreter frames which
1582   //     are equivalent to the frame which is being deoptimized)
1583   //   - deallocate the "unpack_frame"
1584   //   - deallocate the deoptimization frame
1585   //   - in a loop using the information returned in the previous step
1586   //     push interpreter frames;
1587   //   - create a dummy "unpack_frame"
1588   //   - call the C routine: Deoptimization::unpack_frames (this function
1589   //     lays out values on the interpreter frame which was just created)
1590   //   - deallocate the dummy unpack_frame
1591   //   - return to the interpreter entry point
1592   //
1593   //  Refer to the following methods for more information:
1594   //   - Deoptimization::uncommon_trap
1595   //   - Deoptimization::unpack_frame
1596 
1597   // the unloaded class index is in R0 (first parameter to this blob)
1598 
1599   __ raw_push(FP, LR);
1600   __ set_last_Java_frame(SP, FP, false, Rtemp);
1601   __ mov(R2, Deoptimization::Unpack_uncommon_trap);
1602   __ mov(R1, R0);
1603   __ mov(R0, Rthread);
1604   __ call(CAST_FROM_FN_PTR(address, Deoptimization::uncommon_trap));
1605   __ mov(Rublock, R0);
1606   __ reset_last_Java_frame(Rtemp);
1607   __ raw_pop(FP, LR);
1608 
1609 #ifdef ASSERT
1610   { Label L;
1611     __ ldr_s32(Rtemp, Address(Rublock, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes()));
1612     __ cmp_32(Rtemp, Deoptimization::Unpack_uncommon_trap);
1613     __ b(L, eq);
1614     __ stop("SharedRuntime::generate_uncommon_trap_blob: expected Unpack_uncommon_trap");
1615     __ bind(L);
1616   }
1617 #endif
1618 
1619 
1620   // Set initial stack state before pushing interpreter frames
1621   __ ldr_s32(Rtemp, Address(Rublock, Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset_in_bytes()));
1622   __ ldr(R2, Address(Rublock, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes()));
1623   __ ldr(R3, Address(Rublock, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes()));
1624 
1625   __ add(SP, SP, Rtemp);
1626 
1627   // See if it is enough stack to push deoptimized frames.
1628 #ifdef ASSERT
1629   // Compilers generate code that bang the stack by as much as the
1630   // interpreter would need. So this stack banging should never
1631   // trigger a fault. Verify that it does not on non product builds.
1632   //
1633   // The compiled method that we are deoptimizing was popped from the stack.
1634   // If the stack bang results in a stack overflow, we don't return to the
1635   // method that is being deoptimized. The stack overflow exception is
1636   // propagated to the caller of the deoptimized method. Need to get the pc
1637   // from the caller in LR and restore FP.
1638   __ ldr(LR, Address(R2, 0));
1639   __ ldr(FP, Address(Rublock, Deoptimization::UnrollBlock::initial_info_offset_in_bytes()));
1640   __ ldr_s32(R8, Address(Rublock, Deoptimization::UnrollBlock::total_frame_sizes_offset_in_bytes()));
1641   __ arm_stack_overflow_check(R8, Rtemp);
1642 #endif
1643   __ ldr_s32(R8, Address(Rublock, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes()));
1644   __ ldr_s32(Rtemp, Address(Rublock, Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes()));
1645   __ mov(Rsender, SP);
1646   __ sub(SP, SP, Rtemp);
1647   //  __ ldr(FP, Address(FP));
1648   __ ldr(FP, Address(Rublock, Deoptimization::UnrollBlock::initial_info_offset_in_bytes()));
1649 
1650   // Push interpreter frames in a loop
1651   Label loop;
1652   __ bind(loop);
1653   __ ldr(LR, Address(R2, wordSize, post_indexed));         // load frame pc
1654   __ ldr(Rtemp, Address(R3, wordSize, post_indexed));      // load frame size
1655 
1656   __ raw_push(FP, LR);                                     // create new frame
1657   __ mov(FP, SP);
1658   __ sub(Rtemp, Rtemp, 2*wordSize);
1659 
1660   __ sub(SP, SP, Rtemp);
1661 
1662   __ str(Rsender, Address(FP, frame::interpreter_frame_sender_sp_offset * wordSize));
1663   __ mov(LR, 0);
1664   __ str(LR, Address(FP, frame::interpreter_frame_last_sp_offset * wordSize));
1665   __ subs(R8, R8, 1);                               // decrement counter
1666   __ mov(Rsender, SP);
1667   __ b(loop, ne);
1668 
1669   // Re-push self-frame
1670   __ ldr(LR, Address(R2));
1671   __ raw_push(FP, LR);
1672   __ mov(FP, SP);
1673 
1674   // Call unpack_frames with proper arguments
1675   __ mov(R0, Rthread);
1676   __ mov(R1, Deoptimization::Unpack_uncommon_trap);
1677   __ set_last_Java_frame(SP, FP, true, Rtemp);
1678   __ call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames));
1679   //  oop_maps->add_gc_map(__ pc() - start, new OopMap(frame_size_in_words, 0));
1680   __ reset_last_Java_frame(Rtemp);
1681 
1682   __ mov(SP, FP);
1683   __ pop(RegisterSet(FP) | RegisterSet(PC));
1684 
1685   masm->flush();
1686   _uncommon_trap_blob = UncommonTrapBlob::create(&buffer, NULL, 2 /* LR+FP */);
1687 }
1688 
1689 #endif // COMPILER2
1690 
1691 //------------------------------generate_handler_blob------
1692 //
1693 // Generate a special Compile2Runtime blob that saves all registers,
1694 // setup oopmap, and calls safepoint code to stop the compiled code for
1695 // a safepoint.
1696 //
1697 SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_type) {
1698   assert(StubRoutines::forward_exception_entry() != NULL, "must be generated before");
1699 
1700   ResourceMark rm;
1701   CodeBuffer buffer("handler_blob", 256, 256);
1702   int frame_size_words;
1703   OopMapSet* oop_maps;
1704 
1705   bool cause_return = (poll_type == POLL_AT_RETURN);
1706 
1707   MacroAssembler* masm = new MacroAssembler(&buffer);
1708   address start = __ pc();
1709   oop_maps = new OopMapSet();
1710 
1711   if (!cause_return) {
1712     __ sub(SP, SP, 4); // make room for LR which may still be live
1713                        // here if we are coming from a c2 method
1714   }
1715 
1716   OopMap* map = RegisterSaver::save_live_registers(masm, &frame_size_words, !cause_return);
1717   if (!cause_return) {
1718     // update saved PC with correct value
1719     // need 2 steps because LR can be live in c2 method
1720     __ ldr(LR, Address(Rthread, JavaThread::saved_exception_pc_offset()));
1721     __ str(LR, Address(SP, RegisterSaver::LR_offset * wordSize));
1722   }
1723 
1724   __ mov(R0, Rthread);
1725   int pc_offset = __ set_last_Java_frame(SP, FP, false, Rtemp); // note: FP may not need to be saved (not on x86)
1726   assert(((__ pc()) - start) == __ offset(), "warning: start differs from code_begin");
1727   __ call(call_ptr);
1728   if (pc_offset == -1) {
1729     pc_offset = __ offset();
1730   }
1731   oop_maps->add_gc_map(pc_offset, map);
1732   __ reset_last_Java_frame(Rtemp); // Rtemp free since scratched by far call
1733 
1734   if (!cause_return) {
1735     // If our stashed return pc was modified by the runtime we avoid touching it
1736     __ ldr(R3_tmp, Address(Rthread, JavaThread::saved_exception_pc_offset()));
1737     __ ldr(R2_tmp, Address(SP, RegisterSaver::LR_offset * wordSize));
1738     __ cmp(R2_tmp, R3_tmp);
1739     // Adjust return pc forward to step over the safepoint poll instruction
1740     __ add(R2_tmp, R2_tmp, 4, eq);
1741     __ str(R2_tmp, Address(SP, RegisterSaver::LR_offset * wordSize), eq);
1742 
1743     // Check for pending exception
1744     __ ldr(Rtemp, Address(Rthread, Thread::pending_exception_offset()));
1745     __ cmp(Rtemp, 0);
1746 
1747     RegisterSaver::restore_live_registers(masm, false);
1748     __ pop(PC, eq);
1749     __ pop(Rexception_pc);
1750   } else {
1751     // Check for pending exception
1752     __ ldr(Rtemp, Address(Rthread, Thread::pending_exception_offset()));
1753     __ cmp(Rtemp, 0);
1754 
1755     RegisterSaver::restore_live_registers(masm);
1756     __ bx(LR, eq);
1757     __ mov(Rexception_pc, LR);
1758   }
1759 
1760   __ jump(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type, Rtemp);
1761 
1762   __ flush();
1763 
1764   return SafepointBlob::create(&buffer, oop_maps, frame_size_words);
1765 }
1766 
1767 RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const char* name) {
1768   assert(StubRoutines::forward_exception_entry() != NULL, "must be generated before");
1769 
1770   ResourceMark rm;
1771   CodeBuffer buffer(name, 1000, 512);
1772   int frame_size_words;
1773   OopMapSet *oop_maps;
1774   int frame_complete;
1775 
1776   MacroAssembler* masm = new MacroAssembler(&buffer);
1777   Label pending_exception;
1778 
1779   int start = __ offset();
1780 
1781   oop_maps = new OopMapSet();
1782   OopMap* map = RegisterSaver::save_live_registers(masm, &frame_size_words);
1783 
1784   frame_complete = __ offset();
1785 
1786   __ mov(R0, Rthread);
1787 
1788   int pc_offset = __ set_last_Java_frame(SP, FP, false, Rtemp);
1789   assert(start == 0, "warning: start differs from code_begin");
1790   __ call(destination);
1791   if (pc_offset == -1) {
1792     pc_offset = __ offset();
1793   }
1794   oop_maps->add_gc_map(pc_offset, map);
1795   __ reset_last_Java_frame(Rtemp); // Rtemp free since scratched by far call
1796 
1797   __ ldr(R1, Address(Rthread, Thread::pending_exception_offset()));
1798   __ cbnz(R1, pending_exception);
1799 
1800   // Overwrite saved register values
1801 
1802   // Place metadata result of VM call into Rmethod
1803   __ get_vm_result_2(R1, Rtemp);
1804   __ str(R1, Address(SP, RegisterSaver::Rmethod_offset * wordSize));
1805 
1806   // Place target address (VM call result) into Rtemp
1807   __ str(R0, Address(SP, RegisterSaver::Rtemp_offset * wordSize));
1808 
1809   RegisterSaver::restore_live_registers(masm);
1810   __ jump(Rtemp);
1811 
1812   __ bind(pending_exception);
1813 
1814   RegisterSaver::restore_live_registers(masm);
1815   const Register Rzero = __ zero_register(Rtemp);
1816   __ str(Rzero, Address(Rthread, JavaThread::vm_result_2_offset()));
1817   __ mov(Rexception_pc, LR);
1818   __ jump(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type, Rtemp);
1819 
1820   __ flush();
1821 
1822   return RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_words, oop_maps, true);
1823 }