1 /*
   2  * Copyright (c) 2008, 2025, Oracle and/or its affiliates. All rights reserved.
   3  * Copyright (c) 2023, Red Hat, Inc.
   4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   5  *
   6  * This code is free software; you can redistribute it and/or modify it
   7  * under the terms of the GNU General Public License version 2 only, as
   8  * published by the Free Software Foundation.
   9  *
  10  * This code is distributed in the hope that it will be useful, but WITHOUT
  11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  13  * version 2 for more details (a copy is included in the LICENSE file that
  14  * accompanied this code).
  15  *
  16  * You should have received a copy of the GNU General Public License version
  17  * 2 along with this work; if not, write to the Free Software Foundation,
  18  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  19  *
  20  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  21  * or visit www.oracle.com if you need additional information or have any
  22  * questions.
  23  *
  24  */
  25 
  26 #include "asm/assembler.hpp"
  27 #include "asm/assembler.inline.hpp"
  28 #include "asm/macroAssembler.hpp"
  29 #include "ci/ciEnv.hpp"
  30 #include "code/compiledIC.hpp"
  31 #include "code/nativeInst.hpp"
  32 #include "compiler/disassembler.hpp"
  33 #include "gc/shared/barrierSet.hpp"
  34 #include "gc/shared/cardTable.hpp"
  35 #include "gc/shared/barrierSetAssembler.hpp"
  36 #include "gc/shared/cardTableBarrierSet.hpp"
  37 #include "gc/shared/collectedHeap.inline.hpp"
  38 #include "interpreter/bytecodeHistogram.hpp"
  39 #include "interpreter/interpreter.hpp"
  40 #include "memory/resourceArea.hpp"
  41 #include "metaprogramming/primitiveConversions.hpp"
  42 #include "oops/accessDecorators.hpp"
  43 #include "oops/klass.inline.hpp"
  44 #include "prims/methodHandles.hpp"
  45 #include "runtime/interfaceSupport.inline.hpp"
  46 #include "runtime/javaThread.hpp"
  47 #include "runtime/jniHandles.hpp"
  48 #include "runtime/objectMonitor.hpp"
  49 #include "runtime/os.hpp"
  50 #include "runtime/sharedRuntime.hpp"
  51 #include "runtime/stubRoutines.hpp"
  52 #include "utilities/macros.hpp"
  53 #include "utilities/powerOfTwo.hpp"
  54 
  55 // Implementation of AddressLiteral
  56 
  57 void AddressLiteral::set_rspec(relocInfo::relocType rtype) {
  58   switch (rtype) {
  59   case relocInfo::oop_type:
  60     // Oops are a special case. Normally they would be their own section
  61     // but in cases like icBuffer they are literals in the code stream that
  62     // we don't have a section for. We use none so that we get a literal address
  63     // which is always patchable.
  64     break;
  65   case relocInfo::external_word_type:
  66     _rspec = external_word_Relocation::spec(_target);
  67     break;
  68   case relocInfo::internal_word_type:
  69     _rspec = internal_word_Relocation::spec(_target);
  70     break;
  71   case relocInfo::opt_virtual_call_type:
  72     _rspec = opt_virtual_call_Relocation::spec();
  73     break;
  74   case relocInfo::static_call_type:
  75     _rspec = static_call_Relocation::spec();
  76     break;
  77   case relocInfo::runtime_call_type:
  78     _rspec = runtime_call_Relocation::spec();
  79     break;
  80   case relocInfo::poll_type:
  81   case relocInfo::poll_return_type:
  82     _rspec = Relocation::spec_simple(rtype);
  83     break;
  84   case relocInfo::none:
  85     break;
  86   default:
  87     ShouldNotReachHere();
  88     break;
  89   }
  90 }
  91 
  92 
  93 // virtual method calling
  94 void MacroAssembler::lookup_virtual_method(Register recv_klass,
  95                                            Register vtable_index,
  96                                            Register method_result) {
  97   const ByteSize base_offset = Klass::vtable_start_offset() + vtableEntry::method_offset();
  98   assert(vtableEntry::size() * wordSize == wordSize, "adjust the scaling in the code below");
  99   add(recv_klass, recv_klass, AsmOperand(vtable_index, lsl, LogBytesPerWord));
 100   ldr(method_result, Address(recv_klass, base_offset));
 101 }
 102 
 103 
 104 // Simplified, combined version, good for typical uses.
 105 // Falls through on failure.
 106 void MacroAssembler::check_klass_subtype(Register sub_klass,
 107                                          Register super_klass,
 108                                          Register temp_reg,
 109                                          Register temp_reg2,
 110                                          Register temp_reg3,
 111                                          Label& L_success) {
 112   Label L_failure;
 113   check_klass_subtype_fast_path(sub_klass, super_klass, temp_reg, temp_reg2, &L_success, &L_failure, nullptr);
 114   check_klass_subtype_slow_path(sub_klass, super_klass, temp_reg, temp_reg2, temp_reg3, &L_success, nullptr);
 115   bind(L_failure);
 116 };
 117 
 118 void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass,
 119                                                    Register super_klass,
 120                                                    Register temp_reg,
 121                                                    Register temp_reg2,
 122                                                    Label* L_success,
 123                                                    Label* L_failure,
 124                                                    Label* L_slow_path) {
 125 
 126   assert_different_registers(sub_klass, super_klass, temp_reg, temp_reg2, noreg);
 127   const Register super_check_offset = temp_reg2;
 128 
 129   Label L_fallthrough;
 130   int label_nulls = 0;
 131   if (L_success == nullptr)   { L_success   = &L_fallthrough; label_nulls++; }
 132   if (L_failure == nullptr)   { L_failure   = &L_fallthrough; label_nulls++; }
 133   if (L_slow_path == nullptr) { L_slow_path = &L_fallthrough; label_nulls++; }
 134   assert(label_nulls <= 1, "at most one null in the batch");
 135 
 136   int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
 137   int sco_offset = in_bytes(Klass::super_check_offset_offset());
 138   Address super_check_offset_addr(super_klass, sco_offset);
 139 
 140   // If the pointers are equal, we are done (e.g., String[] elements).
 141   // This self-check enables sharing of secondary supertype arrays among
 142   // non-primary types such as array-of-interface.  Otherwise, each such
 143   // type would need its own customized SSA.
 144   // We move this check to the front of the fast path because many
 145   // type checks are in fact trivially successful in this manner,
 146   // so we get a nicely predicted branch right at the start of the check.
 147   cmp(sub_klass, super_klass);
 148   b(*L_success, eq);
 149 
 150   // Check the supertype display:
 151   ldr_u32(super_check_offset, super_check_offset_addr);
 152 
 153   Address super_check_addr(sub_klass, super_check_offset);
 154   ldr(temp_reg, super_check_addr);
 155   cmp(super_klass, temp_reg); // load displayed supertype
 156 
 157   // This check has worked decisively for primary supers.
 158   // Secondary supers are sought in the super_cache ('super_cache_addr').
 159   // (Secondary supers are interfaces and very deeply nested subtypes.)
 160   // This works in the same check above because of a tricky aliasing
 161   // between the super_cache and the primary super display elements.
 162   // (The 'super_check_addr' can address either, as the case requires.)
 163   // Note that the cache is updated below if it does not help us find
 164   // what we need immediately.
 165   // So if it was a primary super, we can just fail immediately.
 166   // Otherwise, it's the slow path for us (no success at this point).
 167 
 168   b(*L_success, eq);
 169   cmp_32(super_check_offset, sc_offset);
 170   if (L_failure == &L_fallthrough) {
 171     b(*L_slow_path, eq);
 172   } else {
 173     b(*L_failure, ne);
 174     if (L_slow_path != &L_fallthrough) {
 175       b(*L_slow_path);
 176     }
 177   }
 178 
 179   bind(L_fallthrough);
 180 }
 181 
 182 
 183 void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass,
 184                                                    Register super_klass,
 185                                                    Register temp_reg,
 186                                                    Register temp2_reg,
 187                                                    Register temp3_reg,
 188                                                    Label* L_success,
 189                                                    Label* L_failure,
 190                                                    bool set_cond_codes) {
 191   // Note: if used by code that expects a register to be 0 on success,
 192   // this register must be temp_reg and set_cond_codes must be true
 193 
 194   Register saved_reg = noreg;
 195 
 196   // get additional tmp registers
 197   if (temp3_reg == noreg) {
 198     saved_reg = temp3_reg = LR;
 199     push(saved_reg);
 200   }
 201 
 202   assert(temp2_reg != noreg, "need all the temporary registers");
 203   assert_different_registers(sub_klass, super_klass, temp_reg, temp2_reg, temp3_reg);
 204 
 205   Register cmp_temp = temp_reg;
 206   Register scan_temp = temp3_reg;
 207   Register count_temp = temp2_reg;
 208 
 209   Label L_fallthrough;
 210   int label_nulls = 0;
 211   if (L_success == nullptr)   { L_success   = &L_fallthrough; label_nulls++; }
 212   if (L_failure == nullptr)   { L_failure   = &L_fallthrough; label_nulls++; }
 213   assert(label_nulls <= 1, "at most one null in the batch");
 214 
 215   // a couple of useful fields in sub_klass:
 216   int ss_offset = in_bytes(Klass::secondary_supers_offset());
 217   int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
 218   Address secondary_supers_addr(sub_klass, ss_offset);
 219   Address super_cache_addr(     sub_klass, sc_offset);
 220 
 221 #ifndef PRODUCT
 222   inc_counter((address)&SharedRuntime::_partial_subtype_ctr, scan_temp, count_temp);
 223 #endif
 224 
 225   // We will consult the secondary-super array.
 226   ldr(scan_temp, Address(sub_klass, ss_offset));
 227 
 228   assert(! UseCompressedOops, "search_key must be the compressed super_klass");
 229   // else search_key is the
 230   Register search_key = super_klass;
 231 
 232   // Load the array length.
 233   ldr(count_temp, Address(scan_temp, Array<Klass*>::length_offset_in_bytes()));
 234   add(scan_temp, scan_temp, Array<Klass*>::base_offset_in_bytes());
 235 
 236   add(count_temp, count_temp, 1);
 237 
 238   Label L_loop, L_fail;
 239 
 240   // Top of search loop
 241   bind(L_loop);
 242   // Notes:
 243   //  scan_temp starts at the array elements
 244   //  count_temp is 1+size
 245   subs(count_temp, count_temp, 1);
 246   if ((L_failure != &L_fallthrough) && (! set_cond_codes) && (saved_reg == noreg)) {
 247     // direct jump to L_failure if failed and no cleanup needed
 248     b(*L_failure, eq); // not found and
 249   } else {
 250     b(L_fail, eq); // not found in the array
 251   }
 252 
 253   // Load next super to check
 254   // In the array of super classes elements are pointer sized.
 255   int element_size = wordSize;
 256   ldr(cmp_temp, Address(scan_temp, element_size, post_indexed));
 257 
 258   // Look for Rsuper_klass on Rsub_klass's secondary super-class-overflow list
 259   subs(cmp_temp, cmp_temp, search_key);
 260 
 261   // A miss means we are NOT a subtype and need to keep looping
 262   b(L_loop, ne);
 263 
 264   // Falling out the bottom means we found a hit; we ARE a subtype
 265 
 266   // Note: temp_reg/cmp_temp is already 0 and flag Z is set
 267 
 268   // Success.  Cache the super we found and proceed in triumph.
 269   str(super_klass, Address(sub_klass, sc_offset));
 270 
 271   if (saved_reg != noreg) {
 272     // Return success
 273     pop(saved_reg);
 274   }
 275 
 276   b(*L_success);
 277 
 278   bind(L_fail);
 279   // Note1: check "b(*L_failure, eq)" above if adding extra instructions here
 280   if (set_cond_codes) {
 281     movs(temp_reg, sub_klass); // clears Z and sets temp_reg to non-0 if needed
 282   }
 283   if (saved_reg != noreg) {
 284     pop(saved_reg);
 285   }
 286   if (L_failure != &L_fallthrough) {
 287     b(*L_failure);
 288   }
 289 
 290   bind(L_fallthrough);
 291 }
 292 
 293 // Returns address of receiver parameter, using tmp as base register. tmp and params_count can be the same.
 294 Address MacroAssembler::receiver_argument_address(Register params_base, Register params_count, Register tmp) {
 295   assert_different_registers(params_base, params_count);
 296   add(tmp, params_base, AsmOperand(params_count, lsl, Interpreter::logStackElementSize));
 297   return Address(tmp, -Interpreter::stackElementSize);
 298 }
 299 
 300 void MacroAssembler::align(int modulus, int target) {
 301   int delta = target - offset();
 302   while ((offset() + delta) % modulus != 0) nop();
 303 }
 304 
 305 void MacroAssembler::align(int modulus) {
 306   align(modulus, offset());
 307 }
 308 
 309 int MacroAssembler::set_last_Java_frame(Register last_java_sp,
 310                                         Register last_java_fp,
 311                                         bool save_last_java_pc,
 312                                         Register tmp) {
 313   int pc_offset;
 314   if (last_java_fp != noreg) {
 315     // optional
 316     str(last_java_fp, Address(Rthread, JavaThread::last_Java_fp_offset()));
 317     _fp_saved = true;
 318   } else {
 319     _fp_saved = false;
 320   }
 321   if (save_last_java_pc) {
 322     str(PC, Address(Rthread, JavaThread::last_Java_pc_offset()));
 323     pc_offset = offset() + VM_Version::stored_pc_adjustment();
 324     _pc_saved = true;
 325   } else {
 326     _pc_saved = false;
 327     pc_offset = -1;
 328   }
 329   // According to comment in javaFrameAnchorm SP must be saved last, so that other
 330   // entries are valid when SP is set.
 331 
 332   // However, this is probably not a strong constrainst since for instance PC is
 333   // sometimes read from the stack at SP... but is pushed later (by the call). Hence,
 334   // we now write the fields in the expected order but we have not added a StoreStore
 335   // barrier.
 336 
 337   // XXX: if the ordering is really important, PC should always be saved (without forgetting
 338   // to update oop_map offsets) and a StoreStore barrier might be needed.
 339 
 340   if (last_java_sp == noreg) {
 341     last_java_sp = SP; // always saved
 342   }
 343   str(last_java_sp, Address(Rthread, JavaThread::last_Java_sp_offset()));
 344 
 345   return pc_offset; // for oopmaps
 346 }
 347 
 348 void MacroAssembler::reset_last_Java_frame(Register tmp) {
 349   const Register Rzero = zero_register(tmp);
 350   str(Rzero, Address(Rthread, JavaThread::last_Java_sp_offset()));
 351   if (_fp_saved) {
 352     str(Rzero, Address(Rthread, JavaThread::last_Java_fp_offset()));
 353   }
 354   if (_pc_saved) {
 355     str(Rzero, Address(Rthread, JavaThread::last_Java_pc_offset()));
 356   }
 357 }
 358 
 359 
 360 // Implementation of call_VM versions
 361 
 362 void MacroAssembler::call_VM_leaf_helper(address entry_point, int number_of_arguments) {
 363   assert(number_of_arguments >= 0, "cannot have negative number of arguments");
 364   assert(number_of_arguments <= 4, "cannot have more than 4 arguments");
 365 
 366   // Safer to save R9 here since callers may have been written
 367   // assuming R9 survives. This is suboptimal but is not worth
 368   // optimizing for the few platforms where R9 is scratched.
 369   push(RegisterSet(R4) | R9ifScratched);
 370   mov(R4, SP);
 371   bic(SP, SP, StackAlignmentInBytes - 1);
 372   call(entry_point, relocInfo::runtime_call_type);
 373   mov(SP, R4);
 374   pop(RegisterSet(R4) | R9ifScratched);
 375 }
 376 
 377 
 378 void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) {
 379   assert(number_of_arguments >= 0, "cannot have negative number of arguments");
 380   assert(number_of_arguments <= 3, "cannot have more than 3 arguments");
 381 
 382   const Register tmp = Rtemp;
 383   assert_different_registers(oop_result, tmp);
 384 
 385   set_last_Java_frame(SP, FP, true, tmp);
 386 
 387 #if R9_IS_SCRATCHED
 388   // Safer to save R9 here since callers may have been written
 389   // assuming R9 survives. This is suboptimal but is not worth
 390   // optimizing for the few platforms where R9 is scratched.
 391 
 392   // Note: cannot save R9 above the saved SP (some calls expect for
 393   // instance the Java stack top at the saved SP)
 394   // => once saved (with set_last_Java_frame), decrease SP before rounding to
 395   // ensure the slot at SP will be free for R9).
 396   sub(SP, SP, 4);
 397   bic(SP, SP, StackAlignmentInBytes - 1);
 398   str(R9, Address(SP, 0));
 399 #else
 400   bic(SP, SP, StackAlignmentInBytes - 1);
 401 #endif // R9_IS_SCRATCHED
 402 
 403   mov(R0, Rthread);
 404   call(entry_point, relocInfo::runtime_call_type);
 405 
 406 #if R9_IS_SCRATCHED
 407   ldr(R9, Address(SP, 0));
 408 #endif
 409   ldr(SP, Address(Rthread, JavaThread::last_Java_sp_offset()));
 410 
 411   reset_last_Java_frame(tmp);
 412 
 413   // C++ interp handles this in the interpreter
 414   check_and_handle_popframe();
 415   check_and_handle_earlyret();
 416 
 417   if (check_exceptions) {
 418     // check for pending exceptions
 419     ldr(tmp, Address(Rthread, Thread::pending_exception_offset()));
 420     cmp(tmp, 0);
 421     mov(Rexception_pc, PC, ne);
 422     b(StubRoutines::forward_exception_entry(), ne);
 423   }
 424 
 425   // get oop result if there is one and reset the value in the thread
 426   if (oop_result->is_valid()) {
 427     get_vm_result_oop(oop_result, tmp);
 428   }
 429 }
 430 
 431 void MacroAssembler::call_VM(Register oop_result, address entry_point, bool check_exceptions) {
 432   call_VM_helper(oop_result, entry_point, 0, check_exceptions);
 433 }
 434 
 435 
 436 void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, bool check_exceptions) {
 437   assert (arg_1 == R1, "fixed register for arg_1");
 438   call_VM_helper(oop_result, entry_point, 1, check_exceptions);
 439 }
 440 
 441 
 442 void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, Register arg_2, bool check_exceptions) {
 443   assert (arg_1 == R1, "fixed register for arg_1");
 444   assert (arg_2 == R2, "fixed register for arg_2");
 445   call_VM_helper(oop_result, entry_point, 2, check_exceptions);
 446 }
 447 
 448 
 449 void MacroAssembler::call_VM(Register oop_result, address entry_point, Register arg_1, Register arg_2, Register arg_3, bool check_exceptions) {
 450   assert (arg_1 == R1, "fixed register for arg_1");
 451   assert (arg_2 == R2, "fixed register for arg_2");
 452   assert (arg_3 == R3, "fixed register for arg_3");
 453   call_VM_helper(oop_result, entry_point, 3, check_exceptions);
 454 }
 455 
 456 
 457 void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, int number_of_arguments, bool check_exceptions) {
 458   // Not used on ARM
 459   Unimplemented();
 460 }
 461 
 462 
 463 void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, bool check_exceptions) {
 464   // Not used on ARM
 465   Unimplemented();
 466 }
 467 
 468 
 469 void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, bool check_exceptions) {
 470 // Not used on ARM
 471   Unimplemented();
 472 }
 473 
 474 
 475 void MacroAssembler::call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, Register arg_3, bool check_exceptions) {
 476   // Not used on ARM
 477   Unimplemented();
 478 }
 479 
 480 // Raw call, without saving/restoring registers, exception handling, etc.
 481 // Mainly used from various stubs.
 482 void MacroAssembler::call_VM(address entry_point, bool save_R9_if_scratched) {
 483   const Register tmp = Rtemp; // Rtemp free since scratched by call
 484   set_last_Java_frame(SP, FP, true, tmp);
 485 #if R9_IS_SCRATCHED
 486   if (save_R9_if_scratched) {
 487     // Note: Saving also R10 for alignment.
 488     push(RegisterSet(R9, R10));
 489   }
 490 #endif
 491   mov(R0, Rthread);
 492   call(entry_point, relocInfo::runtime_call_type);
 493 #if R9_IS_SCRATCHED
 494   if (save_R9_if_scratched) {
 495     pop(RegisterSet(R9, R10));
 496   }
 497 #endif
 498   reset_last_Java_frame(tmp);
 499 }
 500 
 501 void MacroAssembler::call_VM_leaf(address entry_point) {
 502   call_VM_leaf_helper(entry_point, 0);
 503 }
 504 
 505 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1) {
 506   assert (arg_1 == R0, "fixed register for arg_1");
 507   call_VM_leaf_helper(entry_point, 1);
 508 }
 509 
 510 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1, Register arg_2) {
 511   assert (arg_1 == R0, "fixed register for arg_1");
 512   assert (arg_2 == R1, "fixed register for arg_2");
 513   call_VM_leaf_helper(entry_point, 2);
 514 }
 515 
 516 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3) {
 517   assert (arg_1 == R0, "fixed register for arg_1");
 518   assert (arg_2 == R1, "fixed register for arg_2");
 519   assert (arg_3 == R2, "fixed register for arg_3");
 520   call_VM_leaf_helper(entry_point, 3);
 521 }
 522 
 523 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3, Register arg_4) {
 524   assert (arg_1 == R0, "fixed register for arg_1");
 525   assert (arg_2 == R1, "fixed register for arg_2");
 526   assert (arg_3 == R2, "fixed register for arg_3");
 527   assert (arg_4 == R3, "fixed register for arg_4");
 528   call_VM_leaf_helper(entry_point, 4);
 529 }
 530 
 531 void MacroAssembler::get_vm_result_oop(Register oop_result, Register tmp) {
 532   assert_different_registers(oop_result, tmp);
 533   ldr(oop_result, Address(Rthread, JavaThread::vm_result_oop_offset()));
 534   str(zero_register(tmp), Address(Rthread, JavaThread::vm_result_oop_offset()));
 535   verify_oop(oop_result);
 536 }
 537 
 538 void MacroAssembler::get_vm_result_metadata(Register metadata_result, Register tmp) {
 539   assert_different_registers(metadata_result, tmp);
 540   ldr(metadata_result, Address(Rthread, JavaThread::vm_result_metadata_offset()));
 541   str(zero_register(tmp), Address(Rthread, JavaThread::vm_result_metadata_offset()));
 542 }
 543 
 544 void MacroAssembler::add_rc(Register dst, Register arg1, RegisterOrConstant arg2) {
 545   if (arg2.is_register()) {
 546     add(dst, arg1, arg2.as_register());
 547   } else {
 548     add(dst, arg1, arg2.as_constant());
 549   }
 550 }
 551 
 552 void MacroAssembler::add_slow(Register rd, Register rn, int c) {
 553   // This function is used in compiler for handling large frame offsets
 554   if ((c < 0) && (((-c) & ~0x3fc) == 0)) {
 555     return sub(rd, rn, (-c));
 556   }
 557   int low = c & 0x3fc;
 558   if (low != 0) {
 559     add(rd, rn, low);
 560     rn = rd;
 561   }
 562   if (c & ~0x3fc) {
 563     assert(AsmOperand::is_rotated_imm(c & ~0x3fc), "unsupported add_slow offset %d", c);
 564     add(rd, rn, c & ~0x3fc);
 565   } else if (rd != rn) {
 566     assert(c == 0, "");
 567     mov(rd, rn); // need to generate at least one move!
 568   }
 569 }
 570 
 571 void MacroAssembler::sub_slow(Register rd, Register rn, int c) {
 572   // This function is used in compiler for handling large frame offsets
 573   if ((c < 0) && (((-c) & ~0x3fc) == 0)) {
 574     return add(rd, rn, (-c));
 575   }
 576   int low = c & 0x3fc;
 577   if (low != 0) {
 578     sub(rd, rn, low);
 579     rn = rd;
 580   }
 581   if (c & ~0x3fc) {
 582     assert(AsmOperand::is_rotated_imm(c & ~0x3fc), "unsupported sub_slow offset %d", c);
 583     sub(rd, rn, c & ~0x3fc);
 584   } else if (rd != rn) {
 585     assert(c == 0, "");
 586     mov(rd, rn); // need to generate at least one move!
 587   }
 588 }
 589 
 590 void MacroAssembler::mov_slow(Register rd, address addr) {
 591   // do *not* call the non relocated mov_related_address
 592   mov_slow(rd, (intptr_t)addr);
 593 }
 594 
 595 void MacroAssembler::mov_slow(Register rd, const char *str) {
 596   mov_slow(rd, (intptr_t)str);
 597 }
 598 
 599 
 600 void MacroAssembler::mov_slow(Register rd, intptr_t c, AsmCondition cond) {
 601   if (AsmOperand::is_rotated_imm(c)) {
 602     mov(rd, c, cond);
 603   } else if (AsmOperand::is_rotated_imm(~c)) {
 604     mvn(rd, ~c, cond);
 605   } else if (VM_Version::supports_movw()) {
 606     movw(rd, c & 0xffff, cond);
 607     if ((unsigned int)c >> 16) {
 608       movt(rd, (unsigned int)c >> 16, cond);
 609     }
 610   } else {
 611     // Find first non-zero bit
 612     int shift = 0;
 613     while ((c & (3 << shift)) == 0) {
 614       shift += 2;
 615     }
 616     // Put the least significant part of the constant
 617     int mask = 0xff << shift;
 618     mov(rd, c & mask, cond);
 619     // Add up to 3 other parts of the constant;
 620     // each of them can be represented as rotated_imm
 621     if (c & (mask << 8)) {
 622       orr(rd, rd, c & (mask << 8), cond);
 623     }
 624     if (c & (mask << 16)) {
 625       orr(rd, rd, c & (mask << 16), cond);
 626     }
 627     if (c & (mask << 24)) {
 628       orr(rd, rd, c & (mask << 24), cond);
 629     }
 630   }
 631 }
 632 
 633 
 634 void MacroAssembler::mov_oop(Register rd, jobject o, int oop_index,
 635                              AsmCondition cond
 636                              ) {
 637 
 638   if (o == nullptr) {
 639     mov(rd, 0, cond);
 640     return;
 641   }
 642 
 643   if (oop_index == 0) {
 644     oop_index = oop_recorder()->allocate_oop_index(o);
 645   }
 646   relocate(oop_Relocation::spec(oop_index));
 647 
 648   if (VM_Version::supports_movw()) {
 649     movw(rd, 0, cond);
 650     movt(rd, 0, cond);
 651   } else {
 652     ldr(rd, Address(PC), cond);
 653     // Extra nop to handle case of large offset of oop placeholder (see NativeMovConstReg::set_data).
 654     nop();
 655   }
 656 }
 657 
 658 void MacroAssembler::mov_metadata(Register rd, Metadata* o, int metadata_index) {
 659   if (o == nullptr) {
 660     mov(rd, 0);
 661     return;
 662   }
 663 
 664   if (metadata_index == 0) {
 665     metadata_index = oop_recorder()->allocate_metadata_index(o);
 666   }
 667   relocate(metadata_Relocation::spec(metadata_index));
 668 
 669   if (VM_Version::supports_movw()) {
 670     movw(rd, ((int)o) & 0xffff);
 671     movt(rd, (unsigned int)o >> 16);
 672   } else {
 673     ldr(rd, Address(PC));
 674     // Extra nop to handle case of large offset of metadata placeholder (see NativeMovConstReg::set_data).
 675     nop();
 676   }
 677 }
 678 
 679 void MacroAssembler::mov_float(FloatRegister fd, jfloat c, AsmCondition cond) {
 680   Label skip_constant;
 681   jint float_bits = PrimitiveConversions::cast<jint>(c);
 682 
 683   flds(fd, Address(PC), cond);
 684   b(skip_constant);
 685   emit_int32(float_bits);
 686   bind(skip_constant);
 687 }
 688 
 689 void MacroAssembler::mov_double(FloatRegister fd, jdouble c, AsmCondition cond) {
 690   Label skip_constant;
 691   union {
 692     jdouble d;
 693     jint i[2];
 694   } accessor;
 695   accessor.d = c;
 696 
 697   fldd(fd, Address(PC), cond);
 698   b(skip_constant);
 699   emit_int32(accessor.i[0]);
 700   emit_int32(accessor.i[1]);
 701   bind(skip_constant);
 702 }
 703 
 704 void MacroAssembler::ldr_global_s32(Register reg, address address_of_global) {
 705   intptr_t addr = (intptr_t) address_of_global;
 706   mov_slow(reg, addr & ~0xfff);
 707   ldr(reg, Address(reg, addr & 0xfff));
 708 }
 709 
 710 void MacroAssembler::ldr_global_ptr(Register reg, address address_of_global) {
 711   ldr_global_s32(reg, address_of_global);
 712 }
 713 
 714 void MacroAssembler::ldrb_global(Register reg, address address_of_global) {
 715   intptr_t addr = (intptr_t) address_of_global;
 716   mov_slow(reg, addr & ~0xfff);
 717   ldrb(reg, Address(reg, addr & 0xfff));
 718 }
 719 
 720 void MacroAssembler::zero_extend(Register rd, Register rn, int bits) {
 721   if (bits <= 8) {
 722     andr(rd, rn, (1 << bits) - 1);
 723   } else if (bits >= 24) {
 724     bic(rd, rn, -1 << bits);
 725   } else {
 726     mov(rd, AsmOperand(rn, lsl, 32 - bits));
 727     mov(rd, AsmOperand(rd, lsr, 32 - bits));
 728   }
 729 }
 730 
 731 void MacroAssembler::sign_extend(Register rd, Register rn, int bits) {
 732   mov(rd, AsmOperand(rn, lsl, 32 - bits));
 733   mov(rd, AsmOperand(rd, asr, 32 - bits));
 734 }
 735 
 736 
 737 void MacroAssembler::cmpoop(Register obj1, Register obj2) {
 738   cmp(obj1, obj2);
 739 }
 740 
 741 void MacroAssembler::long_move(Register rd_lo, Register rd_hi,
 742                                Register rn_lo, Register rn_hi,
 743                                AsmCondition cond) {
 744   if (rd_lo != rn_hi) {
 745     if (rd_lo != rn_lo) { mov(rd_lo, rn_lo, cond); }
 746     if (rd_hi != rn_hi) { mov(rd_hi, rn_hi, cond); }
 747   } else if (rd_hi != rn_lo) {
 748     if (rd_hi != rn_hi) { mov(rd_hi, rn_hi, cond); }
 749     if (rd_lo != rn_lo) { mov(rd_lo, rn_lo, cond); }
 750   } else {
 751     eor(rd_lo, rd_hi, rd_lo, cond);
 752     eor(rd_hi, rd_lo, rd_hi, cond);
 753     eor(rd_lo, rd_hi, rd_lo, cond);
 754   }
 755 }
 756 
 757 void MacroAssembler::long_shift(Register rd_lo, Register rd_hi,
 758                                 Register rn_lo, Register rn_hi,
 759                                 AsmShift shift, Register count) {
 760   Register tmp;
 761   if (rd_lo != rn_lo && rd_lo != rn_hi && rd_lo != count) {
 762     tmp = rd_lo;
 763   } else {
 764     tmp = rd_hi;
 765   }
 766   assert_different_registers(tmp, count, rn_lo, rn_hi);
 767 
 768   subs(tmp, count, 32);
 769   if (shift == lsl) {
 770     assert_different_registers(rd_hi, rn_lo);
 771     assert_different_registers(count, rd_hi);
 772     mov(rd_hi, AsmOperand(rn_lo, shift, tmp), pl);
 773     rsb(tmp, count, 32, mi);
 774     if (rd_hi == rn_hi) {
 775       mov(rd_hi, AsmOperand(rn_hi, lsl, count), mi);
 776       orr(rd_hi, rd_hi, AsmOperand(rn_lo, lsr, tmp), mi);
 777     } else {
 778       mov(rd_hi, AsmOperand(rn_lo, lsr, tmp), mi);
 779       orr(rd_hi, rd_hi, AsmOperand(rn_hi, lsl, count), mi);
 780     }
 781     mov(rd_lo, AsmOperand(rn_lo, shift, count));
 782   } else {
 783     assert_different_registers(rd_lo, rn_hi);
 784     assert_different_registers(rd_lo, count);
 785     mov(rd_lo, AsmOperand(rn_hi, shift, tmp), pl);
 786     rsb(tmp, count, 32, mi);
 787     if (rd_lo == rn_lo) {
 788       mov(rd_lo, AsmOperand(rn_lo, lsr, count), mi);
 789       orr(rd_lo, rd_lo, AsmOperand(rn_hi, lsl, tmp), mi);
 790     } else {
 791       mov(rd_lo, AsmOperand(rn_hi, lsl, tmp), mi);
 792       orr(rd_lo, rd_lo, AsmOperand(rn_lo, lsr, count), mi);
 793     }
 794     mov(rd_hi, AsmOperand(rn_hi, shift, count));
 795   }
 796 }
 797 
 798 void MacroAssembler::long_shift(Register rd_lo, Register rd_hi,
 799                                 Register rn_lo, Register rn_hi,
 800                                 AsmShift shift, int count) {
 801   assert(count != 0 && (count & ~63) == 0, "must be");
 802 
 803   if (shift == lsl) {
 804     assert_different_registers(rd_hi, rn_lo);
 805     if (count >= 32) {
 806       mov(rd_hi, AsmOperand(rn_lo, lsl, count - 32));
 807       mov(rd_lo, 0);
 808     } else {
 809       mov(rd_hi, AsmOperand(rn_hi, lsl, count));
 810       orr(rd_hi, rd_hi, AsmOperand(rn_lo, lsr, 32 - count));
 811       mov(rd_lo, AsmOperand(rn_lo, lsl, count));
 812     }
 813   } else {
 814     assert_different_registers(rd_lo, rn_hi);
 815     if (count >= 32) {
 816       if (count == 32) {
 817         mov(rd_lo, rn_hi);
 818       } else {
 819         mov(rd_lo, AsmOperand(rn_hi, shift, count - 32));
 820       }
 821       if (shift == asr) {
 822         mov(rd_hi, AsmOperand(rn_hi, asr, 0));
 823       } else {
 824         mov(rd_hi, 0);
 825       }
 826     } else {
 827       mov(rd_lo, AsmOperand(rn_lo, lsr, count));
 828       orr(rd_lo, rd_lo, AsmOperand(rn_hi, lsl, 32 - count));
 829       mov(rd_hi, AsmOperand(rn_hi, shift, count));
 830     }
 831   }
 832 }
 833 
 834 void MacroAssembler::_verify_oop(Register reg, const char* s, const char* file, int line) {
 835   // This code pattern is matched in NativeIntruction::skip_verify_oop.
 836   // Update it at modifications.
 837   if (!VerifyOops) return;
 838 
 839   char buffer[64];
 840 #ifdef COMPILER1
 841   if (CommentedAssembly) {
 842     os::snprintf_checked(buffer, sizeof(buffer), "verify_oop at %d", offset());
 843     block_comment(buffer);
 844   }
 845 #endif
 846   const char* msg_buffer = nullptr;
 847   {
 848     ResourceMark rm;
 849     stringStream ss;
 850     ss.print("%s at offset %d (%s:%d)", s, offset(), file, line);
 851     msg_buffer = code_string(ss.as_string());
 852   }
 853 
 854   save_all_registers();
 855 
 856   if (reg != R2) {
 857       mov(R2, reg);                              // oop to verify
 858   }
 859   mov(R1, SP);                                   // register save area
 860 
 861   Label done;
 862   InlinedString Lmsg(msg_buffer);
 863   ldr_literal(R0, Lmsg);                         // message
 864 
 865   // call indirectly to solve generation ordering problem
 866   ldr_global_ptr(Rtemp, StubRoutines::verify_oop_subroutine_entry_address());
 867   call(Rtemp);
 868 
 869   restore_all_registers();
 870 
 871   b(done);
 872 #ifdef COMPILER2
 873   int off = offset();
 874 #endif
 875   bind_literal(Lmsg);
 876 #ifdef COMPILER2
 877   if (offset() - off == 1 * wordSize) {
 878     // no padding, so insert nop for worst-case sizing
 879     nop();
 880   }
 881 #endif
 882   bind(done);
 883 }
 884 
 885 void MacroAssembler::_verify_oop_addr(Address addr, const char* s, const char* file, int line) {
 886   if (!VerifyOops) return;
 887 
 888   const char* msg_buffer = nullptr;
 889   {
 890     ResourceMark rm;
 891     stringStream ss;
 892     if ((addr.base() == SP) && (addr.index()==noreg)) {
 893       ss.print("verify_oop_addr SP[%d]: %s", (int)addr.disp(), s);
 894     } else {
 895       ss.print("verify_oop_addr: %s", s);
 896     }
 897     ss.print(" (%s:%d)", file, line);
 898     msg_buffer = code_string(ss.as_string());
 899   }
 900 
 901   int push_size = save_all_registers();
 902 
 903   if (addr.base() == SP) {
 904     // computes an addr that takes into account the push
 905     if (addr.index() != noreg) {
 906       Register new_base = addr.index() == R2 ? R1 : R2; // avoid corrupting the index
 907       add(new_base, SP, push_size);
 908       addr = addr.rebase(new_base);
 909     } else {
 910       addr = addr.plus_disp(push_size);
 911     }
 912   }
 913 
 914   ldr(R2, addr);                                 // oop to verify
 915   mov(R1, SP);                                   // register save area
 916 
 917   Label done;
 918   InlinedString Lmsg(msg_buffer);
 919   ldr_literal(R0, Lmsg);                         // message
 920 
 921   // call indirectly to solve generation ordering problem
 922   ldr_global_ptr(Rtemp, StubRoutines::verify_oop_subroutine_entry_address());
 923   call(Rtemp);
 924 
 925   restore_all_registers();
 926 
 927   b(done);
 928   bind_literal(Lmsg);
 929   bind(done);
 930 }
 931 
 932 void MacroAssembler::c2bool(Register x)
 933 {
 934   tst(x, 0xff);   // Only look at the lowest byte
 935   mov(x, 1, ne);
 936 }
 937 
 938 void MacroAssembler::null_check(Register reg, Register tmp, int offset) {
 939   if (needs_explicit_null_check(offset)) {
 940     assert_different_registers(reg, tmp);
 941     if (tmp == noreg) {
 942       tmp = Rtemp;
 943       assert((! Thread::current()->is_Compiler_thread()) ||
 944              (! (ciEnv::current()->task() == nullptr)) ||
 945              (! (ciEnv::current()->comp_level() == CompLevel_full_optimization)),
 946              "Rtemp not available in C2"); // explicit tmp register required
 947       // XXX: could we mark the code buffer as not compatible with C2 ?
 948     }
 949     ldr(tmp, Address(reg));
 950   }
 951 }
 952 
 953 // Puts address of allocated object into register `obj` and end of allocated object into register `obj_end`.
 954 void MacroAssembler::tlab_allocate(Register obj, Register obj_end, Register tmp1,
 955                                  RegisterOrConstant size_expression, Label& slow_case) {
 956   BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
 957   bs->tlab_allocate(this, obj, obj_end, tmp1, size_expression, slow_case);
 958 }
 959 
 960 // Fills memory regions [start..end] with zeroes. Clobbers `start` and `tmp` registers.
 961 void MacroAssembler::zero_memory(Register start, Register end, Register tmp) {
 962   Label loop;
 963   const Register ptr = start;
 964 
 965   mov(tmp, 0);
 966   bind(loop);
 967   cmp(ptr, end);
 968   str(tmp, Address(ptr, wordSize, post_indexed), lo);
 969   b(loop, lo);
 970 }
 971 
 972 void MacroAssembler::arm_stack_overflow_check(int frame_size_in_bytes, Register tmp) {
 973   // Version of AbstractAssembler::generate_stack_overflow_check optimized for ARM
 974   const int page_size = (int)os::vm_page_size();
 975 
 976   sub_slow(tmp, SP, StackOverflow::stack_shadow_zone_size());
 977   strb(R0, Address(tmp));
 978   for (; frame_size_in_bytes >= page_size; frame_size_in_bytes -= 0xff0) {
 979     strb(R0, Address(tmp, -0xff0, pre_indexed));
 980   }
 981 }
 982 
 983 void MacroAssembler::arm_stack_overflow_check(Register Rsize, Register tmp) {
 984   Label loop;
 985 
 986   mov(tmp, SP);
 987   add_slow(Rsize, Rsize, StackOverflow::stack_shadow_zone_size() - os::vm_page_size());
 988   bind(loop);
 989   subs(Rsize, Rsize, 0xff0);
 990   strb(R0, Address(tmp, -0xff0, pre_indexed));
 991   b(loop, hi);
 992 }
 993 
 994 void MacroAssembler::stop(const char* msg) {
 995   // This code pattern is matched in NativeIntruction::is_stop.
 996   // Update it at modifications.
 997 #ifdef COMPILER1
 998   if (CommentedAssembly) {
 999     block_comment("stop");
1000   }
1001 #endif
1002 
1003   InlinedAddress Ldebug(CAST_FROM_FN_PTR(address, MacroAssembler::debug));
1004   InlinedString Lmsg(msg);
1005 
1006   // save all registers for further inspection
1007   save_all_registers();
1008 
1009   ldr_literal(R0, Lmsg);                     // message
1010   mov(R1, SP);                               // register save area
1011 
1012   ldr_literal(PC, Ldebug);                   // call MacroAssembler::debug
1013 
1014   bind_literal(Lmsg);
1015   bind_literal(Ldebug);
1016 }
1017 
1018 void MacroAssembler::warn(const char* msg) {
1019 #ifdef COMPILER1
1020   if (CommentedAssembly) {
1021     block_comment("warn");
1022   }
1023 #endif
1024 
1025   InlinedAddress Lwarn(CAST_FROM_FN_PTR(address, warning));
1026   InlinedString Lmsg(msg);
1027   Label done;
1028 
1029   int push_size = save_caller_save_registers();
1030 
1031 
1032   ldr_literal(R0, Lmsg);                    // message
1033   ldr_literal(LR, Lwarn);                   // call warning
1034 
1035   call(LR);
1036 
1037   restore_caller_save_registers();
1038 
1039   b(done);
1040   bind_literal(Lmsg);
1041   bind_literal(Lwarn);
1042   bind(done);
1043 }
1044 
1045 
1046 int MacroAssembler::save_all_registers() {
1047   // This code pattern is matched in NativeIntruction::is_save_all_registers.
1048   // Update it at modifications.
1049   push(RegisterSet(R0, R12) | RegisterSet(LR) | RegisterSet(PC));
1050   return 15*wordSize;
1051 }
1052 
1053 void MacroAssembler::restore_all_registers() {
1054   pop(RegisterSet(R0, R12) | RegisterSet(LR));   // restore registers
1055   add(SP, SP, wordSize);                         // discard saved PC
1056 }
1057 
1058 int MacroAssembler::save_caller_save_registers() {
1059 #if R9_IS_SCRATCHED
1060   // Save also R10 to preserve alignment
1061   push(RegisterSet(R0, R3) | RegisterSet(R12) | RegisterSet(LR) | RegisterSet(R9,R10));
1062   return 8*wordSize;
1063 #else
1064   push(RegisterSet(R0, R3) | RegisterSet(R12) | RegisterSet(LR));
1065   return 6*wordSize;
1066 #endif
1067 }
1068 
1069 void MacroAssembler::restore_caller_save_registers() {
1070 #if R9_IS_SCRATCHED
1071   pop(RegisterSet(R0, R3) | RegisterSet(R12) | RegisterSet(LR) | RegisterSet(R9,R10));
1072 #else
1073   pop(RegisterSet(R0, R3) | RegisterSet(R12) | RegisterSet(LR));
1074 #endif
1075 }
1076 
1077 void MacroAssembler::debug(const char* msg, const intx* registers) {
1078   // In order to get locks to work, we need to fake a in_VM state
1079   JavaThread* thread = JavaThread::current();
1080   thread->set_thread_state(_thread_in_vm);
1081 
1082   if (ShowMessageBoxOnError) {
1083     ttyLocker ttyl;
1084     if (CountBytecodes || TraceBytecodes || StopInterpreterAt) {
1085       BytecodeCounter::print();
1086     }
1087     if (os::message_box(msg, "Execution stopped, print registers?")) {
1088       // saved registers: R0-R12, LR, PC
1089       const int nregs = 15;
1090       const Register regs[nregs] = {R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, LR, PC};
1091 
1092       for (int i = 0; i < nregs; i++) {
1093         tty->print_cr("%s = " INTPTR_FORMAT, regs[i]->name(), registers[i]);
1094       }
1095 
1096       // derive original SP value from the address of register save area
1097       tty->print_cr("%s = " INTPTR_FORMAT, SP->name(), p2i(&registers[nregs]));
1098     }
1099     BREAKPOINT;
1100   } else {
1101     ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg);
1102   }
1103   assert(false, "DEBUG MESSAGE: %s", msg);
1104   fatal("%s", msg); // returning from MacroAssembler::debug is not supported
1105 }
1106 
1107 void MacroAssembler::unimplemented(const char* what) {
1108   const char* buf = nullptr;
1109   {
1110     ResourceMark rm;
1111     stringStream ss;
1112     ss.print("unimplemented: %s", what);
1113     buf = code_string(ss.as_string());
1114   }
1115   stop(buf);
1116 }
1117 
1118 
1119 // Implementation of FixedSizeCodeBlock
1120 
1121 FixedSizeCodeBlock::FixedSizeCodeBlock(MacroAssembler* masm, int size_in_instrs, bool enabled) :
1122 _masm(masm), _start(masm->pc()), _size_in_instrs(size_in_instrs), _enabled(enabled) {
1123 }
1124 
1125 FixedSizeCodeBlock::~FixedSizeCodeBlock() {
1126   if (_enabled) {
1127     address curr_pc = _masm->pc();
1128 
1129     assert(_start < curr_pc, "invalid current pc");
1130     guarantee(curr_pc <= _start + _size_in_instrs * Assembler::InstructionSize, "code block is too long");
1131 
1132     int nops_count = (_start - curr_pc) / Assembler::InstructionSize + _size_in_instrs;
1133     for (int i = 0; i < nops_count; i++) {
1134       _masm->nop();
1135     }
1136   }
1137 }
1138 
1139 
1140 // Serializes memory. Potentially blows flags and reg.
1141 // tmp is a scratch for v6 co-processor write op (could be noreg for other architecture versions)
1142 // preserve_flags takes a longer path in LoadStore case (dmb rather then control dependency) to preserve status flags. Optional.
1143 // load_tgt is an ordered load target in a LoadStore case only, to create dependency between the load operation and conditional branch. Optional.
1144 void MacroAssembler::membar(Membar_mask_bits order_constraint,
1145                             Register tmp,
1146                             bool preserve_flags,
1147                             Register load_tgt) {
1148 
1149   if (order_constraint == StoreStore) {
1150     dmb(DMB_st, tmp);
1151   } else if ((order_constraint & StoreLoad)  ||
1152              (order_constraint & LoadLoad)   ||
1153              (order_constraint & StoreStore) ||
1154              (load_tgt == noreg)             ||
1155              preserve_flags) {
1156     dmb(DMB_all, tmp);
1157   } else {
1158     // LoadStore: speculative stores reordeing is prohibited
1159 
1160     // By providing an ordered load target register, we avoid an extra memory load reference
1161     Label not_taken;
1162     bind(not_taken);
1163     cmp(load_tgt, load_tgt);
1164     b(not_taken, ne);
1165   }
1166 }
1167 
1168 
1169 // If "allow_fallthrough_on_failure" is false, we always branch to "slow_case"
1170 // on failure, so fall-through can only mean success.
1171 // "one_shot" controls whether we loop and retry to mitigate spurious failures.
1172 // This is only needed for C2, which for some reason does not rety,
1173 // while C1/interpreter does.
1174 // TODO: measure if it makes a difference
1175 
1176 void MacroAssembler::cas_for_lock_acquire(Register oldval, Register newval,
1177   Register base, Register tmp, Label &slow_case,
1178   bool allow_fallthrough_on_failure, bool one_shot)
1179 {
1180 
1181   bool fallthrough_is_success = false;
1182 
1183   // ARM Litmus Test example does prefetching here.
1184   // TODO: investigate if it helps performance
1185 
1186   // The last store was to the displaced header, so to prevent
1187   // reordering we must issue a StoreStore or Release barrier before
1188   // the CAS store.
1189 
1190   membar(MacroAssembler::StoreStore, noreg);
1191 
1192   if (one_shot) {
1193     ldrex(tmp, Address(base, oopDesc::mark_offset_in_bytes()));
1194     cmp(tmp, oldval);
1195     strex(tmp, newval, Address(base, oopDesc::mark_offset_in_bytes()), eq);
1196     cmp(tmp, 0, eq);
1197   } else {
1198     atomic_cas_bool(oldval, newval, base, oopDesc::mark_offset_in_bytes(), tmp);
1199   }
1200 
1201   // Here, on success, EQ is set, NE otherwise
1202 
1203   // MemBarAcquireLock barrier
1204   // According to JSR-133 Cookbook, this should be LoadLoad | LoadStore,
1205   // but that doesn't prevent a load or store from floating up between
1206   // the load and store in the CAS sequence, so play it safe and
1207   // do a full fence.
1208   // Note: we preserve flags here.
1209   // Todo: Do we really need this also for the CAS fail case?
1210   membar(Membar_mask_bits(LoadLoad | LoadStore | StoreStore | StoreLoad), noreg);
1211   if (!fallthrough_is_success && !allow_fallthrough_on_failure) {
1212     b(slow_case, ne);
1213   }
1214 }
1215 
1216 void MacroAssembler::cas_for_lock_release(Register oldval, Register newval,
1217   Register base, Register tmp, Label &slow_case,
1218   bool allow_fallthrough_on_failure, bool one_shot)
1219 {
1220   bool fallthrough_is_success = false;
1221 
1222   assert_different_registers(oldval,newval,base,tmp);
1223 
1224   // MemBarReleaseLock barrier
1225   // According to JSR-133 Cookbook, this should be StoreStore | LoadStore,
1226   // but that doesn't prevent a load or store from floating down between
1227   // the load and store in the CAS sequence, so play it safe and
1228   // do a full fence.
1229   membar(Membar_mask_bits(LoadLoad | LoadStore | StoreStore | StoreLoad), tmp);
1230 
1231   if (one_shot) {
1232     ldrex(tmp, Address(base, oopDesc::mark_offset_in_bytes()));
1233     cmp(tmp, oldval);
1234     strex(tmp, newval, Address(base, oopDesc::mark_offset_in_bytes()), eq);
1235     cmp(tmp, 0, eq);
1236   } else {
1237     atomic_cas_bool(oldval, newval, base, oopDesc::mark_offset_in_bytes(), tmp);
1238   }
1239   if (!fallthrough_is_success && !allow_fallthrough_on_failure) {
1240     b(slow_case, ne);
1241   }
1242 
1243   // ExitEnter
1244   // According to JSR-133 Cookbook, this should be StoreLoad, the same
1245   // barrier that follows volatile store.
1246   // TODO: Should be able to remove on armv8 if volatile loads
1247   // use the load-acquire instruction.
1248   membar(StoreLoad, noreg);
1249 }
1250 
1251 #ifndef PRODUCT
1252 
1253 // Preserves flags and all registers.
1254 // On SMP the updated value might not be visible to external observers without a synchronization barrier
1255 void MacroAssembler::cond_atomic_inc32(AsmCondition cond, int* counter_addr) {
1256   if (counter_addr != nullptr) {
1257     InlinedAddress counter_addr_literal((address)counter_addr);
1258     Label done, retry;
1259     if (cond != al) {
1260       b(done, inverse(cond));
1261     }
1262 
1263     push(RegisterSet(R0, R3) | RegisterSet(Rtemp));
1264     ldr_literal(R0, counter_addr_literal);
1265 
1266     mrs(CPSR, Rtemp);
1267 
1268     bind(retry);
1269     ldr_s32(R1, Address(R0));
1270     add(R2, R1, 1);
1271     atomic_cas_bool(R1, R2, R0, 0, R3);
1272     b(retry, ne);
1273 
1274     msr(CPSR_fsxc, Rtemp);
1275 
1276     pop(RegisterSet(R0, R3) | RegisterSet(Rtemp));
1277 
1278     b(done);
1279     bind_literal(counter_addr_literal);
1280 
1281     bind(done);
1282   }
1283 }
1284 
1285 #endif // !PRODUCT
1286 
1287 void MacroAssembler::resolve_jobject(Register value,
1288                                      Register tmp1,
1289                                      Register tmp2) {
1290   assert_different_registers(value, tmp1, tmp2);
1291   Label done, tagged, weak_tagged;
1292 
1293   cbz(value, done);           // Use null as-is.
1294   tst(value, JNIHandles::tag_mask); // Test for tag.
1295   b(tagged, ne);
1296 
1297   // Resolve local handle
1298   access_load_at(T_OBJECT, IN_NATIVE | AS_RAW, Address(value, 0), value, tmp1, tmp2, noreg);
1299   verify_oop(value);
1300   b(done);
1301 
1302   bind(tagged);
1303   tst(value, JNIHandles::TypeTag::weak_global); // Test for weak tag.
1304   b(weak_tagged, ne);
1305 
1306   // Resolve global handle
1307   access_load_at(T_OBJECT, IN_NATIVE, Address(value, -JNIHandles::TypeTag::global), value, tmp1, tmp2, noreg);
1308   verify_oop(value);
1309   b(done);
1310 
1311   bind(weak_tagged);
1312   // Resolve jweak.
1313   access_load_at(T_OBJECT, IN_NATIVE | ON_PHANTOM_OOP_REF,
1314                  Address(value, -JNIHandles::TypeTag::weak_global), value, tmp1, tmp2, noreg);
1315   verify_oop(value);
1316 
1317   bind(done);
1318 }
1319 
1320 void MacroAssembler::resolve_global_jobject(Register value,
1321                                      Register tmp1,
1322                                      Register tmp2) {
1323   assert_different_registers(value, tmp1, tmp2);
1324   Label done;
1325 
1326   cbz(value, done);           // Use null as-is.
1327 
1328 #ifdef ASSERT
1329   {
1330     Label valid_global_tag;
1331     tst(value, JNIHandles::TypeTag::global); // Test for global tag.
1332     b(valid_global_tag, ne);
1333     stop("non global jobject using resolve_global_jobject");
1334     bind(valid_global_tag);
1335   }
1336 #endif
1337 
1338   // Resolve global handle
1339   access_load_at(T_OBJECT, IN_NATIVE, Address(value, -JNIHandles::TypeTag::global), value, tmp1, tmp2, noreg);
1340   verify_oop(value);
1341 
1342   bind(done);
1343 }
1344 
1345 
1346 //////////////////////////////////////////////////////////////////////////////////
1347 
1348 
1349 void MacroAssembler::load_sized_value(Register dst, Address src,
1350                                     size_t size_in_bytes, bool is_signed, AsmCondition cond) {
1351   switch (size_in_bytes) {
1352     case  4: ldr(dst, src, cond); break;
1353     case  2: is_signed ? ldrsh(dst, src, cond) : ldrh(dst, src, cond); break;
1354     case  1: is_signed ? ldrsb(dst, src, cond) : ldrb(dst, src, cond); break;
1355     default: ShouldNotReachHere();
1356   }
1357 }
1358 
1359 
1360 void MacroAssembler::store_sized_value(Register src, Address dst, size_t size_in_bytes, AsmCondition cond) {
1361   switch (size_in_bytes) {
1362     case  4: str(src, dst, cond); break;
1363     case  2: strh(src, dst, cond);   break;
1364     case  1: strb(src, dst, cond);   break;
1365     default: ShouldNotReachHere();
1366   }
1367 }
1368 
1369 // Look up the method for a megamorphic invokeinterface call.
1370 // The target method is determined by <Rinterf, Rindex>.
1371 // The receiver klass is in Rklass.
1372 // On success, the result will be in method_result, and execution falls through.
1373 // On failure, execution transfers to the given label.
1374 void MacroAssembler::lookup_interface_method(Register Rklass,
1375                                              Register Rintf,
1376                                              RegisterOrConstant itable_index,
1377                                              Register method_result,
1378                                              Register Rscan,
1379                                              Register Rtmp,
1380                                              Label& L_no_such_interface) {
1381 
1382   assert_different_registers(Rklass, Rintf, Rscan, Rtmp);
1383 
1384   const int entry_size = itableOffsetEntry::size() * HeapWordSize;
1385   assert(itableOffsetEntry::interface_offset() == 0, "not added for convenience");
1386 
1387   // Compute start of first itableOffsetEntry (which is at the end of the vtable)
1388   const int base = in_bytes(Klass::vtable_start_offset());
1389   const int scale = exact_log2(vtableEntry::size_in_bytes());
1390   ldr_s32(Rtmp, Address(Rklass, Klass::vtable_length_offset())); // Get length of vtable
1391   add(Rscan, Rklass, base);
1392   add(Rscan, Rscan, AsmOperand(Rtmp, lsl, scale));
1393 
1394   // Search through the itable for an interface equal to incoming Rintf
1395   // itable looks like [intface][offset][intface][offset][intface][offset]
1396 
1397   Label loop;
1398   bind(loop);
1399   ldr(Rtmp, Address(Rscan, entry_size, post_indexed));
1400   cmp(Rtmp, Rintf);  // set ZF and CF if interface is found
1401   cmn(Rtmp, 0, ne);  // check if tmp == 0 and clear CF if it is
1402   b(loop, ne);
1403 
1404   // CF == 0 means we reached the end of itable without finding icklass
1405   b(L_no_such_interface, cc);
1406 
1407   if (method_result != noreg) {
1408     // Interface found at previous position of Rscan, now load the method
1409     ldr_s32(Rtmp, Address(Rscan, in_bytes(itableOffsetEntry::offset_offset()) - entry_size));
1410     if (itable_index.is_register()) {
1411       add(Rtmp, Rtmp, Rklass); // Add offset to Klass*
1412       assert(itableMethodEntry::size() * HeapWordSize == wordSize, "adjust the scaling in the code below");
1413       assert(itableMethodEntry::method_offset() == 0, "adjust the offset in the code below");
1414       ldr(method_result, Address::indexed_ptr(Rtmp, itable_index.as_register()));
1415     } else {
1416       int method_offset = itableMethodEntry::size() * HeapWordSize * itable_index.as_constant() +
1417                           in_bytes(itableMethodEntry::method_offset());
1418       add_slow(method_result, Rklass, method_offset);
1419       ldr(method_result, Address(method_result, Rtmp));
1420     }
1421   }
1422 }
1423 
1424 
1425 void MacroAssembler::inc_counter(address counter_addr, Register tmpreg1, Register tmpreg2) {
1426   mov_slow(tmpreg1, counter_addr);
1427   ldr_s32(tmpreg2, tmpreg1);
1428   add_32(tmpreg2, tmpreg2, 1);
1429   str_32(tmpreg2, tmpreg1);
1430 }
1431 
1432 void MacroAssembler::floating_cmp(Register dst) {
1433   vmrs(dst, FPSCR);
1434   orr(dst, dst, 0x08000000);
1435   eor(dst, dst, AsmOperand(dst, lsl, 3));
1436   mov(dst, AsmOperand(dst, asr, 30));
1437 }
1438 
1439 void MacroAssembler::restore_default_fp_mode() {
1440 #ifndef __SOFTFP__
1441   // Round to Near mode, IEEE compatible, masked exceptions
1442   mov(Rtemp, 0);
1443   vmsr(FPSCR, Rtemp);
1444 #endif // !__SOFTFP__
1445 }
1446 
1447 // 24-bit word range == 26-bit byte range
1448 bool check26(int offset) {
1449   // this could be simplified, but it mimics encoding and decoding
1450   // an actual branch insrtuction
1451   int off1 = offset << 6 >> 8;
1452   int encoded = off1 & ((1<<24)-1);
1453   int decoded = encoded << 8 >> 6;
1454   return offset == decoded;
1455 }
1456 
1457 // Perform some slight adjustments so the default 32MB code cache
1458 // is fully reachable.
1459 static inline address first_cache_address() {
1460   return CodeCache::low_bound() + sizeof(HeapBlock::Header);
1461 }
1462 static inline address last_cache_address() {
1463   return CodeCache::high_bound() - Assembler::InstructionSize;
1464 }
1465 
1466 
1467 // Can we reach target using unconditional branch or call from anywhere
1468 // in the code cache (because code can be relocated)?
1469 bool MacroAssembler::_reachable_from_cache(address target) {
1470 #ifdef __thumb__
1471   if ((1 & (intptr_t)target) != 0) {
1472     // Return false to avoid 'b' if we need switching to THUMB mode.
1473     return false;
1474   }
1475 #endif
1476 
1477   address cl = first_cache_address();
1478   address ch = last_cache_address();
1479 
1480   if (ForceUnreachable) {
1481     // Only addresses from CodeCache can be treated as reachable.
1482     if (target < CodeCache::low_bound() || CodeCache::high_bound() < target) {
1483       return false;
1484     }
1485   }
1486 
1487   intptr_t loffset = (intptr_t)target - (intptr_t)cl;
1488   intptr_t hoffset = (intptr_t)target - (intptr_t)ch;
1489 
1490   return check26(loffset - 8) && check26(hoffset - 8);
1491 }
1492 
1493 bool MacroAssembler::reachable_from_cache(address target) {
1494   assert(CodeCache::contains(pc()), "not supported");
1495   return _reachable_from_cache(target);
1496 }
1497 
1498 // Can we reach the entire code cache from anywhere else in the code cache?
1499 bool MacroAssembler::_cache_fully_reachable() {
1500   address cl = first_cache_address();
1501   address ch = last_cache_address();
1502   return _reachable_from_cache(cl) && _reachable_from_cache(ch);
1503 }
1504 
1505 bool MacroAssembler::cache_fully_reachable() {
1506   assert(CodeCache::contains(pc()), "not supported");
1507   return _cache_fully_reachable();
1508 }
1509 
1510 void MacroAssembler::jump(address target, relocInfo::relocType rtype, Register scratch, AsmCondition cond) {
1511   assert((rtype == relocInfo::runtime_call_type) || (rtype == relocInfo::none), "not supported");
1512   if (reachable_from_cache(target)) {
1513     relocate(rtype);
1514     b(target, cond);
1515     return;
1516   }
1517 
1518   // Note: relocate is not needed for the code below,
1519   // encoding targets in absolute format.
1520   if (ignore_non_patchable_relocations()) {
1521     rtype = relocInfo::none;
1522   }
1523 
1524   if (VM_Version::supports_movw() && (scratch != noreg) && (rtype == relocInfo::none)) {
1525     // Note: this version cannot be (atomically) patched
1526     mov_slow(scratch, (intptr_t)target, cond);
1527     bx(scratch, cond);
1528   } else {
1529     Label skip;
1530     InlinedAddress address_literal(target);
1531     if (cond != al) {
1532       b(skip, inverse(cond));
1533     }
1534     relocate(rtype);
1535     ldr_literal(PC, address_literal);
1536     bind_literal(address_literal);
1537     bind(skip);
1538   }
1539 }
1540 
1541 // Similar to jump except that:
1542 // - near calls are valid only if any destination in the cache is near
1543 // - no movt/movw (not atomically patchable)
1544 void MacroAssembler::patchable_jump(address target, relocInfo::relocType rtype, Register scratch, AsmCondition cond) {
1545   assert((rtype == relocInfo::runtime_call_type) || (rtype == relocInfo::none), "not supported");
1546   if (cache_fully_reachable()) {
1547     // Note: this assumes that all possible targets (the initial one
1548     // and the addressed patched to) are all in the code cache.
1549     assert(CodeCache::contains(target), "target might be too far");
1550     relocate(rtype);
1551     b(target, cond);
1552     return;
1553   }
1554 
1555   // Discard the relocation information if not needed for CacheCompiledCode
1556   // since the next encodings are all in absolute format.
1557   if (ignore_non_patchable_relocations()) {
1558     rtype = relocInfo::none;
1559   }
1560 
1561   {
1562     Label skip;
1563     InlinedAddress address_literal(target);
1564     if (cond != al) {
1565       b(skip, inverse(cond));
1566     }
1567     relocate(rtype);
1568     ldr_literal(PC, address_literal);
1569     bind_literal(address_literal);
1570     bind(skip);
1571   }
1572 }
1573 
1574 void MacroAssembler::call(address target, RelocationHolder rspec, AsmCondition cond) {
1575   Register scratch = LR;
1576   assert(rspec.type() == relocInfo::runtime_call_type || rspec.type() == relocInfo::none, "not supported");
1577   if (reachable_from_cache(target)) {
1578     relocate(rspec);
1579     bl(target, cond);
1580     return;
1581   }
1582 
1583   // Note: relocate is not needed for the code below,
1584   // encoding targets in absolute format.
1585   if (ignore_non_patchable_relocations()) {
1586     // This assumes the information was needed only for relocating the code.
1587     rspec = RelocationHolder::none;
1588   }
1589 
1590   if (VM_Version::supports_movw() && (rspec.type() == relocInfo::none)) {
1591     // Note: this version cannot be (atomically) patched
1592     mov_slow(scratch, (intptr_t)target, cond);
1593     blx(scratch, cond);
1594     return;
1595   }
1596 
1597   {
1598     Label ret_addr;
1599     if (cond != al) {
1600       b(ret_addr, inverse(cond));
1601     }
1602 
1603 
1604     InlinedAddress address_literal(target);
1605     relocate(rspec);
1606     adr(LR, ret_addr);
1607     ldr_literal(PC, address_literal);
1608 
1609     bind_literal(address_literal);
1610     bind(ret_addr);
1611   }
1612 }
1613 
1614 
1615 int MacroAssembler::patchable_call(address target, RelocationHolder const& rspec, bool c2) {
1616   assert(rspec.type() == relocInfo::static_call_type ||
1617          rspec.type() == relocInfo::none ||
1618          rspec.type() == relocInfo::opt_virtual_call_type, "not supported");
1619 
1620   // Always generate the relocation information, needed for patching
1621   relocate(rspec); // used by NativeCall::is_call_before()
1622   if (cache_fully_reachable()) {
1623     // Note: this assumes that all possible targets (the initial one
1624     // and the addresses patched to) are all in the code cache.
1625     assert(CodeCache::contains(target), "target might be too far");
1626     bl(target);
1627   } else {
1628     Label ret_addr;
1629     InlinedAddress address_literal(target);
1630     adr(LR, ret_addr);
1631     ldr_literal(PC, address_literal);
1632     bind_literal(address_literal);
1633     bind(ret_addr);
1634   }
1635   return offset();
1636 }
1637 
1638 // ((OopHandle)result).resolve();
1639 void MacroAssembler::resolve_oop_handle(Register result) {
1640   // OopHandle::resolve is an indirection.
1641   ldr(result, Address(result, 0));
1642 }
1643 
1644 void MacroAssembler::load_mirror(Register mirror, Register method, Register tmp) {
1645   const int mirror_offset = in_bytes(Klass::java_mirror_offset());
1646   ldr(tmp, Address(method, Method::const_offset()));
1647   ldr(tmp, Address(tmp,  ConstMethod::constants_offset()));
1648   ldr(tmp, Address(tmp, ConstantPool::pool_holder_offset()));
1649   ldr(mirror, Address(tmp, mirror_offset));
1650   resolve_oop_handle(mirror);
1651 }
1652 
1653 
1654 ///////////////////////////////////////////////////////////////////////////////
1655 
1656 // Compressed pointers
1657 
1658 
1659 void MacroAssembler::load_klass(Register dst_klass, Register src_oop, AsmCondition cond) {
1660   ldr(dst_klass, Address(src_oop, oopDesc::klass_offset_in_bytes()), cond);
1661 }
1662 
1663 // Blows src_klass.
1664 void MacroAssembler::store_klass(Register src_klass, Register dst_oop) {
1665   str(src_klass, Address(dst_oop, oopDesc::klass_offset_in_bytes()));
1666 }
1667 
1668 
1669 
1670 void MacroAssembler::load_heap_oop(Register dst, Address src, Register tmp1, Register tmp2, Register tmp3, DecoratorSet decorators) {
1671   access_load_at(T_OBJECT, IN_HEAP | decorators, src, dst, tmp1, tmp2, tmp3);
1672 }
1673 
1674 // Blows src and flags.
1675 void MacroAssembler::store_heap_oop(Address obj, Register new_val, Register tmp1, Register tmp2, Register tmp3, DecoratorSet decorators) {
1676   access_store_at(T_OBJECT, IN_HEAP | decorators, obj, new_val, tmp1, tmp2, tmp3, false);
1677 }
1678 
1679 void MacroAssembler::store_heap_oop_null(Address obj, Register new_val, Register tmp1, Register tmp2, Register tmp3, DecoratorSet decorators) {
1680   access_store_at(T_OBJECT, IN_HEAP, obj, new_val, tmp1, tmp2, tmp3, true);
1681 }
1682 
1683 void MacroAssembler::access_load_at(BasicType type, DecoratorSet decorators,
1684                                     Address src, Register dst, Register tmp1, Register tmp2, Register tmp3) {
1685   BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
1686   decorators = AccessInternal::decorator_fixup(decorators, type);
1687   bool as_raw = (decorators & AS_RAW) != 0;
1688   if (as_raw) {
1689     bs->BarrierSetAssembler::load_at(this, decorators, type, dst, src, tmp1, tmp2, tmp3);
1690   } else {
1691     bs->load_at(this, decorators, type, dst, src, tmp1, tmp2, tmp3);
1692   }
1693 }
1694 
1695 void MacroAssembler::access_store_at(BasicType type, DecoratorSet decorators,
1696                                      Address obj, Register new_val, Register tmp1, Register tmp2, Register tmp3, bool is_null) {
1697   BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
1698   decorators = AccessInternal::decorator_fixup(decorators, type);
1699   bool as_raw = (decorators & AS_RAW) != 0;
1700   if (as_raw) {
1701     bs->BarrierSetAssembler::store_at(this, decorators, type, obj, new_val, tmp1, tmp2, tmp3, is_null);
1702   } else {
1703     bs->store_at(this, decorators, type, obj, new_val, tmp1, tmp2, tmp3, is_null);
1704   }
1705 }
1706 
1707 void MacroAssembler::safepoint_poll(Register tmp1, Label& slow_path) {
1708   ldr_u32(tmp1, Address(Rthread, JavaThread::polling_word_offset()));
1709   tst(tmp1, exact_log2(SafepointMechanism::poll_bit()));
1710   b(slow_path, eq);
1711 }
1712 
1713 void MacroAssembler::get_polling_page(Register dest) {
1714   ldr(dest, Address(Rthread, JavaThread::polling_page_offset()));
1715 }
1716 
1717 void MacroAssembler::read_polling_page(Register dest, relocInfo::relocType rtype) {
1718   get_polling_page(dest);
1719   relocate(rtype);
1720   ldr(dest, Address(dest));
1721 }
1722 
1723 #define PUSH_REG(mask, bit, Reg)      \
1724   if (mask & ((unsigned)1 << bit)) {  \
1725     push(Reg);                        \
1726   }
1727 
1728 #define POP_REG(mask, bit, Reg, condition)   \
1729   if (mask & ((unsigned)1 << bit)) {         \
1730     pop(Reg, condition);                     \
1731   }
1732 
1733 #define PUSH_REGS(mask, R1, R2, R3) \
1734   PUSH_REG(mask, 0, R1)             \
1735   PUSH_REG(mask, 1, R2)             \
1736   PUSH_REG(mask, 2, R3)
1737 
1738 #define POP_REGS(mask, R1, R2, R3, condition)   \
1739   POP_REG(mask, 0, R1, condition)               \
1740   POP_REG(mask, 1, R2, condition)               \
1741   POP_REG(mask, 2, R3, condition)
1742 
1743 #define POISON_REG(mask, bit, Reg, poison)      \
1744   if (mask & ((unsigned)1 << bit)) {            \
1745     mov(Reg, poison);                           \
1746   }
1747 
1748 #define POISON_REGS(mask, R1, R2, R3, poison)   \
1749   POISON_REG(mask, 0, R1, poison)               \
1750   POISON_REG(mask, 1, R2, poison)               \
1751   POISON_REG(mask, 2, R3, poison)
1752 
1753 // Attempt to fast-lock an object
1754 // Registers:
1755 //  - obj: the object to be locked
1756 //  - t1, t2, t3: temp registers. If corresponding bit in savemask is set, they get saved, otherwise blown.
1757 // Result:
1758 //  - Success: fallthrough
1759 //  - Error:   break to slow, Z cleared.
1760 void MacroAssembler::fast_lock(Register obj, Register t1, Register t2, Register t3, unsigned savemask, Label& slow) {
1761   assert_different_registers(obj, t1, t2, t3);
1762 
1763 #ifdef ASSERT
1764   // Poison scratch regs
1765   POISON_REGS((~savemask), t1, t2, t3, 0x10000001);
1766 #endif
1767 
1768   PUSH_REGS(savemask, t1, t2, t3);
1769 
1770   // Check if we would have space on lock-stack for the object.
1771   ldr(t1, Address(Rthread, JavaThread::lock_stack_top_offset()));
1772   // cmp(t1, (unsigned)LockStack::end_offset()); //  too complicated constant: 1132 (46c)
1773   movw(t2, LockStack::end_offset() - 1);
1774   cmp(t1, t2);
1775   POP_REGS(savemask, t1, t2, t3, gt);
1776   b(slow, gt); // Z is cleared
1777 
1778   // Prepare old, new header
1779   Register old_hdr = t1;
1780   Register new_hdr = t2;
1781   ldr(new_hdr, Address(obj, oopDesc::mark_offset_in_bytes()));
1782   bic(new_hdr, new_hdr, markWord::lock_mask_in_place);  // new header (00)
1783   orr(old_hdr, new_hdr, markWord::unlocked_value);      // old header (01)
1784 
1785   Label dummy;
1786 
1787   cas_for_lock_acquire(old_hdr /* old */, new_hdr /* new */,
1788       obj /* location */, t3 /* scratch */, dummy,
1789       true /* allow_fallthrough_on_failure */, true /* one_shot */);
1790 
1791   POP_REGS(savemask, t1, t2, t3, ne); // Cas failed -> slow
1792   b(slow, ne);                        // Cas failed -> slow
1793 
1794   // After successful lock, push object onto lock-stack
1795   ldr(t1, Address(Rthread, JavaThread::lock_stack_top_offset()));
1796   str(obj, Address(Rthread, t1));
1797   add(t1, t1, oopSize);
1798   str(t1, Address(Rthread, JavaThread::lock_stack_top_offset()));
1799 
1800   POP_REGS(savemask, t1, t2, t3, al);
1801 
1802 #ifdef ASSERT
1803   // Poison scratch regs
1804   POISON_REGS((~savemask), t1, t2, t3, 0x20000002);
1805 #endif
1806 
1807   // Success: fall through
1808 }
1809 
1810 // Attempt to fast-unlock an object
1811 // Registers:
1812 //  - obj: the object to be unlocked
1813 //  - t1, t2, t3: temp registers. If corresponding bit in savemask is set, they get saved, otherwise blown.
1814 // Result:
1815 //  - Success: fallthrough
1816 //  - Error:   break to slow, Z cleared.
1817 void MacroAssembler::fast_unlock(Register obj, Register t1, Register t2, Register t3, unsigned savemask, Label& slow) {
1818   assert_different_registers(obj, t1, t2, t3);
1819 
1820 #ifdef ASSERT
1821   // Poison scratch regs
1822   POISON_REGS((~savemask), t1, t2, t3, 0x30000003);
1823 #endif
1824 
1825   PUSH_REGS(savemask, t1, t2, t3);
1826 
1827   // Prepare old, new header
1828   Register old_hdr = t1;
1829   Register new_hdr = t2;
1830   ldr(old_hdr, Address(obj, oopDesc::mark_offset_in_bytes()));
1831   bic(old_hdr, old_hdr, markWord::lock_mask_in_place);    // old header (00)
1832   orr(new_hdr, old_hdr, markWord::unlocked_value);        // new header (01)
1833 
1834   // Try to swing header from locked to unlocked
1835   Label dummy;
1836   cas_for_lock_release(old_hdr /* old */, new_hdr /* new */,
1837       obj /* location */, t3 /* scratch */, dummy,
1838       true /* allow_fallthrough_on_failure */, true /* one_shot */);
1839 
1840   POP_REGS(savemask, t1, t2, t3, ne); // Cas failed -> slow
1841   b(slow, ne);                        // Cas failed -> slow
1842 
1843   // After successful unlock, pop object from lock-stack
1844   ldr(t1, Address(Rthread, JavaThread::lock_stack_top_offset()));
1845   sub(t1, t1, oopSize);
1846   str(t1, Address(Rthread, JavaThread::lock_stack_top_offset()));
1847 
1848 #ifdef ASSERT
1849   // zero out popped slot
1850   mov(t2, 0);
1851   str(t2, Address(Rthread, t1));
1852 #endif
1853 
1854   POP_REGS(savemask, t1, t2, t3, al);
1855 
1856 #ifdef ASSERT
1857   // Poison scratch regs
1858   POISON_REGS((~savemask), t1, t2, t3, 0x40000004);
1859 #endif
1860 
1861   // Fallthrough: success
1862 }
1863 
1864 int MacroAssembler::ic_check_size() {
1865   return NativeInstruction::instruction_size * 7;
1866 }
1867 
1868 int MacroAssembler::ic_check(int end_alignment) {
1869   Register receiver = j_rarg0;
1870   Register tmp1 = R4;
1871   Register tmp2 = R5;
1872 
1873   // The UEP of a code blob ensures that the VEP is padded. However, the padding of the UEP is placed
1874   // before the inline cache check, so we don't have to execute any nop instructions when dispatching
1875   // through the UEP, yet we can ensure that the VEP is aligned appropriately. That's why we align
1876   // before the inline cache check here, and not after
1877   align(end_alignment, offset() + ic_check_size());
1878 
1879   int uep_offset = offset();
1880 
1881   ldr(tmp1, Address(receiver, oopDesc::klass_offset_in_bytes()));
1882   ldr(tmp2, Address(Ricklass, CompiledICData::speculated_klass_offset()));
1883   cmp(tmp1, tmp2);
1884 
1885   Label dont;
1886   b(dont, eq);
1887   jump(SharedRuntime::get_ic_miss_stub(), relocInfo::runtime_call_type);
1888   bind(dont);
1889   return uep_offset;
1890 }