1 /*
   2  * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
   3  * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
   4  * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
   5  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   6  *
   7  * This code is free software; you can redistribute it and/or modify it
   8  * under the terms of the GNU General Public License version 2 only, as
   9  * published by the Free Software Foundation.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  *
  25  */
  26 
  27 #include "precompiled.hpp"
  28 #include "asm/assembler.hpp"
  29 #include "asm/assembler.inline.hpp"
  30 #include "compiler/disassembler.hpp"
  31 #include "gc/shared/barrierSet.hpp"
  32 #include "gc/shared/barrierSetAssembler.hpp"
  33 #include "gc/shared/cardTable.hpp"
  34 #include "gc/shared/cardTableBarrierSet.hpp"
  35 #include "interpreter/bytecodeHistogram.hpp"
  36 #include "interpreter/interpreter.hpp"
  37 #include "memory/resourceArea.hpp"
  38 #include "memory/universe.hpp"
  39 #include "nativeInst_riscv.hpp"
  40 #include "oops/accessDecorators.hpp"
  41 #include "oops/compressedOops.inline.hpp"
  42 #include "oops/klass.inline.hpp"
  43 #include "oops/oop.hpp"
  44 #include "runtime/interfaceSupport.inline.hpp"
  45 #include "runtime/jniHandles.inline.hpp"
  46 #include "runtime/sharedRuntime.hpp"
  47 #include "runtime/stubRoutines.hpp"
  48 #include "runtime/thread.hpp"
  49 #include "utilities/powerOfTwo.hpp"
  50 #ifdef COMPILER2
  51 #include "opto/compile.hpp"
  52 #include "opto/node.hpp"
  53 #include "opto/output.hpp"
  54 #endif
  55 
  56 #ifdef PRODUCT
  57 #define BLOCK_COMMENT(str) /* nothing */
  58 #else
  59 #define BLOCK_COMMENT(str) block_comment(str)
  60 #endif
  61 #define BIND(label) bind(label); __ BLOCK_COMMENT(#label ":")
  62 
  63 static void pass_arg0(MacroAssembler* masm, Register arg) {
  64   if (c_rarg0 != arg) {
  65     assert_cond(masm != NULL);
  66     masm->mv(c_rarg0, arg);
  67   }
  68 }
  69 
  70 static void pass_arg1(MacroAssembler* masm, Register arg) {
  71   if (c_rarg1 != arg) {
  72     assert_cond(masm != NULL);
  73     masm->mv(c_rarg1, arg);
  74   }
  75 }
  76 
  77 static void pass_arg2(MacroAssembler* masm, Register arg) {
  78   if (c_rarg2 != arg) {
  79     assert_cond(masm != NULL);
  80     masm->mv(c_rarg2, arg);
  81   }
  82 }
  83 
  84 static void pass_arg3(MacroAssembler* masm, Register arg) {
  85   if (c_rarg3 != arg) {
  86     assert_cond(masm != NULL);
  87     masm->mv(c_rarg3, arg);
  88   }
  89 }
  90 
  91 void MacroAssembler::align(int modulus, int extra_offset) {
  92   CompressibleRegion cr(this);
  93   while ((offset() + extra_offset) % modulus != 0) { nop(); }
  94 }
  95 
  96 void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) {
  97   call_VM_base(oop_result, noreg, noreg, entry_point, number_of_arguments, check_exceptions);
  98 }
  99 
 100 // Implementation of call_VM versions
 101 
 102 void MacroAssembler::call_VM(Register oop_result,
 103                              address entry_point,
 104                              bool check_exceptions) {
 105   call_VM_helper(oop_result, entry_point, 0, check_exceptions);
 106 }
 107 
 108 void MacroAssembler::call_VM(Register oop_result,
 109                              address entry_point,
 110                              Register arg_1,
 111                              bool check_exceptions) {
 112   pass_arg1(this, arg_1);
 113   call_VM_helper(oop_result, entry_point, 1, check_exceptions);
 114 }
 115 
 116 void MacroAssembler::call_VM(Register oop_result,
 117                              address entry_point,
 118                              Register arg_1,
 119                              Register arg_2,
 120                              bool check_exceptions) {
 121   assert(arg_1 != c_rarg2, "smashed arg");
 122   pass_arg2(this, arg_2);
 123   pass_arg1(this, arg_1);
 124   call_VM_helper(oop_result, entry_point, 2, check_exceptions);
 125 }
 126 
 127 void MacroAssembler::call_VM(Register oop_result,
 128                              address entry_point,
 129                              Register arg_1,
 130                              Register arg_2,
 131                              Register arg_3,
 132                              bool check_exceptions) {
 133   assert(arg_1 != c_rarg3, "smashed arg");
 134   assert(arg_2 != c_rarg3, "smashed arg");
 135   pass_arg3(this, arg_3);
 136 
 137   assert(arg_1 != c_rarg2, "smashed arg");
 138   pass_arg2(this, arg_2);
 139 
 140   pass_arg1(this, arg_1);
 141   call_VM_helper(oop_result, entry_point, 3, check_exceptions);
 142 }
 143 
 144 void MacroAssembler::call_VM(Register oop_result,
 145                              Register last_java_sp,
 146                              address entry_point,
 147                              int number_of_arguments,
 148                              bool check_exceptions) {
 149   call_VM_base(oop_result, xthread, last_java_sp, entry_point, number_of_arguments, check_exceptions);
 150 }
 151 
 152 void MacroAssembler::call_VM(Register oop_result,
 153                              Register last_java_sp,
 154                              address entry_point,
 155                              Register arg_1,
 156                              bool check_exceptions) {
 157   pass_arg1(this, arg_1);
 158   call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions);
 159 }
 160 
 161 void MacroAssembler::call_VM(Register oop_result,
 162                              Register last_java_sp,
 163                              address entry_point,
 164                              Register arg_1,
 165                              Register arg_2,
 166                              bool check_exceptions) {
 167 
 168   assert(arg_1 != c_rarg2, "smashed arg");
 169   pass_arg2(this, arg_2);
 170   pass_arg1(this, arg_1);
 171   call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions);
 172 }
 173 
 174 void MacroAssembler::call_VM(Register oop_result,
 175                              Register last_java_sp,
 176                              address entry_point,
 177                              Register arg_1,
 178                              Register arg_2,
 179                              Register arg_3,
 180                              bool check_exceptions) {
 181   assert(arg_1 != c_rarg3, "smashed arg");
 182   assert(arg_2 != c_rarg3, "smashed arg");
 183   pass_arg3(this, arg_3);
 184   assert(arg_1 != c_rarg2, "smashed arg");
 185   pass_arg2(this, arg_2);
 186   pass_arg1(this, arg_1);
 187   call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions);
 188 }
 189 
 190 // these are no-ops overridden by InterpreterMacroAssembler
 191 void MacroAssembler::check_and_handle_earlyret(Register java_thread) {}
 192 void MacroAssembler::check_and_handle_popframe(Register java_thread) {}
 193 
 194 // Calls to C land
 195 //
 196 // When entering C land, the fp, & esp of the last Java frame have to be recorded
 197 // in the (thread-local) JavaThread object. When leaving C land, the last Java fp
 198 // has to be reset to 0. This is required to allow proper stack traversal.
 199 void MacroAssembler::set_last_Java_frame(Register last_java_sp,
 200                                          Register last_java_fp,
 201                                          Register last_java_pc,
 202                                          Register tmp) {
 203 
 204   if (last_java_pc->is_valid()) {
 205       sd(last_java_pc, Address(xthread,
 206                                JavaThread::frame_anchor_offset() +
 207                                JavaFrameAnchor::last_Java_pc_offset()));
 208   }
 209 
 210   // determine last_java_sp register
 211   if (last_java_sp == sp) {
 212     mv(tmp, sp);
 213     last_java_sp = tmp;
 214   } else if (!last_java_sp->is_valid()) {
 215     last_java_sp = esp;
 216   }
 217 
 218   sd(last_java_sp, Address(xthread, JavaThread::last_Java_sp_offset()));
 219 
 220   // last_java_fp is optional
 221   if (last_java_fp->is_valid()) {
 222     sd(last_java_fp, Address(xthread, JavaThread::last_Java_fp_offset()));
 223   }
 224 }
 225 
 226 void MacroAssembler::set_last_Java_frame(Register last_java_sp,
 227                                          Register last_java_fp,
 228                                          address  last_java_pc,
 229                                          Register tmp) {
 230   assert(last_java_pc != NULL, "must provide a valid PC");
 231 
 232   la(tmp, last_java_pc);
 233   sd(tmp, Address(xthread, JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()));
 234 
 235   set_last_Java_frame(last_java_sp, last_java_fp, noreg, tmp);
 236 }
 237 
 238 void MacroAssembler::set_last_Java_frame(Register last_java_sp,
 239                                          Register last_java_fp,
 240                                          Label &L,
 241                                          Register tmp) {
 242   if (L.is_bound()) {
 243     set_last_Java_frame(last_java_sp, last_java_fp, target(L), tmp);
 244   } else {
 245     InstructionMark im(this);
 246     L.add_patch_at(code(), locator());
 247     set_last_Java_frame(last_java_sp, last_java_fp, pc() /* Patched later */, tmp);
 248   }
 249 }
 250 
 251 void MacroAssembler::reset_last_Java_frame(bool clear_fp) {
 252   // we must set sp to zero to clear frame
 253   sd(zr, Address(xthread, JavaThread::last_Java_sp_offset()));
 254 
 255   // must clear fp, so that compiled frames are not confused; it is
 256   // possible that we need it only for debugging
 257   if (clear_fp) {
 258     sd(zr, Address(xthread, JavaThread::last_Java_fp_offset()));
 259   }
 260 
 261   // Always clear the pc because it could have been set by make_walkable()
 262   sd(zr, Address(xthread, JavaThread::last_Java_pc_offset()));
 263 }
 264 
 265 void MacroAssembler::call_VM_base(Register oop_result,
 266                                   Register java_thread,
 267                                   Register last_java_sp,
 268                                   address  entry_point,
 269                                   int      number_of_arguments,
 270                                   bool     check_exceptions) {
 271    // determine java_thread register
 272   if (!java_thread->is_valid()) {
 273     java_thread = xthread;
 274   }
 275   // determine last_java_sp register
 276   if (!last_java_sp->is_valid()) {
 277     last_java_sp = esp;
 278   }
 279 
 280   // debugging support
 281   assert(number_of_arguments >= 0   , "cannot have negative number of arguments");
 282   assert(java_thread == xthread, "unexpected register");
 283 
 284   assert(java_thread != oop_result  , "cannot use the same register for java_thread & oop_result");
 285   assert(java_thread != last_java_sp, "cannot use the same register for java_thread & last_java_sp");
 286 
 287   // push java thread (becomes first argument of C function)
 288   mv(c_rarg0, java_thread);
 289 
 290   // set last Java frame before call
 291   assert(last_java_sp != fp, "can't use fp");
 292 
 293   Label l;
 294   set_last_Java_frame(last_java_sp, fp, l, t0);
 295 
 296   // do the call, remove parameters
 297   MacroAssembler::call_VM_leaf_base(entry_point, number_of_arguments, &l);
 298 
 299   // reset last Java frame
 300   // Only interpreter should have to clear fp
 301   reset_last_Java_frame(true);
 302 
 303    // C++ interp handles this in the interpreter
 304   check_and_handle_popframe(java_thread);
 305   check_and_handle_earlyret(java_thread);
 306 
 307   if (check_exceptions) {
 308     // check for pending exceptions (java_thread is set upon return)
 309     ld(t0, Address(java_thread, in_bytes(Thread::pending_exception_offset())));
 310     Label ok;
 311     beqz(t0, ok);
 312     int32_t offset = 0;
 313     la_patchable(t0, RuntimeAddress(StubRoutines::forward_exception_entry()), offset);
 314     jalr(x0, t0, offset);
 315     bind(ok);
 316   }
 317 
 318   // get oop result if there is one and reset the value in the thread
 319   if (oop_result->is_valid()) {
 320     get_vm_result(oop_result, java_thread);
 321   }
 322 }
 323 
 324 void MacroAssembler::get_vm_result(Register oop_result, Register java_thread) {
 325   ld(oop_result, Address(java_thread, JavaThread::vm_result_offset()));
 326   sd(zr, Address(java_thread, JavaThread::vm_result_offset()));
 327   verify_oop(oop_result, "broken oop in call_VM_base");
 328 }
 329 
 330 void MacroAssembler::get_vm_result_2(Register metadata_result, Register java_thread) {
 331   ld(metadata_result, Address(java_thread, JavaThread::vm_result_2_offset()));
 332   sd(zr, Address(java_thread, JavaThread::vm_result_2_offset()));
 333 }
 334 
 335 void MacroAssembler::clinit_barrier(Register klass, Register tmp, Label* L_fast_path, Label* L_slow_path) {
 336   assert(L_fast_path != NULL || L_slow_path != NULL, "at least one is required");
 337   assert_different_registers(klass, xthread, tmp);
 338 
 339   Label L_fallthrough, L_tmp;
 340   if (L_fast_path == NULL) {
 341     L_fast_path = &L_fallthrough;
 342   } else if (L_slow_path == NULL) {
 343     L_slow_path = &L_fallthrough;
 344   }
 345 
 346   // Fast path check: class is fully initialized
 347   lbu(tmp, Address(klass, InstanceKlass::init_state_offset()));
 348   sub(tmp, tmp, InstanceKlass::fully_initialized);
 349   beqz(tmp, *L_fast_path);
 350 
 351   // Fast path check: current thread is initializer thread
 352   ld(tmp, Address(klass, InstanceKlass::init_thread_offset()));
 353 
 354   if (L_slow_path == &L_fallthrough) {
 355     beq(xthread, tmp, *L_fast_path);
 356     bind(*L_slow_path);
 357   } else if (L_fast_path == &L_fallthrough) {
 358     bne(xthread, tmp, *L_slow_path);
 359     bind(*L_fast_path);
 360   } else {
 361     Unimplemented();
 362   }
 363 }
 364 
 365 void MacroAssembler::verify_oop(Register reg, const char* s) {
 366   if (!VerifyOops) { return; }
 367 
 368   // Pass register number to verify_oop_subroutine
 369   const char* b = NULL;
 370   {
 371     ResourceMark rm;
 372     stringStream ss;
 373     ss.print("verify_oop: %s: %s", reg->name(), s);
 374     b = code_string(ss.as_string());
 375   }
 376   BLOCK_COMMENT("verify_oop {");
 377 
 378   push_reg(RegSet::of(ra, t0, t1, c_rarg0), sp);
 379 
 380   mv(c_rarg0, reg); // c_rarg0 : x10
 381   li(t0, (uintptr_t)(address)b);
 382 
 383   // call indirectly to solve generation ordering problem
 384   int32_t offset = 0;
 385   la_patchable(t1, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address()), offset);
 386   ld(t1, Address(t1, offset));
 387   jalr(t1);
 388 
 389   pop_reg(RegSet::of(ra, t0, t1, c_rarg0), sp);
 390 
 391   BLOCK_COMMENT("} verify_oop");
 392 }
 393 
 394 void MacroAssembler::verify_oop_addr(Address addr, const char* s) {
 395   if (!VerifyOops) {
 396     return;
 397   }
 398 
 399   const char* b = NULL;
 400   {
 401     ResourceMark rm;
 402     stringStream ss;
 403     ss.print("verify_oop_addr: %s", s);
 404     b = code_string(ss.as_string());
 405   }
 406   BLOCK_COMMENT("verify_oop_addr {");
 407 
 408   push_reg(RegSet::of(ra, t0, t1, c_rarg0), sp);
 409 
 410   if (addr.uses(sp)) {
 411     la(x10, addr);
 412     ld(x10, Address(x10, 4 * wordSize));
 413   } else {
 414     ld(x10, addr);
 415   }
 416 
 417   li(t0, (uintptr_t)(address)b);
 418 
 419   // call indirectly to solve generation ordering problem
 420   int32_t offset = 0;
 421   la_patchable(t1, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address()), offset);
 422   ld(t1, Address(t1, offset));
 423   jalr(t1);
 424 
 425   pop_reg(RegSet::of(ra, t0, t1, c_rarg0), sp);
 426 
 427   BLOCK_COMMENT("} verify_oop_addr");
 428 }
 429 
 430 Address MacroAssembler::argument_address(RegisterOrConstant arg_slot,
 431                                          int extra_slot_offset) {
 432   // cf. TemplateTable::prepare_invoke(), if (load_receiver).
 433   int stackElementSize = Interpreter::stackElementSize;
 434   int offset = Interpreter::expr_offset_in_bytes(extra_slot_offset+0);
 435 #ifdef ASSERT
 436   int offset1 = Interpreter::expr_offset_in_bytes(extra_slot_offset+1);
 437   assert(offset1 - offset == stackElementSize, "correct arithmetic");
 438 #endif
 439   if (arg_slot.is_constant()) {
 440     return Address(esp, arg_slot.as_constant() * stackElementSize + offset);
 441   } else {
 442     assert_different_registers(t0, arg_slot.as_register());
 443     shadd(t0, arg_slot.as_register(), esp, t0, exact_log2(stackElementSize));
 444     return Address(t0, offset);
 445   }
 446 }
 447 
 448 #ifndef PRODUCT
 449 extern "C" void findpc(intptr_t x);
 450 #endif
 451 
 452 void MacroAssembler::debug64(char* msg, int64_t pc, int64_t regs[])
 453 {
 454   // In order to get locks to work, we need to fake a in_VM state
 455   if (ShowMessageBoxOnError) {
 456     JavaThread* thread = JavaThread::current();
 457     JavaThreadState saved_state = thread->thread_state();
 458     thread->set_thread_state(_thread_in_vm);
 459 #ifndef PRODUCT
 460     if (CountBytecodes || TraceBytecodes || StopInterpreterAt) {
 461       ttyLocker ttyl;
 462       BytecodeCounter::print();
 463     }
 464 #endif
 465     if (os::message_box(msg, "Execution stopped, print registers?")) {
 466       ttyLocker ttyl;
 467       tty->print_cr(" pc = 0x%016lx", pc);
 468 #ifndef PRODUCT
 469       tty->cr();
 470       findpc(pc);
 471       tty->cr();
 472 #endif
 473       tty->print_cr(" x0 = 0x%016lx", regs[0]);
 474       tty->print_cr(" x1 = 0x%016lx", regs[1]);
 475       tty->print_cr(" x2 = 0x%016lx", regs[2]);
 476       tty->print_cr(" x3 = 0x%016lx", regs[3]);
 477       tty->print_cr(" x4 = 0x%016lx", regs[4]);
 478       tty->print_cr(" x5 = 0x%016lx", regs[5]);
 479       tty->print_cr(" x6 = 0x%016lx", regs[6]);
 480       tty->print_cr(" x7 = 0x%016lx", regs[7]);
 481       tty->print_cr(" x8 = 0x%016lx", regs[8]);
 482       tty->print_cr(" x9 = 0x%016lx", regs[9]);
 483       tty->print_cr("x10 = 0x%016lx", regs[10]);
 484       tty->print_cr("x11 = 0x%016lx", regs[11]);
 485       tty->print_cr("x12 = 0x%016lx", regs[12]);
 486       tty->print_cr("x13 = 0x%016lx", regs[13]);
 487       tty->print_cr("x14 = 0x%016lx", regs[14]);
 488       tty->print_cr("x15 = 0x%016lx", regs[15]);
 489       tty->print_cr("x16 = 0x%016lx", regs[16]);
 490       tty->print_cr("x17 = 0x%016lx", regs[17]);
 491       tty->print_cr("x18 = 0x%016lx", regs[18]);
 492       tty->print_cr("x19 = 0x%016lx", regs[19]);
 493       tty->print_cr("x20 = 0x%016lx", regs[20]);
 494       tty->print_cr("x21 = 0x%016lx", regs[21]);
 495       tty->print_cr("x22 = 0x%016lx", regs[22]);
 496       tty->print_cr("x23 = 0x%016lx", regs[23]);
 497       tty->print_cr("x24 = 0x%016lx", regs[24]);
 498       tty->print_cr("x25 = 0x%016lx", regs[25]);
 499       tty->print_cr("x26 = 0x%016lx", regs[26]);
 500       tty->print_cr("x27 = 0x%016lx", regs[27]);
 501       tty->print_cr("x28 = 0x%016lx", regs[28]);
 502       tty->print_cr("x30 = 0x%016lx", regs[30]);
 503       tty->print_cr("x31 = 0x%016lx", regs[31]);
 504       BREAKPOINT;
 505     }
 506   }
 507   fatal("DEBUG MESSAGE: %s", msg);
 508 }
 509 
 510 void MacroAssembler::resolve_jobject(Register value, Register thread, Register tmp) {
 511   Label done, not_weak;
 512   beqz(value, done);           // Use NULL as-is.
 513 
 514   // Test for jweak tag.
 515   andi(t0, value, JNIHandles::weak_tag_mask);
 516   beqz(t0, not_weak);
 517 
 518   // Resolve jweak.
 519   access_load_at(T_OBJECT, IN_NATIVE | ON_PHANTOM_OOP_REF, value,
 520                  Address(value, -JNIHandles::weak_tag_value), tmp, thread);
 521   verify_oop(value);
 522   j(done);
 523 
 524   bind(not_weak);
 525   // Resolve (untagged) jobject.
 526   access_load_at(T_OBJECT, IN_NATIVE, value, Address(value, 0), tmp, thread);
 527   verify_oop(value);
 528   bind(done);
 529 }
 530 
 531 void MacroAssembler::stop(const char* msg) {
 532   address ip = pc();
 533   pusha();
 534   li(c_rarg0, (uintptr_t)(address)msg);
 535   li(c_rarg1, (uintptr_t)(address)ip);
 536   mv(c_rarg2, sp);
 537   mv(c_rarg3, CAST_FROM_FN_PTR(address, MacroAssembler::debug64));
 538   jalr(c_rarg3);
 539   ebreak();
 540 }
 541 
 542 void MacroAssembler::unimplemented(const char* what) {
 543   const char* buf = NULL;
 544   {
 545     ResourceMark rm;
 546     stringStream ss;
 547     ss.print("unimplemented: %s", what);
 548     buf = code_string(ss.as_string());
 549   }
 550   stop(buf);
 551 }
 552 
 553 void MacroAssembler::emit_static_call_stub() {
 554   // CompiledDirectStaticCall::set_to_interpreted knows the
 555   // exact layout of this stub.
 556 
 557   ifence();
 558   mov_metadata(xmethod, (Metadata*)NULL);
 559 
 560   // Jump to the entry point of the i2c stub.
 561   int32_t offset = 0;
 562   movptr_with_offset(t0, 0, offset);
 563   jalr(x0, t0, offset);
 564 }
 565 
 566 void MacroAssembler::call_VM_leaf_base(address entry_point,
 567                                        int number_of_arguments,
 568                                        Label *retaddr) {
 569   call_native_base(entry_point, retaddr);
 570 }
 571 
 572 void MacroAssembler::call_native(address entry_point, Register arg_0) {
 573   pass_arg0(this, arg_0);
 574   call_native_base(entry_point);
 575 }
 576 
 577 void MacroAssembler::call_native_base(address entry_point, Label *retaddr) {
 578   Label E, L;
 579   int32_t offset = 0;
 580   push_reg(0x80000040, sp);   // push << t0 & xmethod >> to sp
 581   movptr_with_offset(t0, entry_point, offset);
 582   jalr(x1, t0, offset);
 583   if (retaddr != NULL) {
 584     bind(*retaddr);
 585   }
 586   pop_reg(0x80000040, sp);   // pop << t0 & xmethod >> from sp
 587 }
 588 
 589 void MacroAssembler::call_VM_leaf(address entry_point, int number_of_arguments) {
 590   call_VM_leaf_base(entry_point, number_of_arguments);
 591 }
 592 
 593 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0) {
 594   pass_arg0(this, arg_0);
 595   call_VM_leaf_base(entry_point, 1);
 596 }
 597 
 598 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1) {
 599   pass_arg0(this, arg_0);
 600   pass_arg1(this, arg_1);
 601   call_VM_leaf_base(entry_point, 2);
 602 }
 603 
 604 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0,
 605                                   Register arg_1, Register arg_2) {
 606   pass_arg0(this, arg_0);
 607   pass_arg1(this, arg_1);
 608   pass_arg2(this, arg_2);
 609   call_VM_leaf_base(entry_point, 3);
 610 }
 611 
 612 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0) {
 613   pass_arg0(this, arg_0);
 614   MacroAssembler::call_VM_leaf_base(entry_point, 1);
 615 }
 616 
 617 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1) {
 618 
 619   assert(arg_0 != c_rarg1, "smashed arg");
 620   pass_arg1(this, arg_1);
 621   pass_arg0(this, arg_0);
 622   MacroAssembler::call_VM_leaf_base(entry_point, 2);
 623 }
 624 
 625 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) {
 626   assert(arg_0 != c_rarg2, "smashed arg");
 627   assert(arg_1 != c_rarg2, "smashed arg");
 628   pass_arg2(this, arg_2);
 629   assert(arg_0 != c_rarg1, "smashed arg");
 630   pass_arg1(this, arg_1);
 631   pass_arg0(this, arg_0);
 632   MacroAssembler::call_VM_leaf_base(entry_point, 3);
 633 }
 634 
 635 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2, Register arg_3) {
 636   assert(arg_0 != c_rarg3, "smashed arg");
 637   assert(arg_1 != c_rarg3, "smashed arg");
 638   assert(arg_2 != c_rarg3, "smashed arg");
 639   pass_arg3(this, arg_3);
 640   assert(arg_0 != c_rarg2, "smashed arg");
 641   assert(arg_1 != c_rarg2, "smashed arg");
 642   pass_arg2(this, arg_2);
 643   assert(arg_0 != c_rarg1, "smashed arg");
 644   pass_arg1(this, arg_1);
 645   pass_arg0(this, arg_0);
 646   MacroAssembler::call_VM_leaf_base(entry_point, 4);
 647 }
 648 
 649 void MacroAssembler::nop() {
 650   addi(x0, x0, 0);
 651 }
 652 
 653 void MacroAssembler::mv(Register Rd, Register Rs) {
 654   if (Rd != Rs) {
 655     addi(Rd, Rs, 0);
 656   }
 657 }
 658 
 659 void MacroAssembler::notr(Register Rd, Register Rs) {
 660   xori(Rd, Rs, -1);
 661 }
 662 
 663 void MacroAssembler::neg(Register Rd, Register Rs) {
 664   sub(Rd, x0, Rs);
 665 }
 666 
 667 void MacroAssembler::negw(Register Rd, Register Rs) {
 668   subw(Rd, x0, Rs);
 669 }
 670 
 671 void MacroAssembler::sext_w(Register Rd, Register Rs) {
 672   addiw(Rd, Rs, 0);
 673 }
 674 
 675 void MacroAssembler::zext_b(Register Rd, Register Rs) {
 676   andi(Rd, Rs, 0xFF);
 677 }
 678 
 679 void MacroAssembler::seqz(Register Rd, Register Rs) {
 680   sltiu(Rd, Rs, 1);
 681 }
 682 
 683 void MacroAssembler::snez(Register Rd, Register Rs) {
 684   sltu(Rd, x0, Rs);
 685 }
 686 
 687 void MacroAssembler::sltz(Register Rd, Register Rs) {
 688   slt(Rd, Rs, x0);
 689 }
 690 
 691 void MacroAssembler::sgtz(Register Rd, Register Rs) {
 692   slt(Rd, x0, Rs);
 693 }
 694 
 695 void MacroAssembler::fmv_s(FloatRegister Rd, FloatRegister Rs) {
 696   if (Rd != Rs) {
 697     fsgnj_s(Rd, Rs, Rs);
 698   }
 699 }
 700 
 701 void MacroAssembler::fabs_s(FloatRegister Rd, FloatRegister Rs) {
 702   fsgnjx_s(Rd, Rs, Rs);
 703 }
 704 
 705 void MacroAssembler::fneg_s(FloatRegister Rd, FloatRegister Rs) {
 706   fsgnjn_s(Rd, Rs, Rs);
 707 }
 708 
 709 void MacroAssembler::fmv_d(FloatRegister Rd, FloatRegister Rs) {
 710   if (Rd != Rs) {
 711     fsgnj_d(Rd, Rs, Rs);
 712   }
 713 }
 714 
 715 void MacroAssembler::fabs_d(FloatRegister Rd, FloatRegister Rs) {
 716   fsgnjx_d(Rd, Rs, Rs);
 717 }
 718 
 719 void MacroAssembler::fneg_d(FloatRegister Rd, FloatRegister Rs) {
 720   fsgnjn_d(Rd, Rs, Rs);
 721 }
 722 
 723 void MacroAssembler::vmnot_m(VectorRegister vd, VectorRegister vs) {
 724   vmnand_mm(vd, vs, vs);
 725 }
 726 
 727 void MacroAssembler::vncvt_x_x_w(VectorRegister vd, VectorRegister vs, VectorMask vm) {
 728   vnsrl_wx(vd, vs, x0, vm);
 729 }
 730 
 731 void MacroAssembler::vfneg_v(VectorRegister vd, VectorRegister vs) {
 732   vfsgnjn_vv(vd, vs, vs);
 733 }
 734 
 735 void MacroAssembler::la(Register Rd, const address &dest) {
 736   int64_t offset = dest - pc();
 737   if (is_offset_in_range(offset, 32)) {
 738     auipc(Rd, (int32_t)offset + 0x800);  //0x800, Note:the 11th sign bit
 739     addi(Rd, Rd, ((int64_t)offset << 52) >> 52);
 740   } else {
 741     movptr(Rd, dest);
 742   }
 743 }
 744 
 745 void MacroAssembler::la(Register Rd, const Address &adr) {
 746   InstructionMark im(this);
 747   code_section()->relocate(inst_mark(), adr.rspec());
 748   relocInfo::relocType rtype = adr.rspec().reloc()->type();
 749 
 750   switch (adr.getMode()) {
 751     case Address::literal: {
 752       if (rtype == relocInfo::none) {
 753         li(Rd, (intptr_t)(adr.target()));
 754       } else {
 755         movptr(Rd, adr.target());
 756       }
 757       break;
 758     }
 759     case Address::base_plus_offset: {
 760       int32_t offset = 0;
 761       baseOffset(Rd, adr, offset);
 762       addi(Rd, Rd, offset);
 763       break;
 764     }
 765     default:
 766       ShouldNotReachHere();
 767   }
 768 }
 769 
 770 void MacroAssembler::la(Register Rd, Label &label) {
 771   la(Rd, target(label));
 772 }
 773 
 774 #define INSN(NAME)                                                                \
 775   void MacroAssembler::NAME##z(Register Rs, const address &dest) {                \
 776     NAME(Rs, zr, dest);                                                           \
 777   }                                                                               \
 778   void MacroAssembler::NAME##z(Register Rs, Label &l, bool is_far) {              \
 779     NAME(Rs, zr, l, is_far);                                                      \
 780   }                                                                               \
 781 
 782   INSN(beq);
 783   INSN(bne);
 784   INSN(blt);
 785   INSN(ble);
 786   INSN(bge);
 787   INSN(bgt);
 788 
 789 #undef INSN
 790 
 791 // Float compare branch instructions
 792 
 793 #define INSN(NAME, FLOATCMP, BRANCH)                                                                                   \
 794   void MacroAssembler::float_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far, bool is_unordered) {  \
 795     FLOATCMP##_s(t0, Rs1, Rs2);                                                                                        \
 796     BRANCH(t0, l, is_far);                                                                                             \
 797   }                                                                                                                    \
 798   void MacroAssembler::double_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far, bool is_unordered) { \
 799     FLOATCMP##_d(t0, Rs1, Rs2);                                                                                        \
 800     BRANCH(t0, l, is_far);                                                                                             \
 801   }
 802 
 803   INSN(beq, feq, bnez);
 804   INSN(bne, feq, beqz);
 805 
 806 #undef INSN
 807 
 808 
 809 #define INSN(NAME, FLOATCMP1, FLOATCMP2)                                              \
 810   void MacroAssembler::float_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l,   \
 811                                     bool is_far, bool is_unordered) {                 \
 812     if (is_unordered) {                                                               \
 813       /* jump if either source is NaN or condition is expected */                     \
 814       FLOATCMP2##_s(t0, Rs2, Rs1);                                                    \
 815       beqz(t0, l, is_far);                                                            \
 816     } else {                                                                          \
 817       /* jump if no NaN in source and condition is expected */                        \
 818       FLOATCMP1##_s(t0, Rs1, Rs2);                                                    \
 819       bnez(t0, l, is_far);                                                            \
 820     }                                                                                 \
 821   }                                                                                   \
 822   void MacroAssembler::double_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l,  \
 823                                      bool is_far, bool is_unordered) {                \
 824     if (is_unordered) {                                                               \
 825       /* jump if either source is NaN or condition is expected */                     \
 826       FLOATCMP2##_d(t0, Rs2, Rs1);                                                    \
 827       beqz(t0, l, is_far);                                                            \
 828     } else {                                                                          \
 829       /* jump if no NaN in source and condition is expected */                        \
 830       FLOATCMP1##_d(t0, Rs1, Rs2);                                                    \
 831       bnez(t0, l, is_far);                                                            \
 832     }                                                                                 \
 833   }
 834 
 835   INSN(ble, fle, flt);
 836   INSN(blt, flt, fle);
 837 
 838 #undef INSN
 839 
 840 #define INSN(NAME, CMP)                                                              \
 841   void MacroAssembler::float_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l,  \
 842                                     bool is_far, bool is_unordered) {                \
 843     float_##CMP(Rs2, Rs1, l, is_far, is_unordered);                                  \
 844   }                                                                                  \
 845   void MacroAssembler::double_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, \
 846                                      bool is_far, bool is_unordered) {               \
 847     double_##CMP(Rs2, Rs1, l, is_far, is_unordered);                                 \
 848   }
 849 
 850   INSN(bgt, blt);
 851   INSN(bge, ble);
 852 
 853 #undef INSN
 854 
 855 
 856 #define INSN(NAME, CSR)                       \
 857   void MacroAssembler::NAME(Register Rd) {    \
 858     csrr(Rd, CSR);                            \
 859   }
 860 
 861   INSN(rdinstret,  CSR_INSTERT);
 862   INSN(rdcycle,    CSR_CYCLE);
 863   INSN(rdtime,     CSR_TIME);
 864   INSN(frcsr,      CSR_FCSR);
 865   INSN(frrm,       CSR_FRM);
 866   INSN(frflags,    CSR_FFLAGS);
 867 
 868 #undef INSN
 869 
 870 void MacroAssembler::csrr(Register Rd, unsigned csr) {
 871   csrrs(Rd, csr, x0);
 872 }
 873 
 874 #define INSN(NAME, OPFUN)                                      \
 875   void MacroAssembler::NAME(unsigned csr, Register Rs) {       \
 876     OPFUN(x0, csr, Rs);                                        \
 877   }
 878 
 879   INSN(csrw, csrrw);
 880   INSN(csrs, csrrs);
 881   INSN(csrc, csrrc);
 882 
 883 #undef INSN
 884 
 885 #define INSN(NAME, OPFUN)                                      \
 886   void MacroAssembler::NAME(unsigned csr, unsigned imm) {      \
 887     OPFUN(x0, csr, imm);                                       \
 888   }
 889 
 890   INSN(csrwi, csrrwi);
 891   INSN(csrsi, csrrsi);
 892   INSN(csrci, csrrci);
 893 
 894 #undef INSN
 895 
 896 #define INSN(NAME, CSR)                                      \
 897   void MacroAssembler::NAME(Register Rd, Register Rs) {      \
 898     csrrw(Rd, CSR, Rs);                                      \
 899   }
 900 
 901   INSN(fscsr,   CSR_FCSR);
 902   INSN(fsrm,    CSR_FRM);
 903   INSN(fsflags, CSR_FFLAGS);
 904 
 905 #undef INSN
 906 
 907 #define INSN(NAME)                              \
 908   void MacroAssembler::NAME(Register Rs) {      \
 909     NAME(x0, Rs);                               \
 910   }
 911 
 912   INSN(fscsr);
 913   INSN(fsrm);
 914   INSN(fsflags);
 915 
 916 #undef INSN
 917 
 918 void MacroAssembler::fsrmi(Register Rd, unsigned imm) {
 919   guarantee(imm < 5, "Rounding Mode is invalid in Rounding Mode register");
 920   csrrwi(Rd, CSR_FRM, imm);
 921 }
 922 
 923 void MacroAssembler::fsflagsi(Register Rd, unsigned imm) {
 924    csrrwi(Rd, CSR_FFLAGS, imm);
 925 }
 926 
 927 #define INSN(NAME)                             \
 928   void MacroAssembler::NAME(unsigned imm) {    \
 929     NAME(x0, imm);                             \
 930   }
 931 
 932   INSN(fsrmi);
 933   INSN(fsflagsi);
 934 
 935 #undef INSN
 936 
 937 void MacroAssembler::push_reg(Register Rs)
 938 {
 939   addi(esp, esp, 0 - wordSize);
 940   sd(Rs, Address(esp, 0));
 941 }
 942 
 943 void MacroAssembler::pop_reg(Register Rd)
 944 {
 945   ld(Rd, esp, 0);
 946   addi(esp, esp, wordSize);
 947 }
 948 
 949 int MacroAssembler::bitset_to_regs(unsigned int bitset, unsigned char* regs) {
 950   int count = 0;
 951   // Scan bitset to accumulate register pairs
 952   for (int reg = 31; reg >= 0; reg--) {
 953     if ((1U << 31) & bitset) {
 954       regs[count++] = reg;
 955     }
 956     bitset <<= 1;
 957   }
 958   return count;
 959 }
 960 
 961 // Push lots of registers in the bit set supplied.  Don't push sp.
 962 // Return the number of words pushed
 963 int MacroAssembler::push_reg(unsigned int bitset, Register stack) {
 964   DEBUG_ONLY(int words_pushed = 0;)
 965   CompressibleRegion cr(this);
 966 
 967   unsigned char regs[32];
 968   int count = bitset_to_regs(bitset, regs);
 969   // reserve one slot to align for odd count
 970   int offset = is_even(count) ? 0 : wordSize;
 971 
 972   if (count) {
 973     addi(stack, stack, - count * wordSize - offset);
 974   }
 975   for (int i = count - 1; i >= 0; i--) {
 976     sd(as_Register(regs[i]), Address(stack, (count - 1 - i) * wordSize + offset));
 977     DEBUG_ONLY(words_pushed ++;)
 978   }
 979 
 980   assert(words_pushed == count, "oops, pushed != count");
 981 
 982   return count;
 983 }
 984 
 985 int MacroAssembler::pop_reg(unsigned int bitset, Register stack) {
 986   DEBUG_ONLY(int words_popped = 0;)
 987   CompressibleRegion cr(this);
 988 
 989   unsigned char regs[32];
 990   int count = bitset_to_regs(bitset, regs);
 991   // reserve one slot to align for odd count
 992   int offset = is_even(count) ? 0 : wordSize;
 993 
 994   for (int i = count - 1; i >= 0; i--) {
 995     ld(as_Register(regs[i]), Address(stack, (count - 1 - i) * wordSize + offset));
 996     DEBUG_ONLY(words_popped ++;)
 997   }
 998 
 999   if (count) {
1000     addi(stack, stack, count * wordSize + offset);
1001   }
1002   assert(words_popped == count, "oops, popped != count");
1003 
1004   return count;
1005 }
1006 
1007 // Push float registers in the bitset, except sp.
1008 // Return the number of heapwords pushed.
1009 int MacroAssembler::push_fp(unsigned int bitset, Register stack) {
1010   CompressibleRegion cr(this);
1011   int words_pushed = 0;
1012   unsigned char regs[32];
1013   int count = bitset_to_regs(bitset, regs);
1014   int push_slots = count + (count & 1);
1015 
1016   if (count) {
1017     addi(stack, stack, -push_slots * wordSize);
1018   }
1019 
1020   for (int i = count - 1; i >= 0; i--) {
1021     fsd(as_FloatRegister(regs[i]), Address(stack, (push_slots - 1 - i) * wordSize));
1022     words_pushed++;
1023   }
1024 
1025   assert(words_pushed == count, "oops, pushed(%d) != count(%d)", words_pushed, count);
1026   return count;
1027 }
1028 
1029 int MacroAssembler::pop_fp(unsigned int bitset, Register stack) {
1030   CompressibleRegion cr(this);
1031   int words_popped = 0;
1032   unsigned char regs[32];
1033   int count = bitset_to_regs(bitset, regs);
1034   int pop_slots = count + (count & 1);
1035 
1036   for (int i = count - 1; i >= 0; i--) {
1037     fld(as_FloatRegister(regs[i]), Address(stack, (pop_slots - 1 - i) * wordSize));
1038     words_popped++;
1039   }
1040 
1041   if (count) {
1042     addi(stack, stack, pop_slots * wordSize);
1043   }
1044 
1045   assert(words_popped == count, "oops, popped(%d) != count(%d)", words_popped, count);
1046   return count;
1047 }
1048 
1049 #ifdef COMPILER2
1050 int MacroAssembler::push_vp(unsigned int bitset, Register stack) {
1051   CompressibleRegion cr(this);
1052   int vector_size_in_bytes = Matcher::scalable_vector_reg_size(T_BYTE);
1053 
1054   // Scan bitset to accumulate register pairs
1055   unsigned char regs[32];
1056   int count = 0;
1057   for (int reg = 31; reg >= 0; reg--) {
1058     if ((1U << 31) & bitset) {
1059       regs[count++] = reg;
1060     }
1061     bitset <<= 1;
1062   }
1063 
1064   for (int i = 0; i < count; i++) {
1065     sub(stack, stack, vector_size_in_bytes);
1066     vs1r_v(as_VectorRegister(regs[i]), stack);
1067   }
1068 
1069   return count * vector_size_in_bytes / wordSize;
1070 }
1071 
1072 int MacroAssembler::pop_vp(unsigned int bitset, Register stack) {
1073   CompressibleRegion cr(this);
1074   int vector_size_in_bytes = Matcher::scalable_vector_reg_size(T_BYTE);
1075 
1076   // Scan bitset to accumulate register pairs
1077   unsigned char regs[32];
1078   int count = 0;
1079   for (int reg = 31; reg >= 0; reg--) {
1080     if ((1U << 31) & bitset) {
1081       regs[count++] = reg;
1082     }
1083     bitset <<= 1;
1084   }
1085 
1086   for (int i = count - 1; i >= 0; i--) {
1087     vl1r_v(as_VectorRegister(regs[i]), stack);
1088     add(stack, stack, vector_size_in_bytes);
1089   }
1090 
1091   return count * vector_size_in_bytes / wordSize;
1092 }
1093 #endif // COMPILER2
1094 
1095 void MacroAssembler::push_call_clobbered_registers_except(RegSet exclude) {
1096   CompressibleRegion cr(this);
1097   // Push integer registers x7, x10-x17, x28-x31.
1098   push_reg(RegSet::of(x7) + RegSet::range(x10, x17) + RegSet::range(x28, x31) - exclude, sp);
1099 
1100   // Push float registers f0-f7, f10-f17, f28-f31.
1101   addi(sp, sp, - wordSize * 20);
1102   int offset = 0;
1103   for (int i = 0; i < 32; i++) {
1104     if (i <= f7->encoding() || i >= f28->encoding() || (i >= f10->encoding() && i <= f17->encoding())) {
1105       fsd(as_FloatRegister(i), Address(sp, wordSize * (offset ++)));
1106     }
1107   }
1108 }
1109 
1110 void MacroAssembler::pop_call_clobbered_registers_except(RegSet exclude) {
1111   CompressibleRegion cr(this);
1112   int offset = 0;
1113   for (int i = 0; i < 32; i++) {
1114     if (i <= f7->encoding() || i >= f28->encoding() || (i >= f10->encoding() && i <= f17->encoding())) {
1115       fld(as_FloatRegister(i), Address(sp, wordSize * (offset ++)));
1116     }
1117   }
1118   addi(sp, sp, wordSize * 20);
1119 
1120   pop_reg(RegSet::of(x7) + RegSet::range(x10, x17) + RegSet::range(x28, x31) - exclude, sp);
1121 }
1122 
1123 // Push all the integer registers, except zr(x0) & sp(x2) & gp(x3) & tp(x4).
1124 void MacroAssembler::pusha() {
1125   CompressibleRegion cr(this);
1126   push_reg(0xffffffe2, sp);
1127 }
1128 
1129 // Pop all the integer registers, except zr(x0) & sp(x2) & gp(x3) & tp(x4).
1130 void MacroAssembler::popa() {
1131   CompressibleRegion cr(this);
1132   pop_reg(0xffffffe2, sp);
1133 }
1134 
1135 void MacroAssembler::push_CPU_state(bool save_vectors, int vector_size_in_bytes) {
1136   CompressibleRegion cr(this);
1137   // integer registers, except zr(x0) & ra(x1) & sp(x2) & gp(x3) & tp(x4)
1138   push_reg(0xffffffe0, sp);
1139 
1140   // float registers
1141   addi(sp, sp, - 32 * wordSize);
1142   for (int i = 0; i < 32; i++) {
1143     fsd(as_FloatRegister(i), Address(sp, i * wordSize));
1144   }
1145 
1146   // vector registers
1147   if (save_vectors) {
1148     sub(sp, sp, vector_size_in_bytes * VectorRegisterImpl::number_of_registers);
1149     vsetvli(t0, x0, Assembler::e64, Assembler::m8);
1150     for (int i = 0; i < VectorRegisterImpl::number_of_registers; i += 8) {
1151       add(t0, sp, vector_size_in_bytes * i);
1152       vse64_v(as_VectorRegister(i), t0);
1153     }
1154   }
1155 }
1156 
1157 void MacroAssembler::pop_CPU_state(bool restore_vectors, int vector_size_in_bytes) {
1158   CompressibleRegion cr(this);
1159   // vector registers
1160   if (restore_vectors) {
1161     vsetvli(t0, x0, Assembler::e64, Assembler::m8);
1162     for (int i = 0; i < VectorRegisterImpl::number_of_registers; i += 8) {
1163       vle64_v(as_VectorRegister(i), sp);
1164       add(sp, sp, vector_size_in_bytes * 8);
1165     }
1166   }
1167 
1168   // float registers
1169   for (int i = 0; i < 32; i++) {
1170     fld(as_FloatRegister(i), Address(sp, i * wordSize));
1171   }
1172   addi(sp, sp, 32 * wordSize);
1173 
1174   // integer registers, except zr(x0) & ra(x1) & sp(x2) & gp(x3) & tp(x4)
1175   pop_reg(0xffffffe0, sp);
1176 }
1177 
1178 static int patch_offset_in_jal(address branch, int64_t offset) {
1179   assert(is_imm_in_range(offset, 20, 1), "offset is too large to be patched in one jal insrusction!\n");
1180   Assembler::patch(branch, 31, 31, (offset >> 20) & 0x1);                       // offset[20]    ==> branch[31]
1181   Assembler::patch(branch, 30, 21, (offset >> 1)  & 0x3ff);                     // offset[10:1]  ==> branch[30:21]
1182   Assembler::patch(branch, 20, 20, (offset >> 11) & 0x1);                       // offset[11]    ==> branch[20]
1183   Assembler::patch(branch, 19, 12, (offset >> 12) & 0xff);                      // offset[19:12] ==> branch[19:12]
1184   return NativeInstruction::instruction_size;                                   // only one instruction
1185 }
1186 
1187 static int patch_offset_in_conditional_branch(address branch, int64_t offset) {
1188   assert(is_imm_in_range(offset, 12, 1), "offset is too large to be patched in one beq/bge/bgeu/blt/bltu/bne insrusction!\n");
1189   Assembler::patch(branch, 31, 31, (offset >> 12) & 0x1);                       // offset[12]    ==> branch[31]
1190   Assembler::patch(branch, 30, 25, (offset >> 5)  & 0x3f);                      // offset[10:5]  ==> branch[30:25]
1191   Assembler::patch(branch, 7,  7,  (offset >> 11) & 0x1);                       // offset[11]    ==> branch[7]
1192   Assembler::patch(branch, 11, 8,  (offset >> 1)  & 0xf);                       // offset[4:1]   ==> branch[11:8]
1193   return NativeInstruction::instruction_size;                                   // only one instruction
1194 }
1195 
1196 static int patch_offset_in_pc_relative(address branch, int64_t offset) {
1197   const int PC_RELATIVE_INSTRUCTION_NUM = 2;                                    // auipc, addi/jalr/load
1198   Assembler::patch(branch, 31, 12, ((offset + 0x800) >> 12) & 0xfffff);         // Auipc.          offset[31:12]  ==> branch[31:12]
1199   Assembler::patch(branch + 4, 31, 20, offset & 0xfff);                         // Addi/Jalr/Load. offset[11:0]   ==> branch[31:20]
1200   return PC_RELATIVE_INSTRUCTION_NUM * NativeInstruction::instruction_size;
1201 }
1202 
1203 static int patch_addr_in_movptr(address branch, address target) {
1204   const int MOVPTR_INSTRUCTIONS_NUM = 6;                                        // lui + addi + slli + addi + slli + addi/jalr/load
1205   int32_t lower = ((intptr_t)target << 36) >> 36;
1206   int64_t upper = ((intptr_t)target - lower) >> 28;
1207   Assembler::patch(branch + 0,  31, 12, upper & 0xfffff);                       // Lui.             target[47:28] + target[27] ==> branch[31:12]
1208   Assembler::patch(branch + 4,  31, 20, (lower >> 16) & 0xfff);                 // Addi.            target[27:16] ==> branch[31:20]
1209   Assembler::patch(branch + 12, 31, 20, (lower >> 5) & 0x7ff);                  // Addi.            target[15: 5] ==> branch[31:20]
1210   Assembler::patch(branch + 20, 31, 20, lower & 0x1f);                          // Addi/Jalr/Load.  target[ 4: 0] ==> branch[31:20]
1211   return MOVPTR_INSTRUCTIONS_NUM * NativeInstruction::instruction_size;
1212 }
1213 
1214 static int patch_imm_in_li64(address branch, address target) {
1215   const int LI64_INSTRUCTIONS_NUM = 8;                                          // lui + addi + slli + addi + slli + addi + slli + addi
1216   int64_t lower = (intptr_t)target & 0xffffffff;
1217   lower = lower - ((lower << 44) >> 44);
1218   int64_t tmp_imm = ((uint64_t)((intptr_t)target & 0xffffffff00000000)) + (uint64_t)lower;
1219   int32_t upper =  (tmp_imm - (int32_t)lower) >> 32;
1220   int64_t tmp_upper = upper, tmp_lower = upper;
1221   tmp_lower = (tmp_lower << 52) >> 52;
1222   tmp_upper -= tmp_lower;
1223   tmp_upper >>= 12;
1224   // Load upper 32 bits. Upper = target[63:32], but if target[31] = 1 or (target[31:28] == 0x7ff && target[19] == 1),
1225   // upper = target[63:32] + 1.
1226   Assembler::patch(branch + 0,  31, 12, tmp_upper & 0xfffff);                       // Lui.
1227   Assembler::patch(branch + 4,  31, 20, tmp_lower & 0xfff);                         // Addi.
1228   // Load the rest 32 bits.
1229   Assembler::patch(branch + 12, 31, 20, ((int32_t)lower >> 20) & 0xfff);            // Addi.
1230   Assembler::patch(branch + 20, 31, 20, (((intptr_t)target << 44) >> 52) & 0xfff);  // Addi.
1231   Assembler::patch(branch + 28, 31, 20, (intptr_t)target & 0xff);                   // Addi.
1232   return LI64_INSTRUCTIONS_NUM * NativeInstruction::instruction_size;
1233 }
1234 
1235 static int patch_imm_in_li32(address branch, int32_t target) {
1236   const int LI32_INSTRUCTIONS_NUM = 2;                                          // lui + addiw
1237   int64_t upper = (intptr_t)target;
1238   int32_t lower = (((int32_t)target) << 20) >> 20;
1239   upper -= lower;
1240   upper = (int32_t)upper;
1241   Assembler::patch(branch + 0,  31, 12, (upper >> 12) & 0xfffff);               // Lui.
1242   Assembler::patch(branch + 4,  31, 20, lower & 0xfff);                         // Addiw.
1243   return LI32_INSTRUCTIONS_NUM * NativeInstruction::instruction_size;
1244 }
1245 
1246 static long get_offset_of_jal(address insn_addr) {
1247   assert_cond(insn_addr != NULL);
1248   long offset = 0;
1249   unsigned insn = *(unsigned*)insn_addr;
1250   long val = (long)Assembler::sextract(insn, 31, 12);
1251   offset |= ((val >> 19) & 0x1) << 20;
1252   offset |= (val & 0xff) << 12;
1253   offset |= ((val >> 8) & 0x1) << 11;
1254   offset |= ((val >> 9) & 0x3ff) << 1;
1255   offset = (offset << 43) >> 43;
1256   return offset;
1257 }
1258 
1259 static long get_offset_of_conditional_branch(address insn_addr) {
1260   long offset = 0;
1261   assert_cond(insn_addr != NULL);
1262   unsigned insn = *(unsigned*)insn_addr;
1263   offset = (long)Assembler::sextract(insn, 31, 31);
1264   offset = (offset << 12) | (((long)(Assembler::sextract(insn, 7, 7) & 0x1)) << 11);
1265   offset = offset | (((long)(Assembler::sextract(insn, 30, 25) & 0x3f)) << 5);
1266   offset = offset | (((long)(Assembler::sextract(insn, 11, 8) & 0xf)) << 1);
1267   offset = (offset << 41) >> 41;
1268   return offset;
1269 }
1270 
1271 static long get_offset_of_pc_relative(address insn_addr) {
1272   long offset = 0;
1273   assert_cond(insn_addr != NULL);
1274   offset = ((long)(Assembler::sextract(((unsigned*)insn_addr)[0], 31, 12))) << 12;                                  // Auipc.
1275   offset += ((long)Assembler::sextract(((unsigned*)insn_addr)[1], 31, 20));                                         // Addi/Jalr/Load.
1276   offset = (offset << 32) >> 32;
1277   return offset;
1278 }
1279 
1280 static address get_target_of_movptr(address insn_addr) {
1281   assert_cond(insn_addr != NULL);
1282   intptr_t target_address = (((int64_t)Assembler::sextract(((unsigned*)insn_addr)[0], 31, 12)) & 0xfffff) << 28;    // Lui.
1283   target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[1], 31, 20)) << 16;                        // Addi.
1284   target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[3], 31, 20)) << 5;                         // Addi.
1285   target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[5], 31, 20));                              // Addi/Jalr/Load.
1286   return (address) target_address;
1287 }
1288 
1289 static address get_target_of_li64(address insn_addr) {
1290   assert_cond(insn_addr != NULL);
1291   intptr_t target_address = (((int64_t)Assembler::sextract(((unsigned*)insn_addr)[0], 31, 12)) & 0xfffff) << 44;    // Lui.
1292   target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[1], 31, 20)) << 32;                        // Addi.
1293   target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[3], 31, 20)) << 20;                        // Addi.
1294   target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[5], 31, 20)) << 8;                         // Addi.
1295   target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[7], 31, 20));                              // Addi.
1296   return (address)target_address;
1297 }
1298 
1299 static address get_target_of_li32(address insn_addr) {
1300   assert_cond(insn_addr != NULL);
1301   intptr_t target_address = (((int64_t)Assembler::sextract(((unsigned*)insn_addr)[0], 31, 12)) & 0xfffff) << 12;    // Lui.
1302   target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[1], 31, 20));                              // Addiw.
1303   return (address)target_address;
1304 }
1305 
1306 // Patch any kind of instruction; there may be several instructions.
1307 // Return the total length (in bytes) of the instructions.
1308 int MacroAssembler::pd_patch_instruction_size(address branch, address target) {
1309   assert_cond(branch != NULL);
1310   int64_t offset = target - branch;
1311   if (NativeInstruction::is_jal_at(branch)) {                         // jal
1312     return patch_offset_in_jal(branch, offset);
1313   } else if (NativeInstruction::is_branch_at(branch)) {               // beq/bge/bgeu/blt/bltu/bne
1314     return patch_offset_in_conditional_branch(branch, offset);
1315   } else if (NativeInstruction::is_pc_relative_at(branch)) {          // auipc, addi/jalr/load
1316     return patch_offset_in_pc_relative(branch, offset);
1317   } else if (NativeInstruction::is_movptr_at(branch)) {               // movptr
1318     return patch_addr_in_movptr(branch, target);
1319   } else if (NativeInstruction::is_li64_at(branch)) {                 // li64
1320     return patch_imm_in_li64(branch, target);
1321   } else if (NativeInstruction::is_li32_at(branch)) {                 // li32
1322     int64_t imm = (intptr_t)target;
1323     return patch_imm_in_li32(branch, (int32_t)imm);
1324   } else {
1325 #ifdef ASSERT
1326     tty->print_cr("pd_patch_instruction_size: instruction 0x%x at " INTPTR_FORMAT " could not be patched!\n",
1327                   *(unsigned*)branch, p2i(branch));
1328     Disassembler::decode(branch - 16, branch + 16);
1329 #endif
1330     ShouldNotReachHere();
1331     return -1;
1332   }
1333 }
1334 
1335 address MacroAssembler::target_addr_for_insn(address insn_addr) {
1336   long offset = 0;
1337   assert_cond(insn_addr != NULL);
1338   if (NativeInstruction::is_jal_at(insn_addr)) {                     // jal
1339     offset = get_offset_of_jal(insn_addr);
1340   } else if (NativeInstruction::is_branch_at(insn_addr)) {           // beq/bge/bgeu/blt/bltu/bne
1341     offset = get_offset_of_conditional_branch(insn_addr);
1342   } else if (NativeInstruction::is_pc_relative_at(insn_addr)) {      // auipc, addi/jalr/load
1343     offset = get_offset_of_pc_relative(insn_addr);
1344   } else if (NativeInstruction::is_movptr_at(insn_addr)) {           // movptr
1345     return get_target_of_movptr(insn_addr);
1346   } else if (NativeInstruction::is_li64_at(insn_addr)) {             // li64
1347     return get_target_of_li64(insn_addr);
1348   } else if (NativeInstruction::is_li32_at(insn_addr)) {             // li32
1349     return get_target_of_li32(insn_addr);
1350   } else {
1351     ShouldNotReachHere();
1352   }
1353   return address(((uintptr_t)insn_addr + offset));
1354 }
1355 
1356 int MacroAssembler::patch_oop(address insn_addr, address o) {
1357   // OOPs are either narrow (32 bits) or wide (48 bits).  We encode
1358   // narrow OOPs by setting the upper 16 bits in the first
1359   // instruction.
1360   if (NativeInstruction::is_li32_at(insn_addr)) {
1361     // Move narrow OOP
1362     uint32_t n = CompressedOops::narrow_oop_value(cast_to_oop(o));
1363     return patch_imm_in_li32(insn_addr, (int32_t)n);
1364   } else if (NativeInstruction::is_movptr_at(insn_addr)) {
1365     // Move wide OOP
1366     return patch_addr_in_movptr(insn_addr, o);
1367   }
1368   ShouldNotReachHere();
1369   return -1;
1370 }
1371 
1372 void MacroAssembler::reinit_heapbase() {
1373   if (UseCompressedOops) {
1374     if (Universe::is_fully_initialized()) {
1375       mv(xheapbase, CompressedOops::ptrs_base());
1376     } else {
1377       int32_t offset = 0;
1378       la_patchable(xheapbase, ExternalAddress((address)CompressedOops::ptrs_base_addr()), offset);
1379       ld(xheapbase, Address(xheapbase, offset));
1380     }
1381   }
1382 }
1383 
1384 void MacroAssembler::mv(Register Rd, Address dest) {
1385   assert(dest.getMode() == Address::literal, "Address mode should be Address::literal");
1386   code_section()->relocate(pc(), dest.rspec());
1387   movptr(Rd, dest.target());
1388 }
1389 
1390 void MacroAssembler::mv(Register Rd, address addr) {
1391   // Here in case of use with relocation, use fix length instruciton
1392   // movptr instead of li
1393   movptr(Rd, addr);
1394 }
1395 
1396 void MacroAssembler::mv(Register Rd, RegisterOrConstant src) {
1397   if (src.is_register()) {
1398     mv(Rd, src.as_register());
1399   } else {
1400     mv(Rd, src.as_constant());
1401   }
1402 }
1403 
1404 void MacroAssembler::andrw(Register Rd, Register Rs1, Register Rs2) {
1405   andr(Rd, Rs1, Rs2);
1406   // addw: The result is clipped to 32 bits, then the sign bit is extended,
1407   // and the result is stored in Rd
1408   addw(Rd, Rd, zr);
1409 }
1410 
1411 void MacroAssembler::orrw(Register Rd, Register Rs1, Register Rs2) {
1412   orr(Rd, Rs1, Rs2);
1413   // addw: The result is clipped to 32 bits, then the sign bit is extended,
1414   // and the result is stored in Rd
1415   addw(Rd, Rd, zr);
1416 }
1417 
1418 void MacroAssembler::xorrw(Register Rd, Register Rs1, Register Rs2) {
1419   xorr(Rd, Rs1, Rs2);
1420   // addw: The result is clipped to 32 bits, then the sign bit is extended,
1421   // and the result is stored in Rd
1422   addw(Rd, Rd, zr);
1423 }
1424 
1425 // Note: load_unsigned_short used to be called load_unsigned_word.
1426 int MacroAssembler::load_unsigned_short(Register dst, Address src) {
1427   int off = offset();
1428   lhu(dst, src);
1429   return off;
1430 }
1431 
1432 int MacroAssembler::load_unsigned_byte(Register dst, Address src) {
1433   int off = offset();
1434   lbu(dst, src);
1435   return off;
1436 }
1437 
1438 int MacroAssembler::load_signed_short(Register dst, Address src) {
1439   int off = offset();
1440   lh(dst, src);
1441   return off;
1442 }
1443 
1444 int MacroAssembler::load_signed_byte(Register dst, Address src) {
1445   int off = offset();
1446   lb(dst, src);
1447   return off;
1448 }
1449 
1450 void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2) {
1451   switch (size_in_bytes) {
1452     case  8:  ld(dst, src); break;
1453     case  4:  is_signed ? lw(dst, src) : lwu(dst, src); break;
1454     case  2:  is_signed ? load_signed_short(dst, src) : load_unsigned_short(dst, src); break;
1455     case  1:  is_signed ? load_signed_byte( dst, src) : load_unsigned_byte( dst, src); break;
1456     default:  ShouldNotReachHere();
1457   }
1458 }
1459 
1460 void MacroAssembler::store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2) {
1461   switch (size_in_bytes) {
1462     case  8:  sd(src, dst); break;
1463     case  4:  sw(src, dst); break;
1464     case  2:  sh(src, dst); break;
1465     case  1:  sb(src, dst); break;
1466     default:  ShouldNotReachHere();
1467   }
1468 }
1469 
1470 // reverse bytes in halfword in lower 16 bits and sign-extend
1471 // Rd[15:0] = Rs[7:0] Rs[15:8] (sign-extend to 64 bits)
1472 void MacroAssembler::revb_h_h(Register Rd, Register Rs, Register tmp) {
1473   if (UseRVB) {
1474     rev8(Rd, Rs);
1475     srai(Rd, Rd, 48);
1476     return;
1477   }
1478   assert_different_registers(Rs, tmp);
1479   assert_different_registers(Rd, tmp);
1480   srli(tmp, Rs, 8);
1481   andi(tmp, tmp, 0xFF);
1482   slli(Rd, Rs, 56);
1483   srai(Rd, Rd, 48); // sign-extend
1484   orr(Rd, Rd, tmp);
1485 }
1486 
1487 // reverse bytes in lower word and sign-extend
1488 // Rd[31:0] = Rs[7:0] Rs[15:8] Rs[23:16] Rs[31:24] (sign-extend to 64 bits)
1489 void MacroAssembler::revb_w_w(Register Rd, Register Rs, Register tmp1, Register tmp2) {
1490   if (UseRVB) {
1491     rev8(Rd, Rs);
1492     srai(Rd, Rd, 32);
1493     return;
1494   }
1495   assert_different_registers(Rs, tmp1, tmp2);
1496   assert_different_registers(Rd, tmp1, tmp2);
1497   revb_h_w_u(Rd, Rs, tmp1, tmp2);
1498   slli(tmp2, Rd, 48);
1499   srai(tmp2, tmp2, 32); // sign-extend
1500   srli(Rd, Rd, 16);
1501   orr(Rd, Rd, tmp2);
1502 }
1503 
1504 // reverse bytes in halfword in lower 16 bits and zero-extend
1505 // Rd[15:0] = Rs[7:0] Rs[15:8] (zero-extend to 64 bits)
1506 void MacroAssembler::revb_h_h_u(Register Rd, Register Rs, Register tmp) {
1507   if (UseRVB) {
1508     rev8(Rd, Rs);
1509     srli(Rd, Rd, 48);
1510     return;
1511   }
1512   assert_different_registers(Rs, tmp);
1513   assert_different_registers(Rd, tmp);
1514   srli(tmp, Rs, 8);
1515   andi(tmp, tmp, 0xFF);
1516   andi(Rd, Rs, 0xFF);
1517   slli(Rd, Rd, 8);
1518   orr(Rd, Rd, tmp);
1519 }
1520 
1521 // reverse bytes in halfwords in lower 32 bits and zero-extend
1522 // Rd[31:0] = Rs[23:16] Rs[31:24] Rs[7:0] Rs[15:8] (zero-extend to 64 bits)
1523 void MacroAssembler::revb_h_w_u(Register Rd, Register Rs, Register tmp1, Register tmp2) {
1524   if (UseRVB) {
1525     rev8(Rd, Rs);
1526     rori(Rd, Rd, 32);
1527     roriw(Rd, Rd, 16);
1528     zext_w(Rd, Rd);
1529     return;
1530   }
1531   assert_different_registers(Rs, tmp1, tmp2);
1532   assert_different_registers(Rd, tmp1, tmp2);
1533   srli(tmp2, Rs, 16);
1534   revb_h_h_u(tmp2, tmp2, tmp1);
1535   revb_h_h_u(Rd, Rs, tmp1);
1536   slli(tmp2, tmp2, 16);
1537   orr(Rd, Rd, tmp2);
1538 }
1539 
1540 // This method is only used for revb_h
1541 // Rd = Rs[47:0] Rs[55:48] Rs[63:56]
1542 void MacroAssembler::revb_h_helper(Register Rd, Register Rs, Register tmp1, Register tmp2) {
1543   assert_different_registers(Rs, tmp1, tmp2);
1544   assert_different_registers(Rd, tmp1);
1545   srli(tmp1, Rs, 48);
1546   andi(tmp2, tmp1, 0xFF);
1547   slli(tmp2, tmp2, 8);
1548   srli(tmp1, tmp1, 8);
1549   orr(tmp1, tmp1, tmp2);
1550   slli(Rd, Rs, 16);
1551   orr(Rd, Rd, tmp1);
1552 }
1553 
1554 // reverse bytes in each halfword
1555 // Rd[63:0] = Rs[55:48] Rs[63:56] Rs[39:32] Rs[47:40] Rs[23:16] Rs[31:24] Rs[7:0] Rs[15:8]
1556 void MacroAssembler::revb_h(Register Rd, Register Rs, Register tmp1, Register tmp2) {
1557   if (UseRVB) {
1558     assert_different_registers(Rs, tmp1);
1559     assert_different_registers(Rd, tmp1);
1560     rev8(Rd, Rs);
1561     zext_w(tmp1, Rd);
1562     roriw(tmp1, tmp1, 16);
1563     slli(tmp1, tmp1, 32);
1564     srli(Rd, Rd, 32);
1565     roriw(Rd, Rd, 16);
1566     zext_w(Rd, Rd);
1567     orr(Rd, Rd, tmp1);
1568     return;
1569   }
1570   assert_different_registers(Rs, tmp1, tmp2);
1571   assert_different_registers(Rd, tmp1, tmp2);
1572   revb_h_helper(Rd, Rs, tmp1, tmp2);
1573   for (int i = 0; i < 3; ++i) {
1574     revb_h_helper(Rd, Rd, tmp1, tmp2);
1575   }
1576 }
1577 
1578 // reverse bytes in each word
1579 // Rd[63:0] = Rs[39:32] Rs[47:40] Rs[55:48] Rs[63:56] Rs[7:0] Rs[15:8] Rs[23:16] Rs[31:24]
1580 void MacroAssembler::revb_w(Register Rd, Register Rs, Register tmp1, Register tmp2) {
1581   if (UseRVB) {
1582     rev8(Rd, Rs);
1583     rori(Rd, Rd, 32);
1584     return;
1585   }
1586   assert_different_registers(Rs, tmp1, tmp2);
1587   assert_different_registers(Rd, tmp1, tmp2);
1588   revb(Rd, Rs, tmp1, tmp2);
1589   ror_imm(Rd, Rd, 32);
1590 }
1591 
1592 // reverse bytes in doubleword
1593 // Rd[63:0] = Rs[7:0] Rs[15:8] Rs[23:16] Rs[31:24] Rs[39:32] Rs[47,40] Rs[55,48] Rs[63:56]
1594 void MacroAssembler::revb(Register Rd, Register Rs, Register tmp1, Register tmp2) {
1595   if (UseRVB) {
1596     rev8(Rd, Rs);
1597     return;
1598   }
1599   assert_different_registers(Rs, tmp1, tmp2);
1600   assert_different_registers(Rd, tmp1, tmp2);
1601   andi(tmp1, Rs, 0xFF);
1602   slli(tmp1, tmp1, 8);
1603   for (int step = 8; step < 56; step += 8) {
1604     srli(tmp2, Rs, step);
1605     andi(tmp2, tmp2, 0xFF);
1606     orr(tmp1, tmp1, tmp2);
1607     slli(tmp1, tmp1, 8);
1608   }
1609   srli(Rd, Rs, 56);
1610   andi(Rd, Rd, 0xFF);
1611   orr(Rd, tmp1, Rd);
1612 }
1613 
1614 // rotate right with shift bits
1615 void MacroAssembler::ror_imm(Register dst, Register src, uint32_t shift, Register tmp)
1616 {
1617   if (UseRVB) {
1618     rori(dst, src, shift);
1619     return;
1620   }
1621 
1622   assert_different_registers(dst, tmp);
1623   assert_different_registers(src, tmp);
1624   assert(shift < 64, "shift amount must be < 64");
1625   slli(tmp, src, 64 - shift);
1626   srli(dst, src, shift);
1627   orr(dst, dst, tmp);
1628 }
1629 
1630 void MacroAssembler::andi(Register Rd, Register Rn, int64_t imm, Register tmp) {
1631   if (is_imm_in_range(imm, 12, 0)) {
1632     and_imm12(Rd, Rn, imm);
1633   } else {
1634     assert_different_registers(Rn, tmp);
1635     li(tmp, imm);
1636     andr(Rd, Rn, tmp);
1637   }
1638 }
1639 
1640 void MacroAssembler::orptr(Address adr, RegisterOrConstant src, Register tmp1, Register tmp2) {
1641   ld(tmp1, adr);
1642   if (src.is_register()) {
1643     orr(tmp1, tmp1, src.as_register());
1644   } else {
1645     if (is_imm_in_range(src.as_constant(), 12, 0)) {
1646       ori(tmp1, tmp1, src.as_constant());
1647     } else {
1648       assert_different_registers(tmp1, tmp2);
1649       li(tmp2, src.as_constant());
1650       orr(tmp1, tmp1, tmp2);
1651     }
1652   }
1653   sd(tmp1, adr);
1654 }
1655 
1656 void MacroAssembler::cmp_klass(Register oop, Register trial_klass, Register tmp, Label &L) {
1657   if (UseCompressedClassPointers) {
1658       lwu(tmp, Address(oop, oopDesc::klass_offset_in_bytes()));
1659     if (CompressedKlassPointers::base() == NULL) {
1660       slli(tmp, tmp, CompressedKlassPointers::shift());
1661       beq(trial_klass, tmp, L);
1662       return;
1663     }
1664     decode_klass_not_null(tmp);
1665   } else {
1666     ld(tmp, Address(oop, oopDesc::klass_offset_in_bytes()));
1667   }
1668   beq(trial_klass, tmp, L);
1669 }
1670 
1671 // Move an oop into a register. immediate is true if we want
1672 // immediate instructions and nmethod entry barriers are not enabled.
1673 // i.e. we are not going to patch this instruction while the code is being
1674 // executed by another thread.
1675 void MacroAssembler::movoop(Register dst, jobject obj, bool immediate) {
1676   int oop_index;
1677   if (obj == NULL) {
1678     oop_index = oop_recorder()->allocate_oop_index(obj);
1679   } else {
1680 #ifdef ASSERT
1681     {
1682       ThreadInVMfromUnknown tiv;
1683       assert(Universe::heap()->is_in(JNIHandles::resolve(obj)), "should be real oop");
1684     }
1685 #endif
1686     oop_index = oop_recorder()->find_index(obj);
1687   }
1688   RelocationHolder rspec = oop_Relocation::spec(oop_index);
1689 
1690   // nmethod entry barrier necessitate using the constant pool. They have to be
1691   // ordered with respected to oop access.
1692   // Using immediate literals would necessitate fence.i.
1693   if (BarrierSet::barrier_set()->barrier_set_nmethod() != NULL || !immediate) {
1694     address dummy = address(uintptr_t(pc()) & -wordSize); // A nearby aligned address
1695     ld_constant(dst, Address(dummy, rspec));
1696   } else
1697     mv(dst, Address((address)obj, rspec));
1698 }
1699 
1700 // Move a metadata address into a register.
1701 void MacroAssembler::mov_metadata(Register dst, Metadata* obj) {
1702   int oop_index;
1703   if (obj == NULL) {
1704     oop_index = oop_recorder()->allocate_metadata_index(obj);
1705   } else {
1706     oop_index = oop_recorder()->find_index(obj);
1707   }
1708   RelocationHolder rspec = metadata_Relocation::spec(oop_index);
1709   mv(dst, Address((address)obj, rspec));
1710 }
1711 
1712 // Writes to stack successive pages until offset reached to check for
1713 // stack overflow + shadow pages.  This clobbers tmp.
1714 void MacroAssembler::bang_stack_size(Register size, Register tmp) {
1715   assert_different_registers(tmp, size, t0);
1716   // Bang stack for total size given plus shadow page size.
1717   // Bang one page at a time because large size can bang beyond yellow and
1718   // red zones.
1719   mv(t0, os::vm_page_size());
1720   Label loop;
1721   bind(loop);
1722   sub(tmp, sp, t0);
1723   subw(size, size, t0);
1724   sd(size, Address(tmp));
1725   bgtz(size, loop);
1726 
1727   // Bang down shadow pages too.
1728   // At this point, (tmp-0) is the last address touched, so don't
1729   // touch it again.  (It was touched as (tmp-pagesize) but then tmp
1730   // was post-decremented.)  Skip this address by starting at i=1, and
1731   // touch a few more pages below.  N.B.  It is important to touch all
1732   // the way down to and including i=StackShadowPages.
1733   for (int i = 0; i < (int)(StackOverflow::stack_shadow_zone_size() / os::vm_page_size()) - 1; i++) {
1734     // this could be any sized move but this is can be a debugging crumb
1735     // so the bigger the better.
1736     sub(tmp, tmp, os::vm_page_size());
1737     sd(size, Address(tmp, 0));
1738   }
1739 }
1740 
1741 SkipIfEqual::SkipIfEqual(MacroAssembler* masm, const bool* flag_addr, bool value) {
1742   assert_cond(masm != NULL);
1743   int32_t offset = 0;
1744   _masm = masm;
1745   _masm->la_patchable(t0, ExternalAddress((address)flag_addr), offset);
1746   _masm->lbu(t0, Address(t0, offset));
1747   _masm->beqz(t0, _label);
1748 }
1749 
1750 SkipIfEqual::~SkipIfEqual() {
1751   assert_cond(_masm != NULL);
1752   _masm->bind(_label);
1753   _masm = NULL;
1754 }
1755 
1756 void MacroAssembler::load_mirror(Register dst, Register method, Register tmp) {
1757   const int mirror_offset = in_bytes(Klass::java_mirror_offset());
1758   ld(dst, Address(xmethod, Method::const_offset()));
1759   ld(dst, Address(dst, ConstMethod::constants_offset()));
1760   ld(dst, Address(dst, ConstantPool::pool_holder_offset_in_bytes()));
1761   ld(dst, Address(dst, mirror_offset));
1762   resolve_oop_handle(dst, tmp);
1763 }
1764 
1765 void MacroAssembler::resolve_oop_handle(Register result, Register tmp) {
1766   // OopHandle::resolve is an indirection.
1767   assert_different_registers(result, tmp);
1768   access_load_at(T_OBJECT, IN_NATIVE, result, Address(result, 0), tmp, noreg);
1769 }
1770 
1771 // ((WeakHandle)result).resolve()
1772 void MacroAssembler::resolve_weak_handle(Register result, Register tmp) {
1773   assert_different_registers(result, tmp);
1774   Label resolved;
1775 
1776   // A null weak handle resolves to null.
1777   beqz(result, resolved);
1778 
1779   // Only 64 bit platforms support GCs that require a tmp register
1780   // Only IN_HEAP loads require a thread_tmp register
1781   // WeakHandle::resolve is an indirection like jweak.
1782   access_load_at(T_OBJECT, IN_NATIVE | ON_PHANTOM_OOP_REF,
1783                  result, Address(result), tmp, noreg /* tmp_thread */);
1784   bind(resolved);
1785 }
1786 
1787 void MacroAssembler::access_load_at(BasicType type, DecoratorSet decorators,
1788                                     Register dst, Address src,
1789                                     Register tmp1, Register thread_tmp) {
1790   BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
1791   decorators = AccessInternal::decorator_fixup(decorators);
1792   bool as_raw = (decorators & AS_RAW) != 0;
1793   if (as_raw) {
1794     bs->BarrierSetAssembler::load_at(this, decorators, type, dst, src, tmp1, thread_tmp);
1795   } else {
1796     bs->load_at(this, decorators, type, dst, src, tmp1, thread_tmp);
1797   }
1798 }
1799 
1800 void MacroAssembler::null_check(Register reg, int offset) {
1801   if (needs_explicit_null_check(offset)) {
1802     // provoke OS NULL exception if reg = NULL by
1803     // accessing M[reg] w/o changing any registers
1804     // NOTE: this is plenty to provoke a segv
1805     ld(zr, Address(reg, 0));
1806   } else {
1807     // nothing to do, (later) access of M[reg + offset]
1808     // will provoke OS NULL exception if reg = NULL
1809   }
1810 }
1811 
1812 void MacroAssembler::access_store_at(BasicType type, DecoratorSet decorators,
1813                                      Address dst, Register src,
1814                                      Register tmp1, Register thread_tmp) {
1815   BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
1816   decorators = AccessInternal::decorator_fixup(decorators);
1817   bool as_raw = (decorators & AS_RAW) != 0;
1818   if (as_raw) {
1819     bs->BarrierSetAssembler::store_at(this, decorators, type, dst, src, tmp1, thread_tmp);
1820   } else {
1821     bs->store_at(this, decorators, type, dst, src, tmp1, thread_tmp);
1822   }
1823 }
1824 
1825 // Algorithm must match CompressedOops::encode.
1826 void MacroAssembler::encode_heap_oop(Register d, Register s) {
1827   verify_oop(s, "broken oop in encode_heap_oop");
1828   if (CompressedOops::base() == NULL) {
1829     if (CompressedOops::shift() != 0) {
1830       assert (LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong");
1831       srli(d, s, LogMinObjAlignmentInBytes);
1832     } else {
1833       mv(d, s);
1834     }
1835   } else {
1836     Label notNull;
1837     sub(d, s, xheapbase);
1838     bgez(d, notNull);
1839     mv(d, zr);
1840     bind(notNull);
1841     if (CompressedOops::shift() != 0) {
1842       assert (LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong");
1843       srli(d, d, CompressedOops::shift());
1844     }
1845   }
1846 }
1847 
1848 void MacroAssembler::load_klass(Register dst, Register src) {
1849   if (UseCompressedClassPointers) {
1850     lwu(dst, Address(src, oopDesc::klass_offset_in_bytes()));
1851     decode_klass_not_null(dst);
1852   } else {
1853     ld(dst, Address(src, oopDesc::klass_offset_in_bytes()));
1854   }
1855 }
1856 
1857 void MacroAssembler::store_klass(Register dst, Register src) {
1858   // FIXME: Should this be a store release? concurrent gcs assumes
1859   // klass length is valid if klass field is not null.
1860   if (UseCompressedClassPointers) {
1861     encode_klass_not_null(src);
1862     sw(src, Address(dst, oopDesc::klass_offset_in_bytes()));
1863   } else {
1864     sd(src, Address(dst, oopDesc::klass_offset_in_bytes()));
1865   }
1866 }
1867 
1868 void MacroAssembler::store_klass_gap(Register dst, Register src) {
1869   if (UseCompressedClassPointers) {
1870     // Store to klass gap in destination
1871     sw(src, Address(dst, oopDesc::klass_gap_offset_in_bytes()));
1872   }
1873 }
1874 
1875 void  MacroAssembler::decode_klass_not_null(Register r) {
1876   decode_klass_not_null(r, r);
1877 }
1878 
1879 void MacroAssembler::decode_klass_not_null(Register dst, Register src, Register tmp) {
1880   assert(UseCompressedClassPointers, "should only be used for compressed headers");
1881 
1882   if (CompressedKlassPointers::base() == NULL) {
1883     if (CompressedKlassPointers::shift() != 0) {
1884       assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong");
1885       slli(dst, src, LogKlassAlignmentInBytes);
1886     } else {
1887       mv(dst, src);
1888     }
1889     return;
1890   }
1891 
1892   Register xbase = dst;
1893   if (dst == src) {
1894     xbase = tmp;
1895   }
1896 
1897   assert_different_registers(src, xbase);
1898   li(xbase, (uintptr_t)CompressedKlassPointers::base());
1899 
1900   if (CompressedKlassPointers::shift() != 0) {
1901     assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong");
1902     assert_different_registers(t0, xbase);
1903     shadd(dst, src, xbase, t0, LogKlassAlignmentInBytes);
1904   } else {
1905     add(dst, xbase, src);
1906   }
1907 
1908   if (xbase == xheapbase) { reinit_heapbase(); }
1909 }
1910 
1911 void MacroAssembler::encode_klass_not_null(Register r) {
1912   encode_klass_not_null(r, r);
1913 }
1914 
1915 void MacroAssembler::encode_klass_not_null(Register dst, Register src, Register tmp) {
1916   assert(UseCompressedClassPointers, "should only be used for compressed headers");
1917 
1918   if (CompressedKlassPointers::base() == NULL) {
1919     if (CompressedKlassPointers::shift() != 0) {
1920       assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong");
1921       srli(dst, src, LogKlassAlignmentInBytes);
1922     } else {
1923       mv(dst, src);
1924     }
1925     return;
1926   }
1927 
1928   if (((uint64_t)(uintptr_t)CompressedKlassPointers::base() & 0xffffffff) == 0 &&
1929       CompressedKlassPointers::shift() == 0) {
1930     zero_extend(dst, src, 32);
1931     return;
1932   }
1933 
1934   Register xbase = dst;
1935   if (dst == src) {
1936     xbase = tmp;
1937   }
1938 
1939   assert_different_registers(src, xbase);
1940   li(xbase, (intptr_t)CompressedKlassPointers::base());
1941   sub(dst, src, xbase);
1942   if (CompressedKlassPointers::shift() != 0) {
1943     assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong");
1944     srli(dst, dst, LogKlassAlignmentInBytes);
1945   }
1946   if (xbase == xheapbase) {
1947     reinit_heapbase();
1948   }
1949 }
1950 
1951 void  MacroAssembler::decode_heap_oop_not_null(Register r) {
1952   decode_heap_oop_not_null(r, r);
1953 }
1954 
1955 void MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) {
1956   assert(UseCompressedOops, "should only be used for compressed headers");
1957   assert(Universe::heap() != NULL, "java heap should be initialized");
1958   // Cannot assert, unverified entry point counts instructions (see .ad file)
1959   // vtableStubs also counts instructions in pd_code_size_limit.
1960   // Also do not verify_oop as this is called by verify_oop.
1961   if (CompressedOops::shift() != 0) {
1962     assert(LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong");
1963     slli(dst, src, LogMinObjAlignmentInBytes);
1964     if (CompressedOops::base() != NULL) {
1965       add(dst, xheapbase, dst);
1966     }
1967   } else {
1968     assert(CompressedOops::base() == NULL, "sanity");
1969     mv(dst, src);
1970   }
1971 }
1972 
1973 void  MacroAssembler::decode_heap_oop(Register d, Register s) {
1974   if (CompressedOops::base() == NULL) {
1975     if (CompressedOops::shift() != 0 || d != s) {
1976       slli(d, s, CompressedOops::shift());
1977     }
1978   } else {
1979     Label done;
1980     mv(d, s);
1981     beqz(s, done);
1982     shadd(d, s, xheapbase, d, LogMinObjAlignmentInBytes);
1983     bind(done);
1984   }
1985   verify_oop(d, "broken oop in decode_heap_oop");
1986 }
1987 
1988 void MacroAssembler::store_heap_oop(Address dst, Register src, Register tmp1,
1989                                     Register thread_tmp, DecoratorSet decorators) {
1990   access_store_at(T_OBJECT, IN_HEAP | decorators, dst, src, tmp1, thread_tmp);
1991 }
1992 
1993 void MacroAssembler::load_heap_oop(Register dst, Address src, Register tmp1,
1994                                    Register thread_tmp, DecoratorSet decorators) {
1995   access_load_at(T_OBJECT, IN_HEAP | decorators, dst, src, tmp1, thread_tmp);
1996 }
1997 
1998 void MacroAssembler::load_heap_oop_not_null(Register dst, Address src, Register tmp1,
1999                                             Register thread_tmp, DecoratorSet decorators) {
2000   access_load_at(T_OBJECT, IN_HEAP | IS_NOT_NULL, dst, src, tmp1, thread_tmp);
2001 }
2002 
2003 // Used for storing NULLs.
2004 void MacroAssembler::store_heap_oop_null(Address dst) {
2005   access_store_at(T_OBJECT, IN_HEAP, dst, noreg, noreg, noreg);
2006 }
2007 
2008 int MacroAssembler::corrected_idivl(Register result, Register rs1, Register rs2,
2009                                     bool want_remainder)
2010 {
2011   // Full implementation of Java idiv and irem.  The function
2012   // returns the (pc) offset of the div instruction - may be needed
2013   // for implicit exceptions.
2014   //
2015   // input : rs1: dividend
2016   //         rs2: divisor
2017   //
2018   // result: either
2019   //         quotient  (= rs1 idiv rs2)
2020   //         remainder (= rs1 irem rs2)
2021 
2022 
2023   int idivl_offset = offset();
2024   if (!want_remainder) {
2025     divw(result, rs1, rs2);
2026   } else {
2027     remw(result, rs1, rs2); // result = rs1 % rs2;
2028   }
2029   return idivl_offset;
2030 }
2031 
2032 int MacroAssembler::corrected_idivq(Register result, Register rs1, Register rs2,
2033                                     bool want_remainder)
2034 {
2035   // Full implementation of Java ldiv and lrem.  The function
2036   // returns the (pc) offset of the div instruction - may be needed
2037   // for implicit exceptions.
2038   //
2039   // input : rs1: dividend
2040   //         rs2: divisor
2041   //
2042   // result: either
2043   //         quotient  (= rs1 idiv rs2)
2044   //         remainder (= rs1 irem rs2)
2045 
2046   int idivq_offset = offset();
2047   if (!want_remainder) {
2048     div(result, rs1, rs2);
2049   } else {
2050     rem(result, rs1, rs2); // result = rs1 % rs2;
2051   }
2052   return idivq_offset;
2053 }
2054 
2055 // Look up the method for a megamorpic invkkeinterface call.
2056 // The target method is determined by <intf_klass, itable_index>.
2057 // The receiver klass is in recv_klass.
2058 // On success, the result will be in method_result, and execution falls through.
2059 // On failure, execution transfers to the given label.
2060 void MacroAssembler::lookup_interface_method(Register recv_klass,
2061                                              Register intf_klass,
2062                                              RegisterOrConstant itable_index,
2063                                              Register method_result,
2064                                              Register scan_tmp,
2065                                              Label& L_no_such_interface,
2066                                              bool return_method) {
2067   assert_different_registers(recv_klass, intf_klass, scan_tmp);
2068   assert_different_registers(method_result, intf_klass, scan_tmp);
2069   assert(recv_klass != method_result || !return_method,
2070          "recv_klass can be destroyed when mehtid isn't needed");
2071   assert(itable_index.is_constant() || itable_index.as_register() == method_result,
2072          "caller must be same register for non-constant itable index as for method");
2073 
2074   // Compute start of first itableOffsetEntry (which is at the end of the vtable).
2075   int vtable_base = in_bytes(Klass::vtable_start_offset());
2076   int itentry_off = itableMethodEntry::method_offset_in_bytes();
2077   int scan_step   = itableOffsetEntry::size() * wordSize;
2078   int vte_size    = vtableEntry::size_in_bytes();
2079   assert(vte_size == wordSize, "else adjust times_vte_scale");
2080 
2081   lwu(scan_tmp, Address(recv_klass, Klass::vtable_length_offset()));
2082 
2083   // %%% Could store the aligned, prescaled offset in the klassoop.
2084   shadd(scan_tmp, scan_tmp, recv_klass, scan_tmp, 3);
2085   add(scan_tmp, scan_tmp, vtable_base);
2086 
2087   if (return_method) {
2088     // Adjust recv_klass by scaled itable_index, so we can free itable_index.
2089     assert(itableMethodEntry::size() * wordSize == wordSize, "adjust the scaling in the code below");
2090     if (itable_index.is_register()) {
2091       slli(t0, itable_index.as_register(), 3);
2092     } else {
2093       li(t0, itable_index.as_constant() << 3);
2094     }
2095     add(recv_klass, recv_klass, t0);
2096     if (itentry_off) {
2097       add(recv_klass, recv_klass, itentry_off);
2098     }
2099   }
2100 
2101   Label search, found_method;
2102 
2103   ld(method_result, Address(scan_tmp, itableOffsetEntry::interface_offset_in_bytes()));
2104   beq(intf_klass, method_result, found_method);
2105   bind(search);
2106   // Check that the previous entry is non-null. A null entry means that
2107   // the receiver class doens't implement the interface, and wasn't the
2108   // same as when the caller was compiled.
2109   beqz(method_result, L_no_such_interface, /* is_far */ true);
2110   addi(scan_tmp, scan_tmp, scan_step);
2111   ld(method_result, Address(scan_tmp, itableOffsetEntry::interface_offset_in_bytes()));
2112   bne(intf_klass, method_result, search);
2113 
2114   bind(found_method);
2115 
2116   // Got a hit.
2117   if (return_method) {
2118     lwu(scan_tmp, Address(scan_tmp, itableOffsetEntry::offset_offset_in_bytes()));
2119     add(method_result, recv_klass, scan_tmp);
2120     ld(method_result, Address(method_result));
2121   }
2122 }
2123 
2124 // virtual method calling
2125 void MacroAssembler::lookup_virtual_method(Register recv_klass,
2126                                            RegisterOrConstant vtable_index,
2127                                            Register method_result) {
2128   const int base = in_bytes(Klass::vtable_start_offset());
2129   assert(vtableEntry::size() * wordSize == 8,
2130          "adjust the scaling in the code below");
2131   int vtable_offset_in_bytes = base + vtableEntry::method_offset_in_bytes();
2132 
2133   if (vtable_index.is_register()) {
2134     shadd(method_result, vtable_index.as_register(), recv_klass, method_result, LogBytesPerWord);
2135     ld(method_result, Address(method_result, vtable_offset_in_bytes));
2136   } else {
2137     vtable_offset_in_bytes += vtable_index.as_constant() * wordSize;
2138     ld(method_result, form_address(method_result, recv_klass, vtable_offset_in_bytes));
2139   }
2140 }
2141 
2142 void MacroAssembler::membar(uint32_t order_constraint) {
2143   address prev = pc() - NativeMembar::instruction_size;
2144   address last = code()->last_insn();
2145 
2146   if (last != NULL && nativeInstruction_at(last)->is_membar() && prev == last) {
2147     NativeMembar *bar = NativeMembar_at(prev);
2148     // We are merging two memory barrier instructions.  On RISCV we
2149     // can do this simply by ORing them together.
2150     bar->set_kind(bar->get_kind() | order_constraint);
2151     BLOCK_COMMENT("merged membar");
2152   } else {
2153     code()->set_last_insn(pc());
2154 
2155     uint32_t predecessor = 0;
2156     uint32_t successor = 0;
2157 
2158     membar_mask_to_pred_succ(order_constraint, predecessor, successor);
2159     fence(predecessor, successor);
2160   }
2161 }
2162 
2163 // Form an addres from base + offset in Rd. Rd my or may not
2164 // actually be used: you must use the Address that is returned. It
2165 // is up to you to ensure that the shift provided mathces the size
2166 // of your data.
2167 Address MacroAssembler::form_address(Register Rd, Register base, long byte_offset) {
2168   if (is_offset_in_range(byte_offset, 12)) { // 12: imm in range 2^12
2169     return Address(base, byte_offset);
2170   }
2171 
2172   // Do it the hard way
2173   mv(Rd, byte_offset);
2174   add(Rd, base, Rd);
2175   return Address(Rd);
2176 }
2177 
2178 void MacroAssembler::check_klass_subtype(Register sub_klass,
2179                                          Register super_klass,
2180                                          Register tmp_reg,
2181                                          Label& L_success) {
2182   Label L_failure;
2183   check_klass_subtype_fast_path(sub_klass, super_klass, tmp_reg, &L_success, &L_failure, NULL);
2184   check_klass_subtype_slow_path(sub_klass, super_klass, tmp_reg, noreg, &L_success, NULL);
2185   bind(L_failure);
2186 }
2187 
2188 void MacroAssembler::safepoint_poll(Label& slow_path, bool at_return, bool acquire, bool in_nmethod) {
2189   ld(t0, Address(xthread, JavaThread::polling_word_offset()));
2190   if (acquire) {
2191     membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
2192   }
2193   if (at_return) {
2194     bgtu(in_nmethod ? sp : fp, t0, slow_path, true /* is_far */);
2195   } else {
2196     andi(t0, t0, SafepointMechanism::poll_bit());
2197     bnez(t0, slow_path, true /* is_far */);
2198   }
2199 }
2200 
2201 void MacroAssembler::cmpxchgptr(Register oldv, Register newv, Register addr, Register tmp,
2202                                 Label &succeed, Label *fail) {
2203   // oldv holds comparison value
2204   // newv holds value to write in exchange
2205   // addr identifies memory word to compare against/update
2206   Label retry_load, nope;
2207   bind(retry_load);
2208   // Load reserved from the memory location
2209   lr_d(tmp, addr, Assembler::aqrl);
2210   // Fail and exit if it is not what we expect
2211   bne(tmp, oldv, nope);
2212   // If the store conditional succeeds, tmp will be zero
2213   sc_d(tmp, newv, addr, Assembler::rl);
2214   beqz(tmp, succeed);
2215   // Retry only when the store conditional failed
2216   j(retry_load);
2217 
2218   bind(nope);
2219   membar(AnyAny);
2220   mv(oldv, tmp);
2221   if (fail != NULL) {
2222     j(*fail);
2223   }
2224 }
2225 
2226 void MacroAssembler::cmpxchg_obj_header(Register oldv, Register newv, Register obj, Register tmp,
2227                                         Label &succeed, Label *fail) {
2228   assert(oopDesc::mark_offset_in_bytes() == 0, "assumption");
2229   cmpxchgptr(oldv, newv, obj, tmp, succeed, fail);
2230 }
2231 
2232 void MacroAssembler::load_reserved(Register addr,
2233                                    enum operand_size size,
2234                                    Assembler::Aqrl acquire) {
2235   switch (size) {
2236     case int64:
2237       lr_d(t0, addr, acquire);
2238       break;
2239     case int32:
2240       lr_w(t0, addr, acquire);
2241       break;
2242     case uint32:
2243       lr_w(t0, addr, acquire);
2244       zero_extend(t0, t0, 32);
2245       break;
2246     default:
2247       ShouldNotReachHere();
2248   }
2249 }
2250 
2251 void MacroAssembler::store_conditional(Register addr,
2252                                        Register new_val,
2253                                        enum operand_size size,
2254                                        Assembler::Aqrl release) {
2255   switch (size) {
2256     case int64:
2257       sc_d(t0, new_val, addr, release);
2258       break;
2259     case int32:
2260     case uint32:
2261       sc_w(t0, new_val, addr, release);
2262       break;
2263     default:
2264       ShouldNotReachHere();
2265   }
2266 }
2267 
2268 
2269 void MacroAssembler::cmpxchg_narrow_value_helper(Register addr, Register expected,
2270                                                  Register new_val,
2271                                                  enum operand_size size,
2272                                                  Register tmp1, Register tmp2, Register tmp3) {
2273   assert(size == int8 || size == int16, "unsupported operand size");
2274 
2275   Register aligned_addr = t1, shift = tmp1, mask = tmp2, not_mask = tmp3;
2276 
2277   andi(shift, addr, 3);
2278   slli(shift, shift, 3);
2279 
2280   andi(aligned_addr, addr, ~3);
2281 
2282   if (size == int8) {
2283     addi(mask, zr, 0xff);
2284   } else {
2285     // size == int16 case
2286     addi(mask, zr, -1);
2287     zero_extend(mask, mask, 16);
2288   }
2289   sll(mask, mask, shift);
2290 
2291   xori(not_mask, mask, -1);
2292 
2293   sll(expected, expected, shift);
2294   andr(expected, expected, mask);
2295 
2296   sll(new_val, new_val, shift);
2297   andr(new_val, new_val, mask);
2298 }
2299 
2300 // cmpxchg_narrow_value will kill t0, t1, expected, new_val and tmps.
2301 // It's designed to implement compare and swap byte/boolean/char/short by lr.w/sc.w,
2302 // which are forced to work with 4-byte aligned address.
2303 void MacroAssembler::cmpxchg_narrow_value(Register addr, Register expected,
2304                                           Register new_val,
2305                                           enum operand_size size,
2306                                           Assembler::Aqrl acquire, Assembler::Aqrl release,
2307                                           Register result, bool result_as_bool,
2308                                           Register tmp1, Register tmp2, Register tmp3) {
2309   Register aligned_addr = t1, shift = tmp1, mask = tmp2, not_mask = tmp3, old = result, tmp = t0;
2310   assert_different_registers(addr, old, mask, not_mask, new_val, expected, shift, tmp);
2311   cmpxchg_narrow_value_helper(addr, expected, new_val, size, tmp1, tmp2, tmp3);
2312 
2313   Label retry, fail, done;
2314 
2315   bind(retry);
2316   lr_w(old, aligned_addr, acquire);
2317   andr(tmp, old, mask);
2318   bne(tmp, expected, fail);
2319 
2320   andr(tmp, old, not_mask);
2321   orr(tmp, tmp, new_val);
2322   sc_w(tmp, tmp, aligned_addr, release);
2323   bnez(tmp, retry);
2324 
2325   if (result_as_bool) {
2326     addi(result, zr, 1);
2327     j(done);
2328 
2329     bind(fail);
2330     mv(result, zr);
2331 
2332     bind(done);
2333   } else {
2334     andr(tmp, old, mask);
2335 
2336     bind(fail);
2337     srl(result, tmp, shift);
2338 
2339     if (size == int8) {
2340       sign_extend(result, result, 8);
2341     } else {
2342       // size == int16 case
2343       sign_extend(result, result, 16);
2344     }
2345   }
2346 }
2347 
2348 // weak_cmpxchg_narrow_value is a weak version of cmpxchg_narrow_value, to implement
2349 // the weak CAS stuff. The major difference is that it just failed when store conditional
2350 // failed.
2351 void MacroAssembler::weak_cmpxchg_narrow_value(Register addr, Register expected,
2352                                                Register new_val,
2353                                                enum operand_size size,
2354                                                Assembler::Aqrl acquire, Assembler::Aqrl release,
2355                                                Register result,
2356                                                Register tmp1, Register tmp2, Register tmp3) {
2357   Register aligned_addr = t1, shift = tmp1, mask = tmp2, not_mask = tmp3, old = result, tmp = t0;
2358   assert_different_registers(addr, old, mask, not_mask, new_val, expected, shift, tmp);
2359   cmpxchg_narrow_value_helper(addr, expected, new_val, size, tmp1, tmp2, tmp3);
2360 
2361   Label succ, fail, done;
2362 
2363   lr_w(old, aligned_addr, acquire);
2364   andr(tmp, old, mask);
2365   bne(tmp, expected, fail);
2366 
2367   andr(tmp, old, not_mask);
2368   orr(tmp, tmp, new_val);
2369   sc_w(tmp, tmp, aligned_addr, release);
2370   beqz(tmp, succ);
2371 
2372   bind(fail);
2373   addi(result, zr, 1);
2374   j(done);
2375 
2376   bind(succ);
2377   mv(result, zr);
2378 
2379   bind(done);
2380 }
2381 
2382 void MacroAssembler::cmpxchg(Register addr, Register expected,
2383                              Register new_val,
2384                              enum operand_size size,
2385                              Assembler::Aqrl acquire, Assembler::Aqrl release,
2386                              Register result, bool result_as_bool) {
2387   assert(size != int8 && size != int16, "unsupported operand size");
2388 
2389   Label retry_load, done, ne_done;
2390   bind(retry_load);
2391   load_reserved(addr, size, acquire);
2392   bne(t0, expected, ne_done);
2393   store_conditional(addr, new_val, size, release);
2394   bnez(t0, retry_load);
2395 
2396   // equal, succeed
2397   if (result_as_bool) {
2398     li(result, 1);
2399   } else {
2400     mv(result, expected);
2401   }
2402   j(done);
2403 
2404   // not equal, failed
2405   bind(ne_done);
2406   if (result_as_bool) {
2407     mv(result, zr);
2408   } else {
2409     mv(result, t0);
2410   }
2411 
2412   bind(done);
2413 }
2414 
2415 void MacroAssembler::cmpxchg_weak(Register addr, Register expected,
2416                                   Register new_val,
2417                                   enum operand_size size,
2418                                   Assembler::Aqrl acquire, Assembler::Aqrl release,
2419                                   Register result) {
2420   Label fail, done, sc_done;
2421   load_reserved(addr, size, acquire);
2422   bne(t0, expected, fail);
2423   store_conditional(addr, new_val, size, release);
2424   beqz(t0, sc_done);
2425 
2426   // fail
2427   bind(fail);
2428   li(result, 1);
2429   j(done);
2430 
2431   // sc_done
2432   bind(sc_done);
2433   mv(result, 0);
2434   bind(done);
2435 }
2436 
2437 #define ATOMIC_OP(NAME, AOP, ACQUIRE, RELEASE)                                              \
2438 void MacroAssembler::atomic_##NAME(Register prev, RegisterOrConstant incr, Register addr) { \
2439   prev = prev->is_valid() ? prev : zr;                                                      \
2440   if (incr.is_register()) {                                                                 \
2441     AOP(prev, addr, incr.as_register(), (Assembler::Aqrl)(ACQUIRE | RELEASE));              \
2442   } else {                                                                                  \
2443     mv(t0, incr.as_constant());                                                             \
2444     AOP(prev, addr, t0, (Assembler::Aqrl)(ACQUIRE | RELEASE));                              \
2445   }                                                                                         \
2446   return;                                                                                   \
2447 }
2448 
2449 ATOMIC_OP(add, amoadd_d, Assembler::relaxed, Assembler::relaxed)
2450 ATOMIC_OP(addw, amoadd_w, Assembler::relaxed, Assembler::relaxed)
2451 ATOMIC_OP(addal, amoadd_d, Assembler::aq, Assembler::rl)
2452 ATOMIC_OP(addalw, amoadd_w, Assembler::aq, Assembler::rl)
2453 
2454 #undef ATOMIC_OP
2455 
2456 #define ATOMIC_XCHG(OP, AOP, ACQUIRE, RELEASE)                                       \
2457 void MacroAssembler::atomic_##OP(Register prev, Register newv, Register addr) {      \
2458   prev = prev->is_valid() ? prev : zr;                                               \
2459   AOP(prev, addr, newv, (Assembler::Aqrl)(ACQUIRE | RELEASE));                       \
2460   return;                                                                            \
2461 }
2462 
2463 ATOMIC_XCHG(xchg, amoswap_d, Assembler::relaxed, Assembler::relaxed)
2464 ATOMIC_XCHG(xchgw, amoswap_w, Assembler::relaxed, Assembler::relaxed)
2465 ATOMIC_XCHG(xchgal, amoswap_d, Assembler::aq, Assembler::rl)
2466 ATOMIC_XCHG(xchgalw, amoswap_w, Assembler::aq, Assembler::rl)
2467 
2468 #undef ATOMIC_XCHG
2469 
2470 #define ATOMIC_XCHGU(OP1, OP2)                                                       \
2471 void MacroAssembler::atomic_##OP1(Register prev, Register newv, Register addr) {     \
2472   atomic_##OP2(prev, newv, addr);                                                    \
2473   zero_extend(prev, prev, 32);                                                       \
2474   return;                                                                            \
2475 }
2476 
2477 ATOMIC_XCHGU(xchgwu, xchgw)
2478 ATOMIC_XCHGU(xchgalwu, xchgalw)
2479 
2480 #undef ATOMIC_XCHGU
2481 
2482 void MacroAssembler::far_jump(Address entry, CodeBuffer *cbuf, Register tmp) {
2483   assert(ReservedCodeCacheSize < 4*G, "branch out of range");
2484   assert(CodeCache::find_blob(entry.target()) != NULL,
2485          "destination of far call not found in code cache");
2486   int32_t offset = 0;
2487   if (far_branches()) {
2488     // We can use auipc + jalr here because we know that the total size of
2489     // the code cache cannot exceed 2Gb.
2490     la_patchable(tmp, entry, offset);
2491     if (cbuf != NULL) { cbuf->set_insts_mark(); }
2492     jalr(x0, tmp, offset);
2493   } else {
2494     if (cbuf != NULL) { cbuf->set_insts_mark(); }
2495     j(entry);
2496   }
2497 }
2498 
2499 void MacroAssembler::far_call(Address entry, CodeBuffer *cbuf, Register tmp) {
2500   assert(ReservedCodeCacheSize < 4*G, "branch out of range");
2501   assert(CodeCache::find_blob(entry.target()) != NULL,
2502          "destination of far call not found in code cache");
2503   int32_t offset = 0;
2504   if (far_branches()) {
2505     // We can use auipc + jalr here because we know that the total size of
2506     // the code cache cannot exceed 2Gb.
2507     la_patchable(tmp, entry, offset);
2508     if (cbuf != NULL) { cbuf->set_insts_mark(); }
2509     jalr(x1, tmp, offset); // link
2510   } else {
2511     if (cbuf != NULL) { cbuf->set_insts_mark(); }
2512     jal(entry); // link
2513   }
2514 }
2515 
2516 void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass,
2517                                                    Register super_klass,
2518                                                    Register tmp_reg,
2519                                                    Label* L_success,
2520                                                    Label* L_failure,
2521                                                    Label* L_slow_path,
2522                                                    Register super_check_offset) {
2523   assert_different_registers(sub_klass, super_klass, tmp_reg);
2524   bool must_load_sco = (super_check_offset == noreg);
2525   if (must_load_sco) {
2526     assert(tmp_reg != noreg, "supply either a temp or a register offset");
2527   } else {
2528     assert_different_registers(sub_klass, super_klass, super_check_offset);
2529   }
2530 
2531   Label L_fallthrough;
2532   int label_nulls = 0;
2533   if (L_success == NULL)   { L_success   = &L_fallthrough; label_nulls++; }
2534   if (L_failure == NULL)   { L_failure   = &L_fallthrough; label_nulls++; }
2535   if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; }
2536   assert(label_nulls <= 1, "at most one NULL in batch");
2537 
2538   int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
2539   int sco_offset = in_bytes(Klass::super_check_offset_offset());
2540   Address super_check_offset_addr(super_klass, sco_offset);
2541 
2542   // Hacked jmp, which may only be used just before L_fallthrough.
2543 #define final_jmp(label)                                                \
2544   if (&(label) == &L_fallthrough) { /*do nothing*/ }                    \
2545   else                            j(label)             /*omit semi*/
2546 
2547   // If the pointers are equal, we are done (e.g., String[] elements).
2548   // This self-check enables sharing of secondary supertype arrays among
2549   // non-primary types such as array-of-interface. Otherwise, each such
2550   // type would need its own customized SSA.
2551   // We move this check to the front fo the fast path because many
2552   // type checks are in fact trivially successful in this manner,
2553   // so we get a nicely predicted branch right at the start of the check.
2554   beq(sub_klass, super_klass, *L_success);
2555 
2556   // Check the supertype display:
2557   if (must_load_sco) {
2558     lwu(tmp_reg, super_check_offset_addr);
2559     super_check_offset = tmp_reg;
2560   }
2561   add(t0, sub_klass, super_check_offset);
2562   Address super_check_addr(t0);
2563   ld(t0, super_check_addr); // load displayed supertype
2564 
2565   // Ths check has worked decisively for primary supers.
2566   // Secondary supers are sought in the super_cache ('super_cache_addr').
2567   // (Secondary supers are interfaces and very deeply nested subtypes.)
2568   // This works in the same check above because of a tricky aliasing
2569   // between the super_Cache and the primary super dispaly elements.
2570   // (The 'super_check_addr' can address either, as the case requires.)
2571   // Note that the cache is updated below if it does not help us find
2572   // what we need immediately.
2573   // So if it was a primary super, we can just fail immediately.
2574   // Otherwise, it's the slow path for us (no success at this point).
2575 
2576   beq(super_klass, t0, *L_success);
2577   mv(t1, sc_offset);
2578   if (L_failure == &L_fallthrough) {
2579     beq(super_check_offset, t1, *L_slow_path);
2580   } else {
2581     bne(super_check_offset, t1, *L_failure, /* is_far */ true);
2582     final_jmp(*L_slow_path);
2583   }
2584 
2585   bind(L_fallthrough);
2586 
2587 #undef final_jmp
2588 }
2589 
2590 // Scans count pointer sized words at [addr] for occurence of value,
2591 // generic
2592 void MacroAssembler::repne_scan(Register addr, Register value, Register count,
2593                                 Register tmp) {
2594   Label Lloop, Lexit;
2595   beqz(count, Lexit);
2596   bind(Lloop);
2597   ld(tmp, addr);
2598   beq(value, tmp, Lexit);
2599   add(addr, addr, wordSize);
2600   sub(count, count, 1);
2601   bnez(count, Lloop);
2602   bind(Lexit);
2603 }
2604 
2605 void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass,
2606                                                    Register super_klass,
2607                                                    Register tmp1_reg,
2608                                                    Register tmp2_reg,
2609                                                    Label* L_success,
2610                                                    Label* L_failure) {
2611   assert_different_registers(sub_klass, super_klass, tmp1_reg);
2612   if (tmp2_reg != noreg) {
2613     assert_different_registers(sub_klass, super_klass, tmp1_reg, tmp2_reg, t0);
2614   }
2615 #define IS_A_TEMP(reg) ((reg) == tmp1_reg || (reg) == tmp2_reg)
2616 
2617   Label L_fallthrough;
2618   int label_nulls = 0;
2619   if (L_success == NULL)   { L_success   = &L_fallthrough; label_nulls++; }
2620   if (L_failure == NULL)   { L_failure   = &L_fallthrough; label_nulls++; }
2621 
2622   assert(label_nulls <= 1, "at most one NULL in the batch");
2623 
2624   // A couple of usefule fields in sub_klass:
2625   int ss_offset = in_bytes(Klass::secondary_supers_offset());
2626   int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
2627   Address secondary_supers_addr(sub_klass, ss_offset);
2628   Address super_cache_addr(     sub_klass, sc_offset);
2629 
2630   BLOCK_COMMENT("check_klass_subtype_slow_path");
2631 
2632   // Do a linear scan of the secondary super-klass chain.
2633   // This code is rarely used, so simplicity is a virtue here.
2634   // The repne_scan instruction uses fixed registers, which we must spill.
2635   // Don't worry too much about pre-existing connecitons with the input regs.
2636 
2637   assert(sub_klass != x10, "killed reg"); // killed by mv(x10, super)
2638   assert(sub_klass != x12, "killed reg"); // killed by la(x12, &pst_counter)
2639 
2640   RegSet pushed_registers;
2641   if (!IS_A_TEMP(x12)) {
2642     pushed_registers += x12;
2643   }
2644   if (!IS_A_TEMP(x15)) {
2645     pushed_registers += x15;
2646   }
2647 
2648   if (super_klass != x10 || UseCompressedOops) {
2649     if (!IS_A_TEMP(x10)) {
2650       pushed_registers += x10;
2651     }
2652   }
2653 
2654   push_reg(pushed_registers, sp);
2655 
2656   // Get super_klass value into x10 (even if it was in x15 or x12)
2657   mv(x10, super_klass);
2658 
2659 #ifndef PRODUCT
2660   mv(t1, (address)&SharedRuntime::_partial_subtype_ctr);
2661   Address pst_counter_addr(t1);
2662   ld(t0, pst_counter_addr);
2663   add(t0, t0, 1);
2664   sd(t0, pst_counter_addr);
2665 #endif // PRODUCT
2666 
2667   // We will consult the secondary-super array.
2668   ld(x15, secondary_supers_addr);
2669   // Load the array length.
2670   lwu(x12, Address(x15, Array<Klass*>::length_offset_in_bytes()));
2671   // Skip to start of data.
2672   add(x15, x15, Array<Klass*>::base_offset_in_bytes());
2673 
2674   // Set t0 to an obvious invalid value, falling through by default
2675   li(t0, -1);
2676   // Scan X12 words at [X15] for an occurrence of X10.
2677   repne_scan(x15, x10, x12, t0);
2678 
2679   // pop will restore x10, so we should use a temp register to keep its value
2680   mv(t1, x10);
2681 
2682   // Unspill the temp registers:
2683   pop_reg(pushed_registers, sp);
2684 
2685   bne(t1, t0, *L_failure);
2686 
2687   // Success. Cache the super we found an proceed in triumph.
2688   sd(super_klass, super_cache_addr);
2689 
2690   if (L_success != &L_fallthrough) {
2691     j(*L_success);
2692   }
2693 
2694 #undef IS_A_TEMP
2695 
2696   bind(L_fallthrough);
2697 }
2698 
2699 // Defines obj, preserves var_size_in_bytes, okay for tmp2 == var_size_in_bytes.
2700 void MacroAssembler::tlab_allocate(Register obj,
2701                                    Register var_size_in_bytes,
2702                                    int con_size_in_bytes,
2703                                    Register tmp1,
2704                                    Register tmp2,
2705                                    Label& slow_case,
2706                                    bool is_far) {
2707   BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
2708   bs->tlab_allocate(this, obj, var_size_in_bytes, con_size_in_bytes, tmp1, tmp2, slow_case, is_far);
2709 }
2710 
2711 // Defines obj, preserves var_size_in_bytes
2712 void MacroAssembler::eden_allocate(Register obj,
2713                                    Register var_size_in_bytes,
2714                                    int con_size_in_bytes,
2715                                    Register tmp,
2716                                    Label& slow_case,
2717                                    bool is_far) {
2718   BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
2719   bs->eden_allocate(this, obj, var_size_in_bytes, con_size_in_bytes, tmp, slow_case, is_far);
2720 }
2721 
2722 
2723 // get_thread() can be called anywhere inside generated code so we
2724 // need to save whatever non-callee save context might get clobbered
2725 // by the call to Thread::current() or, indeed, the call setup code.
2726 void MacroAssembler::get_thread(Register thread) {
2727   // save all call-clobbered regs except thread
2728   RegSet saved_regs = RegSet::range(x5, x7) + RegSet::range(x10, x17) +
2729                       RegSet::range(x28, x31) + ra - thread;
2730   push_reg(saved_regs, sp);
2731 
2732   int32_t offset = 0;
2733   movptr_with_offset(ra, CAST_FROM_FN_PTR(address, Thread::current), offset);
2734   jalr(ra, ra, offset);
2735   if (thread != x10) {
2736     mv(thread, x10);
2737   }
2738 
2739   // restore pushed registers
2740   pop_reg(saved_regs, sp);
2741 }
2742 
2743 void MacroAssembler::load_byte_map_base(Register reg) {
2744   CardTable::CardValue* byte_map_base =
2745     ((CardTableBarrierSet*)(BarrierSet::barrier_set()))->card_table()->byte_map_base();
2746   li(reg, (uint64_t)byte_map_base);
2747 }
2748 
2749 void MacroAssembler::la_patchable(Register reg1, const Address &dest, int32_t &offset) {
2750   relocInfo::relocType rtype = dest.rspec().reloc()->type();
2751   unsigned long low_address = (uintptr_t)CodeCache::low_bound();
2752   unsigned long high_address = (uintptr_t)CodeCache::high_bound();
2753   unsigned long dest_address = (uintptr_t)dest.target();
2754   long offset_low = dest_address - low_address;
2755   long offset_high = dest_address - high_address;
2756 
2757   assert(is_valid_riscv64_address(dest.target()), "bad address");
2758   assert(dest.getMode() == Address::literal, "la_patchable must be applied to a literal address");
2759 
2760   InstructionMark im(this);
2761   code_section()->relocate(inst_mark(), dest.rspec());
2762   // RISC-V doesn't compute a page-aligned address, in order to partially
2763   // compensate for the use of *signed* offsets in its base+disp12
2764   // addressing mode (RISC-V's PC-relative reach remains asymmetric
2765   // [-(2G + 2K), 2G - 2k).
2766   if (offset_high >= -((1L << 31) + (1L << 11)) && offset_low < (1L << 31) - (1L << 11)) {
2767     int64_t distance = dest.target() - pc();
2768     auipc(reg1, (int32_t)distance + 0x800);
2769     offset = ((int32_t)distance << 20) >> 20;
2770   } else {
2771     movptr_with_offset(reg1, dest.target(), offset);
2772   }
2773 }
2774 
2775 void MacroAssembler::build_frame(int framesize) {
2776   assert(framesize >= 2, "framesize must include space for FP/RA");
2777   assert(framesize % (2*wordSize) == 0, "must preserve 2*wordSize alignment");
2778   sub(sp, sp, framesize);
2779   sd(fp, Address(sp, framesize - 2 * wordSize));
2780   sd(ra, Address(sp, framesize - wordSize));
2781   if (PreserveFramePointer) { add(fp, sp, framesize); }
2782   verify_cross_modify_fence_not_required();
2783 }
2784 
2785 void MacroAssembler::remove_frame(int framesize) {
2786   assert(framesize >= 2, "framesize must include space for FP/RA");
2787   assert(framesize % (2*wordSize) == 0, "must preserve 2*wordSize alignment");
2788   ld(fp, Address(sp, framesize - 2 * wordSize));
2789   ld(ra, Address(sp, framesize - wordSize));
2790   add(sp, sp, framesize);
2791 }
2792 
2793 void MacroAssembler::reserved_stack_check() {
2794     // testing if reserved zone needs to be enabled
2795     Label no_reserved_zone_enabling;
2796 
2797     ld(t0, Address(xthread, JavaThread::reserved_stack_activation_offset()));
2798     bltu(sp, t0, no_reserved_zone_enabling);
2799 
2800     enter();   // RA and FP are live.
2801     mv(c_rarg0, xthread);
2802     int32_t offset = 0;
2803     la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::enable_stack_reserved_zone)), offset);
2804     jalr(x1, t0, offset);
2805     leave();
2806 
2807     // We have already removed our own frame.
2808     // throw_delayed_StackOverflowError will think that it's been
2809     // called by our caller.
2810     offset = 0;
2811     la_patchable(t0, RuntimeAddress(StubRoutines::throw_delayed_StackOverflowError_entry()), offset);
2812     jalr(x0, t0, offset);
2813     should_not_reach_here();
2814 
2815     bind(no_reserved_zone_enabling);
2816 }
2817 
2818 // Move the address of the polling page into dest.
2819 void MacroAssembler::get_polling_page(Register dest, relocInfo::relocType rtype) {
2820   ld(dest, Address(xthread, JavaThread::polling_page_offset()));
2821 }
2822 
2823 // Read the polling page.  The address of the polling page must
2824 // already be in r.
2825 address MacroAssembler::read_polling_page(Register r, int32_t offset, relocInfo::relocType rtype) {
2826   address mark;
2827   {
2828     InstructionMark im(this);
2829     code_section()->relocate(inst_mark(), rtype);
2830     lwu(zr, Address(r, offset));
2831     mark = inst_mark();
2832   }
2833   verify_cross_modify_fence_not_required();
2834   return mark;
2835 }
2836 
2837 void  MacroAssembler::set_narrow_oop(Register dst, jobject obj) {
2838 #ifdef ASSERT
2839   {
2840     ThreadInVMfromUnknown tiv;
2841     assert (UseCompressedOops, "should only be used for compressed oops");
2842     assert (Universe::heap() != NULL, "java heap should be initialized");
2843     assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
2844     assert(Universe::heap()->is_in(JNIHandles::resolve(obj)), "should be real oop");
2845   }
2846 #endif
2847   int oop_index = oop_recorder()->find_index(obj);
2848   InstructionMark im(this);
2849   RelocationHolder rspec = oop_Relocation::spec(oop_index);
2850   code_section()->relocate(inst_mark(), rspec);
2851   li32(dst, 0xDEADBEEF);
2852   zero_extend(dst, dst, 32);
2853 }
2854 
2855 void  MacroAssembler::set_narrow_klass(Register dst, Klass* k) {
2856   assert (UseCompressedClassPointers, "should only be used for compressed headers");
2857   assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
2858   int index = oop_recorder()->find_index(k);
2859   assert(!Universe::heap()->is_in(k), "should not be an oop");
2860 
2861   InstructionMark im(this);
2862   RelocationHolder rspec = metadata_Relocation::spec(index);
2863   code_section()->relocate(inst_mark(), rspec);
2864   narrowKlass nk = CompressedKlassPointers::encode(k);
2865   li32(dst, nk);
2866   zero_extend(dst, dst, 32);
2867 }
2868 
2869 // Maybe emit a call via a trampoline.  If the code cache is small
2870 // trampolines won't be emitted.
2871 address MacroAssembler::trampoline_call(Address entry, CodeBuffer* cbuf) {
2872   assert(JavaThread::current()->is_Compiler_thread(), "just checking");
2873   assert(entry.rspec().type() == relocInfo::runtime_call_type ||
2874          entry.rspec().type() == relocInfo::opt_virtual_call_type ||
2875          entry.rspec().type() == relocInfo::static_call_type ||
2876          entry.rspec().type() == relocInfo::virtual_call_type, "wrong reloc type");
2877 
2878   // We need a trampoline if branches are far.
2879   if (far_branches()) {
2880     bool in_scratch_emit_size = false;
2881 #ifdef COMPILER2
2882     // We don't want to emit a trampoline if C2 is generating dummy
2883     // code during its branch shortening phase.
2884     CompileTask* task = ciEnv::current()->task();
2885     in_scratch_emit_size =
2886       (task != NULL && is_c2_compile(task->comp_level()) &&
2887        Compile::current()->output()->in_scratch_emit_size());
2888 #endif
2889     if (!in_scratch_emit_size) {
2890       address stub = emit_trampoline_stub(offset(), entry.target());
2891       if (stub == NULL) {
2892         postcond(pc() == badAddress);
2893         return NULL; // CodeCache is full
2894       }
2895     }
2896   }
2897 
2898   if (cbuf != NULL) { cbuf->set_insts_mark(); }
2899   relocate(entry.rspec());
2900   if (!far_branches()) {
2901     jal(entry.target());
2902   } else {
2903     jal(pc());
2904   }
2905   // just need to return a non-null address
2906   postcond(pc() != badAddress);
2907   return pc();
2908 }
2909 
2910 address MacroAssembler::ic_call(address entry, jint method_index) {
2911   RelocationHolder rh = virtual_call_Relocation::spec(pc(), method_index);
2912   movptr(t1, (address)Universe::non_oop_word());
2913   assert_cond(entry != NULL);
2914   return trampoline_call(Address(entry, rh));
2915 }
2916 
2917 // Emit a trampoline stub for a call to a target which is too far away.
2918 //
2919 // code sequences:
2920 //
2921 // call-site:
2922 //   branch-and-link to <destination> or <trampoline stub>
2923 //
2924 // Related trampoline stub for this call site in the stub section:
2925 //   load the call target from the constant pool
2926 //   branch (RA still points to the call site above)
2927 
2928 address MacroAssembler::emit_trampoline_stub(int insts_call_instruction_offset,
2929                                              address dest) {
2930   address stub = start_a_stub(NativeInstruction::instruction_size
2931                             + NativeCallTrampolineStub::instruction_size);
2932   if (stub == NULL) {
2933     return NULL;  // CodeBuffer::expand failed
2934   }
2935 
2936   // Create a trampoline stub relocation which relates this trampoline stub
2937   // with the call instruction at insts_call_instruction_offset in the
2938   // instructions code-section.
2939 
2940   // make sure 4 byte aligned here, so that the destination address would be
2941   // 8 byte aligned after 3 intructions
2942   // when we reach here we may get a 2-byte alignment so need to align it
2943   align(wordSize, NativeCallTrampolineStub::data_offset);
2944 
2945   relocate(trampoline_stub_Relocation::spec(code()->insts()->start() +
2946                                             insts_call_instruction_offset));
2947   const int stub_start_offset = offset();
2948 
2949   // Now, create the trampoline stub's code:
2950   // - load the call
2951   // - call
2952   Label target;
2953   ld(t0, target);  // auipc + ld
2954   jr(t0);          // jalr
2955   bind(target);
2956   assert(offset() - stub_start_offset == NativeCallTrampolineStub::data_offset,
2957          "should be");
2958   assert(offset() % wordSize == 0, "bad alignment");
2959   emit_int64((intptr_t)dest);
2960 
2961   const address stub_start_addr = addr_at(stub_start_offset);
2962 
2963   assert(is_NativeCallTrampolineStub_at(stub_start_addr), "doesn't look like a trampoline");
2964 
2965   end_a_stub();
2966   return stub_start_addr;
2967 }
2968 
2969 Address MacroAssembler::add_memory_helper(const Address dst) {
2970   switch (dst.getMode()) {
2971     case Address::base_plus_offset:
2972       // This is the expected mode, although we allow all the other
2973       // forms below.
2974       return form_address(t1, dst.base(), dst.offset());
2975     default:
2976       la(t1, dst);
2977       return Address(t1);
2978   }
2979 }
2980 
2981 void MacroAssembler::add_memory_int64(const Address dst, int64_t imm) {
2982   Address adr = add_memory_helper(dst);
2983   assert_different_registers(adr.base(), t0);
2984   ld(t0, adr);
2985   addi(t0, t0, imm);
2986   sd(t0, adr);
2987 }
2988 
2989 void MacroAssembler::add_memory_int32(const Address dst, int32_t imm) {
2990   Address adr = add_memory_helper(dst);
2991   assert_different_registers(adr.base(), t0);
2992   lwu(t0, adr);
2993   addiw(t0, t0, imm);
2994   sw(t0, adr);
2995 }
2996 
2997 void MacroAssembler::cmpptr(Register src1, Address src2, Label& equal) {
2998   assert_different_registers(src1, t0);
2999   int32_t offset;
3000   la_patchable(t0, src2, offset);
3001   ld(t0, Address(t0, offset));
3002   beq(src1, t0, equal);
3003 }
3004 
3005 void MacroAssembler::load_method_holder_cld(Register result, Register method) {
3006   load_method_holder(result, method);
3007   ld(result, Address(result, InstanceKlass::class_loader_data_offset()));
3008 }
3009 
3010 void MacroAssembler::load_method_holder(Register holder, Register method) {
3011   ld(holder, Address(method, Method::const_offset()));                      // ConstMethod*
3012   ld(holder, Address(holder, ConstMethod::constants_offset()));             // ConstantPool*
3013   ld(holder, Address(holder, ConstantPool::pool_holder_offset_in_bytes())); // InstanceKlass*
3014 }
3015 
3016 // string indexof
3017 // compute index by trailing zeros
3018 void MacroAssembler::compute_index(Register haystack, Register trailing_zeros,
3019                                    Register match_mask, Register result,
3020                                    Register ch2, Register tmp,
3021                                    bool haystack_isL)
3022 {
3023   int haystack_chr_shift = haystack_isL ? 0 : 1;
3024   srl(match_mask, match_mask, trailing_zeros);
3025   srli(match_mask, match_mask, 1);
3026   srli(tmp, trailing_zeros, LogBitsPerByte);
3027   if (!haystack_isL) andi(tmp, tmp, 0xE);
3028   add(haystack, haystack, tmp);
3029   ld(ch2, Address(haystack));
3030   if (!haystack_isL) srli(tmp, tmp, haystack_chr_shift);
3031   add(result, result, tmp);
3032 }
3033 
3034 // string indexof
3035 // Find pattern element in src, compute match mask,
3036 // only the first occurrence of 0x80/0x8000 at low bits is the valid match index
3037 // match mask patterns and corresponding indices would be like:
3038 // - 0x8080808080808080 (Latin1)
3039 // -   7 6 5 4 3 2 1 0  (match index)
3040 // - 0x8000800080008000 (UTF16)
3041 // -   3   2   1   0    (match index)
3042 void MacroAssembler::compute_match_mask(Register src, Register pattern, Register match_mask,
3043                                         Register mask1, Register mask2)
3044 {
3045   xorr(src, pattern, src);
3046   sub(match_mask, src, mask1);
3047   orr(src, src, mask2);
3048   notr(src, src);
3049   andr(match_mask, match_mask, src);
3050 }
3051 
3052 #ifdef COMPILER2
3053 // Code for BigInteger::mulAdd instrinsic
3054 // out     = x10
3055 // in      = x11
3056 // offset  = x12  (already out.length-offset)
3057 // len     = x13
3058 // k       = x14
3059 // tmp     = x28
3060 //
3061 // pseudo code from java implementation:
3062 // long kLong = k & LONG_MASK;
3063 // carry = 0;
3064 // offset = out.length-offset - 1;
3065 // for (int j = len - 1; j >= 0; j--) {
3066 //     product = (in[j] & LONG_MASK) * kLong + (out[offset] & LONG_MASK) + carry;
3067 //     out[offset--] = (int)product;
3068 //     carry = product >>> 32;
3069 // }
3070 // return (int)carry;
3071 void MacroAssembler::mul_add(Register out, Register in, Register offset,
3072                              Register len, Register k, Register tmp) {
3073   Label L_tail_loop, L_unroll, L_end;
3074   mv(tmp, out);
3075   mv(out, zr);
3076   blez(len, L_end);
3077   zero_extend(k, k, 32);
3078   slliw(t0, offset, LogBytesPerInt);
3079   add(offset, tmp, t0);
3080   slliw(t0, len, LogBytesPerInt);
3081   add(in, in, t0);
3082 
3083   const int unroll = 8;
3084   li(tmp, unroll);
3085   blt(len, tmp, L_tail_loop);
3086   bind(L_unroll);
3087   for (int i = 0; i < unroll; i++) {
3088     sub(in, in, BytesPerInt);
3089     lwu(t0, Address(in, 0));
3090     mul(t1, t0, k);
3091     add(t0, t1, out);
3092     sub(offset, offset, BytesPerInt);
3093     lwu(t1, Address(offset, 0));
3094     add(t0, t0, t1);
3095     sw(t0, Address(offset, 0));
3096     srli(out, t0, 32);
3097   }
3098   subw(len, len, tmp);
3099   bge(len, tmp, L_unroll);
3100 
3101   bind(L_tail_loop);
3102   blez(len, L_end);
3103   sub(in, in, BytesPerInt);
3104   lwu(t0, Address(in, 0));
3105   mul(t1, t0, k);
3106   add(t0, t1, out);
3107   sub(offset, offset, BytesPerInt);
3108   lwu(t1, Address(offset, 0));
3109   add(t0, t0, t1);
3110   sw(t0, Address(offset, 0));
3111   srli(out, t0, 32);
3112   subw(len, len, 1);
3113   j(L_tail_loop);
3114 
3115   bind(L_end);
3116 }
3117 
3118 // add two unsigned input and output carry
3119 void MacroAssembler::cad(Register dst, Register src1, Register src2, Register carry)
3120 {
3121   assert_different_registers(dst, carry);
3122   assert_different_registers(dst, src2);
3123   add(dst, src1, src2);
3124   sltu(carry, dst, src2);
3125 }
3126 
3127 // add two input with carry
3128 void MacroAssembler::adc(Register dst, Register src1, Register src2, Register carry)
3129 {
3130   assert_different_registers(dst, carry);
3131   add(dst, src1, src2);
3132   add(dst, dst, carry);
3133 }
3134 
3135 // add two unsigned input with carry and output carry
3136 void MacroAssembler::cadc(Register dst, Register src1, Register src2, Register carry)
3137 {
3138   assert_different_registers(dst, src2);
3139   adc(dst, src1, src2, carry);
3140   sltu(carry, dst, src2);
3141 }
3142 
3143 void MacroAssembler::add2_with_carry(Register final_dest_hi, Register dest_hi, Register dest_lo,
3144                                      Register src1, Register src2, Register carry)
3145 {
3146   cad(dest_lo, dest_lo, src1, carry);
3147   add(dest_hi, dest_hi, carry);
3148   cad(dest_lo, dest_lo, src2, carry);
3149   add(final_dest_hi, dest_hi, carry);
3150 }
3151 
3152 /**
3153  * Multiply 32 bit by 32 bit first loop.
3154  */
3155 void MacroAssembler::multiply_32_x_32_loop(Register x, Register xstart, Register x_xstart,
3156                                            Register y, Register y_idx, Register z,
3157                                            Register carry, Register product,
3158                                            Register idx, Register kdx)
3159 {
3160   // jlong carry, x[], y[], z[];
3161   // for (int idx=ystart, kdx=ystart+1+xstart; idx >= 0; idx--, kdx--) {
3162   //     long product = y[idx] * x[xstart] + carry;
3163   //     z[kdx] = (int)product;
3164   //     carry = product >>> 32;
3165   // }
3166   // z[xstart] = (int)carry;
3167 
3168   Label L_first_loop, L_first_loop_exit;
3169   blez(idx, L_first_loop_exit);
3170 
3171   shadd(t0, xstart, x, t0, LogBytesPerInt);
3172   lwu(x_xstart, Address(t0, 0));
3173 
3174   bind(L_first_loop);
3175   subw(idx, idx, 1);
3176   shadd(t0, idx, y, t0, LogBytesPerInt);
3177   lwu(y_idx, Address(t0, 0));
3178   mul(product, x_xstart, y_idx);
3179   add(product, product, carry);
3180   srli(carry, product, 32);
3181   subw(kdx, kdx, 1);
3182   shadd(t0, kdx, z, t0, LogBytesPerInt);
3183   sw(product, Address(t0, 0));
3184   bgtz(idx, L_first_loop);
3185 
3186   bind(L_first_loop_exit);
3187 }
3188 
3189 /**
3190  * Multiply 64 bit by 64 bit first loop.
3191  */
3192 void MacroAssembler::multiply_64_x_64_loop(Register x, Register xstart, Register x_xstart,
3193                                            Register y, Register y_idx, Register z,
3194                                            Register carry, Register product,
3195                                            Register idx, Register kdx)
3196 {
3197   //
3198   //  jlong carry, x[], y[], z[];
3199   //  for (int idx=ystart, kdx=ystart+1+xstart; idx >= 0; idx--, kdx--) {
3200   //    huge_128 product = y[idx] * x[xstart] + carry;
3201   //    z[kdx] = (jlong)product;
3202   //    carry  = (jlong)(product >>> 64);
3203   //  }
3204   //  z[xstart] = carry;
3205   //
3206 
3207   Label L_first_loop, L_first_loop_exit;
3208   Label L_one_x, L_one_y, L_multiply;
3209 
3210   subw(xstart, xstart, 1);
3211   bltz(xstart, L_one_x);
3212 
3213   shadd(t0, xstart, x, t0, LogBytesPerInt);
3214   ld(x_xstart, Address(t0, 0));
3215   ror_imm(x_xstart, x_xstart, 32); // convert big-endian to little-endian
3216 
3217   bind(L_first_loop);
3218   subw(idx, idx, 1);
3219   bltz(idx, L_first_loop_exit);
3220   subw(idx, idx, 1);
3221   bltz(idx, L_one_y);
3222 
3223   shadd(t0, idx, y, t0, LogBytesPerInt);
3224   ld(y_idx, Address(t0, 0));
3225   ror_imm(y_idx, y_idx, 32); // convert big-endian to little-endian
3226   bind(L_multiply);
3227 
3228   mulhu(t0, x_xstart, y_idx);
3229   mul(product, x_xstart, y_idx);
3230   cad(product, product, carry, t1);
3231   adc(carry, t0, zr, t1);
3232 
3233   subw(kdx, kdx, 2);
3234   ror_imm(product, product, 32); // back to big-endian
3235   shadd(t0, kdx, z, t0, LogBytesPerInt);
3236   sd(product, Address(t0, 0));
3237 
3238   j(L_first_loop);
3239 
3240   bind(L_one_y);
3241   lwu(y_idx, Address(y, 0));
3242   j(L_multiply);
3243 
3244   bind(L_one_x);
3245   lwu(x_xstart, Address(x, 0));
3246   j(L_first_loop);
3247 
3248   bind(L_first_loop_exit);
3249 }
3250 
3251 /**
3252  * Multiply 128 bit by 128 bit. Unrolled inner loop.
3253  *
3254  */
3255 void MacroAssembler::multiply_128_x_128_loop(Register y, Register z,
3256                                              Register carry, Register carry2,
3257                                              Register idx, Register jdx,
3258                                              Register yz_idx1, Register yz_idx2,
3259                                              Register tmp, Register tmp3, Register tmp4,
3260                                              Register tmp6, Register product_hi)
3261 {
3262   //   jlong carry, x[], y[], z[];
3263   //   int kdx = xstart+1;
3264   //   for (int idx=ystart-2; idx >= 0; idx -= 2) { // Third loop
3265   //     huge_128 tmp3 = (y[idx+1] * product_hi) + z[kdx+idx+1] + carry;
3266   //     jlong carry2  = (jlong)(tmp3 >>> 64);
3267   //     huge_128 tmp4 = (y[idx]   * product_hi) + z[kdx+idx] + carry2;
3268   //     carry  = (jlong)(tmp4 >>> 64);
3269   //     z[kdx+idx+1] = (jlong)tmp3;
3270   //     z[kdx+idx] = (jlong)tmp4;
3271   //   }
3272   //   idx += 2;
3273   //   if (idx > 0) {
3274   //     yz_idx1 = (y[idx] * product_hi) + z[kdx+idx] + carry;
3275   //     z[kdx+idx] = (jlong)yz_idx1;
3276   //     carry  = (jlong)(yz_idx1 >>> 64);
3277   //   }
3278   //
3279 
3280   Label L_third_loop, L_third_loop_exit, L_post_third_loop_done;
3281 
3282   srliw(jdx, idx, 2);
3283 
3284   bind(L_third_loop);
3285 
3286   subw(jdx, jdx, 1);
3287   bltz(jdx, L_third_loop_exit);
3288   subw(idx, idx, 4);
3289 
3290   shadd(t0, idx, y, t0, LogBytesPerInt);
3291   ld(yz_idx2, Address(t0, 0));
3292   ld(yz_idx1, Address(t0, wordSize));
3293 
3294   shadd(tmp6, idx, z, t0, LogBytesPerInt);
3295 
3296   ror_imm(yz_idx1, yz_idx1, 32); // convert big-endian to little-endian
3297   ror_imm(yz_idx2, yz_idx2, 32);
3298 
3299   ld(t1, Address(tmp6, 0));
3300   ld(t0, Address(tmp6, wordSize));
3301 
3302   mul(tmp3, product_hi, yz_idx1); //  yz_idx1 * product_hi -> tmp4:tmp3
3303   mulhu(tmp4, product_hi, yz_idx1);
3304 
3305   ror_imm(t0, t0, 32, tmp); // convert big-endian to little-endian
3306   ror_imm(t1, t1, 32, tmp);
3307 
3308   mul(tmp, product_hi, yz_idx2); //  yz_idx2 * product_hi -> carry2:tmp
3309   mulhu(carry2, product_hi, yz_idx2);
3310 
3311   cad(tmp3, tmp3, carry, carry);
3312   adc(tmp4, tmp4, zr, carry);
3313   cad(tmp3, tmp3, t0, t0);
3314   cadc(tmp4, tmp4, tmp, t0);
3315   adc(carry, carry2, zr, t0);
3316   cad(tmp4, tmp4, t1, carry2);
3317   adc(carry, carry, zr, carry2);
3318 
3319   ror_imm(tmp3, tmp3, 32); // convert little-endian to big-endian
3320   ror_imm(tmp4, tmp4, 32);
3321   sd(tmp4, Address(tmp6, 0));
3322   sd(tmp3, Address(tmp6, wordSize));
3323 
3324   j(L_third_loop);
3325 
3326   bind(L_third_loop_exit);
3327 
3328   andi(idx, idx, 0x3);
3329   beqz(idx, L_post_third_loop_done);
3330 
3331   Label L_check_1;
3332   subw(idx, idx, 2);
3333   bltz(idx, L_check_1);
3334 
3335   shadd(t0, idx, y, t0, LogBytesPerInt);
3336   ld(yz_idx1, Address(t0, 0));
3337   ror_imm(yz_idx1, yz_idx1, 32);
3338 
3339   mul(tmp3, product_hi, yz_idx1); //  yz_idx1 * product_hi -> tmp4:tmp3
3340   mulhu(tmp4, product_hi, yz_idx1);
3341 
3342   shadd(t0, idx, z, t0, LogBytesPerInt);
3343   ld(yz_idx2, Address(t0, 0));
3344   ror_imm(yz_idx2, yz_idx2, 32, tmp);
3345 
3346   add2_with_carry(carry, tmp4, tmp3, carry, yz_idx2, tmp);
3347 
3348   ror_imm(tmp3, tmp3, 32, tmp);
3349   sd(tmp3, Address(t0, 0));
3350 
3351   bind(L_check_1);
3352 
3353   andi(idx, idx, 0x1);
3354   subw(idx, idx, 1);
3355   bltz(idx, L_post_third_loop_done);
3356   shadd(t0, idx, y, t0, LogBytesPerInt);
3357   lwu(tmp4, Address(t0, 0));
3358   mul(tmp3, tmp4, product_hi); //  tmp4 * product_hi -> carry2:tmp3
3359   mulhu(carry2, tmp4, product_hi);
3360 
3361   shadd(t0, idx, z, t0, LogBytesPerInt);
3362   lwu(tmp4, Address(t0, 0));
3363 
3364   add2_with_carry(carry2, carry2, tmp3, tmp4, carry, t0);
3365 
3366   shadd(t0, idx, z, t0, LogBytesPerInt);
3367   sw(tmp3, Address(t0, 0));
3368 
3369   slli(t0, carry2, 32);
3370   srli(carry, tmp3, 32);
3371   orr(carry, carry, t0);
3372 
3373   bind(L_post_third_loop_done);
3374 }
3375 
3376 /**
3377  * Code for BigInteger::multiplyToLen() intrinsic.
3378  *
3379  * x10: x
3380  * x11: xlen
3381  * x12: y
3382  * x13: ylen
3383  * x14: z
3384  * x15: zlen
3385  * x16: tmp1
3386  * x17: tmp2
3387  * x7:  tmp3
3388  * x28: tmp4
3389  * x29: tmp5
3390  * x30: tmp6
3391  * x31: tmp7
3392  */
3393 void MacroAssembler::multiply_to_len(Register x, Register xlen, Register y, Register ylen,
3394                                      Register z, Register zlen,
3395                                      Register tmp1, Register tmp2, Register tmp3, Register tmp4,
3396                                      Register tmp5, Register tmp6, Register product_hi)
3397 {
3398   assert_different_registers(x, xlen, y, ylen, z, zlen, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6);
3399 
3400   const Register idx = tmp1;
3401   const Register kdx = tmp2;
3402   const Register xstart = tmp3;
3403 
3404   const Register y_idx = tmp4;
3405   const Register carry = tmp5;
3406   const Register product = xlen;
3407   const Register x_xstart = zlen; // reuse register
3408 
3409   mv(idx, ylen); // idx = ylen;
3410   mv(kdx, zlen); // kdx = xlen+ylen;
3411   mv(carry, zr); // carry = 0;
3412 
3413   Label L_multiply_64_x_64_loop, L_done;
3414 
3415   subw(xstart, xlen, 1);
3416   bltz(xstart, L_done);
3417 
3418   const Register jdx = tmp1;
3419 
3420   if (AvoidUnalignedAccesses) {
3421     // Check if x and y are both 8-byte aligned.
3422     orr(t0, xlen, ylen);
3423     andi(t0, t0, 0x1);
3424     beqz(t0, L_multiply_64_x_64_loop);
3425 
3426     multiply_32_x_32_loop(x, xstart, x_xstart, y, y_idx, z, carry, product, idx, kdx);
3427     shadd(t0, xstart, z, t0, LogBytesPerInt);
3428     sw(carry, Address(t0, 0));
3429 
3430     Label L_second_loop_unaligned;
3431     bind(L_second_loop_unaligned);
3432     mv(carry, zr);
3433     mv(jdx, ylen);
3434     subw(xstart, xstart, 1);
3435     bltz(xstart, L_done);
3436     sub(sp, sp, 2 * wordSize);
3437     sd(z, Address(sp, 0));
3438     sd(zr, Address(sp, wordSize));
3439     shadd(t0, xstart, z, t0, LogBytesPerInt);
3440     addi(z, t0, 4);
3441     shadd(t0, xstart, x, t0, LogBytesPerInt);
3442     lwu(product, Address(t0, 0));
3443     Label L_third_loop, L_third_loop_exit;
3444 
3445     blez(jdx, L_third_loop_exit);
3446 
3447     bind(L_third_loop);
3448     subw(jdx, jdx, 1);
3449     shadd(t0, jdx, y, t0, LogBytesPerInt);
3450     lwu(t0, Address(t0, 0));
3451     mul(t1, t0, product);
3452     add(t0, t1, carry);
3453     shadd(tmp6, jdx, z, t1, LogBytesPerInt);
3454     lwu(t1, Address(tmp6, 0));
3455     add(t0, t0, t1);
3456     sw(t0, Address(tmp6, 0));
3457     srli(carry, t0, 32);
3458     bgtz(jdx, L_third_loop);
3459 
3460     bind(L_third_loop_exit);
3461     ld(z, Address(sp, 0));
3462     addi(sp, sp, 2 * wordSize);
3463     shadd(t0, xstart, z, t0, LogBytesPerInt);
3464     sw(carry, Address(t0, 0));
3465 
3466     j(L_second_loop_unaligned);
3467   }
3468 
3469   bind(L_multiply_64_x_64_loop);
3470   multiply_64_x_64_loop(x, xstart, x_xstart, y, y_idx, z, carry, product, idx, kdx);
3471 
3472   Label L_second_loop_aligned;
3473   beqz(kdx, L_second_loop_aligned);
3474 
3475   Label L_carry;
3476   subw(kdx, kdx, 1);
3477   beqz(kdx, L_carry);
3478 
3479   shadd(t0, kdx, z, t0, LogBytesPerInt);
3480   sw(carry, Address(t0, 0));
3481   srli(carry, carry, 32);
3482   subw(kdx, kdx, 1);
3483 
3484   bind(L_carry);
3485   shadd(t0, kdx, z, t0, LogBytesPerInt);
3486   sw(carry, Address(t0, 0));
3487 
3488   // Second and third (nested) loops.
3489   //
3490   // for (int i = xstart-1; i >= 0; i--) { // Second loop
3491   //   carry = 0;
3492   //   for (int jdx=ystart, k=ystart+1+i; jdx >= 0; jdx--, k--) { // Third loop
3493   //     long product = (y[jdx] & LONG_MASK) * (x[i] & LONG_MASK) +
3494   //                    (z[k] & LONG_MASK) + carry;
3495   //     z[k] = (int)product;
3496   //     carry = product >>> 32;
3497   //   }
3498   //   z[i] = (int)carry;
3499   // }
3500   //
3501   // i = xlen, j = tmp1, k = tmp2, carry = tmp5, x[i] = product_hi
3502 
3503   bind(L_second_loop_aligned);
3504   mv(carry, zr); // carry = 0;
3505   mv(jdx, ylen); // j = ystart+1
3506 
3507   subw(xstart, xstart, 1); // i = xstart-1;
3508   bltz(xstart, L_done);
3509 
3510   sub(sp, sp, 4 * wordSize);
3511   sd(z, Address(sp, 0));
3512 
3513   Label L_last_x;
3514   shadd(t0, xstart, z, t0, LogBytesPerInt);
3515   addi(z, t0, 4);
3516   subw(xstart, xstart, 1); // i = xstart-1;
3517   bltz(xstart, L_last_x);
3518 
3519   shadd(t0, xstart, x, t0, LogBytesPerInt);
3520   ld(product_hi, Address(t0, 0));
3521   ror_imm(product_hi, product_hi, 32); // convert big-endian to little-endian
3522 
3523   Label L_third_loop_prologue;
3524   bind(L_third_loop_prologue);
3525 
3526   sd(ylen, Address(sp, wordSize));
3527   sd(x, Address(sp, 2 * wordSize));
3528   sd(xstart, Address(sp, 3 * wordSize));
3529   multiply_128_x_128_loop(y, z, carry, x, jdx, ylen, product,
3530                           tmp2, x_xstart, tmp3, tmp4, tmp6, product_hi);
3531   ld(z, Address(sp, 0));
3532   ld(ylen, Address(sp, wordSize));
3533   ld(x, Address(sp, 2 * wordSize));
3534   ld(xlen, Address(sp, 3 * wordSize)); // copy old xstart -> xlen
3535   addi(sp, sp, 4 * wordSize);
3536 
3537   addiw(tmp3, xlen, 1);
3538   shadd(t0, tmp3, z, t0, LogBytesPerInt);
3539   sw(carry, Address(t0, 0));
3540 
3541   subw(tmp3, tmp3, 1);
3542   bltz(tmp3, L_done);
3543 
3544   srli(carry, carry, 32);
3545   shadd(t0, tmp3, z, t0, LogBytesPerInt);
3546   sw(carry, Address(t0, 0));
3547   j(L_second_loop_aligned);
3548 
3549   // Next infrequent code is moved outside loops.
3550   bind(L_last_x);
3551   lwu(product_hi, Address(x, 0));
3552   j(L_third_loop_prologue);
3553 
3554   bind(L_done);
3555 }
3556 #endif
3557 
3558 // Count bits of trailing zero chars from lsb to msb until first non-zero element.
3559 // For LL case, one byte for one element, so shift 8 bits once, and for other case,
3560 // shift 16 bits once.
3561 void MacroAssembler::ctzc_bit(Register Rd, Register Rs, bool isLL, Register tmp1, Register tmp2)
3562 {
3563   if (UseRVB) {
3564     assert_different_registers(Rd, Rs, tmp1);
3565     int step = isLL ? 8 : 16;
3566     ctz(Rd, Rs);
3567     andi(tmp1, Rd, step - 1);
3568     sub(Rd, Rd, tmp1);
3569     return;
3570   }
3571   assert_different_registers(Rd, Rs, tmp1, tmp2);
3572   Label Loop;
3573   int step = isLL ? 8 : 16;
3574   li(Rd, -step);
3575   mv(tmp2, Rs);
3576 
3577   bind(Loop);
3578   addi(Rd, Rd, step);
3579   andi(tmp1, tmp2, ((1 << step) - 1));
3580   srli(tmp2, tmp2, step);
3581   beqz(tmp1, Loop);
3582 }
3583 
3584 // This instruction reads adjacent 4 bytes from the lower half of source register,
3585 // inflate into a register, for example:
3586 // Rs: A7A6A5A4A3A2A1A0
3587 // Rd: 00A300A200A100A0
3588 void MacroAssembler::inflate_lo32(Register Rd, Register Rs, Register tmp1, Register tmp2)
3589 {
3590   assert_different_registers(Rd, Rs, tmp1, tmp2);
3591   li(tmp1, 0xFF);
3592   mv(Rd, zr);
3593   for (int i = 0; i <= 3; i++)
3594   {
3595     andr(tmp2, Rs, tmp1);
3596     if (i) {
3597       slli(tmp2, tmp2, i * 8);
3598     }
3599     orr(Rd, Rd, tmp2);
3600     if (i != 3) {
3601       slli(tmp1, tmp1, 8);
3602     }
3603   }
3604 }
3605 
3606 // This instruction reads adjacent 4 bytes from the upper half of source register,
3607 // inflate into a register, for example:
3608 // Rs: A7A6A5A4A3A2A1A0
3609 // Rd: 00A700A600A500A4
3610 void MacroAssembler::inflate_hi32(Register Rd, Register Rs, Register tmp1, Register tmp2)
3611 {
3612   assert_different_registers(Rd, Rs, tmp1, tmp2);
3613   li(tmp1, 0xFF00000000);
3614   mv(Rd, zr);
3615   for (int i = 0; i <= 3; i++)
3616   {
3617     andr(tmp2, Rs, tmp1);
3618     orr(Rd, Rd, tmp2);
3619     srli(Rd, Rd, 8);
3620     if (i != 3) {
3621       slli(tmp1, tmp1, 8);
3622     }
3623   }
3624 }
3625 
3626 // The size of the blocks erased by the zero_blocks stub.  We must
3627 // handle anything smaller than this ourselves in zero_words().
3628 const int MacroAssembler::zero_words_block_size = 8;
3629 
3630 // zero_words() is used by C2 ClearArray patterns.  It is as small as
3631 // possible, handling small word counts locally and delegating
3632 // anything larger to the zero_blocks stub.  It is expanded many times
3633 // in compiled code, so it is important to keep it short.
3634 
3635 // ptr:   Address of a buffer to be zeroed.
3636 // cnt:   Count in HeapWords.
3637 //
3638 // ptr, cnt, and t0 are clobbered.
3639 address MacroAssembler::zero_words(Register ptr, Register cnt)
3640 {
3641   assert(is_power_of_2(zero_words_block_size), "adjust this");
3642   assert(ptr == x28 && cnt == x29, "mismatch in register usage");
3643   assert_different_registers(cnt, t0);
3644 
3645   BLOCK_COMMENT("zero_words {");
3646   mv(t0, zero_words_block_size);
3647   Label around, done, done16;
3648   bltu(cnt, t0, around);
3649   {
3650     RuntimeAddress zero_blocks = RuntimeAddress(StubRoutines::riscv::zero_blocks());
3651     assert(zero_blocks.target() != NULL, "zero_blocks stub has not been generated");
3652     if (StubRoutines::riscv::complete()) {
3653       address tpc = trampoline_call(zero_blocks);
3654       if (tpc == NULL) {
3655         DEBUG_ONLY(reset_labels(around));
3656         postcond(pc() == badAddress);
3657         return NULL;
3658       }
3659     } else {
3660       jal(zero_blocks);
3661     }
3662   }
3663   bind(around);
3664   for (int i = zero_words_block_size >> 1; i > 1; i >>= 1) {
3665     Label l;
3666     andi(t0, cnt, i);
3667     beqz(t0, l);
3668     for (int j = 0; j < i; j++) {
3669       sd(zr, Address(ptr, 0));
3670       addi(ptr, ptr, 8);
3671     }
3672     bind(l);
3673   }
3674   {
3675     Label l;
3676     andi(t0, cnt, 1);
3677     beqz(t0, l);
3678     sd(zr, Address(ptr, 0));
3679     bind(l);
3680   }
3681   BLOCK_COMMENT("} zero_words");
3682   postcond(pc() != badAddress);
3683   return pc();
3684 }
3685 
3686 #define SmallArraySize (18 * BytesPerLong)
3687 
3688 // base:  Address of a buffer to be zeroed, 8 bytes aligned.
3689 // cnt:   Immediate count in HeapWords.
3690 void MacroAssembler::zero_words(Register base, u_int64_t cnt)
3691 {
3692   assert_different_registers(base, t0, t1);
3693 
3694   BLOCK_COMMENT("zero_words {");
3695 
3696   if (cnt <= SmallArraySize / BytesPerLong) {
3697     for (int i = 0; i < (int)cnt; i++) {
3698       sd(zr, Address(base, i * wordSize));
3699     }
3700   } else {
3701     const int unroll = 8; // Number of sd(zr, adr), instructions we'll unroll
3702     int remainder = cnt % unroll;
3703     for (int i = 0; i < remainder; i++) {
3704       sd(zr, Address(base, i * wordSize));
3705     }
3706 
3707     Label loop;
3708     Register cnt_reg = t0;
3709     Register loop_base = t1;
3710     cnt = cnt - remainder;
3711     li(cnt_reg, cnt);
3712     add(loop_base, base, remainder * wordSize);
3713     bind(loop);
3714     sub(cnt_reg, cnt_reg, unroll);
3715     for (int i = 0; i < unroll; i++) {
3716       sd(zr, Address(loop_base, i * wordSize));
3717     }
3718     add(loop_base, loop_base, unroll * wordSize);
3719     bnez(cnt_reg, loop);
3720   }
3721 
3722   BLOCK_COMMENT("} zero_words");
3723 }
3724 
3725 // base:   Address of a buffer to be filled, 8 bytes aligned.
3726 // cnt:    Count in 8-byte unit.
3727 // value:  Value to be filled with.
3728 // base will point to the end of the buffer after filling.
3729 void MacroAssembler::fill_words(Register base, Register cnt, Register value)
3730 {
3731 //  Algorithm:
3732 //
3733 //    t0 = cnt & 7
3734 //    cnt -= t0
3735 //    p += t0
3736 //    switch (t0):
3737 //      switch start:
3738 //      do while cnt
3739 //        cnt -= 8
3740 //          p[-8] = value
3741 //        case 7:
3742 //          p[-7] = value
3743 //        case 6:
3744 //          p[-6] = value
3745 //          // ...
3746 //        case 1:
3747 //          p[-1] = value
3748 //        case 0:
3749 //          p += 8
3750 //      do-while end
3751 //    switch end
3752 
3753   assert_different_registers(base, cnt, value, t0, t1);
3754 
3755   Label fini, skip, entry, loop;
3756   const int unroll = 8; // Number of sd instructions we'll unroll
3757 
3758   beqz(cnt, fini);
3759 
3760   andi(t0, cnt, unroll - 1);
3761   sub(cnt, cnt, t0);
3762   // align 8, so first sd n % 8 = mod, next loop sd 8 * n.
3763   shadd(base, t0, base, t1, 3);
3764   la(t1, entry);
3765   slli(t0, t0, 2); // sd_inst_nums * 4; t0 is cnt % 8, so t1 = t1 - sd_inst_nums * 4, 4 is sizeof(inst)
3766   sub(t1, t1, t0);
3767   jr(t1);
3768 
3769   bind(loop);
3770   add(base, base, unroll * 8);
3771   for (int i = -unroll; i < 0; i++) {
3772     sd(value, Address(base, i * 8));
3773   }
3774   bind(entry);
3775   sub(cnt, cnt, unroll);
3776   bgez(cnt, loop);
3777 
3778   bind(fini);
3779 }
3780 
3781 #define FCVT_SAFE(FLOATCVT, FLOATEQ)                                                             \
3782 void MacroAssembler:: FLOATCVT##_safe(Register dst, FloatRegister src, Register tmp) {           \
3783   Label L_Okay;                                                                                  \
3784   fscsr(zr);                                                                                     \
3785   FLOATCVT(dst, src);                                                                            \
3786   frcsr(tmp);                                                                                    \
3787   andi(tmp, tmp, 0x1E);                                                                          \
3788   beqz(tmp, L_Okay);                                                                             \
3789   FLOATEQ(tmp, src, src);                                                                        \
3790   bnez(tmp, L_Okay);                                                                             \
3791   mv(dst, zr);                                                                                   \
3792   bind(L_Okay);                                                                                  \
3793 }
3794 
3795 FCVT_SAFE(fcvt_w_s, feq_s)
3796 FCVT_SAFE(fcvt_l_s, feq_s)
3797 FCVT_SAFE(fcvt_w_d, feq_d)
3798 FCVT_SAFE(fcvt_l_d, feq_d)
3799 
3800 #undef FCVT_SAFE
3801 
3802 #define FCMP(FLOATTYPE, FLOATSIG)                                                       \
3803 void MacroAssembler::FLOATTYPE##_compare(Register result, FloatRegister Rs1,            \
3804                                          FloatRegister Rs2, int unordered_result) {     \
3805   Label Ldone;                                                                          \
3806   if (unordered_result < 0) {                                                           \
3807     /* we want -1 for unordered or less than, 0 for equal and 1 for greater than. */    \
3808     /* installs 1 if gt else 0 */                                                       \
3809     flt_##FLOATSIG(result, Rs2, Rs1);                                                   \
3810     /* Rs1 > Rs2, install 1 */                                                          \
3811     bgtz(result, Ldone);                                                                \
3812     feq_##FLOATSIG(result, Rs1, Rs2);                                                   \
3813     addi(result, result, -1);                                                           \
3814     /* Rs1 = Rs2, install 0 */                                                          \
3815     /* NaN or Rs1 < Rs2, install -1 */                                                  \
3816     bind(Ldone);                                                                        \
3817   } else {                                                                              \
3818     /* we want -1 for less than, 0 for equal and 1 for unordered or greater than. */    \
3819     /* installs 1 if gt or unordered else 0 */                                          \
3820     flt_##FLOATSIG(result, Rs1, Rs2);                                                   \
3821     /* Rs1 < Rs2, install -1 */                                                         \
3822     bgtz(result, Ldone);                                                                \
3823     feq_##FLOATSIG(result, Rs1, Rs2);                                                   \
3824     addi(result, result, -1);                                                           \
3825     /* Rs1 = Rs2, install 0 */                                                          \
3826     /* NaN or Rs1 > Rs2, install 1 */                                                   \
3827     bind(Ldone);                                                                        \
3828     neg(result, result);                                                                \
3829   }                                                                                     \
3830 }
3831 
3832 FCMP(float, s);
3833 FCMP(double, d);
3834 
3835 #undef FCMP
3836 
3837 // Zero words; len is in bytes
3838 // Destroys all registers except addr
3839 // len must be a nonzero multiple of wordSize
3840 void MacroAssembler::zero_memory(Register addr, Register len, Register tmp) {
3841   assert_different_registers(addr, len, tmp, t0, t1);
3842 
3843 #ifdef ASSERT
3844   {
3845     Label L;
3846     andi(t0, len, BytesPerWord - 1);
3847     beqz(t0, L);
3848     stop("len is not a multiple of BytesPerWord");
3849     bind(L);
3850   }
3851 #endif // ASSERT
3852 
3853 #ifndef PRODUCT
3854   block_comment("zero memory");
3855 #endif // PRODUCT
3856 
3857   Label loop;
3858   Label entry;
3859 
3860   // Algorithm:
3861   //
3862   //  t0 = cnt & 7
3863   //  cnt -= t0
3864   //  p += t0
3865   //  switch (t0) {
3866   //    do {
3867   //      cnt -= 8
3868   //        p[-8] = 0
3869   //      case 7:
3870   //        p[-7] = 0
3871   //      case 6:
3872   //        p[-6] = 0
3873   //        ...
3874   //      case 1:
3875   //        p[-1] = 0
3876   //      case 0:
3877   //        p += 8
3878   //     } while (cnt)
3879   //  }
3880 
3881   const int unroll = 8;   // Number of sd(zr) instructions we'll unroll
3882 
3883   srli(len, len, LogBytesPerWord);
3884   andi(t0, len, unroll - 1);  // t0 = cnt % unroll
3885   sub(len, len, t0);          // cnt -= unroll
3886   // tmp always points to the end of the region we're about to zero
3887   shadd(tmp, t0, addr, t1, LogBytesPerWord);
3888   la(t1, entry);
3889   slli(t0, t0, 2);
3890   sub(t1, t1, t0);
3891   jr(t1);
3892   bind(loop);
3893   sub(len, len, unroll);
3894   for (int i = -unroll; i < 0; i++) {
3895     Assembler::sd(zr, Address(tmp, i * wordSize));
3896   }
3897   bind(entry);
3898   add(tmp, tmp, unroll * wordSize);
3899   bnez(len, loop);
3900 }
3901 
3902 // shift left by shamt and add
3903 // Rd = (Rs1 << shamt) + Rs2
3904 void MacroAssembler::shadd(Register Rd, Register Rs1, Register Rs2, Register tmp, int shamt) {
3905   if (UseRVB) {
3906     if (shamt == 1) {
3907       sh1add(Rd, Rs1, Rs2);
3908       return;
3909     } else if (shamt == 2) {
3910       sh2add(Rd, Rs1, Rs2);
3911       return;
3912     } else if (shamt == 3) {
3913       sh3add(Rd, Rs1, Rs2);
3914       return;
3915     }
3916   }
3917 
3918   if (shamt != 0) {
3919     slli(tmp, Rs1, shamt);
3920     add(Rd, Rs2, tmp);
3921   } else {
3922     add(Rd, Rs1, Rs2);
3923   }
3924 }
3925 
3926 void MacroAssembler::zero_extend(Register dst, Register src, int bits) {
3927   if (UseRVB) {
3928     if (bits == 16) {
3929       zext_h(dst, src);
3930       return;
3931     } else if (bits == 32) {
3932       zext_w(dst, src);
3933       return;
3934     }
3935   }
3936 
3937   if (bits == 8) {
3938     zext_b(dst, src);
3939   } else {
3940     slli(dst, src, XLEN - bits);
3941     srli(dst, dst, XLEN - bits);
3942   }
3943 }
3944 
3945 void MacroAssembler::sign_extend(Register dst, Register src, int bits) {
3946   if (UseRVB) {
3947     if (bits == 8) {
3948       sext_b(dst, src);
3949       return;
3950     } else if (bits == 16) {
3951       sext_h(dst, src);
3952       return;
3953     }
3954   }
3955 
3956   if (bits == 32) {
3957     sext_w(dst, src);
3958   } else {
3959     slli(dst, src, XLEN - bits);
3960     srai(dst, dst, XLEN - bits);
3961   }
3962 }
3963 
3964 void MacroAssembler::cmp_l2i(Register dst, Register src1, Register src2, Register tmp)
3965 {
3966   if (src1 == src2) {
3967     mv(dst, zr);
3968     return;
3969   }
3970   Label done;
3971   Register left = src1;
3972   Register right = src2;
3973   if (dst == src1) {
3974     assert_different_registers(dst, src2, tmp);
3975     mv(tmp, src1);
3976     left = tmp;
3977   } else if (dst == src2) {
3978     assert_different_registers(dst, src1, tmp);
3979     mv(tmp, src2);
3980     right = tmp;
3981   }
3982 
3983   // installs 1 if gt else 0
3984   slt(dst, right, left);
3985   bnez(dst, done);
3986   slt(dst, left, right);
3987   // dst = -1 if lt; else if eq , dst = 0
3988   neg(dst, dst);
3989   bind(done);
3990 }
3991 
3992 void MacroAssembler::safepoint_ifence() {
3993   ifence();
3994 #ifndef PRODUCT
3995   if (VerifyCrossModifyFence) {
3996     // Clear the thread state.
3997     sb(zr, Address(xthread, in_bytes(JavaThread::requires_cross_modify_fence_offset())));
3998   }
3999 #endif
4000 }
4001 
4002 #ifndef PRODUCT
4003 void MacroAssembler::verify_cross_modify_fence_not_required() {
4004   if (VerifyCrossModifyFence) {
4005     // Check if thread needs a cross modify fence.
4006     lbu(t0, Address(xthread, in_bytes(JavaThread::requires_cross_modify_fence_offset())));
4007     Label fence_not_required;
4008     beqz(t0, fence_not_required);
4009     // If it does then fail.
4010     la(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::verify_cross_modify_fence_failure)));
4011     mv(c_rarg0, xthread);
4012     jalr(t0);
4013     bind(fence_not_required);
4014   }
4015 }
4016 #endif