1 /*
   2  * Copyright (c) 1997, 2022, Oracle and/or its affiliates. All rights reserved.
   3  * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
   4  * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
   5  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   6  *
   7  * This code is free software; you can redistribute it and/or modify it
   8  * under the terms of the GNU General Public License version 2 only, as
   9  * published by the Free Software Foundation.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  *
  25  */
  26 
  27 #include "precompiled.hpp"
  28 #include "asm/assembler.hpp"
  29 #include "asm/assembler.inline.hpp"
  30 #include "compiler/disassembler.hpp"
  31 #include "gc/shared/barrierSet.hpp"
  32 #include "gc/shared/barrierSetAssembler.hpp"
  33 #include "gc/shared/cardTable.hpp"
  34 #include "gc/shared/cardTableBarrierSet.hpp"
  35 #include "interpreter/bytecodeHistogram.hpp"
  36 #include "interpreter/interpreter.hpp"
  37 #include "memory/resourceArea.hpp"
  38 #include "memory/universe.hpp"
  39 #include "nativeInst_riscv.hpp"
  40 #include "oops/accessDecorators.hpp"
  41 #include "oops/compressedOops.inline.hpp"
  42 #include "oops/klass.inline.hpp"
  43 #include "oops/oop.hpp"
  44 #include "runtime/interfaceSupport.inline.hpp"
  45 #include "runtime/javaThread.hpp"
  46 #include "runtime/jniHandles.inline.hpp"
  47 #include "runtime/sharedRuntime.hpp"
  48 #include "runtime/stubRoutines.hpp"
  49 #include "utilities/powerOfTwo.hpp"
  50 #ifdef COMPILER2
  51 #include "opto/compile.hpp"
  52 #include "opto/node.hpp"
  53 #include "opto/output.hpp"
  54 #endif
  55 
  56 #ifdef PRODUCT
  57 #define BLOCK_COMMENT(str) /* nothing */
  58 #else
  59 #define BLOCK_COMMENT(str) block_comment(str)
  60 #endif
  61 #define BIND(label) bind(label); __ BLOCK_COMMENT(#label ":")
  62 
  63 static void pass_arg0(MacroAssembler* masm, Register arg) {
  64   if (c_rarg0 != arg) {
  65     masm->mv(c_rarg0, arg);
  66   }
  67 }
  68 
  69 static void pass_arg1(MacroAssembler* masm, Register arg) {
  70   if (c_rarg1 != arg) {
  71     masm->mv(c_rarg1, arg);
  72   }
  73 }
  74 
  75 static void pass_arg2(MacroAssembler* masm, Register arg) {
  76   if (c_rarg2 != arg) {
  77     masm->mv(c_rarg2, arg);
  78   }
  79 }
  80 
  81 static void pass_arg3(MacroAssembler* masm, Register arg) {
  82   if (c_rarg3 != arg) {
  83     masm->mv(c_rarg3, arg);
  84   }
  85 }
  86 
  87 int MacroAssembler::align(int modulus, int extra_offset) {
  88   CompressibleRegion cr(this);
  89   intptr_t before = offset();
  90   while ((offset() + extra_offset) % modulus != 0) { nop(); }
  91   return (int)(offset() - before);
  92 }
  93 
  94 void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) {
  95   call_VM_base(oop_result, noreg, noreg, entry_point, number_of_arguments, check_exceptions);
  96 }
  97 
  98 // Implementation of call_VM versions
  99 
 100 void MacroAssembler::call_VM(Register oop_result,
 101                              address entry_point,
 102                              bool check_exceptions) {
 103   call_VM_helper(oop_result, entry_point, 0, check_exceptions);
 104 }
 105 
 106 void MacroAssembler::call_VM(Register oop_result,
 107                              address entry_point,
 108                              Register arg_1,
 109                              bool check_exceptions) {
 110   pass_arg1(this, arg_1);
 111   call_VM_helper(oop_result, entry_point, 1, check_exceptions);
 112 }
 113 
 114 void MacroAssembler::call_VM(Register oop_result,
 115                              address entry_point,
 116                              Register arg_1,
 117                              Register arg_2,
 118                              bool check_exceptions) {
 119   assert(arg_1 != c_rarg2, "smashed arg");
 120   pass_arg2(this, arg_2);
 121   pass_arg1(this, arg_1);
 122   call_VM_helper(oop_result, entry_point, 2, check_exceptions);
 123 }
 124 
 125 void MacroAssembler::call_VM(Register oop_result,
 126                              address entry_point,
 127                              Register arg_1,
 128                              Register arg_2,
 129                              Register arg_3,
 130                              bool check_exceptions) {
 131   assert(arg_1 != c_rarg3, "smashed arg");
 132   assert(arg_2 != c_rarg3, "smashed arg");
 133   pass_arg3(this, arg_3);
 134 
 135   assert(arg_1 != c_rarg2, "smashed arg");
 136   pass_arg2(this, arg_2);
 137 
 138   pass_arg1(this, arg_1);
 139   call_VM_helper(oop_result, entry_point, 3, check_exceptions);
 140 }
 141 
 142 void MacroAssembler::call_VM(Register oop_result,
 143                              Register last_java_sp,
 144                              address entry_point,
 145                              int number_of_arguments,
 146                              bool check_exceptions) {
 147   call_VM_base(oop_result, xthread, last_java_sp, entry_point, number_of_arguments, check_exceptions);
 148 }
 149 
 150 void MacroAssembler::call_VM(Register oop_result,
 151                              Register last_java_sp,
 152                              address entry_point,
 153                              Register arg_1,
 154                              bool check_exceptions) {
 155   pass_arg1(this, arg_1);
 156   call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions);
 157 }
 158 
 159 void MacroAssembler::call_VM(Register oop_result,
 160                              Register last_java_sp,
 161                              address entry_point,
 162                              Register arg_1,
 163                              Register arg_2,
 164                              bool check_exceptions) {
 165 
 166   assert(arg_1 != c_rarg2, "smashed arg");
 167   pass_arg2(this, arg_2);
 168   pass_arg1(this, arg_1);
 169   call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions);
 170 }
 171 
 172 void MacroAssembler::call_VM(Register oop_result,
 173                              Register last_java_sp,
 174                              address entry_point,
 175                              Register arg_1,
 176                              Register arg_2,
 177                              Register arg_3,
 178                              bool check_exceptions) {
 179   assert(arg_1 != c_rarg3, "smashed arg");
 180   assert(arg_2 != c_rarg3, "smashed arg");
 181   pass_arg3(this, arg_3);
 182   assert(arg_1 != c_rarg2, "smashed arg");
 183   pass_arg2(this, arg_2);
 184   pass_arg1(this, arg_1);
 185   call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions);
 186 }
 187 
 188 // these are no-ops overridden by InterpreterMacroAssembler
 189 void MacroAssembler::check_and_handle_earlyret(Register java_thread) {}
 190 void MacroAssembler::check_and_handle_popframe(Register java_thread) {}
 191 
 192 // Calls to C land
 193 //
 194 // When entering C land, the fp, & esp of the last Java frame have to be recorded
 195 // in the (thread-local) JavaThread object. When leaving C land, the last Java fp
 196 // has to be reset to 0. This is required to allow proper stack traversal.
 197 void MacroAssembler::set_last_Java_frame(Register last_java_sp,
 198                                          Register last_java_fp,
 199                                          Register last_java_pc,
 200                                          Register tmp) {
 201 
 202   if (last_java_pc->is_valid()) {
 203       sd(last_java_pc, Address(xthread,
 204                                JavaThread::frame_anchor_offset() +
 205                                JavaFrameAnchor::last_Java_pc_offset()));
 206   }
 207 
 208   // determine last_java_sp register
 209   if (last_java_sp == sp) {
 210     mv(tmp, sp);
 211     last_java_sp = tmp;
 212   } else if (!last_java_sp->is_valid()) {
 213     last_java_sp = esp;
 214   }
 215 
 216   sd(last_java_sp, Address(xthread, JavaThread::last_Java_sp_offset()));
 217 
 218   // last_java_fp is optional
 219   if (last_java_fp->is_valid()) {
 220     sd(last_java_fp, Address(xthread, JavaThread::last_Java_fp_offset()));
 221   }
 222 }
 223 
 224 void MacroAssembler::set_last_Java_frame(Register last_java_sp,
 225                                          Register last_java_fp,
 226                                          address  last_java_pc,
 227                                          Register tmp) {
 228   assert(last_java_pc != NULL, "must provide a valid PC");
 229 
 230   la(tmp, last_java_pc);
 231   sd(tmp, Address(xthread, JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()));
 232 
 233   set_last_Java_frame(last_java_sp, last_java_fp, noreg, tmp);
 234 }
 235 
 236 void MacroAssembler::set_last_Java_frame(Register last_java_sp,
 237                                          Register last_java_fp,
 238                                          Label &L,
 239                                          Register tmp) {
 240   if (L.is_bound()) {
 241     set_last_Java_frame(last_java_sp, last_java_fp, target(L), tmp);
 242   } else {
 243     L.add_patch_at(code(), locator());
 244     set_last_Java_frame(last_java_sp, last_java_fp, pc() /* Patched later */, tmp);
 245   }
 246 }
 247 
 248 void MacroAssembler::reset_last_Java_frame(bool clear_fp) {
 249   // we must set sp to zero to clear frame
 250   sd(zr, Address(xthread, JavaThread::last_Java_sp_offset()));
 251 
 252   // must clear fp, so that compiled frames are not confused; it is
 253   // possible that we need it only for debugging
 254   if (clear_fp) {
 255     sd(zr, Address(xthread, JavaThread::last_Java_fp_offset()));
 256   }
 257 
 258   // Always clear the pc because it could have been set by make_walkable()
 259   sd(zr, Address(xthread, JavaThread::last_Java_pc_offset()));
 260 }
 261 
 262 void MacroAssembler::call_VM_base(Register oop_result,
 263                                   Register java_thread,
 264                                   Register last_java_sp,
 265                                   address  entry_point,
 266                                   int      number_of_arguments,
 267                                   bool     check_exceptions) {
 268    // determine java_thread register
 269   if (!java_thread->is_valid()) {
 270     java_thread = xthread;
 271   }
 272   // determine last_java_sp register
 273   if (!last_java_sp->is_valid()) {
 274     last_java_sp = esp;
 275   }
 276 
 277   // debugging support
 278   assert(number_of_arguments >= 0   , "cannot have negative number of arguments");
 279   assert(java_thread == xthread, "unexpected register");
 280 
 281   assert(java_thread != oop_result  , "cannot use the same register for java_thread & oop_result");
 282   assert(java_thread != last_java_sp, "cannot use the same register for java_thread & last_java_sp");
 283 
 284   // push java thread (becomes first argument of C function)
 285   mv(c_rarg0, java_thread);
 286 
 287   // set last Java frame before call
 288   assert(last_java_sp != fp, "can't use fp");
 289 
 290   Label l;
 291   set_last_Java_frame(last_java_sp, fp, l, t0);
 292 
 293   // do the call, remove parameters
 294   MacroAssembler::call_VM_leaf_base(entry_point, number_of_arguments, &l);
 295 
 296   // reset last Java frame
 297   // Only interpreter should have to clear fp
 298   reset_last_Java_frame(true);
 299 
 300    // C++ interp handles this in the interpreter
 301   check_and_handle_popframe(java_thread);
 302   check_and_handle_earlyret(java_thread);
 303 
 304   if (check_exceptions) {
 305     // check for pending exceptions (java_thread is set upon return)
 306     ld(t0, Address(java_thread, in_bytes(Thread::pending_exception_offset())));
 307     Label ok;
 308     beqz(t0, ok);
 309     int32_t offset = 0;
 310     la_patchable(t0, RuntimeAddress(StubRoutines::forward_exception_entry()), offset);
 311     jalr(x0, t0, offset);
 312     bind(ok);
 313   }
 314 
 315   // get oop result if there is one and reset the value in the thread
 316   if (oop_result->is_valid()) {
 317     get_vm_result(oop_result, java_thread);
 318   }
 319 }
 320 
 321 void MacroAssembler::get_vm_result(Register oop_result, Register java_thread) {
 322   ld(oop_result, Address(java_thread, JavaThread::vm_result_offset()));
 323   sd(zr, Address(java_thread, JavaThread::vm_result_offset()));
 324   verify_oop_msg(oop_result, "broken oop in call_VM_base");
 325 }
 326 
 327 void MacroAssembler::get_vm_result_2(Register metadata_result, Register java_thread) {
 328   ld(metadata_result, Address(java_thread, JavaThread::vm_result_2_offset()));
 329   sd(zr, Address(java_thread, JavaThread::vm_result_2_offset()));
 330 }
 331 
 332 void MacroAssembler::clinit_barrier(Register klass, Register tmp, Label* L_fast_path, Label* L_slow_path) {
 333   assert(L_fast_path != NULL || L_slow_path != NULL, "at least one is required");
 334   assert_different_registers(klass, xthread, tmp);
 335 
 336   Label L_fallthrough, L_tmp;
 337   if (L_fast_path == NULL) {
 338     L_fast_path = &L_fallthrough;
 339   } else if (L_slow_path == NULL) {
 340     L_slow_path = &L_fallthrough;
 341   }
 342 
 343   // Fast path check: class is fully initialized
 344   lbu(tmp, Address(klass, InstanceKlass::init_state_offset()));
 345   sub(tmp, tmp, InstanceKlass::fully_initialized);
 346   beqz(tmp, *L_fast_path);
 347 
 348   // Fast path check: current thread is initializer thread
 349   ld(tmp, Address(klass, InstanceKlass::init_thread_offset()));
 350 
 351   if (L_slow_path == &L_fallthrough) {
 352     beq(xthread, tmp, *L_fast_path);
 353     bind(*L_slow_path);
 354   } else if (L_fast_path == &L_fallthrough) {
 355     bne(xthread, tmp, *L_slow_path);
 356     bind(*L_fast_path);
 357   } else {
 358     Unimplemented();
 359   }
 360 }
 361 
 362 void MacroAssembler::_verify_oop(Register reg, const char* s, const char* file, int line) {
 363   if (!VerifyOops) { return; }
 364 
 365   // Pass register number to verify_oop_subroutine
 366   const char* b = NULL;
 367   {
 368     ResourceMark rm;
 369     stringStream ss;
 370     ss.print("verify_oop: %s: %s (%s:%d)", reg->name(), s, file, line);
 371     b = code_string(ss.as_string());
 372   }
 373   BLOCK_COMMENT("verify_oop {");
 374 
 375   push_reg(RegSet::of(ra, t0, t1, c_rarg0), sp);
 376 
 377   mv(c_rarg0, reg); // c_rarg0 : x10
 378   // The length of the instruction sequence emitted should be independent
 379   // of the value of the local char buffer address so that the size of mach
 380   // nodes for scratch emit and normal emit matches.
 381   movptr(t0, (address)b);
 382 
 383   // call indirectly to solve generation ordering problem
 384   int32_t offset = 0;
 385   la_patchable(t1, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address()), offset);
 386   ld(t1, Address(t1, offset));
 387   jalr(t1);
 388 
 389   pop_reg(RegSet::of(ra, t0, t1, c_rarg0), sp);
 390 
 391   BLOCK_COMMENT("} verify_oop");
 392 }
 393 
 394 void MacroAssembler::_verify_oop_addr(Address addr, const char* s, const char* file, int line) {
 395   if (!VerifyOops) {
 396     return;
 397   }
 398 
 399   const char* b = NULL;
 400   {
 401     ResourceMark rm;
 402     stringStream ss;
 403     ss.print("verify_oop_addr: %s (%s:%d)", s, file, line);
 404     b = code_string(ss.as_string());
 405   }
 406   BLOCK_COMMENT("verify_oop_addr {");
 407 
 408   push_reg(RegSet::of(ra, t0, t1, c_rarg0), sp);
 409 
 410   if (addr.uses(sp)) {
 411     la(x10, addr);
 412     ld(x10, Address(x10, 4 * wordSize));
 413   } else {
 414     ld(x10, addr);
 415   }
 416 
 417   // The length of the instruction sequence emitted should be independent
 418   // of the value of the local char buffer address so that the size of mach
 419   // nodes for scratch emit and normal emit matches.
 420   movptr(t0, (address)b);
 421 
 422   // call indirectly to solve generation ordering problem
 423   int32_t offset = 0;
 424   la_patchable(t1, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address()), offset);
 425   ld(t1, Address(t1, offset));
 426   jalr(t1);
 427 
 428   pop_reg(RegSet::of(ra, t0, t1, c_rarg0), sp);
 429 
 430   BLOCK_COMMENT("} verify_oop_addr");
 431 }
 432 
 433 Address MacroAssembler::argument_address(RegisterOrConstant arg_slot,
 434                                          int extra_slot_offset) {
 435   // cf. TemplateTable::prepare_invoke(), if (load_receiver).
 436   int stackElementSize = Interpreter::stackElementSize;
 437   int offset = Interpreter::expr_offset_in_bytes(extra_slot_offset+0);
 438 #ifdef ASSERT
 439   int offset1 = Interpreter::expr_offset_in_bytes(extra_slot_offset+1);
 440   assert(offset1 - offset == stackElementSize, "correct arithmetic");
 441 #endif
 442   if (arg_slot.is_constant()) {
 443     return Address(esp, arg_slot.as_constant() * stackElementSize + offset);
 444   } else {
 445     assert_different_registers(t0, arg_slot.as_register());
 446     shadd(t0, arg_slot.as_register(), esp, t0, exact_log2(stackElementSize));
 447     return Address(t0, offset);
 448   }
 449 }
 450 
 451 #ifndef PRODUCT
 452 extern "C" void findpc(intptr_t x);
 453 #endif
 454 
 455 void MacroAssembler::debug64(char* msg, int64_t pc, int64_t regs[])
 456 {
 457   // In order to get locks to work, we need to fake a in_VM state
 458   if (ShowMessageBoxOnError) {
 459     JavaThread* thread = JavaThread::current();
 460     JavaThreadState saved_state = thread->thread_state();
 461     thread->set_thread_state(_thread_in_vm);
 462 #ifndef PRODUCT
 463     if (CountBytecodes || TraceBytecodes || StopInterpreterAt) {
 464       ttyLocker ttyl;
 465       BytecodeCounter::print();
 466     }
 467 #endif
 468     if (os::message_box(msg, "Execution stopped, print registers?")) {
 469       ttyLocker ttyl;
 470       tty->print_cr(" pc = 0x%016lx", pc);
 471 #ifndef PRODUCT
 472       tty->cr();
 473       findpc(pc);
 474       tty->cr();
 475 #endif
 476       tty->print_cr(" x0 = 0x%016lx", regs[0]);
 477       tty->print_cr(" x1 = 0x%016lx", regs[1]);
 478       tty->print_cr(" x2 = 0x%016lx", regs[2]);
 479       tty->print_cr(" x3 = 0x%016lx", regs[3]);
 480       tty->print_cr(" x4 = 0x%016lx", regs[4]);
 481       tty->print_cr(" x5 = 0x%016lx", regs[5]);
 482       tty->print_cr(" x6 = 0x%016lx", regs[6]);
 483       tty->print_cr(" x7 = 0x%016lx", regs[7]);
 484       tty->print_cr(" x8 = 0x%016lx", regs[8]);
 485       tty->print_cr(" x9 = 0x%016lx", regs[9]);
 486       tty->print_cr("x10 = 0x%016lx", regs[10]);
 487       tty->print_cr("x11 = 0x%016lx", regs[11]);
 488       tty->print_cr("x12 = 0x%016lx", regs[12]);
 489       tty->print_cr("x13 = 0x%016lx", regs[13]);
 490       tty->print_cr("x14 = 0x%016lx", regs[14]);
 491       tty->print_cr("x15 = 0x%016lx", regs[15]);
 492       tty->print_cr("x16 = 0x%016lx", regs[16]);
 493       tty->print_cr("x17 = 0x%016lx", regs[17]);
 494       tty->print_cr("x18 = 0x%016lx", regs[18]);
 495       tty->print_cr("x19 = 0x%016lx", regs[19]);
 496       tty->print_cr("x20 = 0x%016lx", regs[20]);
 497       tty->print_cr("x21 = 0x%016lx", regs[21]);
 498       tty->print_cr("x22 = 0x%016lx", regs[22]);
 499       tty->print_cr("x23 = 0x%016lx", regs[23]);
 500       tty->print_cr("x24 = 0x%016lx", regs[24]);
 501       tty->print_cr("x25 = 0x%016lx", regs[25]);
 502       tty->print_cr("x26 = 0x%016lx", regs[26]);
 503       tty->print_cr("x27 = 0x%016lx", regs[27]);
 504       tty->print_cr("x28 = 0x%016lx", regs[28]);
 505       tty->print_cr("x30 = 0x%016lx", regs[30]);
 506       tty->print_cr("x31 = 0x%016lx", regs[31]);
 507       BREAKPOINT;
 508     }
 509   }
 510   fatal("DEBUG MESSAGE: %s", msg);
 511 }
 512 
 513 void MacroAssembler::resolve_jobject(Register value, Register tmp1, Register tmp2) {
 514   Label done, not_weak;
 515   beqz(value, done);           // Use NULL as-is.
 516 
 517   // Test for jweak tag.
 518   andi(t0, value, JNIHandles::weak_tag_mask);
 519   beqz(t0, not_weak);
 520 
 521   // Resolve jweak.
 522   access_load_at(T_OBJECT, IN_NATIVE | ON_PHANTOM_OOP_REF, value,
 523                  Address(value, -JNIHandles::weak_tag_value), tmp1, tmp2);
 524   verify_oop(value);
 525   j(done);
 526 
 527   bind(not_weak);
 528   // Resolve (untagged) jobject.
 529   access_load_at(T_OBJECT, IN_NATIVE, value, Address(value, 0), tmp1, tmp2);
 530   verify_oop(value);
 531   bind(done);
 532 }
 533 
 534 void MacroAssembler::stop(const char* msg) {
 535   BLOCK_COMMENT(msg);
 536   illegal_instruction(Assembler::csr::time);
 537   emit_int64((uintptr_t)msg);
 538 }
 539 
 540 void MacroAssembler::unimplemented(const char* what) {
 541   const char* buf = NULL;
 542   {
 543     ResourceMark rm;
 544     stringStream ss;
 545     ss.print("unimplemented: %s", what);
 546     buf = code_string(ss.as_string());
 547   }
 548   stop(buf);
 549 }
 550 
 551 void MacroAssembler::emit_static_call_stub() {
 552   // CompiledDirectStaticCall::set_to_interpreted knows the
 553   // exact layout of this stub.
 554 
 555   mov_metadata(xmethod, (Metadata*)NULL);
 556 
 557   // Jump to the entry point of the i2c stub.
 558   int32_t offset = 0;
 559   movptr(t0, 0, offset);
 560   jalr(x0, t0, offset);
 561 }
 562 
 563 void MacroAssembler::call_VM_leaf_base(address entry_point,
 564                                        int number_of_arguments,
 565                                        Label *retaddr) {
 566   int32_t offset = 0;
 567   push_reg(RegSet::of(t0, xmethod), sp);   // push << t0 & xmethod >> to sp
 568   movptr(t0, entry_point, offset);
 569   jalr(x1, t0, offset);
 570   if (retaddr != NULL) {
 571     bind(*retaddr);
 572   }
 573   pop_reg(RegSet::of(t0, xmethod), sp);   // pop << t0 & xmethod >> from sp
 574 }
 575 
 576 void MacroAssembler::call_VM_leaf(address entry_point, int number_of_arguments) {
 577   call_VM_leaf_base(entry_point, number_of_arguments);
 578 }
 579 
 580 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0) {
 581   pass_arg0(this, arg_0);
 582   call_VM_leaf_base(entry_point, 1);
 583 }
 584 
 585 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1) {
 586   pass_arg0(this, arg_0);
 587   pass_arg1(this, arg_1);
 588   call_VM_leaf_base(entry_point, 2);
 589 }
 590 
 591 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0,
 592                                   Register arg_1, Register arg_2) {
 593   pass_arg0(this, arg_0);
 594   pass_arg1(this, arg_1);
 595   pass_arg2(this, arg_2);
 596   call_VM_leaf_base(entry_point, 3);
 597 }
 598 
 599 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0) {
 600   pass_arg0(this, arg_0);
 601   MacroAssembler::call_VM_leaf_base(entry_point, 1);
 602 }
 603 
 604 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1) {
 605 
 606   assert(arg_0 != c_rarg1, "smashed arg");
 607   pass_arg1(this, arg_1);
 608   pass_arg0(this, arg_0);
 609   MacroAssembler::call_VM_leaf_base(entry_point, 2);
 610 }
 611 
 612 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) {
 613   assert(arg_0 != c_rarg2, "smashed arg");
 614   assert(arg_1 != c_rarg2, "smashed arg");
 615   pass_arg2(this, arg_2);
 616   assert(arg_0 != c_rarg1, "smashed arg");
 617   pass_arg1(this, arg_1);
 618   pass_arg0(this, arg_0);
 619   MacroAssembler::call_VM_leaf_base(entry_point, 3);
 620 }
 621 
 622 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2, Register arg_3) {
 623   assert(arg_0 != c_rarg3, "smashed arg");
 624   assert(arg_1 != c_rarg3, "smashed arg");
 625   assert(arg_2 != c_rarg3, "smashed arg");
 626   pass_arg3(this, arg_3);
 627   assert(arg_0 != c_rarg2, "smashed arg");
 628   assert(arg_1 != c_rarg2, "smashed arg");
 629   pass_arg2(this, arg_2);
 630   assert(arg_0 != c_rarg1, "smashed arg");
 631   pass_arg1(this, arg_1);
 632   pass_arg0(this, arg_0);
 633   MacroAssembler::call_VM_leaf_base(entry_point, 4);
 634 }
 635 
 636 void MacroAssembler::nop() {
 637   addi(x0, x0, 0);
 638 }
 639 
 640 void MacroAssembler::mv(Register Rd, Register Rs) {
 641   if (Rd != Rs) {
 642     addi(Rd, Rs, 0);
 643   }
 644 }
 645 
 646 void MacroAssembler::notr(Register Rd, Register Rs) {
 647   xori(Rd, Rs, -1);
 648 }
 649 
 650 void MacroAssembler::neg(Register Rd, Register Rs) {
 651   sub(Rd, x0, Rs);
 652 }
 653 
 654 void MacroAssembler::negw(Register Rd, Register Rs) {
 655   subw(Rd, x0, Rs);
 656 }
 657 
 658 void MacroAssembler::sext_w(Register Rd, Register Rs) {
 659   addiw(Rd, Rs, 0);
 660 }
 661 
 662 void MacroAssembler::zext_b(Register Rd, Register Rs) {
 663   andi(Rd, Rs, 0xFF);
 664 }
 665 
 666 void MacroAssembler::seqz(Register Rd, Register Rs) {
 667   sltiu(Rd, Rs, 1);
 668 }
 669 
 670 void MacroAssembler::snez(Register Rd, Register Rs) {
 671   sltu(Rd, x0, Rs);
 672 }
 673 
 674 void MacroAssembler::sltz(Register Rd, Register Rs) {
 675   slt(Rd, Rs, x0);
 676 }
 677 
 678 void MacroAssembler::sgtz(Register Rd, Register Rs) {
 679   slt(Rd, x0, Rs);
 680 }
 681 
 682 void MacroAssembler::fmv_s(FloatRegister Rd, FloatRegister Rs) {
 683   if (Rd != Rs) {
 684     fsgnj_s(Rd, Rs, Rs);
 685   }
 686 }
 687 
 688 void MacroAssembler::fabs_s(FloatRegister Rd, FloatRegister Rs) {
 689   fsgnjx_s(Rd, Rs, Rs);
 690 }
 691 
 692 void MacroAssembler::fneg_s(FloatRegister Rd, FloatRegister Rs) {
 693   fsgnjn_s(Rd, Rs, Rs);
 694 }
 695 
 696 void MacroAssembler::fmv_d(FloatRegister Rd, FloatRegister Rs) {
 697   if (Rd != Rs) {
 698     fsgnj_d(Rd, Rs, Rs);
 699   }
 700 }
 701 
 702 void MacroAssembler::fabs_d(FloatRegister Rd, FloatRegister Rs) {
 703   fsgnjx_d(Rd, Rs, Rs);
 704 }
 705 
 706 void MacroAssembler::fneg_d(FloatRegister Rd, FloatRegister Rs) {
 707   fsgnjn_d(Rd, Rs, Rs);
 708 }
 709 
 710 void MacroAssembler::vmnot_m(VectorRegister vd, VectorRegister vs) {
 711   vmnand_mm(vd, vs, vs);
 712 }
 713 
 714 void MacroAssembler::vncvt_x_x_w(VectorRegister vd, VectorRegister vs, VectorMask vm) {
 715   vnsrl_wx(vd, vs, x0, vm);
 716 }
 717 
 718 void MacroAssembler::vfneg_v(VectorRegister vd, VectorRegister vs) {
 719   vfsgnjn_vv(vd, vs, vs);
 720 }
 721 
 722 void MacroAssembler::la(Register Rd, const address &dest) {
 723   int64_t offset = dest - pc();
 724   if (is_offset_in_range(offset, 32)) {
 725     auipc(Rd, (int32_t)offset + 0x800);  //0x800, Note:the 11th sign bit
 726     addi(Rd, Rd, ((int64_t)offset << 52) >> 52);
 727   } else {
 728     movptr(Rd, dest);
 729   }
 730 }
 731 
 732 void MacroAssembler::la(Register Rd, const Address &adr) {
 733   switch (adr.getMode()) {
 734     case Address::literal: {
 735       relocInfo::relocType rtype = adr.rspec().reloc()->type();
 736       if (rtype == relocInfo::none) {
 737         mv(Rd, (intptr_t)(adr.target()));
 738       } else {
 739         relocate(adr.rspec());
 740         movptr(Rd, adr.target());
 741       }
 742       break;
 743     }
 744     case Address::base_plus_offset: {
 745       int32_t offset = 0;
 746       baseOffset(Rd, adr, offset);
 747       addi(Rd, Rd, offset);
 748       break;
 749     }
 750     default:
 751       ShouldNotReachHere();
 752   }
 753 }
 754 
 755 void MacroAssembler::la(Register Rd, Label &label) {
 756   la(Rd, target(label));
 757 }
 758 
 759 #define INSN(NAME)                                                                \
 760   void MacroAssembler::NAME##z(Register Rs, const address &dest) {                \
 761     NAME(Rs, zr, dest);                                                           \
 762   }                                                                               \
 763   void MacroAssembler::NAME##z(Register Rs, Label &l, bool is_far) {              \
 764     NAME(Rs, zr, l, is_far);                                                      \
 765   }                                                                               \
 766 
 767   INSN(beq);
 768   INSN(bne);
 769   INSN(blt);
 770   INSN(ble);
 771   INSN(bge);
 772   INSN(bgt);
 773 
 774 #undef INSN
 775 
 776 // Float compare branch instructions
 777 
 778 #define INSN(NAME, FLOATCMP, BRANCH)                                                                                   \
 779   void MacroAssembler::float_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far, bool is_unordered) {  \
 780     FLOATCMP##_s(t0, Rs1, Rs2);                                                                                        \
 781     BRANCH(t0, l, is_far);                                                                                             \
 782   }                                                                                                                    \
 783   void MacroAssembler::double_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far, bool is_unordered) { \
 784     FLOATCMP##_d(t0, Rs1, Rs2);                                                                                        \
 785     BRANCH(t0, l, is_far);                                                                                             \
 786   }
 787 
 788   INSN(beq, feq, bnez);
 789   INSN(bne, feq, beqz);
 790 
 791 #undef INSN
 792 
 793 
 794 #define INSN(NAME, FLOATCMP1, FLOATCMP2)                                              \
 795   void MacroAssembler::float_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l,   \
 796                                     bool is_far, bool is_unordered) {                 \
 797     if (is_unordered) {                                                               \
 798       /* jump if either source is NaN or condition is expected */                     \
 799       FLOATCMP2##_s(t0, Rs2, Rs1);                                                    \
 800       beqz(t0, l, is_far);                                                            \
 801     } else {                                                                          \
 802       /* jump if no NaN in source and condition is expected */                        \
 803       FLOATCMP1##_s(t0, Rs1, Rs2);                                                    \
 804       bnez(t0, l, is_far);                                                            \
 805     }                                                                                 \
 806   }                                                                                   \
 807   void MacroAssembler::double_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l,  \
 808                                      bool is_far, bool is_unordered) {                \
 809     if (is_unordered) {                                                               \
 810       /* jump if either source is NaN or condition is expected */                     \
 811       FLOATCMP2##_d(t0, Rs2, Rs1);                                                    \
 812       beqz(t0, l, is_far);                                                            \
 813     } else {                                                                          \
 814       /* jump if no NaN in source and condition is expected */                        \
 815       FLOATCMP1##_d(t0, Rs1, Rs2);                                                    \
 816       bnez(t0, l, is_far);                                                            \
 817     }                                                                                 \
 818   }
 819 
 820   INSN(ble, fle, flt);
 821   INSN(blt, flt, fle);
 822 
 823 #undef INSN
 824 
 825 #define INSN(NAME, CMP)                                                              \
 826   void MacroAssembler::float_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l,  \
 827                                     bool is_far, bool is_unordered) {                \
 828     float_##CMP(Rs2, Rs1, l, is_far, is_unordered);                                  \
 829   }                                                                                  \
 830   void MacroAssembler::double_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, \
 831                                      bool is_far, bool is_unordered) {               \
 832     double_##CMP(Rs2, Rs1, l, is_far, is_unordered);                                 \
 833   }
 834 
 835   INSN(bgt, blt);
 836   INSN(bge, ble);
 837 
 838 #undef INSN
 839 
 840 
 841 #define INSN(NAME, CSR)                       \
 842   void MacroAssembler::NAME(Register Rd) {    \
 843     csrr(Rd, CSR);                            \
 844   }
 845 
 846   INSN(rdinstret,  CSR_INSTERT);
 847   INSN(rdcycle,    CSR_CYCLE);
 848   INSN(rdtime,     CSR_TIME);
 849   INSN(frcsr,      CSR_FCSR);
 850   INSN(frrm,       CSR_FRM);
 851   INSN(frflags,    CSR_FFLAGS);
 852 
 853 #undef INSN
 854 
 855 void MacroAssembler::csrr(Register Rd, unsigned csr) {
 856   csrrs(Rd, csr, x0);
 857 }
 858 
 859 #define INSN(NAME, OPFUN)                                      \
 860   void MacroAssembler::NAME(unsigned csr, Register Rs) {       \
 861     OPFUN(x0, csr, Rs);                                        \
 862   }
 863 
 864   INSN(csrw, csrrw);
 865   INSN(csrs, csrrs);
 866   INSN(csrc, csrrc);
 867 
 868 #undef INSN
 869 
 870 #define INSN(NAME, OPFUN)                                      \
 871   void MacroAssembler::NAME(unsigned csr, unsigned imm) {      \
 872     OPFUN(x0, csr, imm);                                       \
 873   }
 874 
 875   INSN(csrwi, csrrwi);
 876   INSN(csrsi, csrrsi);
 877   INSN(csrci, csrrci);
 878 
 879 #undef INSN
 880 
 881 #define INSN(NAME, CSR)                                      \
 882   void MacroAssembler::NAME(Register Rd, Register Rs) {      \
 883     csrrw(Rd, CSR, Rs);                                      \
 884   }
 885 
 886   INSN(fscsr,   CSR_FCSR);
 887   INSN(fsrm,    CSR_FRM);
 888   INSN(fsflags, CSR_FFLAGS);
 889 
 890 #undef INSN
 891 
 892 #define INSN(NAME)                              \
 893   void MacroAssembler::NAME(Register Rs) {      \
 894     NAME(x0, Rs);                               \
 895   }
 896 
 897   INSN(fscsr);
 898   INSN(fsrm);
 899   INSN(fsflags);
 900 
 901 #undef INSN
 902 
 903 void MacroAssembler::fsrmi(Register Rd, unsigned imm) {
 904   guarantee(imm < 5, "Rounding Mode is invalid in Rounding Mode register");
 905   csrrwi(Rd, CSR_FRM, imm);
 906 }
 907 
 908 void MacroAssembler::fsflagsi(Register Rd, unsigned imm) {
 909    csrrwi(Rd, CSR_FFLAGS, imm);
 910 }
 911 
 912 #define INSN(NAME)                             \
 913   void MacroAssembler::NAME(unsigned imm) {    \
 914     NAME(x0, imm);                             \
 915   }
 916 
 917   INSN(fsrmi);
 918   INSN(fsflagsi);
 919 
 920 #undef INSN
 921 
 922 void MacroAssembler::push_reg(Register Rs)
 923 {
 924   addi(esp, esp, 0 - wordSize);
 925   sd(Rs, Address(esp, 0));
 926 }
 927 
 928 void MacroAssembler::pop_reg(Register Rd)
 929 {
 930   ld(Rd, esp, 0);
 931   addi(esp, esp, wordSize);
 932 }
 933 
 934 int MacroAssembler::bitset_to_regs(unsigned int bitset, unsigned char* regs) {
 935   int count = 0;
 936   // Scan bitset to accumulate register pairs
 937   for (int reg = 31; reg >= 0; reg--) {
 938     if ((1U << 31) & bitset) {
 939       regs[count++] = reg;
 940     }
 941     bitset <<= 1;
 942   }
 943   return count;
 944 }
 945 
 946 // Push integer registers in the bitset supplied. Don't push sp.
 947 // Return the number of words pushed
 948 int MacroAssembler::push_reg(unsigned int bitset, Register stack) {
 949   DEBUG_ONLY(int words_pushed = 0;)
 950   CompressibleRegion cr(this);
 951 
 952   unsigned char regs[32];
 953   int count = bitset_to_regs(bitset, regs);
 954   // reserve one slot to align for odd count
 955   int offset = is_even(count) ? 0 : wordSize;
 956 
 957   if (count) {
 958     addi(stack, stack, -count * wordSize - offset);
 959   }
 960   for (int i = count - 1; i >= 0; i--) {
 961     sd(as_Register(regs[i]), Address(stack, (count - 1 - i) * wordSize + offset));
 962     DEBUG_ONLY(words_pushed++;)
 963   }
 964 
 965   assert(words_pushed == count, "oops, pushed != count");
 966 
 967   return count;
 968 }
 969 
 970 int MacroAssembler::pop_reg(unsigned int bitset, Register stack) {
 971   DEBUG_ONLY(int words_popped = 0;)
 972   CompressibleRegion cr(this);
 973 
 974   unsigned char regs[32];
 975   int count = bitset_to_regs(bitset, regs);
 976   // reserve one slot to align for odd count
 977   int offset = is_even(count) ? 0 : wordSize;
 978 
 979   for (int i = count - 1; i >= 0; i--) {
 980     ld(as_Register(regs[i]), Address(stack, (count - 1 - i) * wordSize + offset));
 981     DEBUG_ONLY(words_popped++;)
 982   }
 983 
 984   if (count) {
 985     addi(stack, stack, count * wordSize + offset);
 986   }
 987   assert(words_popped == count, "oops, popped != count");
 988 
 989   return count;
 990 }
 991 
 992 // Push floating-point registers in the bitset supplied.
 993 // Return the number of words pushed
 994 int MacroAssembler::push_fp(unsigned int bitset, Register stack) {
 995   CompressibleRegion cr(this);
 996   DEBUG_ONLY(int words_pushed = 0;)
 997   unsigned char regs[32];
 998   int count = bitset_to_regs(bitset, regs);
 999   int push_slots = count + (count & 1);
1000 
1001   if (count) {
1002     addi(stack, stack, -push_slots * wordSize);
1003   }
1004 
1005   for (int i = count - 1; i >= 0; i--) {
1006     fsd(as_FloatRegister(regs[i]), Address(stack, (push_slots - 1 - i) * wordSize));
1007     DEBUG_ONLY(words_pushed++;)
1008   }
1009 
1010   assert(words_pushed == count, "oops, pushed(%d) != count(%d)", words_pushed, count);
1011 
1012   return count;
1013 }
1014 
1015 int MacroAssembler::pop_fp(unsigned int bitset, Register stack) {
1016   CompressibleRegion cr(this);
1017   DEBUG_ONLY(int words_popped = 0;)
1018   unsigned char regs[32];
1019   int count = bitset_to_regs(bitset, regs);
1020   int pop_slots = count + (count & 1);
1021 
1022   for (int i = count - 1; i >= 0; i--) {
1023     fld(as_FloatRegister(regs[i]), Address(stack, (pop_slots - 1 - i) * wordSize));
1024     DEBUG_ONLY(words_popped++;)
1025   }
1026 
1027   if (count) {
1028     addi(stack, stack, pop_slots * wordSize);
1029   }
1030 
1031   assert(words_popped == count, "oops, popped(%d) != count(%d)", words_popped, count);
1032 
1033   return count;
1034 }
1035 
1036 #ifdef COMPILER2
1037 // Push vector registers in the bitset supplied.
1038 // Return the number of words pushed
1039 int MacroAssembler::push_v(unsigned int bitset, Register stack) {
1040   CompressibleRegion cr(this);
1041   int vector_size_in_bytes = Matcher::scalable_vector_reg_size(T_BYTE);
1042 
1043   // Scan bitset to accumulate register pairs
1044   unsigned char regs[32];
1045   int count = bitset_to_regs(bitset, regs);
1046 
1047   for (int i = 0; i < count; i++) {
1048     sub(stack, stack, vector_size_in_bytes);
1049     vs1r_v(as_VectorRegister(regs[i]), stack);
1050   }
1051 
1052   return count * vector_size_in_bytes / wordSize;
1053 }
1054 
1055 int MacroAssembler::pop_v(unsigned int bitset, Register stack) {
1056   CompressibleRegion cr(this);
1057   int vector_size_in_bytes = Matcher::scalable_vector_reg_size(T_BYTE);
1058 
1059   // Scan bitset to accumulate register pairs
1060   unsigned char regs[32];
1061   int count = bitset_to_regs(bitset, regs);
1062 
1063   for (int i = count - 1; i >= 0; i--) {
1064     vl1r_v(as_VectorRegister(regs[i]), stack);
1065     add(stack, stack, vector_size_in_bytes);
1066   }
1067 
1068   return count * vector_size_in_bytes / wordSize;
1069 }
1070 #endif // COMPILER2
1071 
1072 void MacroAssembler::push_call_clobbered_registers_except(RegSet exclude) {
1073   CompressibleRegion cr(this);
1074   // Push integer registers x7, x10-x17, x28-x31.
1075   push_reg(RegSet::of(x7) + RegSet::range(x10, x17) + RegSet::range(x28, x31) - exclude, sp);
1076 
1077   // Push float registers f0-f7, f10-f17, f28-f31.
1078   addi(sp, sp, - wordSize * 20);
1079   int offset = 0;
1080   for (int i = 0; i < 32; i++) {
1081     if (i <= f7->encoding() || i >= f28->encoding() || (i >= f10->encoding() && i <= f17->encoding())) {
1082       fsd(as_FloatRegister(i), Address(sp, wordSize * (offset++)));
1083     }
1084   }
1085 }
1086 
1087 void MacroAssembler::pop_call_clobbered_registers_except(RegSet exclude) {
1088   CompressibleRegion cr(this);
1089   int offset = 0;
1090   for (int i = 0; i < 32; i++) {
1091     if (i <= f7->encoding() || i >= f28->encoding() || (i >= f10->encoding() && i <= f17->encoding())) {
1092       fld(as_FloatRegister(i), Address(sp, wordSize * (offset++)));
1093     }
1094   }
1095   addi(sp, sp, wordSize * 20);
1096 
1097   pop_reg(RegSet::of(x7) + RegSet::range(x10, x17) + RegSet::range(x28, x31) - exclude, sp);
1098 }
1099 
1100 void MacroAssembler::push_CPU_state(bool save_vectors, int vector_size_in_bytes) {
1101   CompressibleRegion cr(this);
1102   // integer registers, except zr(x0) & ra(x1) & sp(x2) & gp(x3) & tp(x4)
1103   push_reg(RegSet::range(x5, x31), sp);
1104 
1105   // float registers
1106   addi(sp, sp, - 32 * wordSize);
1107   for (int i = 0; i < 32; i++) {
1108     fsd(as_FloatRegister(i), Address(sp, i * wordSize));
1109   }
1110 
1111   // vector registers
1112   if (save_vectors) {
1113     sub(sp, sp, vector_size_in_bytes * VectorRegister::number_of_registers);
1114     vsetvli(t0, x0, Assembler::e64, Assembler::m8);
1115     for (int i = 0; i < VectorRegister::number_of_registers; i += 8) {
1116       add(t0, sp, vector_size_in_bytes * i);
1117       vse64_v(as_VectorRegister(i), t0);
1118     }
1119   }
1120 }
1121 
1122 void MacroAssembler::pop_CPU_state(bool restore_vectors, int vector_size_in_bytes) {
1123   CompressibleRegion cr(this);
1124   // vector registers
1125   if (restore_vectors) {
1126     vsetvli(t0, x0, Assembler::e64, Assembler::m8);
1127     for (int i = 0; i < VectorRegister::number_of_registers; i += 8) {
1128       vle64_v(as_VectorRegister(i), sp);
1129       add(sp, sp, vector_size_in_bytes * 8);
1130     }
1131   }
1132 
1133   // float registers
1134   for (int i = 0; i < 32; i++) {
1135     fld(as_FloatRegister(i), Address(sp, i * wordSize));
1136   }
1137   addi(sp, sp, 32 * wordSize);
1138 
1139   // integer registers, except zr(x0) & ra(x1) & sp(x2) & gp(x3) & tp(x4)
1140   pop_reg(RegSet::range(x5, x31), sp);
1141 }
1142 
1143 static int patch_offset_in_jal(address branch, int64_t offset) {
1144   assert(is_imm_in_range(offset, 20, 1), "offset is too large to be patched in one jal insrusction!\n");
1145   Assembler::patch(branch, 31, 31, (offset >> 20) & 0x1);                       // offset[20]    ==> branch[31]
1146   Assembler::patch(branch, 30, 21, (offset >> 1)  & 0x3ff);                     // offset[10:1]  ==> branch[30:21]
1147   Assembler::patch(branch, 20, 20, (offset >> 11) & 0x1);                       // offset[11]    ==> branch[20]
1148   Assembler::patch(branch, 19, 12, (offset >> 12) & 0xff);                      // offset[19:12] ==> branch[19:12]
1149   return NativeInstruction::instruction_size;                                   // only one instruction
1150 }
1151 
1152 static int patch_offset_in_conditional_branch(address branch, int64_t offset) {
1153   assert(is_imm_in_range(offset, 12, 1), "offset is too large to be patched in one beq/bge/bgeu/blt/bltu/bne insrusction!\n");
1154   Assembler::patch(branch, 31, 31, (offset >> 12) & 0x1);                       // offset[12]    ==> branch[31]
1155   Assembler::patch(branch, 30, 25, (offset >> 5)  & 0x3f);                      // offset[10:5]  ==> branch[30:25]
1156   Assembler::patch(branch, 7,  7,  (offset >> 11) & 0x1);                       // offset[11]    ==> branch[7]
1157   Assembler::patch(branch, 11, 8,  (offset >> 1)  & 0xf);                       // offset[4:1]   ==> branch[11:8]
1158   return NativeInstruction::instruction_size;                                   // only one instruction
1159 }
1160 
1161 static int patch_offset_in_pc_relative(address branch, int64_t offset) {
1162   const int PC_RELATIVE_INSTRUCTION_NUM = 2;                                    // auipc, addi/jalr/load
1163   Assembler::patch(branch, 31, 12, ((offset + 0x800) >> 12) & 0xfffff);         // Auipc.          offset[31:12]  ==> branch[31:12]
1164   Assembler::patch(branch + 4, 31, 20, offset & 0xfff);                         // Addi/Jalr/Load. offset[11:0]   ==> branch[31:20]
1165   return PC_RELATIVE_INSTRUCTION_NUM * NativeInstruction::instruction_size;
1166 }
1167 
1168 static int patch_addr_in_movptr(address branch, address target) {
1169   const int MOVPTR_INSTRUCTIONS_NUM = 6;                                        // lui + addi + slli + addi + slli + addi/jalr/load
1170   int32_t lower = ((intptr_t)target << 35) >> 35;
1171   int64_t upper = ((intptr_t)target - lower) >> 29;
1172   Assembler::patch(branch + 0,  31, 12, upper & 0xfffff);                       // Lui.             target[48:29] + target[28] ==> branch[31:12]
1173   Assembler::patch(branch + 4,  31, 20, (lower >> 17) & 0xfff);                 // Addi.            target[28:17] ==> branch[31:20]
1174   Assembler::patch(branch + 12, 31, 20, (lower >> 6) & 0x7ff);                  // Addi.            target[16: 6] ==> branch[31:20]
1175   Assembler::patch(branch + 20, 31, 20, lower & 0x3f);                          // Addi/Jalr/Load.  target[ 5: 0] ==> branch[31:20]
1176   return MOVPTR_INSTRUCTIONS_NUM * NativeInstruction::instruction_size;
1177 }
1178 
1179 static int patch_imm_in_li64(address branch, address target) {
1180   const int LI64_INSTRUCTIONS_NUM = 8;                                          // lui + addi + slli + addi + slli + addi + slli + addi
1181   int64_t lower = (intptr_t)target & 0xffffffff;
1182   lower = lower - ((lower << 44) >> 44);
1183   int64_t tmp_imm = ((uint64_t)((intptr_t)target & 0xffffffff00000000)) + (uint64_t)lower;
1184   int32_t upper =  (tmp_imm - (int32_t)lower) >> 32;
1185   int64_t tmp_upper = upper, tmp_lower = upper;
1186   tmp_lower = (tmp_lower << 52) >> 52;
1187   tmp_upper -= tmp_lower;
1188   tmp_upper >>= 12;
1189   // Load upper 32 bits. Upper = target[63:32], but if target[31] = 1 or (target[31:20] == 0x7ff && target[19] == 1),
1190   // upper = target[63:32] + 1.
1191   Assembler::patch(branch + 0,  31, 12, tmp_upper & 0xfffff);                       // Lui.
1192   Assembler::patch(branch + 4,  31, 20, tmp_lower & 0xfff);                         // Addi.
1193   // Load the rest 32 bits.
1194   Assembler::patch(branch + 12, 31, 20, ((int32_t)lower >> 20) & 0xfff);            // Addi.
1195   Assembler::patch(branch + 20, 31, 20, (((intptr_t)target << 44) >> 52) & 0xfff);  // Addi.
1196   Assembler::patch(branch + 28, 31, 20, (intptr_t)target & 0xff);                   // Addi.
1197   return LI64_INSTRUCTIONS_NUM * NativeInstruction::instruction_size;
1198 }
1199 
1200 static int patch_imm_in_li32(address branch, int32_t target) {
1201   const int LI32_INSTRUCTIONS_NUM = 2;                                          // lui + addiw
1202   int64_t upper = (intptr_t)target;
1203   int32_t lower = (((int32_t)target) << 20) >> 20;
1204   upper -= lower;
1205   upper = (int32_t)upper;
1206   Assembler::patch(branch + 0,  31, 12, (upper >> 12) & 0xfffff);               // Lui.
1207   Assembler::patch(branch + 4,  31, 20, lower & 0xfff);                         // Addiw.
1208   return LI32_INSTRUCTIONS_NUM * NativeInstruction::instruction_size;
1209 }
1210 
1211 static long get_offset_of_jal(address insn_addr) {
1212   assert_cond(insn_addr != NULL);
1213   long offset = 0;
1214   unsigned insn = *(unsigned*)insn_addr;
1215   long val = (long)Assembler::sextract(insn, 31, 12);
1216   offset |= ((val >> 19) & 0x1) << 20;
1217   offset |= (val & 0xff) << 12;
1218   offset |= ((val >> 8) & 0x1) << 11;
1219   offset |= ((val >> 9) & 0x3ff) << 1;
1220   offset = (offset << 43) >> 43;
1221   return offset;
1222 }
1223 
1224 static long get_offset_of_conditional_branch(address insn_addr) {
1225   long offset = 0;
1226   assert_cond(insn_addr != NULL);
1227   unsigned insn = *(unsigned*)insn_addr;
1228   offset = (long)Assembler::sextract(insn, 31, 31);
1229   offset = (offset << 12) | (((long)(Assembler::sextract(insn, 7, 7) & 0x1)) << 11);
1230   offset = offset | (((long)(Assembler::sextract(insn, 30, 25) & 0x3f)) << 5);
1231   offset = offset | (((long)(Assembler::sextract(insn, 11, 8) & 0xf)) << 1);
1232   offset = (offset << 41) >> 41;
1233   return offset;
1234 }
1235 
1236 static long get_offset_of_pc_relative(address insn_addr) {
1237   long offset = 0;
1238   assert_cond(insn_addr != NULL);
1239   offset = ((long)(Assembler::sextract(((unsigned*)insn_addr)[0], 31, 12))) << 12;                                  // Auipc.
1240   offset += ((long)Assembler::sextract(((unsigned*)insn_addr)[1], 31, 20));                                         // Addi/Jalr/Load.
1241   offset = (offset << 32) >> 32;
1242   return offset;
1243 }
1244 
1245 static address get_target_of_movptr(address insn_addr) {
1246   assert_cond(insn_addr != NULL);
1247   intptr_t target_address = (((int64_t)Assembler::sextract(((unsigned*)insn_addr)[0], 31, 12)) & 0xfffff) << 29;    // Lui.
1248   target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[1], 31, 20)) << 17;                        // Addi.
1249   target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[3], 31, 20)) << 6;                         // Addi.
1250   target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[5], 31, 20));                              // Addi/Jalr/Load.
1251   return (address) target_address;
1252 }
1253 
1254 static address get_target_of_li64(address insn_addr) {
1255   assert_cond(insn_addr != NULL);
1256   intptr_t target_address = (((int64_t)Assembler::sextract(((unsigned*)insn_addr)[0], 31, 12)) & 0xfffff) << 44;    // Lui.
1257   target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[1], 31, 20)) << 32;                        // Addi.
1258   target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[3], 31, 20)) << 20;                        // Addi.
1259   target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[5], 31, 20)) << 8;                         // Addi.
1260   target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[7], 31, 20));                              // Addi.
1261   return (address)target_address;
1262 }
1263 
1264 static address get_target_of_li32(address insn_addr) {
1265   assert_cond(insn_addr != NULL);
1266   intptr_t target_address = (((int64_t)Assembler::sextract(((unsigned*)insn_addr)[0], 31, 12)) & 0xfffff) << 12;    // Lui.
1267   target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[1], 31, 20));                              // Addiw.
1268   return (address)target_address;
1269 }
1270 
1271 // Patch any kind of instruction; there may be several instructions.
1272 // Return the total length (in bytes) of the instructions.
1273 int MacroAssembler::pd_patch_instruction_size(address branch, address target) {
1274   assert_cond(branch != NULL);
1275   int64_t offset = target - branch;
1276   if (NativeInstruction::is_jal_at(branch)) {                         // jal
1277     return patch_offset_in_jal(branch, offset);
1278   } else if (NativeInstruction::is_branch_at(branch)) {               // beq/bge/bgeu/blt/bltu/bne
1279     return patch_offset_in_conditional_branch(branch, offset);
1280   } else if (NativeInstruction::is_pc_relative_at(branch)) {          // auipc, addi/jalr/load
1281     return patch_offset_in_pc_relative(branch, offset);
1282   } else if (NativeInstruction::is_movptr_at(branch)) {               // movptr
1283     return patch_addr_in_movptr(branch, target);
1284   } else if (NativeInstruction::is_li64_at(branch)) {                 // li64
1285     return patch_imm_in_li64(branch, target);
1286   } else if (NativeInstruction::is_li32_at(branch)) {                 // li32
1287     int64_t imm = (intptr_t)target;
1288     return patch_imm_in_li32(branch, (int32_t)imm);
1289   } else {
1290 #ifdef ASSERT
1291     tty->print_cr("pd_patch_instruction_size: instruction 0x%x at " INTPTR_FORMAT " could not be patched!\n",
1292                   *(unsigned*)branch, p2i(branch));
1293     Disassembler::decode(branch - 16, branch + 16);
1294 #endif
1295     ShouldNotReachHere();
1296     return -1;
1297   }
1298 }
1299 
1300 address MacroAssembler::target_addr_for_insn(address insn_addr) {
1301   long offset = 0;
1302   assert_cond(insn_addr != NULL);
1303   if (NativeInstruction::is_jal_at(insn_addr)) {                     // jal
1304     offset = get_offset_of_jal(insn_addr);
1305   } else if (NativeInstruction::is_branch_at(insn_addr)) {           // beq/bge/bgeu/blt/bltu/bne
1306     offset = get_offset_of_conditional_branch(insn_addr);
1307   } else if (NativeInstruction::is_pc_relative_at(insn_addr)) {      // auipc, addi/jalr/load
1308     offset = get_offset_of_pc_relative(insn_addr);
1309   } else if (NativeInstruction::is_movptr_at(insn_addr)) {           // movptr
1310     return get_target_of_movptr(insn_addr);
1311   } else if (NativeInstruction::is_li64_at(insn_addr)) {             // li64
1312     return get_target_of_li64(insn_addr);
1313   } else if (NativeInstruction::is_li32_at(insn_addr)) {             // li32
1314     return get_target_of_li32(insn_addr);
1315   } else {
1316     ShouldNotReachHere();
1317   }
1318   return address(((uintptr_t)insn_addr + offset));
1319 }
1320 
1321 int MacroAssembler::patch_oop(address insn_addr, address o) {
1322   // OOPs are either narrow (32 bits) or wide (48 bits).  We encode
1323   // narrow OOPs by setting the upper 16 bits in the first
1324   // instruction.
1325   if (NativeInstruction::is_li32_at(insn_addr)) {
1326     // Move narrow OOP
1327     uint32_t n = CompressedOops::narrow_oop_value(cast_to_oop(o));
1328     return patch_imm_in_li32(insn_addr, (int32_t)n);
1329   } else if (NativeInstruction::is_movptr_at(insn_addr)) {
1330     // Move wide OOP
1331     return patch_addr_in_movptr(insn_addr, o);
1332   }
1333   ShouldNotReachHere();
1334   return -1;
1335 }
1336 
1337 void MacroAssembler::reinit_heapbase() {
1338   if (UseCompressedOops) {
1339     if (Universe::is_fully_initialized()) {
1340       mv(xheapbase, CompressedOops::ptrs_base());
1341     } else {
1342       int32_t offset = 0;
1343       la_patchable(xheapbase, ExternalAddress(CompressedOops::ptrs_base_addr()), offset);
1344       ld(xheapbase, Address(xheapbase, offset));
1345     }
1346   }
1347 }
1348 
1349 void MacroAssembler::mv(Register Rd, Address dest) {
1350   assert(dest.getMode() == Address::literal, "Address mode should be Address::literal");
1351   relocate(dest.rspec());
1352   movptr(Rd, dest.target());
1353 }
1354 
1355 void MacroAssembler::mv(Register Rd, RegisterOrConstant src) {
1356   if (src.is_register()) {
1357     mv(Rd, src.as_register());
1358   } else {
1359     mv(Rd, src.as_constant());
1360   }
1361 }
1362 
1363 void MacroAssembler::andrw(Register Rd, Register Rs1, Register Rs2) {
1364   andr(Rd, Rs1, Rs2);
1365   // addw: The result is clipped to 32 bits, then the sign bit is extended,
1366   // and the result is stored in Rd
1367   addw(Rd, Rd, zr);
1368 }
1369 
1370 void MacroAssembler::orrw(Register Rd, Register Rs1, Register Rs2) {
1371   orr(Rd, Rs1, Rs2);
1372   // addw: The result is clipped to 32 bits, then the sign bit is extended,
1373   // and the result is stored in Rd
1374   addw(Rd, Rd, zr);
1375 }
1376 
1377 void MacroAssembler::xorrw(Register Rd, Register Rs1, Register Rs2) {
1378   xorr(Rd, Rs1, Rs2);
1379   // addw: The result is clipped to 32 bits, then the sign bit is extended,
1380   // and the result is stored in Rd
1381   addw(Rd, Rd, zr);
1382 }
1383 
1384 // Note: load_unsigned_short used to be called load_unsigned_word.
1385 int MacroAssembler::load_unsigned_short(Register dst, Address src) {
1386   int off = offset();
1387   lhu(dst, src);
1388   return off;
1389 }
1390 
1391 int MacroAssembler::load_unsigned_byte(Register dst, Address src) {
1392   int off = offset();
1393   lbu(dst, src);
1394   return off;
1395 }
1396 
1397 int MacroAssembler::load_signed_short(Register dst, Address src) {
1398   int off = offset();
1399   lh(dst, src);
1400   return off;
1401 }
1402 
1403 int MacroAssembler::load_signed_byte(Register dst, Address src) {
1404   int off = offset();
1405   lb(dst, src);
1406   return off;
1407 }
1408 
1409 void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2) {
1410   switch (size_in_bytes) {
1411     case  8:  ld(dst, src); break;
1412     case  4:  is_signed ? lw(dst, src) : lwu(dst, src); break;
1413     case  2:  is_signed ? load_signed_short(dst, src) : load_unsigned_short(dst, src); break;
1414     case  1:  is_signed ? load_signed_byte( dst, src) : load_unsigned_byte( dst, src); break;
1415     default:  ShouldNotReachHere();
1416   }
1417 }
1418 
1419 void MacroAssembler::store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2) {
1420   switch (size_in_bytes) {
1421     case  8:  sd(src, dst); break;
1422     case  4:  sw(src, dst); break;
1423     case  2:  sh(src, dst); break;
1424     case  1:  sb(src, dst); break;
1425     default:  ShouldNotReachHere();
1426   }
1427 }
1428 
1429 // reverse bytes in halfword in lower 16 bits and sign-extend
1430 // Rd[15:0] = Rs[7:0] Rs[15:8] (sign-extend to 64 bits)
1431 void MacroAssembler::revb_h_h(Register Rd, Register Rs, Register tmp) {
1432   if (UseZbb) {
1433     rev8(Rd, Rs);
1434     srai(Rd, Rd, 48);
1435     return;
1436   }
1437   assert_different_registers(Rs, tmp);
1438   assert_different_registers(Rd, tmp);
1439   srli(tmp, Rs, 8);
1440   andi(tmp, tmp, 0xFF);
1441   slli(Rd, Rs, 56);
1442   srai(Rd, Rd, 48); // sign-extend
1443   orr(Rd, Rd, tmp);
1444 }
1445 
1446 // reverse bytes in lower word and sign-extend
1447 // Rd[31:0] = Rs[7:0] Rs[15:8] Rs[23:16] Rs[31:24] (sign-extend to 64 bits)
1448 void MacroAssembler::revb_w_w(Register Rd, Register Rs, Register tmp1, Register tmp2) {
1449   if (UseZbb) {
1450     rev8(Rd, Rs);
1451     srai(Rd, Rd, 32);
1452     return;
1453   }
1454   assert_different_registers(Rs, tmp1, tmp2);
1455   assert_different_registers(Rd, tmp1, tmp2);
1456   revb_h_w_u(Rd, Rs, tmp1, tmp2);
1457   slli(tmp2, Rd, 48);
1458   srai(tmp2, tmp2, 32); // sign-extend
1459   srli(Rd, Rd, 16);
1460   orr(Rd, Rd, tmp2);
1461 }
1462 
1463 // reverse bytes in halfword in lower 16 bits and zero-extend
1464 // Rd[15:0] = Rs[7:0] Rs[15:8] (zero-extend to 64 bits)
1465 void MacroAssembler::revb_h_h_u(Register Rd, Register Rs, Register tmp) {
1466   if (UseZbb) {
1467     rev8(Rd, Rs);
1468     srli(Rd, Rd, 48);
1469     return;
1470   }
1471   assert_different_registers(Rs, tmp);
1472   assert_different_registers(Rd, tmp);
1473   srli(tmp, Rs, 8);
1474   andi(tmp, tmp, 0xFF);
1475   andi(Rd, Rs, 0xFF);
1476   slli(Rd, Rd, 8);
1477   orr(Rd, Rd, tmp);
1478 }
1479 
1480 // reverse bytes in halfwords in lower 32 bits and zero-extend
1481 // Rd[31:0] = Rs[23:16] Rs[31:24] Rs[7:0] Rs[15:8] (zero-extend to 64 bits)
1482 void MacroAssembler::revb_h_w_u(Register Rd, Register Rs, Register tmp1, Register tmp2) {
1483   if (UseZbb) {
1484     rev8(Rd, Rs);
1485     rori(Rd, Rd, 32);
1486     roriw(Rd, Rd, 16);
1487     zero_extend(Rd, Rd, 32);
1488     return;
1489   }
1490   assert_different_registers(Rs, tmp1, tmp2);
1491   assert_different_registers(Rd, tmp1, tmp2);
1492   srli(tmp2, Rs, 16);
1493   revb_h_h_u(tmp2, tmp2, tmp1);
1494   revb_h_h_u(Rd, Rs, tmp1);
1495   slli(tmp2, tmp2, 16);
1496   orr(Rd, Rd, tmp2);
1497 }
1498 
1499 // This method is only used for revb_h
1500 // Rd = Rs[47:0] Rs[55:48] Rs[63:56]
1501 void MacroAssembler::revb_h_helper(Register Rd, Register Rs, Register tmp1, Register tmp2) {
1502   assert_different_registers(Rs, tmp1, tmp2);
1503   assert_different_registers(Rd, tmp1);
1504   srli(tmp1, Rs, 48);
1505   andi(tmp2, tmp1, 0xFF);
1506   slli(tmp2, tmp2, 8);
1507   srli(tmp1, tmp1, 8);
1508   orr(tmp1, tmp1, tmp2);
1509   slli(Rd, Rs, 16);
1510   orr(Rd, Rd, tmp1);
1511 }
1512 
1513 // reverse bytes in each halfword
1514 // Rd[63:0] = Rs[55:48] Rs[63:56] Rs[39:32] Rs[47:40] Rs[23:16] Rs[31:24] Rs[7:0] Rs[15:8]
1515 void MacroAssembler::revb_h(Register Rd, Register Rs, Register tmp1, Register tmp2) {
1516   if (UseZbb) {
1517     assert_different_registers(Rs, tmp1);
1518     assert_different_registers(Rd, tmp1);
1519     rev8(Rd, Rs);
1520     zero_extend(tmp1, Rd, 32);
1521     roriw(tmp1, tmp1, 16);
1522     slli(tmp1, tmp1, 32);
1523     srli(Rd, Rd, 32);
1524     roriw(Rd, Rd, 16);
1525     zero_extend(Rd, Rd, 32);
1526     orr(Rd, Rd, tmp1);
1527     return;
1528   }
1529   assert_different_registers(Rs, tmp1, tmp2);
1530   assert_different_registers(Rd, tmp1, tmp2);
1531   revb_h_helper(Rd, Rs, tmp1, tmp2);
1532   for (int i = 0; i < 3; ++i) {
1533     revb_h_helper(Rd, Rd, tmp1, tmp2);
1534   }
1535 }
1536 
1537 // reverse bytes in each word
1538 // Rd[63:0] = Rs[39:32] Rs[47:40] Rs[55:48] Rs[63:56] Rs[7:0] Rs[15:8] Rs[23:16] Rs[31:24]
1539 void MacroAssembler::revb_w(Register Rd, Register Rs, Register tmp1, Register tmp2) {
1540   if (UseZbb) {
1541     rev8(Rd, Rs);
1542     rori(Rd, Rd, 32);
1543     return;
1544   }
1545   assert_different_registers(Rs, tmp1, tmp2);
1546   assert_different_registers(Rd, tmp1, tmp2);
1547   revb(Rd, Rs, tmp1, tmp2);
1548   ror_imm(Rd, Rd, 32);
1549 }
1550 
1551 // reverse bytes in doubleword
1552 // Rd[63:0] = Rs[7:0] Rs[15:8] Rs[23:16] Rs[31:24] Rs[39:32] Rs[47,40] Rs[55,48] Rs[63:56]
1553 void MacroAssembler::revb(Register Rd, Register Rs, Register tmp1, Register tmp2) {
1554   if (UseZbb) {
1555     rev8(Rd, Rs);
1556     return;
1557   }
1558   assert_different_registers(Rs, tmp1, tmp2);
1559   assert_different_registers(Rd, tmp1, tmp2);
1560   andi(tmp1, Rs, 0xFF);
1561   slli(tmp1, tmp1, 8);
1562   for (int step = 8; step < 56; step += 8) {
1563     srli(tmp2, Rs, step);
1564     andi(tmp2, tmp2, 0xFF);
1565     orr(tmp1, tmp1, tmp2);
1566     slli(tmp1, tmp1, 8);
1567   }
1568   srli(Rd, Rs, 56);
1569   andi(Rd, Rd, 0xFF);
1570   orr(Rd, tmp1, Rd);
1571 }
1572 
1573 // rotate right with shift bits
1574 void MacroAssembler::ror_imm(Register dst, Register src, uint32_t shift, Register tmp)
1575 {
1576   if (UseZbb) {
1577     rori(dst, src, shift);
1578     return;
1579   }
1580 
1581   assert_different_registers(dst, tmp);
1582   assert_different_registers(src, tmp);
1583   assert(shift < 64, "shift amount must be < 64");
1584   slli(tmp, src, 64 - shift);
1585   srli(dst, src, shift);
1586   orr(dst, dst, tmp);
1587 }
1588 
1589 void MacroAssembler::andi(Register Rd, Register Rn, int64_t imm, Register tmp) {
1590   if (is_imm_in_range(imm, 12, 0)) {
1591     and_imm12(Rd, Rn, imm);
1592   } else {
1593     assert_different_registers(Rn, tmp);
1594     mv(tmp, imm);
1595     andr(Rd, Rn, tmp);
1596   }
1597 }
1598 
1599 void MacroAssembler::orptr(Address adr, RegisterOrConstant src, Register tmp1, Register tmp2) {
1600   ld(tmp1, adr);
1601   if (src.is_register()) {
1602     orr(tmp1, tmp1, src.as_register());
1603   } else {
1604     if (is_imm_in_range(src.as_constant(), 12, 0)) {
1605       ori(tmp1, tmp1, src.as_constant());
1606     } else {
1607       assert_different_registers(tmp1, tmp2);
1608       mv(tmp2, src.as_constant());
1609       orr(tmp1, tmp1, tmp2);
1610     }
1611   }
1612   sd(tmp1, adr);
1613 }
1614 
1615 void MacroAssembler::cmp_klass(Register oop, Register trial_klass, Register tmp, Label &L) {
1616   if (UseCompressedClassPointers) {
1617       lwu(tmp, Address(oop, oopDesc::klass_offset_in_bytes()));
1618     if (CompressedKlassPointers::base() == NULL) {
1619       slli(tmp, tmp, CompressedKlassPointers::shift());
1620       beq(trial_klass, tmp, L);
1621       return;
1622     }
1623     decode_klass_not_null(tmp);
1624   } else {
1625     ld(tmp, Address(oop, oopDesc::klass_offset_in_bytes()));
1626   }
1627   beq(trial_klass, tmp, L);
1628 }
1629 
1630 // Move an oop into a register.
1631 void MacroAssembler::movoop(Register dst, jobject obj) {
1632   int oop_index;
1633   if (obj == NULL) {
1634     oop_index = oop_recorder()->allocate_oop_index(obj);
1635   } else {
1636 #ifdef ASSERT
1637     {
1638       ThreadInVMfromUnknown tiv;
1639       assert(Universe::heap()->is_in(JNIHandles::resolve(obj)), "should be real oop");
1640     }
1641 #endif
1642     oop_index = oop_recorder()->find_index(obj);
1643   }
1644   RelocationHolder rspec = oop_Relocation::spec(oop_index);
1645 
1646   if (BarrierSet::barrier_set()->barrier_set_assembler()->supports_instruction_patching()) {
1647     mv(dst, Address((address)obj, rspec));
1648   } else {
1649     address dummy = address(uintptr_t(pc()) & -wordSize); // A nearby aligned address
1650     ld_constant(dst, Address(dummy, rspec));
1651   }
1652 }
1653 
1654 // Move a metadata address into a register.
1655 void MacroAssembler::mov_metadata(Register dst, Metadata* obj) {
1656   int oop_index;
1657   if (obj == NULL) {
1658     oop_index = oop_recorder()->allocate_metadata_index(obj);
1659   } else {
1660     oop_index = oop_recorder()->find_index(obj);
1661   }
1662   RelocationHolder rspec = metadata_Relocation::spec(oop_index);
1663   mv(dst, Address((address)obj, rspec));
1664 }
1665 
1666 // Writes to stack successive pages until offset reached to check for
1667 // stack overflow + shadow pages.  This clobbers tmp.
1668 void MacroAssembler::bang_stack_size(Register size, Register tmp) {
1669   assert_different_registers(tmp, size, t0);
1670   // Bang stack for total size given plus shadow page size.
1671   // Bang one page at a time because large size can bang beyond yellow and
1672   // red zones.
1673   mv(t0, os::vm_page_size());
1674   Label loop;
1675   bind(loop);
1676   sub(tmp, sp, t0);
1677   subw(size, size, t0);
1678   sd(size, Address(tmp));
1679   bgtz(size, loop);
1680 
1681   // Bang down shadow pages too.
1682   // At this point, (tmp-0) is the last address touched, so don't
1683   // touch it again.  (It was touched as (tmp-pagesize) but then tmp
1684   // was post-decremented.)  Skip this address by starting at i=1, and
1685   // touch a few more pages below.  N.B.  It is important to touch all
1686   // the way down to and including i=StackShadowPages.
1687   for (int i = 0; i < (int)(StackOverflow::stack_shadow_zone_size() / os::vm_page_size()) - 1; i++) {
1688     // this could be any sized move but this is can be a debugging crumb
1689     // so the bigger the better.
1690     sub(tmp, tmp, os::vm_page_size());
1691     sd(size, Address(tmp, 0));
1692   }
1693 }
1694 
1695 SkipIfEqual::SkipIfEqual(MacroAssembler* masm, const bool* flag_addr, bool value) {
1696   int32_t offset = 0;
1697   _masm = masm;
1698   _masm->la_patchable(t0, ExternalAddress((address)flag_addr), offset);
1699   _masm->lbu(t0, Address(t0, offset));
1700   _masm->beqz(t0, _label);
1701 }
1702 
1703 SkipIfEqual::~SkipIfEqual() {
1704   _masm->bind(_label);
1705   _masm = NULL;
1706 }
1707 
1708 void MacroAssembler::load_mirror(Register dst, Register method, Register tmp1, Register tmp2) {
1709   const int mirror_offset = in_bytes(Klass::java_mirror_offset());
1710   ld(dst, Address(xmethod, Method::const_offset()));
1711   ld(dst, Address(dst, ConstMethod::constants_offset()));
1712   ld(dst, Address(dst, ConstantPool::pool_holder_offset_in_bytes()));
1713   ld(dst, Address(dst, mirror_offset));
1714   resolve_oop_handle(dst, tmp1, tmp2);
1715 }
1716 
1717 void MacroAssembler::resolve_oop_handle(Register result, Register tmp1, Register tmp2) {
1718   // OopHandle::resolve is an indirection.
1719   assert_different_registers(result, tmp1, tmp2);
1720   access_load_at(T_OBJECT, IN_NATIVE, result, Address(result, 0), tmp1, tmp2);
1721 }
1722 
1723 // ((WeakHandle)result).resolve()
1724 void MacroAssembler::resolve_weak_handle(Register result, Register tmp1, Register tmp2) {
1725   assert_different_registers(result, tmp1, tmp2);
1726   Label resolved;
1727 
1728   // A null weak handle resolves to null.
1729   beqz(result, resolved);
1730 
1731   // Only 64 bit platforms support GCs that require a tmp register
1732   // Only IN_HEAP loads require a thread_tmp register
1733   // WeakHandle::resolve is an indirection like jweak.
1734   access_load_at(T_OBJECT, IN_NATIVE | ON_PHANTOM_OOP_REF,
1735                  result, Address(result), tmp1, tmp2);
1736   bind(resolved);
1737 }
1738 
1739 void MacroAssembler::access_load_at(BasicType type, DecoratorSet decorators,
1740                                     Register dst, Address src,
1741                                     Register tmp1, Register tmp2) {
1742   BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
1743   decorators = AccessInternal::decorator_fixup(decorators);
1744   bool as_raw = (decorators & AS_RAW) != 0;
1745   if (as_raw) {
1746     bs->BarrierSetAssembler::load_at(this, decorators, type, dst, src, tmp1, tmp2);
1747   } else {
1748     bs->load_at(this, decorators, type, dst, src, tmp1, tmp2);
1749   }
1750 }
1751 
1752 void MacroAssembler::null_check(Register reg, int offset) {
1753   if (needs_explicit_null_check(offset)) {
1754     // provoke OS NULL exception if reg = NULL by
1755     // accessing M[reg] w/o changing any registers
1756     // NOTE: this is plenty to provoke a segv
1757     ld(zr, Address(reg, 0));
1758   } else {
1759     // nothing to do, (later) access of M[reg + offset]
1760     // will provoke OS NULL exception if reg = NULL
1761   }
1762 }
1763 
1764 void MacroAssembler::access_store_at(BasicType type, DecoratorSet decorators,
1765                                      Address dst, Register src,
1766                                      Register tmp1, Register tmp2, Register tmp3) {
1767   BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
1768   decorators = AccessInternal::decorator_fixup(decorators);
1769   bool as_raw = (decorators & AS_RAW) != 0;
1770   if (as_raw) {
1771     bs->BarrierSetAssembler::store_at(this, decorators, type, dst, src, tmp1, tmp2, tmp3);
1772   } else {
1773     bs->store_at(this, decorators, type, dst, src, tmp1, tmp2, tmp3);
1774   }
1775 }
1776 
1777 // Algorithm must match CompressedOops::encode.
1778 void MacroAssembler::encode_heap_oop(Register d, Register s) {
1779   verify_oop_msg(s, "broken oop in encode_heap_oop");
1780   if (CompressedOops::base() == NULL) {
1781     if (CompressedOops::shift() != 0) {
1782       assert (LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong");
1783       srli(d, s, LogMinObjAlignmentInBytes);
1784     } else {
1785       mv(d, s);
1786     }
1787   } else {
1788     Label notNull;
1789     sub(d, s, xheapbase);
1790     bgez(d, notNull);
1791     mv(d, zr);
1792     bind(notNull);
1793     if (CompressedOops::shift() != 0) {
1794       assert (LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong");
1795       srli(d, d, CompressedOops::shift());
1796     }
1797   }
1798 }
1799 
1800 void MacroAssembler::load_klass(Register dst, Register src) {
1801   if (UseCompressedClassPointers) {
1802     lwu(dst, Address(src, oopDesc::klass_offset_in_bytes()));
1803     decode_klass_not_null(dst);
1804   } else {
1805     ld(dst, Address(src, oopDesc::klass_offset_in_bytes()));
1806   }
1807 }
1808 
1809 void MacroAssembler::store_klass(Register dst, Register src) {
1810   // FIXME: Should this be a store release? concurrent gcs assumes
1811   // klass length is valid if klass field is not null.
1812   if (UseCompressedClassPointers) {
1813     encode_klass_not_null(src);
1814     sw(src, Address(dst, oopDesc::klass_offset_in_bytes()));
1815   } else {
1816     sd(src, Address(dst, oopDesc::klass_offset_in_bytes()));
1817   }
1818 }
1819 
1820 void MacroAssembler::store_klass_gap(Register dst, Register src) {
1821   if (UseCompressedClassPointers) {
1822     // Store to klass gap in destination
1823     sw(src, Address(dst, oopDesc::klass_gap_offset_in_bytes()));
1824   }
1825 }
1826 
1827 void  MacroAssembler::decode_klass_not_null(Register r) {
1828   decode_klass_not_null(r, r);
1829 }
1830 
1831 void MacroAssembler::decode_klass_not_null(Register dst, Register src, Register tmp) {
1832   assert(UseCompressedClassPointers, "should only be used for compressed headers");
1833 
1834   if (CompressedKlassPointers::base() == NULL) {
1835     if (CompressedKlassPointers::shift() != 0) {
1836       assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong");
1837       slli(dst, src, LogKlassAlignmentInBytes);
1838     } else {
1839       mv(dst, src);
1840     }
1841     return;
1842   }
1843 
1844   Register xbase = dst;
1845   if (dst == src) {
1846     xbase = tmp;
1847   }
1848 
1849   assert_different_registers(src, xbase);
1850   mv(xbase, (uintptr_t)CompressedKlassPointers::base());
1851 
1852   if (CompressedKlassPointers::shift() != 0) {
1853     assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong");
1854     assert_different_registers(t0, xbase);
1855     shadd(dst, src, xbase, t0, LogKlassAlignmentInBytes);
1856   } else {
1857     add(dst, xbase, src);
1858   }
1859 
1860   if (xbase == xheapbase) { reinit_heapbase(); }
1861 }
1862 
1863 void MacroAssembler::encode_klass_not_null(Register r) {
1864   encode_klass_not_null(r, r);
1865 }
1866 
1867 void MacroAssembler::encode_klass_not_null(Register dst, Register src, Register tmp) {
1868   assert(UseCompressedClassPointers, "should only be used for compressed headers");
1869 
1870   if (CompressedKlassPointers::base() == NULL) {
1871     if (CompressedKlassPointers::shift() != 0) {
1872       assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong");
1873       srli(dst, src, LogKlassAlignmentInBytes);
1874     } else {
1875       mv(dst, src);
1876     }
1877     return;
1878   }
1879 
1880   if (((uint64_t)(uintptr_t)CompressedKlassPointers::base() & 0xffffffff) == 0 &&
1881       CompressedKlassPointers::shift() == 0) {
1882     zero_extend(dst, src, 32);
1883     return;
1884   }
1885 
1886   Register xbase = dst;
1887   if (dst == src) {
1888     xbase = tmp;
1889   }
1890 
1891   assert_different_registers(src, xbase);
1892   mv(xbase, (intptr_t)CompressedKlassPointers::base());
1893   sub(dst, src, xbase);
1894   if (CompressedKlassPointers::shift() != 0) {
1895     assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong");
1896     srli(dst, dst, LogKlassAlignmentInBytes);
1897   }
1898   if (xbase == xheapbase) {
1899     reinit_heapbase();
1900   }
1901 }
1902 
1903 void  MacroAssembler::decode_heap_oop_not_null(Register r) {
1904   decode_heap_oop_not_null(r, r);
1905 }
1906 
1907 void MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) {
1908   assert(UseCompressedOops, "should only be used for compressed headers");
1909   assert(Universe::heap() != NULL, "java heap should be initialized");
1910   // Cannot assert, unverified entry point counts instructions (see .ad file)
1911   // vtableStubs also counts instructions in pd_code_size_limit.
1912   // Also do not verify_oop as this is called by verify_oop.
1913   if (CompressedOops::shift() != 0) {
1914     assert(LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong");
1915     slli(dst, src, LogMinObjAlignmentInBytes);
1916     if (CompressedOops::base() != NULL) {
1917       add(dst, xheapbase, dst);
1918     }
1919   } else {
1920     assert(CompressedOops::base() == NULL, "sanity");
1921     mv(dst, src);
1922   }
1923 }
1924 
1925 void  MacroAssembler::decode_heap_oop(Register d, Register s) {
1926   if (CompressedOops::base() == NULL) {
1927     if (CompressedOops::shift() != 0 || d != s) {
1928       slli(d, s, CompressedOops::shift());
1929     }
1930   } else {
1931     Label done;
1932     mv(d, s);
1933     beqz(s, done);
1934     shadd(d, s, xheapbase, d, LogMinObjAlignmentInBytes);
1935     bind(done);
1936   }
1937   verify_oop_msg(d, "broken oop in decode_heap_oop");
1938 }
1939 
1940 void MacroAssembler::store_heap_oop(Address dst, Register src, Register tmp1,
1941                                     Register tmp2, Register tmp3, DecoratorSet decorators) {
1942   access_store_at(T_OBJECT, IN_HEAP | decorators, dst, src, tmp1, tmp2, tmp3);
1943 }
1944 
1945 void MacroAssembler::load_heap_oop(Register dst, Address src, Register tmp1,
1946                                    Register tmp2, DecoratorSet decorators) {
1947   access_load_at(T_OBJECT, IN_HEAP | decorators, dst, src, tmp1, tmp2);
1948 }
1949 
1950 void MacroAssembler::load_heap_oop_not_null(Register dst, Address src, Register tmp1,
1951                                             Register tmp2, DecoratorSet decorators) {
1952   access_load_at(T_OBJECT, IN_HEAP | IS_NOT_NULL, dst, src, tmp1, tmp2);
1953 }
1954 
1955 // Used for storing NULLs.
1956 void MacroAssembler::store_heap_oop_null(Address dst) {
1957   access_store_at(T_OBJECT, IN_HEAP, dst, noreg, noreg, noreg, noreg);
1958 }
1959 
1960 int MacroAssembler::corrected_idivl(Register result, Register rs1, Register rs2,
1961                                     bool want_remainder)
1962 {
1963   // Full implementation of Java idiv and irem.  The function
1964   // returns the (pc) offset of the div instruction - may be needed
1965   // for implicit exceptions.
1966   //
1967   // input : rs1: dividend
1968   //         rs2: divisor
1969   //
1970   // result: either
1971   //         quotient  (= rs1 idiv rs2)
1972   //         remainder (= rs1 irem rs2)
1973 
1974 
1975   int idivl_offset = offset();
1976   if (!want_remainder) {
1977     divw(result, rs1, rs2);
1978   } else {
1979     remw(result, rs1, rs2); // result = rs1 % rs2;
1980   }
1981   return idivl_offset;
1982 }
1983 
1984 int MacroAssembler::corrected_idivq(Register result, Register rs1, Register rs2,
1985                                     bool want_remainder)
1986 {
1987   // Full implementation of Java ldiv and lrem.  The function
1988   // returns the (pc) offset of the div instruction - may be needed
1989   // for implicit exceptions.
1990   //
1991   // input : rs1: dividend
1992   //         rs2: divisor
1993   //
1994   // result: either
1995   //         quotient  (= rs1 idiv rs2)
1996   //         remainder (= rs1 irem rs2)
1997 
1998   int idivq_offset = offset();
1999   if (!want_remainder) {
2000     div(result, rs1, rs2);
2001   } else {
2002     rem(result, rs1, rs2); // result = rs1 % rs2;
2003   }
2004   return idivq_offset;
2005 }
2006 
2007 // Look up the method for a megamorpic invkkeinterface call.
2008 // The target method is determined by <intf_klass, itable_index>.
2009 // The receiver klass is in recv_klass.
2010 // On success, the result will be in method_result, and execution falls through.
2011 // On failure, execution transfers to the given label.
2012 void MacroAssembler::lookup_interface_method(Register recv_klass,
2013                                              Register intf_klass,
2014                                              RegisterOrConstant itable_index,
2015                                              Register method_result,
2016                                              Register scan_tmp,
2017                                              Label& L_no_such_interface,
2018                                              bool return_method) {
2019   assert_different_registers(recv_klass, intf_klass, scan_tmp);
2020   assert_different_registers(method_result, intf_klass, scan_tmp);
2021   assert(recv_klass != method_result || !return_method,
2022          "recv_klass can be destroyed when mehtid isn't needed");
2023   assert(itable_index.is_constant() || itable_index.as_register() == method_result,
2024          "caller must be same register for non-constant itable index as for method");
2025 
2026   // Compute start of first itableOffsetEntry (which is at the end of the vtable).
2027   int vtable_base = in_bytes(Klass::vtable_start_offset());
2028   int itentry_off = itableMethodEntry::method_offset_in_bytes();
2029   int scan_step   = itableOffsetEntry::size() * wordSize;
2030   int vte_size    = vtableEntry::size_in_bytes();
2031   assert(vte_size == wordSize, "else adjust times_vte_scale");
2032 
2033   lwu(scan_tmp, Address(recv_klass, Klass::vtable_length_offset()));
2034 
2035   // %%% Could store the aligned, prescaled offset in the klassoop.
2036   shadd(scan_tmp, scan_tmp, recv_klass, scan_tmp, 3);
2037   add(scan_tmp, scan_tmp, vtable_base);
2038 
2039   if (return_method) {
2040     // Adjust recv_klass by scaled itable_index, so we can free itable_index.
2041     assert(itableMethodEntry::size() * wordSize == wordSize, "adjust the scaling in the code below");
2042     if (itable_index.is_register()) {
2043       slli(t0, itable_index.as_register(), 3);
2044     } else {
2045       mv(t0, itable_index.as_constant() << 3);
2046     }
2047     add(recv_klass, recv_klass, t0);
2048     if (itentry_off) {
2049       add(recv_klass, recv_klass, itentry_off);
2050     }
2051   }
2052 
2053   Label search, found_method;
2054 
2055   ld(method_result, Address(scan_tmp, itableOffsetEntry::interface_offset_in_bytes()));
2056   beq(intf_klass, method_result, found_method);
2057   bind(search);
2058   // Check that the previous entry is non-null. A null entry means that
2059   // the receiver class doesn't implement the interface, and wasn't the
2060   // same as when the caller was compiled.
2061   beqz(method_result, L_no_such_interface, /* is_far */ true);
2062   addi(scan_tmp, scan_tmp, scan_step);
2063   ld(method_result, Address(scan_tmp, itableOffsetEntry::interface_offset_in_bytes()));
2064   bne(intf_klass, method_result, search);
2065 
2066   bind(found_method);
2067 
2068   // Got a hit.
2069   if (return_method) {
2070     lwu(scan_tmp, Address(scan_tmp, itableOffsetEntry::offset_offset_in_bytes()));
2071     add(method_result, recv_klass, scan_tmp);
2072     ld(method_result, Address(method_result));
2073   }
2074 }
2075 
2076 // virtual method calling
2077 void MacroAssembler::lookup_virtual_method(Register recv_klass,
2078                                            RegisterOrConstant vtable_index,
2079                                            Register method_result) {
2080   const int base = in_bytes(Klass::vtable_start_offset());
2081   assert(vtableEntry::size() * wordSize == 8,
2082          "adjust the scaling in the code below");
2083   int vtable_offset_in_bytes = base + vtableEntry::method_offset_in_bytes();
2084 
2085   if (vtable_index.is_register()) {
2086     shadd(method_result, vtable_index.as_register(), recv_klass, method_result, LogBytesPerWord);
2087     ld(method_result, Address(method_result, vtable_offset_in_bytes));
2088   } else {
2089     vtable_offset_in_bytes += vtable_index.as_constant() * wordSize;
2090     ld(method_result, form_address(method_result, recv_klass, vtable_offset_in_bytes));
2091   }
2092 }
2093 
2094 void MacroAssembler::membar(uint32_t order_constraint) {
2095   address prev = pc() - NativeMembar::instruction_size;
2096   address last = code()->last_insn();
2097 
2098   if (last != NULL && nativeInstruction_at(last)->is_membar() && prev == last) {
2099     NativeMembar *bar = NativeMembar_at(prev);
2100     // We are merging two memory barrier instructions.  On RISCV we
2101     // can do this simply by ORing them together.
2102     bar->set_kind(bar->get_kind() | order_constraint);
2103     BLOCK_COMMENT("merged membar");
2104   } else {
2105     code()->set_last_insn(pc());
2106 
2107     uint32_t predecessor = 0;
2108     uint32_t successor = 0;
2109 
2110     membar_mask_to_pred_succ(order_constraint, predecessor, successor);
2111     fence(predecessor, successor);
2112   }
2113 }
2114 
2115 // Form an address from base + offset in Rd. Rd my or may not
2116 // actually be used: you must use the Address that is returned. It
2117 // is up to you to ensure that the shift provided matches the size
2118 // of your data.
2119 Address MacroAssembler::form_address(Register Rd, Register base, long byte_offset) {
2120   if (is_offset_in_range(byte_offset, 12)) { // 12: imm in range 2^12
2121     return Address(base, byte_offset);
2122   }
2123 
2124   // Do it the hard way
2125   mv(Rd, byte_offset);
2126   add(Rd, base, Rd);
2127   return Address(Rd);
2128 }
2129 
2130 void MacroAssembler::check_klass_subtype(Register sub_klass,
2131                                          Register super_klass,
2132                                          Register tmp_reg,
2133                                          Label& L_success) {
2134   Label L_failure;
2135   check_klass_subtype_fast_path(sub_klass, super_klass, tmp_reg, &L_success, &L_failure, NULL);
2136   check_klass_subtype_slow_path(sub_klass, super_klass, tmp_reg, noreg, &L_success, NULL);
2137   bind(L_failure);
2138 }
2139 
2140 void MacroAssembler::safepoint_poll(Label& slow_path, bool at_return, bool acquire, bool in_nmethod) {
2141   ld(t0, Address(xthread, JavaThread::polling_word_offset()));
2142   if (acquire) {
2143     membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
2144   }
2145   if (at_return) {
2146     bgtu(in_nmethod ? sp : fp, t0, slow_path, true /* is_far */);
2147   } else {
2148     andi(t0, t0, SafepointMechanism::poll_bit());
2149     bnez(t0, slow_path, true /* is_far */);
2150   }
2151 }
2152 
2153 void MacroAssembler::cmpxchgptr(Register oldv, Register newv, Register addr, Register tmp,
2154                                 Label &succeed, Label *fail) {
2155   // oldv holds comparison value
2156   // newv holds value to write in exchange
2157   // addr identifies memory word to compare against/update
2158   Label retry_load, nope;
2159   bind(retry_load);
2160   // Load reserved from the memory location
2161   lr_d(tmp, addr, Assembler::aqrl);
2162   // Fail and exit if it is not what we expect
2163   bne(tmp, oldv, nope);
2164   // If the store conditional succeeds, tmp will be zero
2165   sc_d(tmp, newv, addr, Assembler::rl);
2166   beqz(tmp, succeed);
2167   // Retry only when the store conditional failed
2168   j(retry_load);
2169 
2170   bind(nope);
2171   membar(AnyAny);
2172   mv(oldv, tmp);
2173   if (fail != NULL) {
2174     j(*fail);
2175   }
2176 }
2177 
2178 void MacroAssembler::load_reserved(Register addr,
2179                                    enum operand_size size,
2180                                    Assembler::Aqrl acquire) {
2181   switch (size) {
2182     case int64:
2183       lr_d(t0, addr, acquire);
2184       break;
2185     case int32:
2186       lr_w(t0, addr, acquire);
2187       break;
2188     case uint32:
2189       lr_w(t0, addr, acquire);
2190       zero_extend(t0, t0, 32);
2191       break;
2192     default:
2193       ShouldNotReachHere();
2194   }
2195 }
2196 
2197 void MacroAssembler::store_conditional(Register addr,
2198                                        Register new_val,
2199                                        enum operand_size size,
2200                                        Assembler::Aqrl release) {
2201   switch (size) {
2202     case int64:
2203       sc_d(t0, new_val, addr, release);
2204       break;
2205     case int32:
2206     case uint32:
2207       sc_w(t0, new_val, addr, release);
2208       break;
2209     default:
2210       ShouldNotReachHere();
2211   }
2212 }
2213 
2214 
2215 void MacroAssembler::cmpxchg_narrow_value_helper(Register addr, Register expected,
2216                                                  Register new_val,
2217                                                  enum operand_size size,
2218                                                  Register tmp1, Register tmp2, Register tmp3) {
2219   assert(size == int8 || size == int16, "unsupported operand size");
2220 
2221   Register aligned_addr = t1, shift = tmp1, mask = tmp2, not_mask = tmp3;
2222 
2223   andi(shift, addr, 3);
2224   slli(shift, shift, 3);
2225 
2226   andi(aligned_addr, addr, ~3);
2227 
2228   if (size == int8) {
2229     mv(mask, 0xff);
2230   } else {
2231     // size == int16 case
2232     mv(mask, -1);
2233     zero_extend(mask, mask, 16);
2234   }
2235   sll(mask, mask, shift);
2236 
2237   xori(not_mask, mask, -1);
2238 
2239   sll(expected, expected, shift);
2240   andr(expected, expected, mask);
2241 
2242   sll(new_val, new_val, shift);
2243   andr(new_val, new_val, mask);
2244 }
2245 
2246 // cmpxchg_narrow_value will kill t0, t1, expected, new_val and tmps.
2247 // It's designed to implement compare and swap byte/boolean/char/short by lr.w/sc.w,
2248 // which are forced to work with 4-byte aligned address.
2249 void MacroAssembler::cmpxchg_narrow_value(Register addr, Register expected,
2250                                           Register new_val,
2251                                           enum operand_size size,
2252                                           Assembler::Aqrl acquire, Assembler::Aqrl release,
2253                                           Register result, bool result_as_bool,
2254                                           Register tmp1, Register tmp2, Register tmp3) {
2255   Register aligned_addr = t1, shift = tmp1, mask = tmp2, not_mask = tmp3, old = result, tmp = t0;
2256   assert_different_registers(addr, old, mask, not_mask, new_val, expected, shift, tmp);
2257   cmpxchg_narrow_value_helper(addr, expected, new_val, size, tmp1, tmp2, tmp3);
2258 
2259   Label retry, fail, done;
2260 
2261   bind(retry);
2262   lr_w(old, aligned_addr, acquire);
2263   andr(tmp, old, mask);
2264   bne(tmp, expected, fail);
2265 
2266   andr(tmp, old, not_mask);
2267   orr(tmp, tmp, new_val);
2268   sc_w(tmp, tmp, aligned_addr, release);
2269   bnez(tmp, retry);
2270 
2271   if (result_as_bool) {
2272     mv(result, 1);
2273     j(done);
2274 
2275     bind(fail);
2276     mv(result, zr);
2277 
2278     bind(done);
2279   } else {
2280     andr(tmp, old, mask);
2281 
2282     bind(fail);
2283     srl(result, tmp, shift);
2284 
2285     if (size == int8) {
2286       sign_extend(result, result, 8);
2287     } else {
2288       // size == int16 case
2289       sign_extend(result, result, 16);
2290     }
2291   }
2292 }
2293 
2294 // weak_cmpxchg_narrow_value is a weak version of cmpxchg_narrow_value, to implement
2295 // the weak CAS stuff. The major difference is that it just failed when store conditional
2296 // failed.
2297 void MacroAssembler::weak_cmpxchg_narrow_value(Register addr, Register expected,
2298                                                Register new_val,
2299                                                enum operand_size size,
2300                                                Assembler::Aqrl acquire, Assembler::Aqrl release,
2301                                                Register result,
2302                                                Register tmp1, Register tmp2, Register tmp3) {
2303   Register aligned_addr = t1, shift = tmp1, mask = tmp2, not_mask = tmp3, old = result, tmp = t0;
2304   assert_different_registers(addr, old, mask, not_mask, new_val, expected, shift, tmp);
2305   cmpxchg_narrow_value_helper(addr, expected, new_val, size, tmp1, tmp2, tmp3);
2306 
2307   Label fail, done;
2308 
2309   lr_w(old, aligned_addr, acquire);
2310   andr(tmp, old, mask);
2311   bne(tmp, expected, fail);
2312 
2313   andr(tmp, old, not_mask);
2314   orr(tmp, tmp, new_val);
2315   sc_w(tmp, tmp, aligned_addr, release);
2316   bnez(tmp, fail);
2317 
2318   // Success
2319   mv(result, 1);
2320   j(done);
2321 
2322   // Fail
2323   bind(fail);
2324   mv(result, zr);
2325 
2326   bind(done);
2327 }
2328 
2329 void MacroAssembler::cmpxchg(Register addr, Register expected,
2330                              Register new_val,
2331                              enum operand_size size,
2332                              Assembler::Aqrl acquire, Assembler::Aqrl release,
2333                              Register result, bool result_as_bool) {
2334   assert(size != int8 && size != int16, "unsupported operand size");
2335 
2336   Label retry_load, done, ne_done;
2337   bind(retry_load);
2338   load_reserved(addr, size, acquire);
2339   bne(t0, expected, ne_done);
2340   store_conditional(addr, new_val, size, release);
2341   bnez(t0, retry_load);
2342 
2343   // equal, succeed
2344   if (result_as_bool) {
2345     mv(result, 1);
2346   } else {
2347     mv(result, expected);
2348   }
2349   j(done);
2350 
2351   // not equal, failed
2352   bind(ne_done);
2353   if (result_as_bool) {
2354     mv(result, zr);
2355   } else {
2356     mv(result, t0);
2357   }
2358 
2359   bind(done);
2360 }
2361 
2362 void MacroAssembler::cmpxchg_weak(Register addr, Register expected,
2363                                   Register new_val,
2364                                   enum operand_size size,
2365                                   Assembler::Aqrl acquire, Assembler::Aqrl release,
2366                                   Register result) {
2367   Label fail, done;
2368   load_reserved(addr, size, acquire);
2369   bne(t0, expected, fail);
2370   store_conditional(addr, new_val, size, release);
2371   bnez(t0, fail);
2372 
2373   // Success
2374   mv(result, 1);
2375   j(done);
2376 
2377   // Fail
2378   bind(fail);
2379   mv(result, zr);
2380 
2381   bind(done);
2382 }
2383 
2384 #define ATOMIC_OP(NAME, AOP, ACQUIRE, RELEASE)                                              \
2385 void MacroAssembler::atomic_##NAME(Register prev, RegisterOrConstant incr, Register addr) { \
2386   prev = prev->is_valid() ? prev : zr;                                                      \
2387   if (incr.is_register()) {                                                                 \
2388     AOP(prev, addr, incr.as_register(), (Assembler::Aqrl)(ACQUIRE | RELEASE));              \
2389   } else {                                                                                  \
2390     mv(t0, incr.as_constant());                                                             \
2391     AOP(prev, addr, t0, (Assembler::Aqrl)(ACQUIRE | RELEASE));                              \
2392   }                                                                                         \
2393   return;                                                                                   \
2394 }
2395 
2396 ATOMIC_OP(add, amoadd_d, Assembler::relaxed, Assembler::relaxed)
2397 ATOMIC_OP(addw, amoadd_w, Assembler::relaxed, Assembler::relaxed)
2398 ATOMIC_OP(addal, amoadd_d, Assembler::aq, Assembler::rl)
2399 ATOMIC_OP(addalw, amoadd_w, Assembler::aq, Assembler::rl)
2400 
2401 #undef ATOMIC_OP
2402 
2403 #define ATOMIC_XCHG(OP, AOP, ACQUIRE, RELEASE)                                       \
2404 void MacroAssembler::atomic_##OP(Register prev, Register newv, Register addr) {      \
2405   prev = prev->is_valid() ? prev : zr;                                               \
2406   AOP(prev, addr, newv, (Assembler::Aqrl)(ACQUIRE | RELEASE));                       \
2407   return;                                                                            \
2408 }
2409 
2410 ATOMIC_XCHG(xchg, amoswap_d, Assembler::relaxed, Assembler::relaxed)
2411 ATOMIC_XCHG(xchgw, amoswap_w, Assembler::relaxed, Assembler::relaxed)
2412 ATOMIC_XCHG(xchgal, amoswap_d, Assembler::aq, Assembler::rl)
2413 ATOMIC_XCHG(xchgalw, amoswap_w, Assembler::aq, Assembler::rl)
2414 
2415 #undef ATOMIC_XCHG
2416 
2417 #define ATOMIC_XCHGU(OP1, OP2)                                                       \
2418 void MacroAssembler::atomic_##OP1(Register prev, Register newv, Register addr) {     \
2419   atomic_##OP2(prev, newv, addr);                                                    \
2420   zero_extend(prev, prev, 32);                                                       \
2421   return;                                                                            \
2422 }
2423 
2424 ATOMIC_XCHGU(xchgwu, xchgw)
2425 ATOMIC_XCHGU(xchgalwu, xchgalw)
2426 
2427 #undef ATOMIC_XCHGU
2428 
2429 void MacroAssembler::far_jump(Address entry, Register tmp) {
2430   assert(ReservedCodeCacheSize < 4*G, "branch out of range");
2431   assert(CodeCache::find_blob(entry.target()) != NULL,
2432          "destination of far call not found in code cache");
2433   assert(entry.rspec().type() == relocInfo::external_word_type
2434         || entry.rspec().type() == relocInfo::runtime_call_type
2435         || entry.rspec().type() == relocInfo::none, "wrong entry relocInfo type");
2436   int32_t offset = 0;
2437   if (far_branches()) {
2438     // We can use auipc + jalr here because we know that the total size of
2439     // the code cache cannot exceed 2Gb.
2440     la_patchable(tmp, entry, offset);
2441     jalr(x0, tmp, offset);
2442   } else {
2443     j(entry);
2444   }
2445 }
2446 
2447 void MacroAssembler::far_call(Address entry, Register tmp) {
2448   assert(ReservedCodeCacheSize < 4*G, "branch out of range");
2449   assert(CodeCache::find_blob(entry.target()) != NULL,
2450          "destination of far call not found in code cache");
2451   assert(entry.rspec().type() == relocInfo::external_word_type
2452         || entry.rspec().type() == relocInfo::runtime_call_type
2453         || entry.rspec().type() == relocInfo::none, "wrong entry relocInfo type");
2454   int32_t offset = 0;
2455   if (far_branches()) {
2456     // We can use auipc + jalr here because we know that the total size of
2457     // the code cache cannot exceed 2Gb.
2458     la_patchable(tmp, entry, offset);
2459     jalr(x1, tmp, offset); // link
2460   } else {
2461     jal(entry); // link
2462   }
2463 }
2464 
2465 void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass,
2466                                                    Register super_klass,
2467                                                    Register tmp_reg,
2468                                                    Label* L_success,
2469                                                    Label* L_failure,
2470                                                    Label* L_slow_path,
2471                                                    Register super_check_offset) {
2472   assert_different_registers(sub_klass, super_klass, tmp_reg);
2473   bool must_load_sco = (super_check_offset == noreg);
2474   if (must_load_sco) {
2475     assert(tmp_reg != noreg, "supply either a temp or a register offset");
2476   } else {
2477     assert_different_registers(sub_klass, super_klass, super_check_offset);
2478   }
2479 
2480   Label L_fallthrough;
2481   int label_nulls = 0;
2482   if (L_success == NULL)   { L_success   = &L_fallthrough; label_nulls++; }
2483   if (L_failure == NULL)   { L_failure   = &L_fallthrough; label_nulls++; }
2484   if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; }
2485   assert(label_nulls <= 1, "at most one NULL in batch");
2486 
2487   int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
2488   int sco_offset = in_bytes(Klass::super_check_offset_offset());
2489   Address super_check_offset_addr(super_klass, sco_offset);
2490 
2491   // Hacked jmp, which may only be used just before L_fallthrough.
2492 #define final_jmp(label)                                                \
2493   if (&(label) == &L_fallthrough) { /*do nothing*/ }                    \
2494   else                            j(label)             /*omit semi*/
2495 
2496   // If the pointers are equal, we are done (e.g., String[] elements).
2497   // This self-check enables sharing of secondary supertype arrays among
2498   // non-primary types such as array-of-interface. Otherwise, each such
2499   // type would need its own customized SSA.
2500   // We move this check to the front of the fast path because many
2501   // type checks are in fact trivially successful in this manner,
2502   // so we get a nicely predicted branch right at the start of the check.
2503   beq(sub_klass, super_klass, *L_success);
2504 
2505   // Check the supertype display:
2506   if (must_load_sco) {
2507     lwu(tmp_reg, super_check_offset_addr);
2508     super_check_offset = tmp_reg;
2509   }
2510   add(t0, sub_klass, super_check_offset);
2511   Address super_check_addr(t0);
2512   ld(t0, super_check_addr); // load displayed supertype
2513 
2514   // This check has worked decisively for primary supers.
2515   // Secondary supers are sought in the super_cache ('super_cache_addr').
2516   // (Secondary supers are interfaces and very deeply nested subtypes.)
2517   // This works in the same check above because of a tricky aliasing
2518   // between the super_Cache and the primary super display elements.
2519   // (The 'super_check_addr' can address either, as the case requires.)
2520   // Note that the cache is updated below if it does not help us find
2521   // what we need immediately.
2522   // So if it was a primary super, we can just fail immediately.
2523   // Otherwise, it's the slow path for us (no success at this point).
2524 
2525   beq(super_klass, t0, *L_success);
2526   mv(t1, sc_offset);
2527   if (L_failure == &L_fallthrough) {
2528     beq(super_check_offset, t1, *L_slow_path);
2529   } else {
2530     bne(super_check_offset, t1, *L_failure, /* is_far */ true);
2531     final_jmp(*L_slow_path);
2532   }
2533 
2534   bind(L_fallthrough);
2535 
2536 #undef final_jmp
2537 }
2538 
2539 // Scans count pointer sized words at [addr] for occurrence of value,
2540 // generic
2541 void MacroAssembler::repne_scan(Register addr, Register value, Register count,
2542                                 Register tmp) {
2543   Label Lloop, Lexit;
2544   beqz(count, Lexit);
2545   bind(Lloop);
2546   ld(tmp, addr);
2547   beq(value, tmp, Lexit);
2548   add(addr, addr, wordSize);
2549   sub(count, count, 1);
2550   bnez(count, Lloop);
2551   bind(Lexit);
2552 }
2553 
2554 void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass,
2555                                                    Register super_klass,
2556                                                    Register tmp1_reg,
2557                                                    Register tmp2_reg,
2558                                                    Label* L_success,
2559                                                    Label* L_failure) {
2560   assert_different_registers(sub_klass, super_klass, tmp1_reg);
2561   if (tmp2_reg != noreg) {
2562     assert_different_registers(sub_klass, super_klass, tmp1_reg, tmp2_reg, t0);
2563   }
2564 #define IS_A_TEMP(reg) ((reg) == tmp1_reg || (reg) == tmp2_reg)
2565 
2566   Label L_fallthrough;
2567   int label_nulls = 0;
2568   if (L_success == NULL)   { L_success   = &L_fallthrough; label_nulls++; }
2569   if (L_failure == NULL)   { L_failure   = &L_fallthrough; label_nulls++; }
2570 
2571   assert(label_nulls <= 1, "at most one NULL in the batch");
2572 
2573   // A couple of useful fields in sub_klass:
2574   int ss_offset = in_bytes(Klass::secondary_supers_offset());
2575   int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
2576   Address secondary_supers_addr(sub_klass, ss_offset);
2577   Address super_cache_addr(     sub_klass, sc_offset);
2578 
2579   BLOCK_COMMENT("check_klass_subtype_slow_path");
2580 
2581   // Do a linear scan of the secondary super-klass chain.
2582   // This code is rarely used, so simplicity is a virtue here.
2583   // The repne_scan instruction uses fixed registers, which we must spill.
2584   // Don't worry too much about pre-existing connections with the input regs.
2585 
2586   assert(sub_klass != x10, "killed reg"); // killed by mv(x10, super)
2587   assert(sub_klass != x12, "killed reg"); // killed by la(x12, &pst_counter)
2588 
2589   RegSet pushed_registers;
2590   if (!IS_A_TEMP(x12)) {
2591     pushed_registers += x12;
2592   }
2593   if (!IS_A_TEMP(x15)) {
2594     pushed_registers += x15;
2595   }
2596 
2597   if (super_klass != x10) {
2598     if (!IS_A_TEMP(x10)) {
2599       pushed_registers += x10;
2600     }
2601   }
2602 
2603   push_reg(pushed_registers, sp);
2604 
2605   // Get super_klass value into x10 (even if it was in x15 or x12)
2606   mv(x10, super_klass);
2607 
2608 #ifndef PRODUCT
2609   mv(t1, (address)&SharedRuntime::_partial_subtype_ctr);
2610   Address pst_counter_addr(t1);
2611   ld(t0, pst_counter_addr);
2612   add(t0, t0, 1);
2613   sd(t0, pst_counter_addr);
2614 #endif // PRODUCT
2615 
2616   // We will consult the secondary-super array.
2617   ld(x15, secondary_supers_addr);
2618   // Load the array length.
2619   lwu(x12, Address(x15, Array<Klass*>::length_offset_in_bytes()));
2620   // Skip to start of data.
2621   add(x15, x15, Array<Klass*>::base_offset_in_bytes());
2622 
2623   // Set t0 to an obvious invalid value, falling through by default
2624   mv(t0, -1);
2625   // Scan X12 words at [X15] for an occurrence of X10.
2626   repne_scan(x15, x10, x12, t0);
2627 
2628   // pop will restore x10, so we should use a temp register to keep its value
2629   mv(t1, x10);
2630 
2631   // Unspill the temp registers:
2632   pop_reg(pushed_registers, sp);
2633 
2634   bne(t1, t0, *L_failure);
2635 
2636   // Success. Cache the super we found an proceed in triumph.
2637   sd(super_klass, super_cache_addr);
2638 
2639   if (L_success != &L_fallthrough) {
2640     j(*L_success);
2641   }
2642 
2643 #undef IS_A_TEMP
2644 
2645   bind(L_fallthrough);
2646 }
2647 
2648 // Defines obj, preserves var_size_in_bytes, okay for tmp2 == var_size_in_bytes.
2649 void MacroAssembler::tlab_allocate(Register obj,
2650                                    Register var_size_in_bytes,
2651                                    int con_size_in_bytes,
2652                                    Register tmp1,
2653                                    Register tmp2,
2654                                    Label& slow_case,
2655                                    bool is_far) {
2656   BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
2657   bs->tlab_allocate(this, obj, var_size_in_bytes, con_size_in_bytes, tmp1, tmp2, slow_case, is_far);
2658 }
2659 
2660 // get_thread() can be called anywhere inside generated code so we
2661 // need to save whatever non-callee save context might get clobbered
2662 // by the call to Thread::current() or, indeed, the call setup code.
2663 void MacroAssembler::get_thread(Register thread) {
2664   // save all call-clobbered regs except thread
2665   RegSet saved_regs = RegSet::range(x5, x7) + RegSet::range(x10, x17) +
2666                       RegSet::range(x28, x31) + ra - thread;
2667   push_reg(saved_regs, sp);
2668 
2669   mv(ra, CAST_FROM_FN_PTR(address, Thread::current));
2670   jalr(ra);
2671   if (thread != c_rarg0) {
2672     mv(thread, c_rarg0);
2673   }
2674 
2675   // restore pushed registers
2676   pop_reg(saved_regs, sp);
2677 }
2678 
2679 void MacroAssembler::load_byte_map_base(Register reg) {
2680   CardTable::CardValue* byte_map_base =
2681     ((CardTableBarrierSet*)(BarrierSet::barrier_set()))->card_table()->byte_map_base();
2682   mv(reg, (uint64_t)byte_map_base);
2683 }
2684 
2685 void MacroAssembler::la_patchable(Register reg1, const Address &dest, int32_t &offset) {
2686   unsigned long low_address = (uintptr_t)CodeCache::low_bound();
2687   unsigned long high_address = (uintptr_t)CodeCache::high_bound();
2688   unsigned long dest_address = (uintptr_t)dest.target();
2689   long offset_low = dest_address - low_address;
2690   long offset_high = dest_address - high_address;
2691 
2692   assert(is_valid_riscv64_address(dest.target()), "bad address");
2693   assert(dest.getMode() == Address::literal, "la_patchable must be applied to a literal address");
2694 
2695   relocate(dest.rspec());
2696   // RISC-V doesn't compute a page-aligned address, in order to partially
2697   // compensate for the use of *signed* offsets in its base+disp12
2698   // addressing mode (RISC-V's PC-relative reach remains asymmetric
2699   // [-(2G + 2K), 2G - 2K).
2700   if (offset_high >= -((1L << 31) + (1L << 11)) && offset_low < (1L << 31) - (1L << 11)) {
2701     int64_t distance = dest.target() - pc();
2702     auipc(reg1, (int32_t)distance + 0x800);
2703     offset = ((int32_t)distance << 20) >> 20;
2704   } else {
2705     movptr(reg1, dest.target(), offset);
2706   }
2707 }
2708 
2709 void MacroAssembler::build_frame(int framesize) {
2710   assert(framesize >= 2, "framesize must include space for FP/RA");
2711   assert(framesize % (2*wordSize) == 0, "must preserve 2*wordSize alignment");
2712   sub(sp, sp, framesize);
2713   sd(fp, Address(sp, framesize - 2 * wordSize));
2714   sd(ra, Address(sp, framesize - wordSize));
2715   if (PreserveFramePointer) { add(fp, sp, framesize); }
2716 }
2717 
2718 void MacroAssembler::remove_frame(int framesize) {
2719   assert(framesize >= 2, "framesize must include space for FP/RA");
2720   assert(framesize % (2*wordSize) == 0, "must preserve 2*wordSize alignment");
2721   ld(fp, Address(sp, framesize - 2 * wordSize));
2722   ld(ra, Address(sp, framesize - wordSize));
2723   add(sp, sp, framesize);
2724 }
2725 
2726 void MacroAssembler::reserved_stack_check() {
2727     // testing if reserved zone needs to be enabled
2728     Label no_reserved_zone_enabling;
2729 
2730     ld(t0, Address(xthread, JavaThread::reserved_stack_activation_offset()));
2731     bltu(sp, t0, no_reserved_zone_enabling);
2732 
2733     enter();   // RA and FP are live.
2734     mv(c_rarg0, xthread);
2735     int32_t offset = 0;
2736     la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::enable_stack_reserved_zone)), offset);
2737     jalr(x1, t0, offset);
2738     leave();
2739 
2740     // We have already removed our own frame.
2741     // throw_delayed_StackOverflowError will think that it's been
2742     // called by our caller.
2743     offset = 0;
2744     la_patchable(t0, RuntimeAddress(StubRoutines::throw_delayed_StackOverflowError_entry()), offset);
2745     jalr(x0, t0, offset);
2746     should_not_reach_here();
2747 
2748     bind(no_reserved_zone_enabling);
2749 }
2750 
2751 // Move the address of the polling page into dest.
2752 void MacroAssembler::get_polling_page(Register dest, relocInfo::relocType rtype) {
2753   ld(dest, Address(xthread, JavaThread::polling_page_offset()));
2754 }
2755 
2756 // Read the polling page.  The address of the polling page must
2757 // already be in r.
2758 void MacroAssembler::read_polling_page(Register r, int32_t offset, relocInfo::relocType rtype) {
2759   relocate(rtype);
2760   lwu(zr, Address(r, offset));
2761 }
2762 
2763 void  MacroAssembler::set_narrow_oop(Register dst, jobject obj) {
2764 #ifdef ASSERT
2765   {
2766     ThreadInVMfromUnknown tiv;
2767     assert (UseCompressedOops, "should only be used for compressed oops");
2768     assert (Universe::heap() != NULL, "java heap should be initialized");
2769     assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
2770     assert(Universe::heap()->is_in(JNIHandles::resolve(obj)), "should be real oop");
2771   }
2772 #endif
2773   int oop_index = oop_recorder()->find_index(obj);
2774   relocate(oop_Relocation::spec(oop_index));
2775   li32(dst, 0xDEADBEEF);
2776   zero_extend(dst, dst, 32);
2777 }
2778 
2779 void  MacroAssembler::set_narrow_klass(Register dst, Klass* k) {
2780   assert (UseCompressedClassPointers, "should only be used for compressed headers");
2781   assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
2782   int index = oop_recorder()->find_index(k);
2783   assert(!Universe::heap()->is_in(k), "should not be an oop");
2784 
2785   narrowKlass nk = CompressedKlassPointers::encode(k);
2786   relocate(metadata_Relocation::spec(index));
2787   li32(dst, nk);
2788   zero_extend(dst, dst, 32);
2789 }
2790 
2791 // Maybe emit a call via a trampoline.  If the code cache is small
2792 // trampolines won't be emitted.
2793 address MacroAssembler::trampoline_call(Address entry) {
2794   assert(JavaThread::current()->is_Compiler_thread(), "just checking");
2795   assert(entry.rspec().type() == relocInfo::runtime_call_type ||
2796          entry.rspec().type() == relocInfo::opt_virtual_call_type ||
2797          entry.rspec().type() == relocInfo::static_call_type ||
2798          entry.rspec().type() == relocInfo::virtual_call_type, "wrong reloc type");
2799 
2800   // We need a trampoline if branches are far.
2801   if (far_branches()) {
2802     bool in_scratch_emit_size = false;
2803 #ifdef COMPILER2
2804     // We don't want to emit a trampoline if C2 is generating dummy
2805     // code during its branch shortening phase.
2806     CompileTask* task = ciEnv::current()->task();
2807     in_scratch_emit_size =
2808       (task != NULL && is_c2_compile(task->comp_level()) &&
2809        Compile::current()->output()->in_scratch_emit_size());
2810 #endif
2811     if (!in_scratch_emit_size) {
2812       address stub = emit_trampoline_stub(offset(), entry.target());
2813       if (stub == NULL) {
2814         postcond(pc() == badAddress);
2815         return NULL; // CodeCache is full
2816       }
2817     }
2818   }
2819 
2820   address call_pc = pc();
2821 #ifdef ASSERT
2822   if (entry.rspec().type() != relocInfo::runtime_call_type) {
2823     assert_alignment(call_pc);
2824   }
2825 #endif
2826   relocate(entry.rspec());
2827   if (!far_branches()) {
2828     jal(entry.target());
2829   } else {
2830     jal(pc());
2831   }
2832 
2833   postcond(pc() != badAddress);
2834   return call_pc;
2835 }
2836 
2837 address MacroAssembler::ic_call(address entry, jint method_index) {
2838   RelocationHolder rh = virtual_call_Relocation::spec(pc(), method_index);
2839   movptr(t1, (address)Universe::non_oop_word());
2840   assert_cond(entry != NULL);
2841   return trampoline_call(Address(entry, rh));
2842 }
2843 
2844 // Emit a trampoline stub for a call to a target which is too far away.
2845 //
2846 // code sequences:
2847 //
2848 // call-site:
2849 //   branch-and-link to <destination> or <trampoline stub>
2850 //
2851 // Related trampoline stub for this call site in the stub section:
2852 //   load the call target from the constant pool
2853 //   branch (RA still points to the call site above)
2854 
2855 address MacroAssembler::emit_trampoline_stub(int insts_call_instruction_offset,
2856                                              address dest) {
2857   address stub = start_a_stub(NativeInstruction::instruction_size
2858                             + NativeCallTrampolineStub::instruction_size);
2859   if (stub == NULL) {
2860     return NULL;  // CodeBuffer::expand failed
2861   }
2862 
2863   // Create a trampoline stub relocation which relates this trampoline stub
2864   // with the call instruction at insts_call_instruction_offset in the
2865   // instructions code-section.
2866 
2867   // Make sure the address of destination 8-byte aligned after 3 instructions.
2868   align(wordSize, NativeCallTrampolineStub::data_offset);
2869 
2870   relocate(trampoline_stub_Relocation::spec(code()->insts()->start() +
2871                                             insts_call_instruction_offset));
2872   const int stub_start_offset = offset();
2873 
2874   // Now, create the trampoline stub's code:
2875   // - load the call
2876   // - call
2877   Label target;
2878   ld(t0, target);  // auipc + ld
2879   jr(t0);          // jalr
2880   bind(target);
2881   assert(offset() - stub_start_offset == NativeCallTrampolineStub::data_offset,
2882          "should be");
2883   assert(offset() % wordSize == 0, "bad alignment");
2884   emit_int64((intptr_t)dest);
2885 
2886   const address stub_start_addr = addr_at(stub_start_offset);
2887 
2888   assert(is_NativeCallTrampolineStub_at(stub_start_addr), "doesn't look like a trampoline");
2889 
2890   end_a_stub();
2891   return stub_start_addr;
2892 }
2893 
2894 Address MacroAssembler::add_memory_helper(const Address dst) {
2895   switch (dst.getMode()) {
2896     case Address::base_plus_offset:
2897       // This is the expected mode, although we allow all the other
2898       // forms below.
2899       return form_address(t1, dst.base(), dst.offset());
2900     default:
2901       la(t1, dst);
2902       return Address(t1);
2903   }
2904 }
2905 
2906 void MacroAssembler::increment(const Address dst, int64_t value) {
2907   assert(((dst.getMode() == Address::base_plus_offset &&
2908            is_offset_in_range(dst.offset(), 12)) || is_imm_in_range(value, 12, 0)),
2909           "invalid value and address mode combination");
2910   Address adr = add_memory_helper(dst);
2911   assert(!adr.uses(t0), "invalid dst for address increment");
2912   ld(t0, adr);
2913   add(t0, t0, value, t1);
2914   sd(t0, adr);
2915 }
2916 
2917 void MacroAssembler::incrementw(const Address dst, int32_t value) {
2918   assert(((dst.getMode() == Address::base_plus_offset &&
2919            is_offset_in_range(dst.offset(), 12)) || is_imm_in_range(value, 12, 0)),
2920           "invalid value and address mode combination");
2921   Address adr = add_memory_helper(dst);
2922   assert(!adr.uses(t0), "invalid dst for address increment");
2923   lwu(t0, adr);
2924   addw(t0, t0, value, t1);
2925   sw(t0, adr);
2926 }
2927 
2928 void MacroAssembler::decrement(const Address dst, int64_t value) {
2929   assert(((dst.getMode() == Address::base_plus_offset &&
2930            is_offset_in_range(dst.offset(), 12)) || is_imm_in_range(value, 12, 0)),
2931           "invalid value and address mode combination");
2932   Address adr = add_memory_helper(dst);
2933   assert(!adr.uses(t0), "invalid dst for address decrement");
2934   ld(t0, adr);
2935   sub(t0, t0, value, t1);
2936   sd(t0, adr);
2937 }
2938 
2939 void MacroAssembler::decrementw(const Address dst, int32_t value) {
2940   assert(((dst.getMode() == Address::base_plus_offset &&
2941            is_offset_in_range(dst.offset(), 12)) || is_imm_in_range(value, 12, 0)),
2942           "invalid value and address mode combination");
2943   Address adr = add_memory_helper(dst);
2944   assert(!adr.uses(t0), "invalid dst for address decrement");
2945   lwu(t0, adr);
2946   subw(t0, t0, value, t1);
2947   sw(t0, adr);
2948 }
2949 
2950 void MacroAssembler::cmpptr(Register src1, Address src2, Label& equal) {
2951   assert_different_registers(src1, t0);
2952   int32_t offset;
2953   la_patchable(t0, src2, offset);
2954   ld(t0, Address(t0, offset));
2955   beq(src1, t0, equal);
2956 }
2957 
2958 void MacroAssembler::load_method_holder_cld(Register result, Register method) {
2959   load_method_holder(result, method);
2960   ld(result, Address(result, InstanceKlass::class_loader_data_offset()));
2961 }
2962 
2963 void MacroAssembler::load_method_holder(Register holder, Register method) {
2964   ld(holder, Address(method, Method::const_offset()));                      // ConstMethod*
2965   ld(holder, Address(holder, ConstMethod::constants_offset()));             // ConstantPool*
2966   ld(holder, Address(holder, ConstantPool::pool_holder_offset_in_bytes())); // InstanceKlass*
2967 }
2968 
2969 // string indexof
2970 // compute index by trailing zeros
2971 void MacroAssembler::compute_index(Register haystack, Register trailing_zeros,
2972                                    Register match_mask, Register result,
2973                                    Register ch2, Register tmp,
2974                                    bool haystack_isL)
2975 {
2976   int haystack_chr_shift = haystack_isL ? 0 : 1;
2977   srl(match_mask, match_mask, trailing_zeros);
2978   srli(match_mask, match_mask, 1);
2979   srli(tmp, trailing_zeros, LogBitsPerByte);
2980   if (!haystack_isL) andi(tmp, tmp, 0xE);
2981   add(haystack, haystack, tmp);
2982   ld(ch2, Address(haystack));
2983   if (!haystack_isL) srli(tmp, tmp, haystack_chr_shift);
2984   add(result, result, tmp);
2985 }
2986 
2987 // string indexof
2988 // Find pattern element in src, compute match mask,
2989 // only the first occurrence of 0x80/0x8000 at low bits is the valid match index
2990 // match mask patterns and corresponding indices would be like:
2991 // - 0x8080808080808080 (Latin1)
2992 // -   7 6 5 4 3 2 1 0  (match index)
2993 // - 0x8000800080008000 (UTF16)
2994 // -   3   2   1   0    (match index)
2995 void MacroAssembler::compute_match_mask(Register src, Register pattern, Register match_mask,
2996                                         Register mask1, Register mask2)
2997 {
2998   xorr(src, pattern, src);
2999   sub(match_mask, src, mask1);
3000   orr(src, src, mask2);
3001   notr(src, src);
3002   andr(match_mask, match_mask, src);
3003 }
3004 
3005 #ifdef COMPILER2
3006 // Code for BigInteger::mulAdd intrinsic
3007 // out     = x10
3008 // in      = x11
3009 // offset  = x12  (already out.length-offset)
3010 // len     = x13
3011 // k       = x14
3012 // tmp     = x28
3013 //
3014 // pseudo code from java implementation:
3015 // long kLong = k & LONG_MASK;
3016 // carry = 0;
3017 // offset = out.length-offset - 1;
3018 // for (int j = len - 1; j >= 0; j--) {
3019 //     product = (in[j] & LONG_MASK) * kLong + (out[offset] & LONG_MASK) + carry;
3020 //     out[offset--] = (int)product;
3021 //     carry = product >>> 32;
3022 // }
3023 // return (int)carry;
3024 void MacroAssembler::mul_add(Register out, Register in, Register offset,
3025                              Register len, Register k, Register tmp) {
3026   Label L_tail_loop, L_unroll, L_end;
3027   mv(tmp, out);
3028   mv(out, zr);
3029   blez(len, L_end);
3030   zero_extend(k, k, 32);
3031   slliw(t0, offset, LogBytesPerInt);
3032   add(offset, tmp, t0);
3033   slliw(t0, len, LogBytesPerInt);
3034   add(in, in, t0);
3035 
3036   const int unroll = 8;
3037   mv(tmp, unroll);
3038   blt(len, tmp, L_tail_loop);
3039   bind(L_unroll);
3040   for (int i = 0; i < unroll; i++) {
3041     sub(in, in, BytesPerInt);
3042     lwu(t0, Address(in, 0));
3043     mul(t1, t0, k);
3044     add(t0, t1, out);
3045     sub(offset, offset, BytesPerInt);
3046     lwu(t1, Address(offset, 0));
3047     add(t0, t0, t1);
3048     sw(t0, Address(offset, 0));
3049     srli(out, t0, 32);
3050   }
3051   subw(len, len, tmp);
3052   bge(len, tmp, L_unroll);
3053 
3054   bind(L_tail_loop);
3055   blez(len, L_end);
3056   sub(in, in, BytesPerInt);
3057   lwu(t0, Address(in, 0));
3058   mul(t1, t0, k);
3059   add(t0, t1, out);
3060   sub(offset, offset, BytesPerInt);
3061   lwu(t1, Address(offset, 0));
3062   add(t0, t0, t1);
3063   sw(t0, Address(offset, 0));
3064   srli(out, t0, 32);
3065   subw(len, len, 1);
3066   j(L_tail_loop);
3067 
3068   bind(L_end);
3069 }
3070 
3071 // add two unsigned input and output carry
3072 void MacroAssembler::cad(Register dst, Register src1, Register src2, Register carry)
3073 {
3074   assert_different_registers(dst, carry);
3075   assert_different_registers(dst, src2);
3076   add(dst, src1, src2);
3077   sltu(carry, dst, src2);
3078 }
3079 
3080 // add two input with carry
3081 void MacroAssembler::adc(Register dst, Register src1, Register src2, Register carry)
3082 {
3083   assert_different_registers(dst, carry);
3084   add(dst, src1, src2);
3085   add(dst, dst, carry);
3086 }
3087 
3088 // add two unsigned input with carry and output carry
3089 void MacroAssembler::cadc(Register dst, Register src1, Register src2, Register carry)
3090 {
3091   assert_different_registers(dst, src2);
3092   adc(dst, src1, src2, carry);
3093   sltu(carry, dst, src2);
3094 }
3095 
3096 void MacroAssembler::add2_with_carry(Register final_dest_hi, Register dest_hi, Register dest_lo,
3097                                      Register src1, Register src2, Register carry)
3098 {
3099   cad(dest_lo, dest_lo, src1, carry);
3100   add(dest_hi, dest_hi, carry);
3101   cad(dest_lo, dest_lo, src2, carry);
3102   add(final_dest_hi, dest_hi, carry);
3103 }
3104 
3105 /**
3106  * Multiply 32 bit by 32 bit first loop.
3107  */
3108 void MacroAssembler::multiply_32_x_32_loop(Register x, Register xstart, Register x_xstart,
3109                                            Register y, Register y_idx, Register z,
3110                                            Register carry, Register product,
3111                                            Register idx, Register kdx)
3112 {
3113   // jlong carry, x[], y[], z[];
3114   // for (int idx=ystart, kdx=ystart+1+xstart; idx >= 0; idx--, kdx--) {
3115   //     long product = y[idx] * x[xstart] + carry;
3116   //     z[kdx] = (int)product;
3117   //     carry = product >>> 32;
3118   // }
3119   // z[xstart] = (int)carry;
3120 
3121   Label L_first_loop, L_first_loop_exit;
3122   blez(idx, L_first_loop_exit);
3123 
3124   shadd(t0, xstart, x, t0, LogBytesPerInt);
3125   lwu(x_xstart, Address(t0, 0));
3126 
3127   bind(L_first_loop);
3128   subw(idx, idx, 1);
3129   shadd(t0, idx, y, t0, LogBytesPerInt);
3130   lwu(y_idx, Address(t0, 0));
3131   mul(product, x_xstart, y_idx);
3132   add(product, product, carry);
3133   srli(carry, product, 32);
3134   subw(kdx, kdx, 1);
3135   shadd(t0, kdx, z, t0, LogBytesPerInt);
3136   sw(product, Address(t0, 0));
3137   bgtz(idx, L_first_loop);
3138 
3139   bind(L_first_loop_exit);
3140 }
3141 
3142 /**
3143  * Multiply 64 bit by 64 bit first loop.
3144  */
3145 void MacroAssembler::multiply_64_x_64_loop(Register x, Register xstart, Register x_xstart,
3146                                            Register y, Register y_idx, Register z,
3147                                            Register carry, Register product,
3148                                            Register idx, Register kdx)
3149 {
3150   //
3151   //  jlong carry, x[], y[], z[];
3152   //  for (int idx=ystart, kdx=ystart+1+xstart; idx >= 0; idx--, kdx--) {
3153   //    huge_128 product = y[idx] * x[xstart] + carry;
3154   //    z[kdx] = (jlong)product;
3155   //    carry  = (jlong)(product >>> 64);
3156   //  }
3157   //  z[xstart] = carry;
3158   //
3159 
3160   Label L_first_loop, L_first_loop_exit;
3161   Label L_one_x, L_one_y, L_multiply;
3162 
3163   subw(xstart, xstart, 1);
3164   bltz(xstart, L_one_x);
3165 
3166   shadd(t0, xstart, x, t0, LogBytesPerInt);
3167   ld(x_xstart, Address(t0, 0));
3168   ror_imm(x_xstart, x_xstart, 32); // convert big-endian to little-endian
3169 
3170   bind(L_first_loop);
3171   subw(idx, idx, 1);
3172   bltz(idx, L_first_loop_exit);
3173   subw(idx, idx, 1);
3174   bltz(idx, L_one_y);
3175 
3176   shadd(t0, idx, y, t0, LogBytesPerInt);
3177   ld(y_idx, Address(t0, 0));
3178   ror_imm(y_idx, y_idx, 32); // convert big-endian to little-endian
3179   bind(L_multiply);
3180 
3181   mulhu(t0, x_xstart, y_idx);
3182   mul(product, x_xstart, y_idx);
3183   cad(product, product, carry, t1);
3184   adc(carry, t0, zr, t1);
3185 
3186   subw(kdx, kdx, 2);
3187   ror_imm(product, product, 32); // back to big-endian
3188   shadd(t0, kdx, z, t0, LogBytesPerInt);
3189   sd(product, Address(t0, 0));
3190 
3191   j(L_first_loop);
3192 
3193   bind(L_one_y);
3194   lwu(y_idx, Address(y, 0));
3195   j(L_multiply);
3196 
3197   bind(L_one_x);
3198   lwu(x_xstart, Address(x, 0));
3199   j(L_first_loop);
3200 
3201   bind(L_first_loop_exit);
3202 }
3203 
3204 /**
3205  * Multiply 128 bit by 128 bit. Unrolled inner loop.
3206  *
3207  */
3208 void MacroAssembler::multiply_128_x_128_loop(Register y, Register z,
3209                                              Register carry, Register carry2,
3210                                              Register idx, Register jdx,
3211                                              Register yz_idx1, Register yz_idx2,
3212                                              Register tmp, Register tmp3, Register tmp4,
3213                                              Register tmp6, Register product_hi)
3214 {
3215   //   jlong carry, x[], y[], z[];
3216   //   int kdx = xstart+1;
3217   //   for (int idx=ystart-2; idx >= 0; idx -= 2) { // Third loop
3218   //     huge_128 tmp3 = (y[idx+1] * product_hi) + z[kdx+idx+1] + carry;
3219   //     jlong carry2  = (jlong)(tmp3 >>> 64);
3220   //     huge_128 tmp4 = (y[idx]   * product_hi) + z[kdx+idx] + carry2;
3221   //     carry  = (jlong)(tmp4 >>> 64);
3222   //     z[kdx+idx+1] = (jlong)tmp3;
3223   //     z[kdx+idx] = (jlong)tmp4;
3224   //   }
3225   //   idx += 2;
3226   //   if (idx > 0) {
3227   //     yz_idx1 = (y[idx] * product_hi) + z[kdx+idx] + carry;
3228   //     z[kdx+idx] = (jlong)yz_idx1;
3229   //     carry  = (jlong)(yz_idx1 >>> 64);
3230   //   }
3231   //
3232 
3233   Label L_third_loop, L_third_loop_exit, L_post_third_loop_done;
3234 
3235   srliw(jdx, idx, 2);
3236 
3237   bind(L_third_loop);
3238 
3239   subw(jdx, jdx, 1);
3240   bltz(jdx, L_third_loop_exit);
3241   subw(idx, idx, 4);
3242 
3243   shadd(t0, idx, y, t0, LogBytesPerInt);
3244   ld(yz_idx2, Address(t0, 0));
3245   ld(yz_idx1, Address(t0, wordSize));
3246 
3247   shadd(tmp6, idx, z, t0, LogBytesPerInt);
3248 
3249   ror_imm(yz_idx1, yz_idx1, 32); // convert big-endian to little-endian
3250   ror_imm(yz_idx2, yz_idx2, 32);
3251 
3252   ld(t1, Address(tmp6, 0));
3253   ld(t0, Address(tmp6, wordSize));
3254 
3255   mul(tmp3, product_hi, yz_idx1); //  yz_idx1 * product_hi -> tmp4:tmp3
3256   mulhu(tmp4, product_hi, yz_idx1);
3257 
3258   ror_imm(t0, t0, 32, tmp); // convert big-endian to little-endian
3259   ror_imm(t1, t1, 32, tmp);
3260 
3261   mul(tmp, product_hi, yz_idx2); //  yz_idx2 * product_hi -> carry2:tmp
3262   mulhu(carry2, product_hi, yz_idx2);
3263 
3264   cad(tmp3, tmp3, carry, carry);
3265   adc(tmp4, tmp4, zr, carry);
3266   cad(tmp3, tmp3, t0, t0);
3267   cadc(tmp4, tmp4, tmp, t0);
3268   adc(carry, carry2, zr, t0);
3269   cad(tmp4, tmp4, t1, carry2);
3270   adc(carry, carry, zr, carry2);
3271 
3272   ror_imm(tmp3, tmp3, 32); // convert little-endian to big-endian
3273   ror_imm(tmp4, tmp4, 32);
3274   sd(tmp4, Address(tmp6, 0));
3275   sd(tmp3, Address(tmp6, wordSize));
3276 
3277   j(L_third_loop);
3278 
3279   bind(L_third_loop_exit);
3280 
3281   andi(idx, idx, 0x3);
3282   beqz(idx, L_post_third_loop_done);
3283 
3284   Label L_check_1;
3285   subw(idx, idx, 2);
3286   bltz(idx, L_check_1);
3287 
3288   shadd(t0, idx, y, t0, LogBytesPerInt);
3289   ld(yz_idx1, Address(t0, 0));
3290   ror_imm(yz_idx1, yz_idx1, 32);
3291 
3292   mul(tmp3, product_hi, yz_idx1); //  yz_idx1 * product_hi -> tmp4:tmp3
3293   mulhu(tmp4, product_hi, yz_idx1);
3294 
3295   shadd(t0, idx, z, t0, LogBytesPerInt);
3296   ld(yz_idx2, Address(t0, 0));
3297   ror_imm(yz_idx2, yz_idx2, 32, tmp);
3298 
3299   add2_with_carry(carry, tmp4, tmp3, carry, yz_idx2, tmp);
3300 
3301   ror_imm(tmp3, tmp3, 32, tmp);
3302   sd(tmp3, Address(t0, 0));
3303 
3304   bind(L_check_1);
3305 
3306   andi(idx, idx, 0x1);
3307   subw(idx, idx, 1);
3308   bltz(idx, L_post_third_loop_done);
3309   shadd(t0, idx, y, t0, LogBytesPerInt);
3310   lwu(tmp4, Address(t0, 0));
3311   mul(tmp3, tmp4, product_hi); //  tmp4 * product_hi -> carry2:tmp3
3312   mulhu(carry2, tmp4, product_hi);
3313 
3314   shadd(t0, idx, z, t0, LogBytesPerInt);
3315   lwu(tmp4, Address(t0, 0));
3316 
3317   add2_with_carry(carry2, carry2, tmp3, tmp4, carry, t0);
3318 
3319   shadd(t0, idx, z, t0, LogBytesPerInt);
3320   sw(tmp3, Address(t0, 0));
3321 
3322   slli(t0, carry2, 32);
3323   srli(carry, tmp3, 32);
3324   orr(carry, carry, t0);
3325 
3326   bind(L_post_third_loop_done);
3327 }
3328 
3329 /**
3330  * Code for BigInteger::multiplyToLen() intrinsic.
3331  *
3332  * x10: x
3333  * x11: xlen
3334  * x12: y
3335  * x13: ylen
3336  * x14: z
3337  * x15: zlen
3338  * x16: tmp1
3339  * x17: tmp2
3340  * x7:  tmp3
3341  * x28: tmp4
3342  * x29: tmp5
3343  * x30: tmp6
3344  * x31: tmp7
3345  */
3346 void MacroAssembler::multiply_to_len(Register x, Register xlen, Register y, Register ylen,
3347                                      Register z, Register zlen,
3348                                      Register tmp1, Register tmp2, Register tmp3, Register tmp4,
3349                                      Register tmp5, Register tmp6, Register product_hi)
3350 {
3351   assert_different_registers(x, xlen, y, ylen, z, zlen, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6);
3352 
3353   const Register idx = tmp1;
3354   const Register kdx = tmp2;
3355   const Register xstart = tmp3;
3356 
3357   const Register y_idx = tmp4;
3358   const Register carry = tmp5;
3359   const Register product = xlen;
3360   const Register x_xstart = zlen; // reuse register
3361 
3362   mv(idx, ylen); // idx = ylen;
3363   mv(kdx, zlen); // kdx = xlen+ylen;
3364   mv(carry, zr); // carry = 0;
3365 
3366   Label L_multiply_64_x_64_loop, L_done;
3367 
3368   subw(xstart, xlen, 1);
3369   bltz(xstart, L_done);
3370 
3371   const Register jdx = tmp1;
3372 
3373   if (AvoidUnalignedAccesses) {
3374     // Check if x and y are both 8-byte aligned.
3375     orr(t0, xlen, ylen);
3376     andi(t0, t0, 0x1);
3377     beqz(t0, L_multiply_64_x_64_loop);
3378 
3379     multiply_32_x_32_loop(x, xstart, x_xstart, y, y_idx, z, carry, product, idx, kdx);
3380     shadd(t0, xstart, z, t0, LogBytesPerInt);
3381     sw(carry, Address(t0, 0));
3382 
3383     Label L_second_loop_unaligned;
3384     bind(L_second_loop_unaligned);
3385     mv(carry, zr);
3386     mv(jdx, ylen);
3387     subw(xstart, xstart, 1);
3388     bltz(xstart, L_done);
3389     sub(sp, sp, 2 * wordSize);
3390     sd(z, Address(sp, 0));
3391     sd(zr, Address(sp, wordSize));
3392     shadd(t0, xstart, z, t0, LogBytesPerInt);
3393     addi(z, t0, 4);
3394     shadd(t0, xstart, x, t0, LogBytesPerInt);
3395     lwu(product, Address(t0, 0));
3396     Label L_third_loop, L_third_loop_exit;
3397 
3398     blez(jdx, L_third_loop_exit);
3399 
3400     bind(L_third_loop);
3401     subw(jdx, jdx, 1);
3402     shadd(t0, jdx, y, t0, LogBytesPerInt);
3403     lwu(t0, Address(t0, 0));
3404     mul(t1, t0, product);
3405     add(t0, t1, carry);
3406     shadd(tmp6, jdx, z, t1, LogBytesPerInt);
3407     lwu(t1, Address(tmp6, 0));
3408     add(t0, t0, t1);
3409     sw(t0, Address(tmp6, 0));
3410     srli(carry, t0, 32);
3411     bgtz(jdx, L_third_loop);
3412 
3413     bind(L_third_loop_exit);
3414     ld(z, Address(sp, 0));
3415     addi(sp, sp, 2 * wordSize);
3416     shadd(t0, xstart, z, t0, LogBytesPerInt);
3417     sw(carry, Address(t0, 0));
3418 
3419     j(L_second_loop_unaligned);
3420   }
3421 
3422   bind(L_multiply_64_x_64_loop);
3423   multiply_64_x_64_loop(x, xstart, x_xstart, y, y_idx, z, carry, product, idx, kdx);
3424 
3425   Label L_second_loop_aligned;
3426   beqz(kdx, L_second_loop_aligned);
3427 
3428   Label L_carry;
3429   subw(kdx, kdx, 1);
3430   beqz(kdx, L_carry);
3431 
3432   shadd(t0, kdx, z, t0, LogBytesPerInt);
3433   sw(carry, Address(t0, 0));
3434   srli(carry, carry, 32);
3435   subw(kdx, kdx, 1);
3436 
3437   bind(L_carry);
3438   shadd(t0, kdx, z, t0, LogBytesPerInt);
3439   sw(carry, Address(t0, 0));
3440 
3441   // Second and third (nested) loops.
3442   //
3443   // for (int i = xstart-1; i >= 0; i--) { // Second loop
3444   //   carry = 0;
3445   //   for (int jdx=ystart, k=ystart+1+i; jdx >= 0; jdx--, k--) { // Third loop
3446   //     long product = (y[jdx] & LONG_MASK) * (x[i] & LONG_MASK) +
3447   //                    (z[k] & LONG_MASK) + carry;
3448   //     z[k] = (int)product;
3449   //     carry = product >>> 32;
3450   //   }
3451   //   z[i] = (int)carry;
3452   // }
3453   //
3454   // i = xlen, j = tmp1, k = tmp2, carry = tmp5, x[i] = product_hi
3455 
3456   bind(L_second_loop_aligned);
3457   mv(carry, zr); // carry = 0;
3458   mv(jdx, ylen); // j = ystart+1
3459 
3460   subw(xstart, xstart, 1); // i = xstart-1;
3461   bltz(xstart, L_done);
3462 
3463   sub(sp, sp, 4 * wordSize);
3464   sd(z, Address(sp, 0));
3465 
3466   Label L_last_x;
3467   shadd(t0, xstart, z, t0, LogBytesPerInt);
3468   addi(z, t0, 4);
3469   subw(xstart, xstart, 1); // i = xstart-1;
3470   bltz(xstart, L_last_x);
3471 
3472   shadd(t0, xstart, x, t0, LogBytesPerInt);
3473   ld(product_hi, Address(t0, 0));
3474   ror_imm(product_hi, product_hi, 32); // convert big-endian to little-endian
3475 
3476   Label L_third_loop_prologue;
3477   bind(L_third_loop_prologue);
3478 
3479   sd(ylen, Address(sp, wordSize));
3480   sd(x, Address(sp, 2 * wordSize));
3481   sd(xstart, Address(sp, 3 * wordSize));
3482   multiply_128_x_128_loop(y, z, carry, x, jdx, ylen, product,
3483                           tmp2, x_xstart, tmp3, tmp4, tmp6, product_hi);
3484   ld(z, Address(sp, 0));
3485   ld(ylen, Address(sp, wordSize));
3486   ld(x, Address(sp, 2 * wordSize));
3487   ld(xlen, Address(sp, 3 * wordSize)); // copy old xstart -> xlen
3488   addi(sp, sp, 4 * wordSize);
3489 
3490   addiw(tmp3, xlen, 1);
3491   shadd(t0, tmp3, z, t0, LogBytesPerInt);
3492   sw(carry, Address(t0, 0));
3493 
3494   subw(tmp3, tmp3, 1);
3495   bltz(tmp3, L_done);
3496 
3497   srli(carry, carry, 32);
3498   shadd(t0, tmp3, z, t0, LogBytesPerInt);
3499   sw(carry, Address(t0, 0));
3500   j(L_second_loop_aligned);
3501 
3502   // Next infrequent code is moved outside loops.
3503   bind(L_last_x);
3504   lwu(product_hi, Address(x, 0));
3505   j(L_third_loop_prologue);
3506 
3507   bind(L_done);
3508 }
3509 #endif
3510 
3511 // Count bits of trailing zero chars from lsb to msb until first non-zero element.
3512 // For LL case, one byte for one element, so shift 8 bits once, and for other case,
3513 // shift 16 bits once.
3514 void MacroAssembler::ctzc_bit(Register Rd, Register Rs, bool isLL, Register tmp1, Register tmp2)
3515 {
3516   if (UseZbb) {
3517     assert_different_registers(Rd, Rs, tmp1);
3518     int step = isLL ? 8 : 16;
3519     ctz(Rd, Rs);
3520     andi(tmp1, Rd, step - 1);
3521     sub(Rd, Rd, tmp1);
3522     return;
3523   }
3524   assert_different_registers(Rd, Rs, tmp1, tmp2);
3525   Label Loop;
3526   int step = isLL ? 8 : 16;
3527   mv(Rd, -step);
3528   mv(tmp2, Rs);
3529 
3530   bind(Loop);
3531   addi(Rd, Rd, step);
3532   andi(tmp1, tmp2, ((1 << step) - 1));
3533   srli(tmp2, tmp2, step);
3534   beqz(tmp1, Loop);
3535 }
3536 
3537 // This instruction reads adjacent 4 bytes from the lower half of source register,
3538 // inflate into a register, for example:
3539 // Rs: A7A6A5A4A3A2A1A0
3540 // Rd: 00A300A200A100A0
3541 void MacroAssembler::inflate_lo32(Register Rd, Register Rs, Register tmp1, Register tmp2)
3542 {
3543   assert_different_registers(Rd, Rs, tmp1, tmp2);
3544   mv(tmp1, 0xFF);
3545   mv(Rd, zr);
3546   for (int i = 0; i <= 3; i++)
3547   {
3548     andr(tmp2, Rs, tmp1);
3549     if (i) {
3550       slli(tmp2, tmp2, i * 8);
3551     }
3552     orr(Rd, Rd, tmp2);
3553     if (i != 3) {
3554       slli(tmp1, tmp1, 8);
3555     }
3556   }
3557 }
3558 
3559 // This instruction reads adjacent 4 bytes from the upper half of source register,
3560 // inflate into a register, for example:
3561 // Rs: A7A6A5A4A3A2A1A0
3562 // Rd: 00A700A600A500A4
3563 void MacroAssembler::inflate_hi32(Register Rd, Register Rs, Register tmp1, Register tmp2)
3564 {
3565   assert_different_registers(Rd, Rs, tmp1, tmp2);
3566   mv(tmp1, 0xFF00000000);
3567   mv(Rd, zr);
3568   for (int i = 0; i <= 3; i++)
3569   {
3570     andr(tmp2, Rs, tmp1);
3571     orr(Rd, Rd, tmp2);
3572     srli(Rd, Rd, 8);
3573     if (i != 3) {
3574       slli(tmp1, tmp1, 8);
3575     }
3576   }
3577 }
3578 
3579 // The size of the blocks erased by the zero_blocks stub.  We must
3580 // handle anything smaller than this ourselves in zero_words().
3581 const int MacroAssembler::zero_words_block_size = 8;
3582 
3583 // zero_words() is used by C2 ClearArray patterns.  It is as small as
3584 // possible, handling small word counts locally and delegating
3585 // anything larger to the zero_blocks stub.  It is expanded many times
3586 // in compiled code, so it is important to keep it short.
3587 
3588 // ptr:   Address of a buffer to be zeroed.
3589 // cnt:   Count in HeapWords.
3590 //
3591 // ptr, cnt, and t0 are clobbered.
3592 address MacroAssembler::zero_words(Register ptr, Register cnt)
3593 {
3594   assert(is_power_of_2(zero_words_block_size), "adjust this");
3595   assert(ptr == x28 && cnt == x29, "mismatch in register usage");
3596   assert_different_registers(cnt, t0);
3597 
3598   BLOCK_COMMENT("zero_words {");
3599   mv(t0, zero_words_block_size);
3600   Label around, done, done16;
3601   bltu(cnt, t0, around);
3602   {
3603     RuntimeAddress zero_blocks = RuntimeAddress(StubRoutines::riscv::zero_blocks());
3604     assert(zero_blocks.target() != NULL, "zero_blocks stub has not been generated");
3605     if (StubRoutines::riscv::complete()) {
3606       address tpc = trampoline_call(zero_blocks);
3607       if (tpc == NULL) {
3608         DEBUG_ONLY(reset_labels(around));
3609         postcond(pc() == badAddress);
3610         return NULL;
3611       }
3612     } else {
3613       jal(zero_blocks);
3614     }
3615   }
3616   bind(around);
3617   for (int i = zero_words_block_size >> 1; i > 1; i >>= 1) {
3618     Label l;
3619     andi(t0, cnt, i);
3620     beqz(t0, l);
3621     for (int j = 0; j < i; j++) {
3622       sd(zr, Address(ptr, 0));
3623       addi(ptr, ptr, 8);
3624     }
3625     bind(l);
3626   }
3627   {
3628     Label l;
3629     andi(t0, cnt, 1);
3630     beqz(t0, l);
3631     sd(zr, Address(ptr, 0));
3632     bind(l);
3633   }
3634   BLOCK_COMMENT("} zero_words");
3635   postcond(pc() != badAddress);
3636   return pc();
3637 }
3638 
3639 #define SmallArraySize (18 * BytesPerLong)
3640 
3641 // base:  Address of a buffer to be zeroed, 8 bytes aligned.
3642 // cnt:   Immediate count in HeapWords.
3643 void MacroAssembler::zero_words(Register base, u_int64_t cnt)
3644 {
3645   assert_different_registers(base, t0, t1);
3646 
3647   BLOCK_COMMENT("zero_words {");
3648 
3649   if (cnt <= SmallArraySize / BytesPerLong) {
3650     for (int i = 0; i < (int)cnt; i++) {
3651       sd(zr, Address(base, i * wordSize));
3652     }
3653   } else {
3654     const int unroll = 8; // Number of sd(zr, adr), instructions we'll unroll
3655     int remainder = cnt % unroll;
3656     for (int i = 0; i < remainder; i++) {
3657       sd(zr, Address(base, i * wordSize));
3658     }
3659 
3660     Label loop;
3661     Register cnt_reg = t0;
3662     Register loop_base = t1;
3663     cnt = cnt - remainder;
3664     mv(cnt_reg, cnt);
3665     add(loop_base, base, remainder * wordSize);
3666     bind(loop);
3667     sub(cnt_reg, cnt_reg, unroll);
3668     for (int i = 0; i < unroll; i++) {
3669       sd(zr, Address(loop_base, i * wordSize));
3670     }
3671     add(loop_base, loop_base, unroll * wordSize);
3672     bnez(cnt_reg, loop);
3673   }
3674 
3675   BLOCK_COMMENT("} zero_words");
3676 }
3677 
3678 // base:   Address of a buffer to be filled, 8 bytes aligned.
3679 // cnt:    Count in 8-byte unit.
3680 // value:  Value to be filled with.
3681 // base will point to the end of the buffer after filling.
3682 void MacroAssembler::fill_words(Register base, Register cnt, Register value)
3683 {
3684 //  Algorithm:
3685 //
3686 //    t0 = cnt & 7
3687 //    cnt -= t0
3688 //    p += t0
3689 //    switch (t0):
3690 //      switch start:
3691 //      do while cnt
3692 //        cnt -= 8
3693 //          p[-8] = value
3694 //        case 7:
3695 //          p[-7] = value
3696 //        case 6:
3697 //          p[-6] = value
3698 //          // ...
3699 //        case 1:
3700 //          p[-1] = value
3701 //        case 0:
3702 //          p += 8
3703 //      do-while end
3704 //    switch end
3705 
3706   assert_different_registers(base, cnt, value, t0, t1);
3707 
3708   Label fini, skip, entry, loop;
3709   const int unroll = 8; // Number of sd instructions we'll unroll
3710 
3711   beqz(cnt, fini);
3712 
3713   andi(t0, cnt, unroll - 1);
3714   sub(cnt, cnt, t0);
3715   // align 8, so first sd n % 8 = mod, next loop sd 8 * n.
3716   shadd(base, t0, base, t1, 3);
3717   la(t1, entry);
3718   slli(t0, t0, 2); // sd_inst_nums * 4; t0 is cnt % 8, so t1 = t1 - sd_inst_nums * 4, 4 is sizeof(inst)
3719   sub(t1, t1, t0);
3720   jr(t1);
3721 
3722   bind(loop);
3723   add(base, base, unroll * 8);
3724   for (int i = -unroll; i < 0; i++) {
3725     sd(value, Address(base, i * 8));
3726   }
3727   bind(entry);
3728   sub(cnt, cnt, unroll);
3729   bgez(cnt, loop);
3730 
3731   bind(fini);
3732 }
3733 
3734 #define FCVT_SAFE(FLOATCVT, FLOATEQ)                                                             \
3735 void MacroAssembler:: FLOATCVT##_safe(Register dst, FloatRegister src, Register tmp) {           \
3736   Label L_Okay;                                                                                  \
3737   fscsr(zr);                                                                                     \
3738   FLOATCVT(dst, src);                                                                            \
3739   frcsr(tmp);                                                                                    \
3740   andi(tmp, tmp, 0x1E);                                                                          \
3741   beqz(tmp, L_Okay);                                                                             \
3742   FLOATEQ(tmp, src, src);                                                                        \
3743   bnez(tmp, L_Okay);                                                                             \
3744   mv(dst, zr);                                                                                   \
3745   bind(L_Okay);                                                                                  \
3746 }
3747 
3748 FCVT_SAFE(fcvt_w_s, feq_s)
3749 FCVT_SAFE(fcvt_l_s, feq_s)
3750 FCVT_SAFE(fcvt_w_d, feq_d)
3751 FCVT_SAFE(fcvt_l_d, feq_d)
3752 
3753 #undef FCVT_SAFE
3754 
3755 #define FCMP(FLOATTYPE, FLOATSIG)                                                       \
3756 void MacroAssembler::FLOATTYPE##_compare(Register result, FloatRegister Rs1,            \
3757                                          FloatRegister Rs2, int unordered_result) {     \
3758   Label Ldone;                                                                          \
3759   if (unordered_result < 0) {                                                           \
3760     /* we want -1 for unordered or less than, 0 for equal and 1 for greater than. */    \
3761     /* installs 1 if gt else 0 */                                                       \
3762     flt_##FLOATSIG(result, Rs2, Rs1);                                                   \
3763     /* Rs1 > Rs2, install 1 */                                                          \
3764     bgtz(result, Ldone);                                                                \
3765     feq_##FLOATSIG(result, Rs1, Rs2);                                                   \
3766     addi(result, result, -1);                                                           \
3767     /* Rs1 = Rs2, install 0 */                                                          \
3768     /* NaN or Rs1 < Rs2, install -1 */                                                  \
3769     bind(Ldone);                                                                        \
3770   } else {                                                                              \
3771     /* we want -1 for less than, 0 for equal and 1 for unordered or greater than. */    \
3772     /* installs 1 if gt or unordered else 0 */                                          \
3773     flt_##FLOATSIG(result, Rs1, Rs2);                                                   \
3774     /* Rs1 < Rs2, install -1 */                                                         \
3775     bgtz(result, Ldone);                                                                \
3776     feq_##FLOATSIG(result, Rs1, Rs2);                                                   \
3777     addi(result, result, -1);                                                           \
3778     /* Rs1 = Rs2, install 0 */                                                          \
3779     /* NaN or Rs1 > Rs2, install 1 */                                                   \
3780     bind(Ldone);                                                                        \
3781     neg(result, result);                                                                \
3782   }                                                                                     \
3783 }
3784 
3785 FCMP(float, s);
3786 FCMP(double, d);
3787 
3788 #undef FCMP
3789 
3790 // Zero words; len is in bytes
3791 // Destroys all registers except addr
3792 // len must be a nonzero multiple of wordSize
3793 void MacroAssembler::zero_memory(Register addr, Register len, Register tmp) {
3794   assert_different_registers(addr, len, tmp, t0, t1);
3795 
3796 #ifdef ASSERT
3797   {
3798     Label L;
3799     andi(t0, len, BytesPerWord - 1);
3800     beqz(t0, L);
3801     stop("len is not a multiple of BytesPerWord");
3802     bind(L);
3803   }
3804 #endif // ASSERT
3805 
3806 #ifndef PRODUCT
3807   block_comment("zero memory");
3808 #endif // PRODUCT
3809 
3810   Label loop;
3811   Label entry;
3812 
3813   // Algorithm:
3814   //
3815   //  t0 = cnt & 7
3816   //  cnt -= t0
3817   //  p += t0
3818   //  switch (t0) {
3819   //    do {
3820   //      cnt -= 8
3821   //        p[-8] = 0
3822   //      case 7:
3823   //        p[-7] = 0
3824   //      case 6:
3825   //        p[-6] = 0
3826   //        ...
3827   //      case 1:
3828   //        p[-1] = 0
3829   //      case 0:
3830   //        p += 8
3831   //     } while (cnt)
3832   //  }
3833 
3834   const int unroll = 8;   // Number of sd(zr) instructions we'll unroll
3835 
3836   srli(len, len, LogBytesPerWord);
3837   andi(t0, len, unroll - 1);  // t0 = cnt % unroll
3838   sub(len, len, t0);          // cnt -= unroll
3839   // tmp always points to the end of the region we're about to zero
3840   shadd(tmp, t0, addr, t1, LogBytesPerWord);
3841   la(t1, entry);
3842   slli(t0, t0, 2);
3843   sub(t1, t1, t0);
3844   jr(t1);
3845   bind(loop);
3846   sub(len, len, unroll);
3847   for (int i = -unroll; i < 0; i++) {
3848     Assembler::sd(zr, Address(tmp, i * wordSize));
3849   }
3850   bind(entry);
3851   add(tmp, tmp, unroll * wordSize);
3852   bnez(len, loop);
3853 }
3854 
3855 // shift left by shamt and add
3856 // Rd = (Rs1 << shamt) + Rs2
3857 void MacroAssembler::shadd(Register Rd, Register Rs1, Register Rs2, Register tmp, int shamt) {
3858   if (UseZba) {
3859     if (shamt == 1) {
3860       sh1add(Rd, Rs1, Rs2);
3861       return;
3862     } else if (shamt == 2) {
3863       sh2add(Rd, Rs1, Rs2);
3864       return;
3865     } else if (shamt == 3) {
3866       sh3add(Rd, Rs1, Rs2);
3867       return;
3868     }
3869   }
3870 
3871   if (shamt != 0) {
3872     slli(tmp, Rs1, shamt);
3873     add(Rd, Rs2, tmp);
3874   } else {
3875     add(Rd, Rs1, Rs2);
3876   }
3877 }
3878 
3879 void MacroAssembler::zero_extend(Register dst, Register src, int bits) {
3880   if (UseZba && bits == 32) {
3881     zext_w(dst, src);
3882     return;
3883   }
3884 
3885   if (UseZbb && bits == 16) {
3886     zext_h(dst, src);
3887     return;
3888   }
3889 
3890   if (bits == 8) {
3891     zext_b(dst, src);
3892   } else {
3893     slli(dst, src, XLEN - bits);
3894     srli(dst, dst, XLEN - bits);
3895   }
3896 }
3897 
3898 void MacroAssembler::sign_extend(Register dst, Register src, int bits) {
3899   if (UseZbb) {
3900     if (bits == 8) {
3901       sext_b(dst, src);
3902       return;
3903     } else if (bits == 16) {
3904       sext_h(dst, src);
3905       return;
3906     }
3907   }
3908 
3909   if (bits == 32) {
3910     sext_w(dst, src);
3911   } else {
3912     slli(dst, src, XLEN - bits);
3913     srai(dst, dst, XLEN - bits);
3914   }
3915 }
3916 
3917 void MacroAssembler::cmp_l2i(Register dst, Register src1, Register src2, Register tmp)
3918 {
3919   if (src1 == src2) {
3920     mv(dst, zr);
3921     return;
3922   }
3923   Label done;
3924   Register left = src1;
3925   Register right = src2;
3926   if (dst == src1) {
3927     assert_different_registers(dst, src2, tmp);
3928     mv(tmp, src1);
3929     left = tmp;
3930   } else if (dst == src2) {
3931     assert_different_registers(dst, src1, tmp);
3932     mv(tmp, src2);
3933     right = tmp;
3934   }
3935 
3936   // installs 1 if gt else 0
3937   slt(dst, right, left);
3938   bnez(dst, done);
3939   slt(dst, left, right);
3940   // dst = -1 if lt; else if eq , dst = 0
3941   neg(dst, dst);
3942   bind(done);
3943 }
3944 
3945 // The java_calling_convention describes stack locations as ideal slots on
3946 // a frame with no abi restrictions. Since we must observe abi restrictions
3947 // (like the placement of the register window) the slots must be biased by
3948 // the following value.
3949 static int reg2offset_in(VMReg r) {
3950   // Account for saved fp and ra
3951   // This should really be in_preserve_stack_slots
3952   return r->reg2stack() * VMRegImpl::stack_slot_size;
3953 }
3954 
3955 static int reg2offset_out(VMReg r) {
3956   return (r->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size;
3957 }
3958 
3959 // On 64 bit we will store integer like items to the stack as
3960 // 64 bits items (riscv64 abi) even though java would only store
3961 // 32bits for a parameter. On 32bit it will simply be 32 bits
3962 // So this routine will do 32->32 on 32bit and 32->64 on 64bit
3963 void MacroAssembler::move32_64(VMRegPair src, VMRegPair dst, Register tmp) {
3964   if (src.first()->is_stack()) {
3965     if (dst.first()->is_stack()) {
3966       // stack to stack
3967       ld(tmp, Address(fp, reg2offset_in(src.first())));
3968       sd(tmp, Address(sp, reg2offset_out(dst.first())));
3969     } else {
3970       // stack to reg
3971       lw(dst.first()->as_Register(), Address(fp, reg2offset_in(src.first())));
3972     }
3973   } else if (dst.first()->is_stack()) {
3974     // reg to stack
3975     sd(src.first()->as_Register(), Address(sp, reg2offset_out(dst.first())));
3976   } else {
3977     if (dst.first() != src.first()) {
3978       // 32bits extend sign
3979       addw(dst.first()->as_Register(), src.first()->as_Register(), zr);
3980     }
3981   }
3982 }
3983 
3984 // An oop arg. Must pass a handle not the oop itself
3985 void MacroAssembler::object_move(OopMap* map,
3986                                  int oop_handle_offset,
3987                                  int framesize_in_slots,
3988                                  VMRegPair src,
3989                                  VMRegPair dst,
3990                                  bool is_receiver,
3991                                  int* receiver_offset) {
3992   assert_cond(map != NULL && receiver_offset != NULL);
3993   // must pass a handle. First figure out the location we use as a handle
3994   Register rHandle = dst.first()->is_stack() ? t1 : dst.first()->as_Register();
3995 
3996   // See if oop is NULL if it is we need no handle
3997 
3998   if (src.first()->is_stack()) {
3999     // Oop is already on the stack as an argument
4000     int offset_in_older_frame = src.first()->reg2stack() + SharedRuntime::out_preserve_stack_slots();
4001     map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + framesize_in_slots));
4002     if (is_receiver) {
4003       *receiver_offset = (offset_in_older_frame + framesize_in_slots) * VMRegImpl::stack_slot_size;
4004     }
4005 
4006     ld(t0, Address(fp, reg2offset_in(src.first())));
4007     la(rHandle, Address(fp, reg2offset_in(src.first())));
4008     // conditionally move a NULL
4009     Label notZero1;
4010     bnez(t0, notZero1);
4011     mv(rHandle, zr);
4012     bind(notZero1);
4013   } else {
4014 
4015     // Oop is in a register we must store it to the space we reserve
4016     // on the stack for oop_handles and pass a handle if oop is non-NULL
4017 
4018     const Register rOop = src.first()->as_Register();
4019     int oop_slot = -1;
4020     if (rOop == j_rarg0) {
4021       oop_slot = 0;
4022     } else if (rOop == j_rarg1) {
4023       oop_slot = 1;
4024     } else if (rOop == j_rarg2) {
4025       oop_slot = 2;
4026     } else if (rOop == j_rarg3) {
4027       oop_slot = 3;
4028     } else if (rOop == j_rarg4) {
4029       oop_slot = 4;
4030     } else if (rOop == j_rarg5) {
4031       oop_slot = 5;
4032     } else if (rOop == j_rarg6) {
4033       oop_slot = 6;
4034     } else {
4035       assert(rOop == j_rarg7, "wrong register");
4036       oop_slot = 7;
4037     }
4038 
4039     oop_slot = oop_slot * VMRegImpl::slots_per_word + oop_handle_offset;
4040     int offset = oop_slot * VMRegImpl::stack_slot_size;
4041 
4042     map->set_oop(VMRegImpl::stack2reg(oop_slot));
4043     // Store oop in handle area, may be NULL
4044     sd(rOop, Address(sp, offset));
4045     if (is_receiver) {
4046       *receiver_offset = offset;
4047     }
4048 
4049     //rOop maybe the same as rHandle
4050     if (rOop == rHandle) {
4051       Label isZero;
4052       beqz(rOop, isZero);
4053       la(rHandle, Address(sp, offset));
4054       bind(isZero);
4055     } else {
4056       Label notZero2;
4057       la(rHandle, Address(sp, offset));
4058       bnez(rOop, notZero2);
4059       mv(rHandle, zr);
4060       bind(notZero2);
4061     }
4062   }
4063 
4064   // If arg is on the stack then place it otherwise it is already in correct reg.
4065   if (dst.first()->is_stack()) {
4066     sd(rHandle, Address(sp, reg2offset_out(dst.first())));
4067   }
4068 }
4069 
4070 // A float arg may have to do float reg int reg conversion
4071 void MacroAssembler::float_move(VMRegPair src, VMRegPair dst, Register tmp) {
4072   assert(src.first()->is_stack() && dst.first()->is_stack() ||
4073          src.first()->is_reg() && dst.first()->is_reg() ||
4074          src.first()->is_stack() && dst.first()->is_reg(), "Unexpected error");
4075   if (src.first()->is_stack()) {
4076     if (dst.first()->is_stack()) {
4077       lwu(tmp, Address(fp, reg2offset_in(src.first())));
4078       sw(tmp, Address(sp, reg2offset_out(dst.first())));
4079     } else if (dst.first()->is_Register()) {
4080       lwu(dst.first()->as_Register(), Address(fp, reg2offset_in(src.first())));
4081     } else {
4082       ShouldNotReachHere();
4083     }
4084   } else if (src.first() != dst.first()) {
4085     if (src.is_single_phys_reg() && dst.is_single_phys_reg()) {
4086       fmv_s(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister());
4087     } else {
4088       ShouldNotReachHere();
4089     }
4090   }
4091 }
4092 
4093 // A long move
4094 void MacroAssembler::long_move(VMRegPair src, VMRegPair dst, Register tmp) {
4095   if (src.first()->is_stack()) {
4096     if (dst.first()->is_stack()) {
4097       // stack to stack
4098       ld(tmp, Address(fp, reg2offset_in(src.first())));
4099       sd(tmp, Address(sp, reg2offset_out(dst.first())));
4100     } else {
4101       // stack to reg
4102       ld(dst.first()->as_Register(), Address(fp, reg2offset_in(src.first())));
4103     }
4104   } else if (dst.first()->is_stack()) {
4105     // reg to stack
4106     sd(src.first()->as_Register(), Address(sp, reg2offset_out(dst.first())));
4107   } else {
4108     if (dst.first() != src.first()) {
4109       mv(dst.first()->as_Register(), src.first()->as_Register());
4110     }
4111   }
4112 }
4113 
4114 // A double move
4115 void MacroAssembler::double_move(VMRegPair src, VMRegPair dst, Register tmp) {
4116   assert(src.first()->is_stack() && dst.first()->is_stack() ||
4117          src.first()->is_reg() && dst.first()->is_reg() ||
4118          src.first()->is_stack() && dst.first()->is_reg(), "Unexpected error");
4119   if (src.first()->is_stack()) {
4120     if (dst.first()->is_stack()) {
4121       ld(tmp, Address(fp, reg2offset_in(src.first())));
4122       sd(tmp, Address(sp, reg2offset_out(dst.first())));
4123     } else if (dst.first()-> is_Register()) {
4124       ld(dst.first()->as_Register(), Address(fp, reg2offset_in(src.first())));
4125     } else {
4126       ShouldNotReachHere();
4127     }
4128   } else if (src.first() != dst.first()) {
4129     if (src.is_single_phys_reg() && dst.is_single_phys_reg()) {
4130       fmv_d(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister());
4131     } else {
4132       ShouldNotReachHere();
4133     }
4134   }
4135 }
4136 
4137 void MacroAssembler::rt_call(address dest, Register tmp) {
4138   CodeBlob *cb = CodeCache::find_blob(dest);
4139   if (cb) {
4140     far_call(RuntimeAddress(dest));
4141   } else {
4142     int32_t offset = 0;
4143     la_patchable(tmp, RuntimeAddress(dest), offset);
4144     jalr(x1, tmp, offset);
4145   }
4146 }
4147 
4148 // Attempt to fast-lock an object. Fall-through on success, branch to slow label
4149 // on failure.
4150 // Registers:
4151 //  - obj: the object to be locked
4152 //  - hdr: the header, already loaded from obj, will be destroyed
4153 //  - tmp1, tmp2, tmp3: temporary registers, will be destroyed
4154 void MacroAssembler::fast_lock(Register obj, Register hdr, Register tmp1, Register tmp2, Register tmp3, Label& slow) {
4155   // Check if we would have space on lock-stack for the object.
4156   ld(tmp1, Address(xthread, Thread::lock_stack_current_offset()));
4157   ld(tmp2, Address(xthread, Thread::lock_stack_limit_offset()));
4158   bge(tmp1, tmp2, slow, true);
4159 
4160   // Load (object->mark() | 1) into hdr
4161   ori(hdr, hdr, markWord::unlocked_value);
4162   // Clear lock-bits, into tmp2
4163   xori(tmp2, hdr, markWord::unlocked_value);
4164   // Try to swing header from unlocked to locked
4165   Label success;
4166   cmpxchgptr(hdr, tmp2, obj, tmp3, success, &slow);
4167   bind(success);
4168 
4169   // After successful lock, push object on lock-stack
4170   sd(obj, Address(tmp1, 0));
4171   add(tmp1, tmp1, oopSize);
4172   sd(tmp1, Address(xthread, Thread::lock_stack_current_offset()));
4173 }
4174 
4175 void MacroAssembler::fast_unlock(Register obj, Register hdr, Register tmp1, Register tmp2, Label& slow) {
4176   // Load the expected old header (lock-bits cleared to indicate 'locked') into hdr
4177   mv(tmp1, ~markWord::lock_mask_in_place);
4178   andr(hdr, hdr, tmp1);
4179 
4180   // Load the new header (unlocked) into tmp1
4181   ori(tmp1, hdr, markWord::unlocked_value);
4182 
4183   // Try to swing header from locked to unlocked
4184   Label success;
4185   cmpxchgptr(hdr, tmp1, obj, tmp2, success, &slow);
4186   bind(success);
4187 
4188   // After successful unlock, pop object from lock-stack
4189   ld(tmp1, Address(xthread, Thread::lock_stack_current_offset()));
4190   sub(tmp1, tmp1, oopSize);
4191   sd(tmp1, Address(xthread, Thread::lock_stack_current_offset()));
4192 }