1 /*
   2  * Copyright (c) 1997, 2022, Oracle and/or its affiliates. All rights reserved.
   3  * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
   4  * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
   5  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   6  *
   7  * This code is free software; you can redistribute it and/or modify it
   8  * under the terms of the GNU General Public License version 2 only, as
   9  * published by the Free Software Foundation.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  *
  25  */
  26 
  27 #include "precompiled.hpp"
  28 #include "asm/assembler.hpp"
  29 #include "asm/assembler.inline.hpp"
  30 #include "compiler/disassembler.hpp"
  31 #include "gc/shared/barrierSet.hpp"
  32 #include "gc/shared/barrierSetAssembler.hpp"
  33 #include "gc/shared/cardTable.hpp"
  34 #include "gc/shared/cardTableBarrierSet.hpp"
  35 #include "interpreter/bytecodeHistogram.hpp"
  36 #include "interpreter/interpreter.hpp"
  37 #include "memory/resourceArea.hpp"
  38 #include "memory/universe.hpp"
  39 #include "nativeInst_riscv.hpp"
  40 #include "oops/accessDecorators.hpp"
  41 #include "oops/compressedOops.inline.hpp"
  42 #include "oops/klass.inline.hpp"
  43 #include "oops/oop.hpp"
  44 #include "runtime/interfaceSupport.inline.hpp"
  45 #include "runtime/javaThread.hpp"
  46 #include "runtime/jniHandles.inline.hpp"
  47 #include "runtime/sharedRuntime.hpp"
  48 #include "runtime/stubRoutines.hpp"
  49 #include "utilities/powerOfTwo.hpp"
  50 #ifdef COMPILER2
  51 #include "opto/compile.hpp"
  52 #include "opto/node.hpp"
  53 #include "opto/output.hpp"
  54 #endif
  55 
  56 #ifdef PRODUCT
  57 #define BLOCK_COMMENT(str) /* nothing */
  58 #else
  59 #define BLOCK_COMMENT(str) block_comment(str)
  60 #endif
  61 #define BIND(label) bind(label); __ BLOCK_COMMENT(#label ":")
  62 
  63 static void pass_arg0(MacroAssembler* masm, Register arg) {
  64   if (c_rarg0 != arg) {
  65     masm->mv(c_rarg0, arg);
  66   }
  67 }
  68 
  69 static void pass_arg1(MacroAssembler* masm, Register arg) {
  70   if (c_rarg1 != arg) {
  71     masm->mv(c_rarg1, arg);
  72   }
  73 }
  74 
  75 static void pass_arg2(MacroAssembler* masm, Register arg) {
  76   if (c_rarg2 != arg) {
  77     masm->mv(c_rarg2, arg);
  78   }
  79 }
  80 
  81 static void pass_arg3(MacroAssembler* masm, Register arg) {
  82   if (c_rarg3 != arg) {
  83     masm->mv(c_rarg3, arg);
  84   }
  85 }
  86 
  87 int MacroAssembler::align(int modulus, int extra_offset) {
  88   CompressibleRegion cr(this);
  89   intptr_t before = offset();
  90   while ((offset() + extra_offset) % modulus != 0) { nop(); }
  91   return (int)(offset() - before);
  92 }
  93 
  94 void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) {
  95   call_VM_base(oop_result, noreg, noreg, entry_point, number_of_arguments, check_exceptions);
  96 }
  97 
  98 // Implementation of call_VM versions
  99 
 100 void MacroAssembler::call_VM(Register oop_result,
 101                              address entry_point,
 102                              bool check_exceptions) {
 103   call_VM_helper(oop_result, entry_point, 0, check_exceptions);
 104 }
 105 
 106 void MacroAssembler::call_VM(Register oop_result,
 107                              address entry_point,
 108                              Register arg_1,
 109                              bool check_exceptions) {
 110   pass_arg1(this, arg_1);
 111   call_VM_helper(oop_result, entry_point, 1, check_exceptions);
 112 }
 113 
 114 void MacroAssembler::call_VM(Register oop_result,
 115                              address entry_point,
 116                              Register arg_1,
 117                              Register arg_2,
 118                              bool check_exceptions) {
 119   assert(arg_1 != c_rarg2, "smashed arg");
 120   pass_arg2(this, arg_2);
 121   pass_arg1(this, arg_1);
 122   call_VM_helper(oop_result, entry_point, 2, check_exceptions);
 123 }
 124 
 125 void MacroAssembler::call_VM(Register oop_result,
 126                              address entry_point,
 127                              Register arg_1,
 128                              Register arg_2,
 129                              Register arg_3,
 130                              bool check_exceptions) {
 131   assert(arg_1 != c_rarg3, "smashed arg");
 132   assert(arg_2 != c_rarg3, "smashed arg");
 133   pass_arg3(this, arg_3);
 134 
 135   assert(arg_1 != c_rarg2, "smashed arg");
 136   pass_arg2(this, arg_2);
 137 
 138   pass_arg1(this, arg_1);
 139   call_VM_helper(oop_result, entry_point, 3, check_exceptions);
 140 }
 141 
 142 void MacroAssembler::call_VM(Register oop_result,
 143                              Register last_java_sp,
 144                              address entry_point,
 145                              int number_of_arguments,
 146                              bool check_exceptions) {
 147   call_VM_base(oop_result, xthread, last_java_sp, entry_point, number_of_arguments, check_exceptions);
 148 }
 149 
 150 void MacroAssembler::call_VM(Register oop_result,
 151                              Register last_java_sp,
 152                              address entry_point,
 153                              Register arg_1,
 154                              bool check_exceptions) {
 155   pass_arg1(this, arg_1);
 156   call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions);
 157 }
 158 
 159 void MacroAssembler::call_VM(Register oop_result,
 160                              Register last_java_sp,
 161                              address entry_point,
 162                              Register arg_1,
 163                              Register arg_2,
 164                              bool check_exceptions) {
 165 
 166   assert(arg_1 != c_rarg2, "smashed arg");
 167   pass_arg2(this, arg_2);
 168   pass_arg1(this, arg_1);
 169   call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions);
 170 }
 171 
 172 void MacroAssembler::call_VM(Register oop_result,
 173                              Register last_java_sp,
 174                              address entry_point,
 175                              Register arg_1,
 176                              Register arg_2,
 177                              Register arg_3,
 178                              bool check_exceptions) {
 179   assert(arg_1 != c_rarg3, "smashed arg");
 180   assert(arg_2 != c_rarg3, "smashed arg");
 181   pass_arg3(this, arg_3);
 182   assert(arg_1 != c_rarg2, "smashed arg");
 183   pass_arg2(this, arg_2);
 184   pass_arg1(this, arg_1);
 185   call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions);
 186 }
 187 
 188 // these are no-ops overridden by InterpreterMacroAssembler
 189 void MacroAssembler::check_and_handle_earlyret(Register java_thread) {}
 190 void MacroAssembler::check_and_handle_popframe(Register java_thread) {}
 191 
 192 // Calls to C land
 193 //
 194 // When entering C land, the fp, & esp of the last Java frame have to be recorded
 195 // in the (thread-local) JavaThread object. When leaving C land, the last Java fp
 196 // has to be reset to 0. This is required to allow proper stack traversal.
 197 void MacroAssembler::set_last_Java_frame(Register last_java_sp,
 198                                          Register last_java_fp,
 199                                          Register last_java_pc,
 200                                          Register tmp) {
 201 
 202   if (last_java_pc->is_valid()) {
 203       sd(last_java_pc, Address(xthread,
 204                                JavaThread::frame_anchor_offset() +
 205                                JavaFrameAnchor::last_Java_pc_offset()));
 206   }
 207 
 208   // determine last_java_sp register
 209   if (last_java_sp == sp) {
 210     mv(tmp, sp);
 211     last_java_sp = tmp;
 212   } else if (!last_java_sp->is_valid()) {
 213     last_java_sp = esp;
 214   }
 215 
 216   sd(last_java_sp, Address(xthread, JavaThread::last_Java_sp_offset()));
 217 
 218   // last_java_fp is optional
 219   if (last_java_fp->is_valid()) {
 220     sd(last_java_fp, Address(xthread, JavaThread::last_Java_fp_offset()));
 221   }
 222 }
 223 
 224 void MacroAssembler::set_last_Java_frame(Register last_java_sp,
 225                                          Register last_java_fp,
 226                                          address  last_java_pc,
 227                                          Register tmp) {
 228   assert(last_java_pc != NULL, "must provide a valid PC");
 229 
 230   la(tmp, last_java_pc);
 231   sd(tmp, Address(xthread, JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()));
 232 
 233   set_last_Java_frame(last_java_sp, last_java_fp, noreg, tmp);
 234 }
 235 
 236 void MacroAssembler::set_last_Java_frame(Register last_java_sp,
 237                                          Register last_java_fp,
 238                                          Label &L,
 239                                          Register tmp) {
 240   if (L.is_bound()) {
 241     set_last_Java_frame(last_java_sp, last_java_fp, target(L), tmp);
 242   } else {
 243     L.add_patch_at(code(), locator());
 244     set_last_Java_frame(last_java_sp, last_java_fp, pc() /* Patched later */, tmp);
 245   }
 246 }
 247 
 248 void MacroAssembler::reset_last_Java_frame(bool clear_fp) {
 249   // we must set sp to zero to clear frame
 250   sd(zr, Address(xthread, JavaThread::last_Java_sp_offset()));
 251 
 252   // must clear fp, so that compiled frames are not confused; it is
 253   // possible that we need it only for debugging
 254   if (clear_fp) {
 255     sd(zr, Address(xthread, JavaThread::last_Java_fp_offset()));
 256   }
 257 
 258   // Always clear the pc because it could have been set by make_walkable()
 259   sd(zr, Address(xthread, JavaThread::last_Java_pc_offset()));
 260 }
 261 
 262 void MacroAssembler::call_VM_base(Register oop_result,
 263                                   Register java_thread,
 264                                   Register last_java_sp,
 265                                   address  entry_point,
 266                                   int      number_of_arguments,
 267                                   bool     check_exceptions) {
 268    // determine java_thread register
 269   if (!java_thread->is_valid()) {
 270     java_thread = xthread;
 271   }
 272   // determine last_java_sp register
 273   if (!last_java_sp->is_valid()) {
 274     last_java_sp = esp;
 275   }
 276 
 277   // debugging support
 278   assert(number_of_arguments >= 0   , "cannot have negative number of arguments");
 279   assert(java_thread == xthread, "unexpected register");
 280 
 281   assert(java_thread != oop_result  , "cannot use the same register for java_thread & oop_result");
 282   assert(java_thread != last_java_sp, "cannot use the same register for java_thread & last_java_sp");
 283 
 284   // push java thread (becomes first argument of C function)
 285   mv(c_rarg0, java_thread);
 286 
 287   // set last Java frame before call
 288   assert(last_java_sp != fp, "can't use fp");
 289 
 290   Label l;
 291   set_last_Java_frame(last_java_sp, fp, l, t0);
 292 
 293   // do the call, remove parameters
 294   MacroAssembler::call_VM_leaf_base(entry_point, number_of_arguments, &l);
 295 
 296   // reset last Java frame
 297   // Only interpreter should have to clear fp
 298   reset_last_Java_frame(true);
 299 
 300    // C++ interp handles this in the interpreter
 301   check_and_handle_popframe(java_thread);
 302   check_and_handle_earlyret(java_thread);
 303 
 304   if (check_exceptions) {
 305     // check for pending exceptions (java_thread is set upon return)
 306     ld(t0, Address(java_thread, in_bytes(Thread::pending_exception_offset())));
 307     Label ok;
 308     beqz(t0, ok);
 309     int32_t offset = 0;
 310     la_patchable(t0, RuntimeAddress(StubRoutines::forward_exception_entry()), offset);
 311     jalr(x0, t0, offset);
 312     bind(ok);
 313   }
 314 
 315   // get oop result if there is one and reset the value in the thread
 316   if (oop_result->is_valid()) {
 317     get_vm_result(oop_result, java_thread);
 318   }
 319 }
 320 
 321 void MacroAssembler::get_vm_result(Register oop_result, Register java_thread) {
 322   ld(oop_result, Address(java_thread, JavaThread::vm_result_offset()));
 323   sd(zr, Address(java_thread, JavaThread::vm_result_offset()));
 324   verify_oop_msg(oop_result, "broken oop in call_VM_base");
 325 }
 326 
 327 void MacroAssembler::get_vm_result_2(Register metadata_result, Register java_thread) {
 328   ld(metadata_result, Address(java_thread, JavaThread::vm_result_2_offset()));
 329   sd(zr, Address(java_thread, JavaThread::vm_result_2_offset()));
 330 }
 331 
 332 void MacroAssembler::clinit_barrier(Register klass, Register tmp, Label* L_fast_path, Label* L_slow_path) {
 333   assert(L_fast_path != NULL || L_slow_path != NULL, "at least one is required");
 334   assert_different_registers(klass, xthread, tmp);
 335 
 336   Label L_fallthrough, L_tmp;
 337   if (L_fast_path == NULL) {
 338     L_fast_path = &L_fallthrough;
 339   } else if (L_slow_path == NULL) {
 340     L_slow_path = &L_fallthrough;
 341   }
 342 
 343   // Fast path check: class is fully initialized
 344   lbu(tmp, Address(klass, InstanceKlass::init_state_offset()));
 345   sub(tmp, tmp, InstanceKlass::fully_initialized);
 346   beqz(tmp, *L_fast_path);
 347 
 348   // Fast path check: current thread is initializer thread
 349   ld(tmp, Address(klass, InstanceKlass::init_thread_offset()));
 350 
 351   if (L_slow_path == &L_fallthrough) {
 352     beq(xthread, tmp, *L_fast_path);
 353     bind(*L_slow_path);
 354   } else if (L_fast_path == &L_fallthrough) {
 355     bne(xthread, tmp, *L_slow_path);
 356     bind(*L_fast_path);
 357   } else {
 358     Unimplemented();
 359   }
 360 }
 361 
 362 void MacroAssembler::_verify_oop(Register reg, const char* s, const char* file, int line) {
 363   if (!VerifyOops) { return; }
 364 
 365   // Pass register number to verify_oop_subroutine
 366   const char* b = NULL;
 367   {
 368     ResourceMark rm;
 369     stringStream ss;
 370     ss.print("verify_oop: %s: %s (%s:%d)", reg->name(), s, file, line);
 371     b = code_string(ss.as_string());
 372   }
 373   BLOCK_COMMENT("verify_oop {");
 374 
 375   push_reg(RegSet::of(ra, t0, t1, c_rarg0), sp);
 376 
 377   mv(c_rarg0, reg); // c_rarg0 : x10
 378   // The length of the instruction sequence emitted should be independent
 379   // of the value of the local char buffer address so that the size of mach
 380   // nodes for scratch emit and normal emit matches.
 381   movptr(t0, (address)b);
 382 
 383   // call indirectly to solve generation ordering problem
 384   int32_t offset = 0;
 385   la_patchable(t1, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address()), offset);
 386   ld(t1, Address(t1, offset));
 387   jalr(t1);
 388 
 389   pop_reg(RegSet::of(ra, t0, t1, c_rarg0), sp);
 390 
 391   BLOCK_COMMENT("} verify_oop");
 392 }
 393 
 394 void MacroAssembler::_verify_oop_addr(Address addr, const char* s, const char* file, int line) {
 395   if (!VerifyOops) {
 396     return;
 397   }
 398 
 399   const char* b = NULL;
 400   {
 401     ResourceMark rm;
 402     stringStream ss;
 403     ss.print("verify_oop_addr: %s (%s:%d)", s, file, line);
 404     b = code_string(ss.as_string());
 405   }
 406   BLOCK_COMMENT("verify_oop_addr {");
 407 
 408   push_reg(RegSet::of(ra, t0, t1, c_rarg0), sp);
 409 
 410   if (addr.uses(sp)) {
 411     la(x10, addr);
 412     ld(x10, Address(x10, 4 * wordSize));
 413   } else {
 414     ld(x10, addr);
 415   }
 416 
 417   // The length of the instruction sequence emitted should be independent
 418   // of the value of the local char buffer address so that the size of mach
 419   // nodes for scratch emit and normal emit matches.
 420   movptr(t0, (address)b);
 421 
 422   // call indirectly to solve generation ordering problem
 423   int32_t offset = 0;
 424   la_patchable(t1, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address()), offset);
 425   ld(t1, Address(t1, offset));
 426   jalr(t1);
 427 
 428   pop_reg(RegSet::of(ra, t0, t1, c_rarg0), sp);
 429 
 430   BLOCK_COMMENT("} verify_oop_addr");
 431 }
 432 
 433 Address MacroAssembler::argument_address(RegisterOrConstant arg_slot,
 434                                          int extra_slot_offset) {
 435   // cf. TemplateTable::prepare_invoke(), if (load_receiver).
 436   int stackElementSize = Interpreter::stackElementSize;
 437   int offset = Interpreter::expr_offset_in_bytes(extra_slot_offset+0);
 438 #ifdef ASSERT
 439   int offset1 = Interpreter::expr_offset_in_bytes(extra_slot_offset+1);
 440   assert(offset1 - offset == stackElementSize, "correct arithmetic");
 441 #endif
 442   if (arg_slot.is_constant()) {
 443     return Address(esp, arg_slot.as_constant() * stackElementSize + offset);
 444   } else {
 445     assert_different_registers(t0, arg_slot.as_register());
 446     shadd(t0, arg_slot.as_register(), esp, t0, exact_log2(stackElementSize));
 447     return Address(t0, offset);
 448   }
 449 }
 450 
 451 #ifndef PRODUCT
 452 extern "C" void findpc(intptr_t x);
 453 #endif
 454 
 455 void MacroAssembler::debug64(char* msg, int64_t pc, int64_t regs[])
 456 {
 457   // In order to get locks to work, we need to fake a in_VM state
 458   if (ShowMessageBoxOnError) {
 459     JavaThread* thread = JavaThread::current();
 460     JavaThreadState saved_state = thread->thread_state();
 461     thread->set_thread_state(_thread_in_vm);
 462 #ifndef PRODUCT
 463     if (CountBytecodes || TraceBytecodes || StopInterpreterAt) {
 464       ttyLocker ttyl;
 465       BytecodeCounter::print();
 466     }
 467 #endif
 468     if (os::message_box(msg, "Execution stopped, print registers?")) {
 469       ttyLocker ttyl;
 470       tty->print_cr(" pc = 0x%016lx", pc);
 471 #ifndef PRODUCT
 472       tty->cr();
 473       findpc(pc);
 474       tty->cr();
 475 #endif
 476       tty->print_cr(" x0 = 0x%016lx", regs[0]);
 477       tty->print_cr(" x1 = 0x%016lx", regs[1]);
 478       tty->print_cr(" x2 = 0x%016lx", regs[2]);
 479       tty->print_cr(" x3 = 0x%016lx", regs[3]);
 480       tty->print_cr(" x4 = 0x%016lx", regs[4]);
 481       tty->print_cr(" x5 = 0x%016lx", regs[5]);
 482       tty->print_cr(" x6 = 0x%016lx", regs[6]);
 483       tty->print_cr(" x7 = 0x%016lx", regs[7]);
 484       tty->print_cr(" x8 = 0x%016lx", regs[8]);
 485       tty->print_cr(" x9 = 0x%016lx", regs[9]);
 486       tty->print_cr("x10 = 0x%016lx", regs[10]);
 487       tty->print_cr("x11 = 0x%016lx", regs[11]);
 488       tty->print_cr("x12 = 0x%016lx", regs[12]);
 489       tty->print_cr("x13 = 0x%016lx", regs[13]);
 490       tty->print_cr("x14 = 0x%016lx", regs[14]);
 491       tty->print_cr("x15 = 0x%016lx", regs[15]);
 492       tty->print_cr("x16 = 0x%016lx", regs[16]);
 493       tty->print_cr("x17 = 0x%016lx", regs[17]);
 494       tty->print_cr("x18 = 0x%016lx", regs[18]);
 495       tty->print_cr("x19 = 0x%016lx", regs[19]);
 496       tty->print_cr("x20 = 0x%016lx", regs[20]);
 497       tty->print_cr("x21 = 0x%016lx", regs[21]);
 498       tty->print_cr("x22 = 0x%016lx", regs[22]);
 499       tty->print_cr("x23 = 0x%016lx", regs[23]);
 500       tty->print_cr("x24 = 0x%016lx", regs[24]);
 501       tty->print_cr("x25 = 0x%016lx", regs[25]);
 502       tty->print_cr("x26 = 0x%016lx", regs[26]);
 503       tty->print_cr("x27 = 0x%016lx", regs[27]);
 504       tty->print_cr("x28 = 0x%016lx", regs[28]);
 505       tty->print_cr("x30 = 0x%016lx", regs[30]);
 506       tty->print_cr("x31 = 0x%016lx", regs[31]);
 507       BREAKPOINT;
 508     }
 509   }
 510   fatal("DEBUG MESSAGE: %s", msg);
 511 }
 512 
 513 void MacroAssembler::resolve_jobject(Register value, Register tmp1, Register tmp2) {
 514   Label done, not_weak;
 515   beqz(value, done);           // Use NULL as-is.
 516 
 517   // Test for jweak tag.
 518   andi(t0, value, JNIHandles::weak_tag_mask);
 519   beqz(t0, not_weak);
 520 
 521   // Resolve jweak.
 522   access_load_at(T_OBJECT, IN_NATIVE | ON_PHANTOM_OOP_REF, value,
 523                  Address(value, -JNIHandles::weak_tag_value), tmp1, tmp2);
 524   verify_oop(value);
 525   j(done);
 526 
 527   bind(not_weak);
 528   // Resolve (untagged) jobject.
 529   access_load_at(T_OBJECT, IN_NATIVE, value, Address(value, 0), tmp1, tmp2);
 530   verify_oop(value);
 531   bind(done);
 532 }
 533 
 534 void MacroAssembler::stop(const char* msg) {
 535   BLOCK_COMMENT(msg);
 536   illegal_instruction(Assembler::csr::time);
 537   emit_int64((uintptr_t)msg);
 538 }
 539 
 540 void MacroAssembler::unimplemented(const char* what) {
 541   const char* buf = NULL;
 542   {
 543     ResourceMark rm;
 544     stringStream ss;
 545     ss.print("unimplemented: %s", what);
 546     buf = code_string(ss.as_string());
 547   }
 548   stop(buf);
 549 }
 550 
 551 void MacroAssembler::emit_static_call_stub() {
 552   // CompiledDirectStaticCall::set_to_interpreted knows the
 553   // exact layout of this stub.
 554 
 555   mov_metadata(xmethod, (Metadata*)NULL);
 556 
 557   // Jump to the entry point of the i2c stub.
 558   int32_t offset = 0;
 559   movptr(t0, 0, offset);
 560   jalr(x0, t0, offset);
 561 }
 562 
 563 void MacroAssembler::call_VM_leaf_base(address entry_point,
 564                                        int number_of_arguments,
 565                                        Label *retaddr) {
 566   int32_t offset = 0;
 567   push_reg(RegSet::of(t0, xmethod), sp);   // push << t0 & xmethod >> to sp
 568   movptr(t0, entry_point, offset);
 569   jalr(x1, t0, offset);
 570   if (retaddr != NULL) {
 571     bind(*retaddr);
 572   }
 573   pop_reg(RegSet::of(t0, xmethod), sp);   // pop << t0 & xmethod >> from sp
 574 }
 575 
 576 void MacroAssembler::call_VM_leaf(address entry_point, int number_of_arguments) {
 577   call_VM_leaf_base(entry_point, number_of_arguments);
 578 }
 579 
 580 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0) {
 581   pass_arg0(this, arg_0);
 582   call_VM_leaf_base(entry_point, 1);
 583 }
 584 
 585 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1) {
 586   pass_arg0(this, arg_0);
 587   pass_arg1(this, arg_1);
 588   call_VM_leaf_base(entry_point, 2);
 589 }
 590 
 591 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0,
 592                                   Register arg_1, Register arg_2) {
 593   pass_arg0(this, arg_0);
 594   pass_arg1(this, arg_1);
 595   pass_arg2(this, arg_2);
 596   call_VM_leaf_base(entry_point, 3);
 597 }
 598 
 599 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0) {
 600   pass_arg0(this, arg_0);
 601   MacroAssembler::call_VM_leaf_base(entry_point, 1);
 602 }
 603 
 604 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1) {
 605 
 606   assert(arg_0 != c_rarg1, "smashed arg");
 607   pass_arg1(this, arg_1);
 608   pass_arg0(this, arg_0);
 609   MacroAssembler::call_VM_leaf_base(entry_point, 2);
 610 }
 611 
 612 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) {
 613   assert(arg_0 != c_rarg2, "smashed arg");
 614   assert(arg_1 != c_rarg2, "smashed arg");
 615   pass_arg2(this, arg_2);
 616   assert(arg_0 != c_rarg1, "smashed arg");
 617   pass_arg1(this, arg_1);
 618   pass_arg0(this, arg_0);
 619   MacroAssembler::call_VM_leaf_base(entry_point, 3);
 620 }
 621 
 622 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2, Register arg_3) {
 623   assert(arg_0 != c_rarg3, "smashed arg");
 624   assert(arg_1 != c_rarg3, "smashed arg");
 625   assert(arg_2 != c_rarg3, "smashed arg");
 626   pass_arg3(this, arg_3);
 627   assert(arg_0 != c_rarg2, "smashed arg");
 628   assert(arg_1 != c_rarg2, "smashed arg");
 629   pass_arg2(this, arg_2);
 630   assert(arg_0 != c_rarg1, "smashed arg");
 631   pass_arg1(this, arg_1);
 632   pass_arg0(this, arg_0);
 633   MacroAssembler::call_VM_leaf_base(entry_point, 4);
 634 }
 635 
 636 void MacroAssembler::nop() {
 637   addi(x0, x0, 0);
 638 }
 639 
 640 void MacroAssembler::mv(Register Rd, Register Rs) {
 641   if (Rd != Rs) {
 642     addi(Rd, Rs, 0);
 643   }
 644 }
 645 
 646 void MacroAssembler::notr(Register Rd, Register Rs) {
 647   xori(Rd, Rs, -1);
 648 }
 649 
 650 void MacroAssembler::neg(Register Rd, Register Rs) {
 651   sub(Rd, x0, Rs);
 652 }
 653 
 654 void MacroAssembler::negw(Register Rd, Register Rs) {
 655   subw(Rd, x0, Rs);
 656 }
 657 
 658 void MacroAssembler::sext_w(Register Rd, Register Rs) {
 659   addiw(Rd, Rs, 0);
 660 }
 661 
 662 void MacroAssembler::zext_b(Register Rd, Register Rs) {
 663   andi(Rd, Rs, 0xFF);
 664 }
 665 
 666 void MacroAssembler::seqz(Register Rd, Register Rs) {
 667   sltiu(Rd, Rs, 1);
 668 }
 669 
 670 void MacroAssembler::snez(Register Rd, Register Rs) {
 671   sltu(Rd, x0, Rs);
 672 }
 673 
 674 void MacroAssembler::sltz(Register Rd, Register Rs) {
 675   slt(Rd, Rs, x0);
 676 }
 677 
 678 void MacroAssembler::sgtz(Register Rd, Register Rs) {
 679   slt(Rd, x0, Rs);
 680 }
 681 
 682 void MacroAssembler::fmv_s(FloatRegister Rd, FloatRegister Rs) {
 683   if (Rd != Rs) {
 684     fsgnj_s(Rd, Rs, Rs);
 685   }
 686 }
 687 
 688 void MacroAssembler::fabs_s(FloatRegister Rd, FloatRegister Rs) {
 689   fsgnjx_s(Rd, Rs, Rs);
 690 }
 691 
 692 void MacroAssembler::fneg_s(FloatRegister Rd, FloatRegister Rs) {
 693   fsgnjn_s(Rd, Rs, Rs);
 694 }
 695 
 696 void MacroAssembler::fmv_d(FloatRegister Rd, FloatRegister Rs) {
 697   if (Rd != Rs) {
 698     fsgnj_d(Rd, Rs, Rs);
 699   }
 700 }
 701 
 702 void MacroAssembler::fabs_d(FloatRegister Rd, FloatRegister Rs) {
 703   fsgnjx_d(Rd, Rs, Rs);
 704 }
 705 
 706 void MacroAssembler::fneg_d(FloatRegister Rd, FloatRegister Rs) {
 707   fsgnjn_d(Rd, Rs, Rs);
 708 }
 709 
 710 void MacroAssembler::vmnot_m(VectorRegister vd, VectorRegister vs) {
 711   vmnand_mm(vd, vs, vs);
 712 }
 713 
 714 void MacroAssembler::vncvt_x_x_w(VectorRegister vd, VectorRegister vs, VectorMask vm) {
 715   vnsrl_wx(vd, vs, x0, vm);
 716 }
 717 
 718 void MacroAssembler::vfneg_v(VectorRegister vd, VectorRegister vs) {
 719   vfsgnjn_vv(vd, vs, vs);
 720 }
 721 
 722 void MacroAssembler::la(Register Rd, const address &dest) {
 723   int64_t offset = dest - pc();
 724   if (is_offset_in_range(offset, 32)) {
 725     auipc(Rd, (int32_t)offset + 0x800);  //0x800, Note:the 11th sign bit
 726     addi(Rd, Rd, ((int64_t)offset << 52) >> 52);
 727   } else {
 728     movptr(Rd, dest);
 729   }
 730 }
 731 
 732 void MacroAssembler::la(Register Rd, const Address &adr) {
 733   switch (adr.getMode()) {
 734     case Address::literal: {
 735       relocInfo::relocType rtype = adr.rspec().reloc()->type();
 736       if (rtype == relocInfo::none) {
 737         mv(Rd, (intptr_t)(adr.target()));
 738       } else {
 739         relocate(adr.rspec());
 740         movptr(Rd, adr.target());
 741       }
 742       break;
 743     }
 744     case Address::base_plus_offset: {
 745       int32_t offset = 0;
 746       baseOffset(Rd, adr, offset);
 747       addi(Rd, Rd, offset);
 748       break;
 749     }
 750     default:
 751       ShouldNotReachHere();
 752   }
 753 }
 754 
 755 void MacroAssembler::la(Register Rd, Label &label) {
 756   la(Rd, target(label));
 757 }
 758 
 759 #define INSN(NAME)                                                                \
 760   void MacroAssembler::NAME##z(Register Rs, const address &dest) {                \
 761     NAME(Rs, zr, dest);                                                           \
 762   }                                                                               \
 763   void MacroAssembler::NAME##z(Register Rs, Label &l, bool is_far) {              \
 764     NAME(Rs, zr, l, is_far);                                                      \
 765   }                                                                               \
 766 
 767   INSN(beq);
 768   INSN(bne);
 769   INSN(blt);
 770   INSN(ble);
 771   INSN(bge);
 772   INSN(bgt);
 773 
 774 #undef INSN
 775 
 776 // Float compare branch instructions
 777 
 778 #define INSN(NAME, FLOATCMP, BRANCH)                                                                                   \
 779   void MacroAssembler::float_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far, bool is_unordered) {  \
 780     FLOATCMP##_s(t0, Rs1, Rs2);                                                                                        \
 781     BRANCH(t0, l, is_far);                                                                                             \
 782   }                                                                                                                    \
 783   void MacroAssembler::double_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far, bool is_unordered) { \
 784     FLOATCMP##_d(t0, Rs1, Rs2);                                                                                        \
 785     BRANCH(t0, l, is_far);                                                                                             \
 786   }
 787 
 788   INSN(beq, feq, bnez);
 789   INSN(bne, feq, beqz);
 790 
 791 #undef INSN
 792 
 793 
 794 #define INSN(NAME, FLOATCMP1, FLOATCMP2)                                              \
 795   void MacroAssembler::float_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l,   \
 796                                     bool is_far, bool is_unordered) {                 \
 797     if (is_unordered) {                                                               \
 798       /* jump if either source is NaN or condition is expected */                     \
 799       FLOATCMP2##_s(t0, Rs2, Rs1);                                                    \
 800       beqz(t0, l, is_far);                                                            \
 801     } else {                                                                          \
 802       /* jump if no NaN in source and condition is expected */                        \
 803       FLOATCMP1##_s(t0, Rs1, Rs2);                                                    \
 804       bnez(t0, l, is_far);                                                            \
 805     }                                                                                 \
 806   }                                                                                   \
 807   void MacroAssembler::double_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l,  \
 808                                      bool is_far, bool is_unordered) {                \
 809     if (is_unordered) {                                                               \
 810       /* jump if either source is NaN or condition is expected */                     \
 811       FLOATCMP2##_d(t0, Rs2, Rs1);                                                    \
 812       beqz(t0, l, is_far);                                                            \
 813     } else {                                                                          \
 814       /* jump if no NaN in source and condition is expected */                        \
 815       FLOATCMP1##_d(t0, Rs1, Rs2);                                                    \
 816       bnez(t0, l, is_far);                                                            \
 817     }                                                                                 \
 818   }
 819 
 820   INSN(ble, fle, flt);
 821   INSN(blt, flt, fle);
 822 
 823 #undef INSN
 824 
 825 #define INSN(NAME, CMP)                                                              \
 826   void MacroAssembler::float_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l,  \
 827                                     bool is_far, bool is_unordered) {                \
 828     float_##CMP(Rs2, Rs1, l, is_far, is_unordered);                                  \
 829   }                                                                                  \
 830   void MacroAssembler::double_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, \
 831                                      bool is_far, bool is_unordered) {               \
 832     double_##CMP(Rs2, Rs1, l, is_far, is_unordered);                                 \
 833   }
 834 
 835   INSN(bgt, blt);
 836   INSN(bge, ble);
 837 
 838 #undef INSN
 839 
 840 
 841 #define INSN(NAME, CSR)                       \
 842   void MacroAssembler::NAME(Register Rd) {    \
 843     csrr(Rd, CSR);                            \
 844   }
 845 
 846   INSN(rdinstret,  CSR_INSTERT);
 847   INSN(rdcycle,    CSR_CYCLE);
 848   INSN(rdtime,     CSR_TIME);
 849   INSN(frcsr,      CSR_FCSR);
 850   INSN(frrm,       CSR_FRM);
 851   INSN(frflags,    CSR_FFLAGS);
 852 
 853 #undef INSN
 854 
 855 void MacroAssembler::csrr(Register Rd, unsigned csr) {
 856   csrrs(Rd, csr, x0);
 857 }
 858 
 859 #define INSN(NAME, OPFUN)                                      \
 860   void MacroAssembler::NAME(unsigned csr, Register Rs) {       \
 861     OPFUN(x0, csr, Rs);                                        \
 862   }
 863 
 864   INSN(csrw, csrrw);
 865   INSN(csrs, csrrs);
 866   INSN(csrc, csrrc);
 867 
 868 #undef INSN
 869 
 870 #define INSN(NAME, OPFUN)                                      \
 871   void MacroAssembler::NAME(unsigned csr, unsigned imm) {      \
 872     OPFUN(x0, csr, imm);                                       \
 873   }
 874 
 875   INSN(csrwi, csrrwi);
 876   INSN(csrsi, csrrsi);
 877   INSN(csrci, csrrci);
 878 
 879 #undef INSN
 880 
 881 #define INSN(NAME, CSR)                                      \
 882   void MacroAssembler::NAME(Register Rd, Register Rs) {      \
 883     csrrw(Rd, CSR, Rs);                                      \
 884   }
 885 
 886   INSN(fscsr,   CSR_FCSR);
 887   INSN(fsrm,    CSR_FRM);
 888   INSN(fsflags, CSR_FFLAGS);
 889 
 890 #undef INSN
 891 
 892 #define INSN(NAME)                              \
 893   void MacroAssembler::NAME(Register Rs) {      \
 894     NAME(x0, Rs);                               \
 895   }
 896 
 897   INSN(fscsr);
 898   INSN(fsrm);
 899   INSN(fsflags);
 900 
 901 #undef INSN
 902 
 903 void MacroAssembler::fsrmi(Register Rd, unsigned imm) {
 904   guarantee(imm < 5, "Rounding Mode is invalid in Rounding Mode register");
 905   csrrwi(Rd, CSR_FRM, imm);
 906 }
 907 
 908 void MacroAssembler::fsflagsi(Register Rd, unsigned imm) {
 909    csrrwi(Rd, CSR_FFLAGS, imm);
 910 }
 911 
 912 #define INSN(NAME)                             \
 913   void MacroAssembler::NAME(unsigned imm) {    \
 914     NAME(x0, imm);                             \
 915   }
 916 
 917   INSN(fsrmi);
 918   INSN(fsflagsi);
 919 
 920 #undef INSN
 921 
 922 void MacroAssembler::push_reg(Register Rs)
 923 {
 924   addi(esp, esp, 0 - wordSize);
 925   sd(Rs, Address(esp, 0));
 926 }
 927 
 928 void MacroAssembler::pop_reg(Register Rd)
 929 {
 930   ld(Rd, esp, 0);
 931   addi(esp, esp, wordSize);
 932 }
 933 
 934 int MacroAssembler::bitset_to_regs(unsigned int bitset, unsigned char* regs) {
 935   int count = 0;
 936   // Scan bitset to accumulate register pairs
 937   for (int reg = 31; reg >= 0; reg--) {
 938     if ((1U << 31) & bitset) {
 939       regs[count++] = reg;
 940     }
 941     bitset <<= 1;
 942   }
 943   return count;
 944 }
 945 
 946 // Push integer registers in the bitset supplied. Don't push sp.
 947 // Return the number of words pushed
 948 int MacroAssembler::push_reg(unsigned int bitset, Register stack) {
 949   DEBUG_ONLY(int words_pushed = 0;)
 950   CompressibleRegion cr(this);
 951 
 952   unsigned char regs[32];
 953   int count = bitset_to_regs(bitset, regs);
 954   // reserve one slot to align for odd count
 955   int offset = is_even(count) ? 0 : wordSize;
 956 
 957   if (count) {
 958     addi(stack, stack, -count * wordSize - offset);
 959   }
 960   for (int i = count - 1; i >= 0; i--) {
 961     sd(as_Register(regs[i]), Address(stack, (count - 1 - i) * wordSize + offset));
 962     DEBUG_ONLY(words_pushed++;)
 963   }
 964 
 965   assert(words_pushed == count, "oops, pushed != count");
 966 
 967   return count;
 968 }
 969 
 970 int MacroAssembler::pop_reg(unsigned int bitset, Register stack) {
 971   DEBUG_ONLY(int words_popped = 0;)
 972   CompressibleRegion cr(this);
 973 
 974   unsigned char regs[32];
 975   int count = bitset_to_regs(bitset, regs);
 976   // reserve one slot to align for odd count
 977   int offset = is_even(count) ? 0 : wordSize;
 978 
 979   for (int i = count - 1; i >= 0; i--) {
 980     ld(as_Register(regs[i]), Address(stack, (count - 1 - i) * wordSize + offset));
 981     DEBUG_ONLY(words_popped++;)
 982   }
 983 
 984   if (count) {
 985     addi(stack, stack, count * wordSize + offset);
 986   }
 987   assert(words_popped == count, "oops, popped != count");
 988 
 989   return count;
 990 }
 991 
 992 // Push floating-point registers in the bitset supplied.
 993 // Return the number of words pushed
 994 int MacroAssembler::push_fp(unsigned int bitset, Register stack) {
 995   CompressibleRegion cr(this);
 996   DEBUG_ONLY(int words_pushed = 0;)
 997   unsigned char regs[32];
 998   int count = bitset_to_regs(bitset, regs);
 999   int push_slots = count + (count & 1);
1000 
1001   if (count) {
1002     addi(stack, stack, -push_slots * wordSize);
1003   }
1004 
1005   for (int i = count - 1; i >= 0; i--) {
1006     fsd(as_FloatRegister(regs[i]), Address(stack, (push_slots - 1 - i) * wordSize));
1007     DEBUG_ONLY(words_pushed++;)
1008   }
1009 
1010   assert(words_pushed == count, "oops, pushed(%d) != count(%d)", words_pushed, count);
1011 
1012   return count;
1013 }
1014 
1015 int MacroAssembler::pop_fp(unsigned int bitset, Register stack) {
1016   CompressibleRegion cr(this);
1017   DEBUG_ONLY(int words_popped = 0;)
1018   unsigned char regs[32];
1019   int count = bitset_to_regs(bitset, regs);
1020   int pop_slots = count + (count & 1);
1021 
1022   for (int i = count - 1; i >= 0; i--) {
1023     fld(as_FloatRegister(regs[i]), Address(stack, (pop_slots - 1 - i) * wordSize));
1024     DEBUG_ONLY(words_popped++;)
1025   }
1026 
1027   if (count) {
1028     addi(stack, stack, pop_slots * wordSize);
1029   }
1030 
1031   assert(words_popped == count, "oops, popped(%d) != count(%d)", words_popped, count);
1032 
1033   return count;
1034 }
1035 
1036 #ifdef COMPILER2
1037 // Push vector registers in the bitset supplied.
1038 // Return the number of words pushed
1039 int MacroAssembler::push_v(unsigned int bitset, Register stack) {
1040   CompressibleRegion cr(this);
1041   int vector_size_in_bytes = Matcher::scalable_vector_reg_size(T_BYTE);
1042 
1043   // Scan bitset to accumulate register pairs
1044   unsigned char regs[32];
1045   int count = bitset_to_regs(bitset, regs);
1046 
1047   for (int i = 0; i < count; i++) {
1048     sub(stack, stack, vector_size_in_bytes);
1049     vs1r_v(as_VectorRegister(regs[i]), stack);
1050   }
1051 
1052   return count * vector_size_in_bytes / wordSize;
1053 }
1054 
1055 int MacroAssembler::pop_v(unsigned int bitset, Register stack) {
1056   CompressibleRegion cr(this);
1057   int vector_size_in_bytes = Matcher::scalable_vector_reg_size(T_BYTE);
1058 
1059   // Scan bitset to accumulate register pairs
1060   unsigned char regs[32];
1061   int count = bitset_to_regs(bitset, regs);
1062 
1063   for (int i = count - 1; i >= 0; i--) {
1064     vl1r_v(as_VectorRegister(regs[i]), stack);
1065     add(stack, stack, vector_size_in_bytes);
1066   }
1067 
1068   return count * vector_size_in_bytes / wordSize;
1069 }
1070 #endif // COMPILER2
1071 
1072 void MacroAssembler::push_call_clobbered_registers_except(RegSet exclude) {
1073   CompressibleRegion cr(this);
1074   // Push integer registers x7, x10-x17, x28-x31.
1075   push_reg(RegSet::of(x7) + RegSet::range(x10, x17) + RegSet::range(x28, x31) - exclude, sp);
1076 
1077   // Push float registers f0-f7, f10-f17, f28-f31.
1078   addi(sp, sp, - wordSize * 20);
1079   int offset = 0;
1080   for (int i = 0; i < 32; i++) {
1081     if (i <= f7->encoding() || i >= f28->encoding() || (i >= f10->encoding() && i <= f17->encoding())) {
1082       fsd(as_FloatRegister(i), Address(sp, wordSize * (offset++)));
1083     }
1084   }
1085 }
1086 
1087 void MacroAssembler::pop_call_clobbered_registers_except(RegSet exclude) {
1088   CompressibleRegion cr(this);
1089   int offset = 0;
1090   for (int i = 0; i < 32; i++) {
1091     if (i <= f7->encoding() || i >= f28->encoding() || (i >= f10->encoding() && i <= f17->encoding())) {
1092       fld(as_FloatRegister(i), Address(sp, wordSize * (offset++)));
1093     }
1094   }
1095   addi(sp, sp, wordSize * 20);
1096 
1097   pop_reg(RegSet::of(x7) + RegSet::range(x10, x17) + RegSet::range(x28, x31) - exclude, sp);
1098 }
1099 
1100 void MacroAssembler::push_CPU_state(bool save_vectors, int vector_size_in_bytes) {
1101   CompressibleRegion cr(this);
1102   // integer registers, except zr(x0) & ra(x1) & sp(x2) & gp(x3) & tp(x4)
1103   push_reg(RegSet::range(x5, x31), sp);
1104 
1105   // float registers
1106   addi(sp, sp, - 32 * wordSize);
1107   for (int i = 0; i < 32; i++) {
1108     fsd(as_FloatRegister(i), Address(sp, i * wordSize));
1109   }
1110 
1111   // vector registers
1112   if (save_vectors) {
1113     sub(sp, sp, vector_size_in_bytes * VectorRegister::number_of_registers);
1114     vsetvli(t0, x0, Assembler::e64, Assembler::m8);
1115     for (int i = 0; i < VectorRegister::number_of_registers; i += 8) {
1116       add(t0, sp, vector_size_in_bytes * i);
1117       vse64_v(as_VectorRegister(i), t0);
1118     }
1119   }
1120 }
1121 
1122 void MacroAssembler::pop_CPU_state(bool restore_vectors, int vector_size_in_bytes) {
1123   CompressibleRegion cr(this);
1124   // vector registers
1125   if (restore_vectors) {
1126     vsetvli(t0, x0, Assembler::e64, Assembler::m8);
1127     for (int i = 0; i < VectorRegister::number_of_registers; i += 8) {
1128       vle64_v(as_VectorRegister(i), sp);
1129       add(sp, sp, vector_size_in_bytes * 8);
1130     }
1131   }
1132 
1133   // float registers
1134   for (int i = 0; i < 32; i++) {
1135     fld(as_FloatRegister(i), Address(sp, i * wordSize));
1136   }
1137   addi(sp, sp, 32 * wordSize);
1138 
1139   // integer registers, except zr(x0) & ra(x1) & sp(x2) & gp(x3) & tp(x4)
1140   pop_reg(RegSet::range(x5, x31), sp);
1141 }
1142 
1143 static int patch_offset_in_jal(address branch, int64_t offset) {
1144   assert(is_imm_in_range(offset, 20, 1), "offset is too large to be patched in one jal insrusction!\n");
1145   Assembler::patch(branch, 31, 31, (offset >> 20) & 0x1);                       // offset[20]    ==> branch[31]
1146   Assembler::patch(branch, 30, 21, (offset >> 1)  & 0x3ff);                     // offset[10:1]  ==> branch[30:21]
1147   Assembler::patch(branch, 20, 20, (offset >> 11) & 0x1);                       // offset[11]    ==> branch[20]
1148   Assembler::patch(branch, 19, 12, (offset >> 12) & 0xff);                      // offset[19:12] ==> branch[19:12]
1149   return NativeInstruction::instruction_size;                                   // only one instruction
1150 }
1151 
1152 static int patch_offset_in_conditional_branch(address branch, int64_t offset) {
1153   assert(is_imm_in_range(offset, 12, 1), "offset is too large to be patched in one beq/bge/bgeu/blt/bltu/bne insrusction!\n");
1154   Assembler::patch(branch, 31, 31, (offset >> 12) & 0x1);                       // offset[12]    ==> branch[31]
1155   Assembler::patch(branch, 30, 25, (offset >> 5)  & 0x3f);                      // offset[10:5]  ==> branch[30:25]
1156   Assembler::patch(branch, 7,  7,  (offset >> 11) & 0x1);                       // offset[11]    ==> branch[7]
1157   Assembler::patch(branch, 11, 8,  (offset >> 1)  & 0xf);                       // offset[4:1]   ==> branch[11:8]
1158   return NativeInstruction::instruction_size;                                   // only one instruction
1159 }
1160 
1161 static int patch_offset_in_pc_relative(address branch, int64_t offset) {
1162   const int PC_RELATIVE_INSTRUCTION_NUM = 2;                                    // auipc, addi/jalr/load
1163   Assembler::patch(branch, 31, 12, ((offset + 0x800) >> 12) & 0xfffff);         // Auipc.          offset[31:12]  ==> branch[31:12]
1164   Assembler::patch(branch + 4, 31, 20, offset & 0xfff);                         // Addi/Jalr/Load. offset[11:0]   ==> branch[31:20]
1165   return PC_RELATIVE_INSTRUCTION_NUM * NativeInstruction::instruction_size;
1166 }
1167 
1168 static int patch_addr_in_movptr(address branch, address target) {
1169   const int MOVPTR_INSTRUCTIONS_NUM = 6;                                        // lui + addi + slli + addi + slli + addi/jalr/load
1170   int32_t lower = ((intptr_t)target << 35) >> 35;
1171   int64_t upper = ((intptr_t)target - lower) >> 29;
1172   Assembler::patch(branch + 0,  31, 12, upper & 0xfffff);                       // Lui.             target[48:29] + target[28] ==> branch[31:12]
1173   Assembler::patch(branch + 4,  31, 20, (lower >> 17) & 0xfff);                 // Addi.            target[28:17] ==> branch[31:20]
1174   Assembler::patch(branch + 12, 31, 20, (lower >> 6) & 0x7ff);                  // Addi.            target[16: 6] ==> branch[31:20]
1175   Assembler::patch(branch + 20, 31, 20, lower & 0x3f);                          // Addi/Jalr/Load.  target[ 5: 0] ==> branch[31:20]
1176   return MOVPTR_INSTRUCTIONS_NUM * NativeInstruction::instruction_size;
1177 }
1178 
1179 static int patch_imm_in_li64(address branch, address target) {
1180   const int LI64_INSTRUCTIONS_NUM = 8;                                          // lui + addi + slli + addi + slli + addi + slli + addi
1181   int64_t lower = (intptr_t)target & 0xffffffff;
1182   lower = lower - ((lower << 44) >> 44);
1183   int64_t tmp_imm = ((uint64_t)((intptr_t)target & 0xffffffff00000000)) + (uint64_t)lower;
1184   int32_t upper =  (tmp_imm - (int32_t)lower) >> 32;
1185   int64_t tmp_upper = upper, tmp_lower = upper;
1186   tmp_lower = (tmp_lower << 52) >> 52;
1187   tmp_upper -= tmp_lower;
1188   tmp_upper >>= 12;
1189   // Load upper 32 bits. Upper = target[63:32], but if target[31] = 1 or (target[31:20] == 0x7ff && target[19] == 1),
1190   // upper = target[63:32] + 1.
1191   Assembler::patch(branch + 0,  31, 12, tmp_upper & 0xfffff);                       // Lui.
1192   Assembler::patch(branch + 4,  31, 20, tmp_lower & 0xfff);                         // Addi.
1193   // Load the rest 32 bits.
1194   Assembler::patch(branch + 12, 31, 20, ((int32_t)lower >> 20) & 0xfff);            // Addi.
1195   Assembler::patch(branch + 20, 31, 20, (((intptr_t)target << 44) >> 52) & 0xfff);  // Addi.
1196   Assembler::patch(branch + 28, 31, 20, (intptr_t)target & 0xff);                   // Addi.
1197   return LI64_INSTRUCTIONS_NUM * NativeInstruction::instruction_size;
1198 }
1199 
1200 static int patch_imm_in_li32(address branch, int32_t target) {
1201   const int LI32_INSTRUCTIONS_NUM = 2;                                          // lui + addiw
1202   int64_t upper = (intptr_t)target;
1203   int32_t lower = (((int32_t)target) << 20) >> 20;
1204   upper -= lower;
1205   upper = (int32_t)upper;
1206   Assembler::patch(branch + 0,  31, 12, (upper >> 12) & 0xfffff);               // Lui.
1207   Assembler::patch(branch + 4,  31, 20, lower & 0xfff);                         // Addiw.
1208   return LI32_INSTRUCTIONS_NUM * NativeInstruction::instruction_size;
1209 }
1210 
1211 static long get_offset_of_jal(address insn_addr) {
1212   assert_cond(insn_addr != NULL);
1213   long offset = 0;
1214   unsigned insn = *(unsigned*)insn_addr;
1215   long val = (long)Assembler::sextract(insn, 31, 12);
1216   offset |= ((val >> 19) & 0x1) << 20;
1217   offset |= (val & 0xff) << 12;
1218   offset |= ((val >> 8) & 0x1) << 11;
1219   offset |= ((val >> 9) & 0x3ff) << 1;
1220   offset = (offset << 43) >> 43;
1221   return offset;
1222 }
1223 
1224 static long get_offset_of_conditional_branch(address insn_addr) {
1225   long offset = 0;
1226   assert_cond(insn_addr != NULL);
1227   unsigned insn = *(unsigned*)insn_addr;
1228   offset = (long)Assembler::sextract(insn, 31, 31);
1229   offset = (offset << 12) | (((long)(Assembler::sextract(insn, 7, 7) & 0x1)) << 11);
1230   offset = offset | (((long)(Assembler::sextract(insn, 30, 25) & 0x3f)) << 5);
1231   offset = offset | (((long)(Assembler::sextract(insn, 11, 8) & 0xf)) << 1);
1232   offset = (offset << 41) >> 41;
1233   return offset;
1234 }
1235 
1236 static long get_offset_of_pc_relative(address insn_addr) {
1237   long offset = 0;
1238   assert_cond(insn_addr != NULL);
1239   offset = ((long)(Assembler::sextract(((unsigned*)insn_addr)[0], 31, 12))) << 12;                                  // Auipc.
1240   offset += ((long)Assembler::sextract(((unsigned*)insn_addr)[1], 31, 20));                                         // Addi/Jalr/Load.
1241   offset = (offset << 32) >> 32;
1242   return offset;
1243 }
1244 
1245 static address get_target_of_movptr(address insn_addr) {
1246   assert_cond(insn_addr != NULL);
1247   intptr_t target_address = (((int64_t)Assembler::sextract(((unsigned*)insn_addr)[0], 31, 12)) & 0xfffff) << 29;    // Lui.
1248   target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[1], 31, 20)) << 17;                        // Addi.
1249   target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[3], 31, 20)) << 6;                         // Addi.
1250   target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[5], 31, 20));                              // Addi/Jalr/Load.
1251   return (address) target_address;
1252 }
1253 
1254 static address get_target_of_li64(address insn_addr) {
1255   assert_cond(insn_addr != NULL);
1256   intptr_t target_address = (((int64_t)Assembler::sextract(((unsigned*)insn_addr)[0], 31, 12)) & 0xfffff) << 44;    // Lui.
1257   target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[1], 31, 20)) << 32;                        // Addi.
1258   target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[3], 31, 20)) << 20;                        // Addi.
1259   target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[5], 31, 20)) << 8;                         // Addi.
1260   target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[7], 31, 20));                              // Addi.
1261   return (address)target_address;
1262 }
1263 
1264 static address get_target_of_li32(address insn_addr) {
1265   assert_cond(insn_addr != NULL);
1266   intptr_t target_address = (((int64_t)Assembler::sextract(((unsigned*)insn_addr)[0], 31, 12)) & 0xfffff) << 12;    // Lui.
1267   target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[1], 31, 20));                              // Addiw.
1268   return (address)target_address;
1269 }
1270 
1271 // Patch any kind of instruction; there may be several instructions.
1272 // Return the total length (in bytes) of the instructions.
1273 int MacroAssembler::pd_patch_instruction_size(address branch, address target) {
1274   assert_cond(branch != NULL);
1275   int64_t offset = target - branch;
1276   if (NativeInstruction::is_jal_at(branch)) {                         // jal
1277     return patch_offset_in_jal(branch, offset);
1278   } else if (NativeInstruction::is_branch_at(branch)) {               // beq/bge/bgeu/blt/bltu/bne
1279     return patch_offset_in_conditional_branch(branch, offset);
1280   } else if (NativeInstruction::is_pc_relative_at(branch)) {          // auipc, addi/jalr/load
1281     return patch_offset_in_pc_relative(branch, offset);
1282   } else if (NativeInstruction::is_movptr_at(branch)) {               // movptr
1283     return patch_addr_in_movptr(branch, target);
1284   } else if (NativeInstruction::is_li64_at(branch)) {                 // li64
1285     return patch_imm_in_li64(branch, target);
1286   } else if (NativeInstruction::is_li32_at(branch)) {                 // li32
1287     int64_t imm = (intptr_t)target;
1288     return patch_imm_in_li32(branch, (int32_t)imm);
1289   } else {
1290 #ifdef ASSERT
1291     tty->print_cr("pd_patch_instruction_size: instruction 0x%x at " INTPTR_FORMAT " could not be patched!\n",
1292                   *(unsigned*)branch, p2i(branch));
1293     Disassembler::decode(branch - 16, branch + 16);
1294 #endif
1295     ShouldNotReachHere();
1296     return -1;
1297   }
1298 }
1299 
1300 address MacroAssembler::target_addr_for_insn(address insn_addr) {
1301   long offset = 0;
1302   assert_cond(insn_addr != NULL);
1303   if (NativeInstruction::is_jal_at(insn_addr)) {                     // jal
1304     offset = get_offset_of_jal(insn_addr);
1305   } else if (NativeInstruction::is_branch_at(insn_addr)) {           // beq/bge/bgeu/blt/bltu/bne
1306     offset = get_offset_of_conditional_branch(insn_addr);
1307   } else if (NativeInstruction::is_pc_relative_at(insn_addr)) {      // auipc, addi/jalr/load
1308     offset = get_offset_of_pc_relative(insn_addr);
1309   } else if (NativeInstruction::is_movptr_at(insn_addr)) {           // movptr
1310     return get_target_of_movptr(insn_addr);
1311   } else if (NativeInstruction::is_li64_at(insn_addr)) {             // li64
1312     return get_target_of_li64(insn_addr);
1313   } else if (NativeInstruction::is_li32_at(insn_addr)) {             // li32
1314     return get_target_of_li32(insn_addr);
1315   } else {
1316     ShouldNotReachHere();
1317   }
1318   return address(((uintptr_t)insn_addr + offset));
1319 }
1320 
1321 int MacroAssembler::patch_oop(address insn_addr, address o) {
1322   // OOPs are either narrow (32 bits) or wide (48 bits).  We encode
1323   // narrow OOPs by setting the upper 16 bits in the first
1324   // instruction.
1325   if (NativeInstruction::is_li32_at(insn_addr)) {
1326     // Move narrow OOP
1327     uint32_t n = CompressedOops::narrow_oop_value(cast_to_oop(o));
1328     return patch_imm_in_li32(insn_addr, (int32_t)n);
1329   } else if (NativeInstruction::is_movptr_at(insn_addr)) {
1330     // Move wide OOP
1331     return patch_addr_in_movptr(insn_addr, o);
1332   }
1333   ShouldNotReachHere();
1334   return -1;
1335 }
1336 
1337 void MacroAssembler::reinit_heapbase() {
1338   if (UseCompressedOops) {
1339     if (Universe::is_fully_initialized()) {
1340       mv(xheapbase, CompressedOops::ptrs_base());
1341     } else {
1342       int32_t offset = 0;
1343       la_patchable(xheapbase, ExternalAddress(CompressedOops::ptrs_base_addr()), offset);
1344       ld(xheapbase, Address(xheapbase, offset));
1345     }
1346   }
1347 }
1348 
1349 void MacroAssembler::mv(Register Rd, Address dest) {
1350   assert(dest.getMode() == Address::literal, "Address mode should be Address::literal");
1351   relocate(dest.rspec());
1352   movptr(Rd, dest.target());
1353 }
1354 
1355 void MacroAssembler::mv(Register Rd, RegisterOrConstant src) {
1356   if (src.is_register()) {
1357     mv(Rd, src.as_register());
1358   } else {
1359     mv(Rd, src.as_constant());
1360   }
1361 }
1362 
1363 void MacroAssembler::andrw(Register Rd, Register Rs1, Register Rs2) {
1364   andr(Rd, Rs1, Rs2);
1365   // addw: The result is clipped to 32 bits, then the sign bit is extended,
1366   // and the result is stored in Rd
1367   addw(Rd, Rd, zr);
1368 }
1369 
1370 void MacroAssembler::orrw(Register Rd, Register Rs1, Register Rs2) {
1371   orr(Rd, Rs1, Rs2);
1372   // addw: The result is clipped to 32 bits, then the sign bit is extended,
1373   // and the result is stored in Rd
1374   addw(Rd, Rd, zr);
1375 }
1376 
1377 void MacroAssembler::xorrw(Register Rd, Register Rs1, Register Rs2) {
1378   xorr(Rd, Rs1, Rs2);
1379   // addw: The result is clipped to 32 bits, then the sign bit is extended,
1380   // and the result is stored in Rd
1381   addw(Rd, Rd, zr);
1382 }
1383 
1384 // Note: load_unsigned_short used to be called load_unsigned_word.
1385 int MacroAssembler::load_unsigned_short(Register dst, Address src) {
1386   int off = offset();
1387   lhu(dst, src);
1388   return off;
1389 }
1390 
1391 int MacroAssembler::load_unsigned_byte(Register dst, Address src) {
1392   int off = offset();
1393   lbu(dst, src);
1394   return off;
1395 }
1396 
1397 int MacroAssembler::load_signed_short(Register dst, Address src) {
1398   int off = offset();
1399   lh(dst, src);
1400   return off;
1401 }
1402 
1403 int MacroAssembler::load_signed_byte(Register dst, Address src) {
1404   int off = offset();
1405   lb(dst, src);
1406   return off;
1407 }
1408 
1409 void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2) {
1410   switch (size_in_bytes) {
1411     case  8:  ld(dst, src); break;
1412     case  4:  is_signed ? lw(dst, src) : lwu(dst, src); break;
1413     case  2:  is_signed ? load_signed_short(dst, src) : load_unsigned_short(dst, src); break;
1414     case  1:  is_signed ? load_signed_byte( dst, src) : load_unsigned_byte( dst, src); break;
1415     default:  ShouldNotReachHere();
1416   }
1417 }
1418 
1419 void MacroAssembler::store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2) {
1420   switch (size_in_bytes) {
1421     case  8:  sd(src, dst); break;
1422     case  4:  sw(src, dst); break;
1423     case  2:  sh(src, dst); break;
1424     case  1:  sb(src, dst); break;
1425     default:  ShouldNotReachHere();
1426   }
1427 }
1428 
1429 // reverse bytes in halfword in lower 16 bits and sign-extend
1430 // Rd[15:0] = Rs[7:0] Rs[15:8] (sign-extend to 64 bits)
1431 void MacroAssembler::revb_h_h(Register Rd, Register Rs, Register tmp) {
1432   if (UseZbb) {
1433     rev8(Rd, Rs);
1434     srai(Rd, Rd, 48);
1435     return;
1436   }
1437   assert_different_registers(Rs, tmp);
1438   assert_different_registers(Rd, tmp);
1439   srli(tmp, Rs, 8);
1440   andi(tmp, tmp, 0xFF);
1441   slli(Rd, Rs, 56);
1442   srai(Rd, Rd, 48); // sign-extend
1443   orr(Rd, Rd, tmp);
1444 }
1445 
1446 // reverse bytes in lower word and sign-extend
1447 // Rd[31:0] = Rs[7:0] Rs[15:8] Rs[23:16] Rs[31:24] (sign-extend to 64 bits)
1448 void MacroAssembler::revb_w_w(Register Rd, Register Rs, Register tmp1, Register tmp2) {
1449   if (UseZbb) {
1450     rev8(Rd, Rs);
1451     srai(Rd, Rd, 32);
1452     return;
1453   }
1454   assert_different_registers(Rs, tmp1, tmp2);
1455   assert_different_registers(Rd, tmp1, tmp2);
1456   revb_h_w_u(Rd, Rs, tmp1, tmp2);
1457   slli(tmp2, Rd, 48);
1458   srai(tmp2, tmp2, 32); // sign-extend
1459   srli(Rd, Rd, 16);
1460   orr(Rd, Rd, tmp2);
1461 }
1462 
1463 // reverse bytes in halfword in lower 16 bits and zero-extend
1464 // Rd[15:0] = Rs[7:0] Rs[15:8] (zero-extend to 64 bits)
1465 void MacroAssembler::revb_h_h_u(Register Rd, Register Rs, Register tmp) {
1466   if (UseZbb) {
1467     rev8(Rd, Rs);
1468     srli(Rd, Rd, 48);
1469     return;
1470   }
1471   assert_different_registers(Rs, tmp);
1472   assert_different_registers(Rd, tmp);
1473   srli(tmp, Rs, 8);
1474   andi(tmp, tmp, 0xFF);
1475   andi(Rd, Rs, 0xFF);
1476   slli(Rd, Rd, 8);
1477   orr(Rd, Rd, tmp);
1478 }
1479 
1480 // reverse bytes in halfwords in lower 32 bits and zero-extend
1481 // Rd[31:0] = Rs[23:16] Rs[31:24] Rs[7:0] Rs[15:8] (zero-extend to 64 bits)
1482 void MacroAssembler::revb_h_w_u(Register Rd, Register Rs, Register tmp1, Register tmp2) {
1483   if (UseZbb) {
1484     rev8(Rd, Rs);
1485     rori(Rd, Rd, 32);
1486     roriw(Rd, Rd, 16);
1487     zero_extend(Rd, Rd, 32);
1488     return;
1489   }
1490   assert_different_registers(Rs, tmp1, tmp2);
1491   assert_different_registers(Rd, tmp1, tmp2);
1492   srli(tmp2, Rs, 16);
1493   revb_h_h_u(tmp2, tmp2, tmp1);
1494   revb_h_h_u(Rd, Rs, tmp1);
1495   slli(tmp2, tmp2, 16);
1496   orr(Rd, Rd, tmp2);
1497 }
1498 
1499 // This method is only used for revb_h
1500 // Rd = Rs[47:0] Rs[55:48] Rs[63:56]
1501 void MacroAssembler::revb_h_helper(Register Rd, Register Rs, Register tmp1, Register tmp2) {
1502   assert_different_registers(Rs, tmp1, tmp2);
1503   assert_different_registers(Rd, tmp1);
1504   srli(tmp1, Rs, 48);
1505   andi(tmp2, tmp1, 0xFF);
1506   slli(tmp2, tmp2, 8);
1507   srli(tmp1, tmp1, 8);
1508   orr(tmp1, tmp1, tmp2);
1509   slli(Rd, Rs, 16);
1510   orr(Rd, Rd, tmp1);
1511 }
1512 
1513 // reverse bytes in each halfword
1514 // Rd[63:0] = Rs[55:48] Rs[63:56] Rs[39:32] Rs[47:40] Rs[23:16] Rs[31:24] Rs[7:0] Rs[15:8]
1515 void MacroAssembler::revb_h(Register Rd, Register Rs, Register tmp1, Register tmp2) {
1516   if (UseZbb) {
1517     assert_different_registers(Rs, tmp1);
1518     assert_different_registers(Rd, tmp1);
1519     rev8(Rd, Rs);
1520     zero_extend(tmp1, Rd, 32);
1521     roriw(tmp1, tmp1, 16);
1522     slli(tmp1, tmp1, 32);
1523     srli(Rd, Rd, 32);
1524     roriw(Rd, Rd, 16);
1525     zero_extend(Rd, Rd, 32);
1526     orr(Rd, Rd, tmp1);
1527     return;
1528   }
1529   assert_different_registers(Rs, tmp1, tmp2);
1530   assert_different_registers(Rd, tmp1, tmp2);
1531   revb_h_helper(Rd, Rs, tmp1, tmp2);
1532   for (int i = 0; i < 3; ++i) {
1533     revb_h_helper(Rd, Rd, tmp1, tmp2);
1534   }
1535 }
1536 
1537 // reverse bytes in each word
1538 // Rd[63:0] = Rs[39:32] Rs[47:40] Rs[55:48] Rs[63:56] Rs[7:0] Rs[15:8] Rs[23:16] Rs[31:24]
1539 void MacroAssembler::revb_w(Register Rd, Register Rs, Register tmp1, Register tmp2) {
1540   if (UseZbb) {
1541     rev8(Rd, Rs);
1542     rori(Rd, Rd, 32);
1543     return;
1544   }
1545   assert_different_registers(Rs, tmp1, tmp2);
1546   assert_different_registers(Rd, tmp1, tmp2);
1547   revb(Rd, Rs, tmp1, tmp2);
1548   ror_imm(Rd, Rd, 32);
1549 }
1550 
1551 // reverse bytes in doubleword
1552 // Rd[63:0] = Rs[7:0] Rs[15:8] Rs[23:16] Rs[31:24] Rs[39:32] Rs[47,40] Rs[55,48] Rs[63:56]
1553 void MacroAssembler::revb(Register Rd, Register Rs, Register tmp1, Register tmp2) {
1554   if (UseZbb) {
1555     rev8(Rd, Rs);
1556     return;
1557   }
1558   assert_different_registers(Rs, tmp1, tmp2);
1559   assert_different_registers(Rd, tmp1, tmp2);
1560   andi(tmp1, Rs, 0xFF);
1561   slli(tmp1, tmp1, 8);
1562   for (int step = 8; step < 56; step += 8) {
1563     srli(tmp2, Rs, step);
1564     andi(tmp2, tmp2, 0xFF);
1565     orr(tmp1, tmp1, tmp2);
1566     slli(tmp1, tmp1, 8);
1567   }
1568   srli(Rd, Rs, 56);
1569   andi(Rd, Rd, 0xFF);
1570   orr(Rd, tmp1, Rd);
1571 }
1572 
1573 // rotate right with shift bits
1574 void MacroAssembler::ror_imm(Register dst, Register src, uint32_t shift, Register tmp)
1575 {
1576   if (UseZbb) {
1577     rori(dst, src, shift);
1578     return;
1579   }
1580 
1581   assert_different_registers(dst, tmp);
1582   assert_different_registers(src, tmp);
1583   assert(shift < 64, "shift amount must be < 64");
1584   slli(tmp, src, 64 - shift);
1585   srli(dst, src, shift);
1586   orr(dst, dst, tmp);
1587 }
1588 
1589 void MacroAssembler::andi(Register Rd, Register Rn, int64_t imm, Register tmp) {
1590   if (is_imm_in_range(imm, 12, 0)) {
1591     and_imm12(Rd, Rn, imm);
1592   } else {
1593     assert_different_registers(Rn, tmp);
1594     mv(tmp, imm);
1595     andr(Rd, Rn, tmp);
1596   }
1597 }
1598 
1599 void MacroAssembler::orptr(Address adr, RegisterOrConstant src, Register tmp1, Register tmp2) {
1600   ld(tmp1, adr);
1601   if (src.is_register()) {
1602     orr(tmp1, tmp1, src.as_register());
1603   } else {
1604     if (is_imm_in_range(src.as_constant(), 12, 0)) {
1605       ori(tmp1, tmp1, src.as_constant());
1606     } else {
1607       assert_different_registers(tmp1, tmp2);
1608       mv(tmp2, src.as_constant());
1609       orr(tmp1, tmp1, tmp2);
1610     }
1611   }
1612   sd(tmp1, adr);
1613 }
1614 
1615 void MacroAssembler::cmp_klass(Register oop, Register trial_klass, Register tmp, Label &L) {
1616   if (UseCompressedClassPointers) {
1617       lwu(tmp, Address(oop, oopDesc::klass_offset_in_bytes()));
1618     if (CompressedKlassPointers::base() == NULL) {
1619       slli(tmp, tmp, CompressedKlassPointers::shift());
1620       beq(trial_klass, tmp, L);
1621       return;
1622     }
1623     decode_klass_not_null(tmp);
1624   } else {
1625     ld(tmp, Address(oop, oopDesc::klass_offset_in_bytes()));
1626   }
1627   beq(trial_klass, tmp, L);
1628 }
1629 
1630 // Move an oop into a register.
1631 void MacroAssembler::movoop(Register dst, jobject obj) {
1632   int oop_index;
1633   if (obj == NULL) {
1634     oop_index = oop_recorder()->allocate_oop_index(obj);
1635   } else {
1636 #ifdef ASSERT
1637     {
1638       ThreadInVMfromUnknown tiv;
1639       assert(Universe::heap()->is_in(JNIHandles::resolve(obj)), "should be real oop");
1640     }
1641 #endif
1642     oop_index = oop_recorder()->find_index(obj);
1643   }
1644   RelocationHolder rspec = oop_Relocation::spec(oop_index);
1645 
1646   if (BarrierSet::barrier_set()->barrier_set_assembler()->supports_instruction_patching()) {
1647     mv(dst, Address((address)obj, rspec));
1648   } else {
1649     address dummy = address(uintptr_t(pc()) & -wordSize); // A nearby aligned address
1650     ld_constant(dst, Address(dummy, rspec));
1651   }
1652 }
1653 
1654 // Move a metadata address into a register.
1655 void MacroAssembler::mov_metadata(Register dst, Metadata* obj) {
1656   int oop_index;
1657   if (obj == NULL) {
1658     oop_index = oop_recorder()->allocate_metadata_index(obj);
1659   } else {
1660     oop_index = oop_recorder()->find_index(obj);
1661   }
1662   RelocationHolder rspec = metadata_Relocation::spec(oop_index);
1663   mv(dst, Address((address)obj, rspec));
1664 }
1665 
1666 // Writes to stack successive pages until offset reached to check for
1667 // stack overflow + shadow pages.  This clobbers tmp.
1668 void MacroAssembler::bang_stack_size(Register size, Register tmp) {
1669   assert_different_registers(tmp, size, t0);
1670   // Bang stack for total size given plus shadow page size.
1671   // Bang one page at a time because large size can bang beyond yellow and
1672   // red zones.
1673   mv(t0, os::vm_page_size());
1674   Label loop;
1675   bind(loop);
1676   sub(tmp, sp, t0);
1677   subw(size, size, t0);
1678   sd(size, Address(tmp));
1679   bgtz(size, loop);
1680 
1681   // Bang down shadow pages too.
1682   // At this point, (tmp-0) is the last address touched, so don't
1683   // touch it again.  (It was touched as (tmp-pagesize) but then tmp
1684   // was post-decremented.)  Skip this address by starting at i=1, and
1685   // touch a few more pages below.  N.B.  It is important to touch all
1686   // the way down to and including i=StackShadowPages.
1687   for (int i = 0; i < (int)(StackOverflow::stack_shadow_zone_size() / os::vm_page_size()) - 1; i++) {
1688     // this could be any sized move but this is can be a debugging crumb
1689     // so the bigger the better.
1690     sub(tmp, tmp, os::vm_page_size());
1691     sd(size, Address(tmp, 0));
1692   }
1693 }
1694 
1695 SkipIfEqual::SkipIfEqual(MacroAssembler* masm, const bool* flag_addr, bool value) {
1696   int32_t offset = 0;
1697   _masm = masm;
1698   _masm->la_patchable(t0, ExternalAddress((address)flag_addr), offset);
1699   _masm->lbu(t0, Address(t0, offset));
1700   _masm->beqz(t0, _label);
1701 }
1702 
1703 SkipIfEqual::~SkipIfEqual() {
1704   _masm->bind(_label);
1705   _masm = NULL;
1706 }
1707 
1708 void MacroAssembler::load_mirror(Register dst, Register method, Register tmp1, Register tmp2) {
1709   const int mirror_offset = in_bytes(Klass::java_mirror_offset());
1710   ld(dst, Address(xmethod, Method::const_offset()));
1711   ld(dst, Address(dst, ConstMethod::constants_offset()));
1712   ld(dst, Address(dst, ConstantPool::pool_holder_offset_in_bytes()));
1713   ld(dst, Address(dst, mirror_offset));
1714   resolve_oop_handle(dst, tmp1, tmp2);
1715 }
1716 
1717 void MacroAssembler::resolve_oop_handle(Register result, Register tmp1, Register tmp2) {
1718   // OopHandle::resolve is an indirection.
1719   assert_different_registers(result, tmp1, tmp2);
1720   access_load_at(T_OBJECT, IN_NATIVE, result, Address(result, 0), tmp1, tmp2);
1721 }
1722 
1723 // ((WeakHandle)result).resolve()
1724 void MacroAssembler::resolve_weak_handle(Register result, Register tmp1, Register tmp2) {
1725   assert_different_registers(result, tmp1, tmp2);
1726   Label resolved;
1727 
1728   // A null weak handle resolves to null.
1729   beqz(result, resolved);
1730 
1731   // Only 64 bit platforms support GCs that require a tmp register
1732   // Only IN_HEAP loads require a thread_tmp register
1733   // WeakHandle::resolve is an indirection like jweak.
1734   access_load_at(T_OBJECT, IN_NATIVE | ON_PHANTOM_OOP_REF,
1735                  result, Address(result), tmp1, tmp2);
1736   bind(resolved);
1737 }
1738 
1739 void MacroAssembler::access_load_at(BasicType type, DecoratorSet decorators,
1740                                     Register dst, Address src,
1741                                     Register tmp1, Register tmp2) {
1742   BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
1743   decorators = AccessInternal::decorator_fixup(decorators);
1744   bool as_raw = (decorators & AS_RAW) != 0;
1745   if (as_raw) {
1746     bs->BarrierSetAssembler::load_at(this, decorators, type, dst, src, tmp1, tmp2);
1747   } else {
1748     bs->load_at(this, decorators, type, dst, src, tmp1, tmp2);
1749   }
1750 }
1751 
1752 void MacroAssembler::null_check(Register reg, int offset) {
1753   if (needs_explicit_null_check(offset)) {
1754     // provoke OS NULL exception if reg = NULL by
1755     // accessing M[reg] w/o changing any registers
1756     // NOTE: this is plenty to provoke a segv
1757     ld(zr, Address(reg, 0));
1758   } else {
1759     // nothing to do, (later) access of M[reg + offset]
1760     // will provoke OS NULL exception if reg = NULL
1761   }
1762 }
1763 
1764 void MacroAssembler::access_store_at(BasicType type, DecoratorSet decorators,
1765                                      Address dst, Register src,
1766                                      Register tmp1, Register tmp2, Register tmp3) {
1767   BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
1768   decorators = AccessInternal::decorator_fixup(decorators);
1769   bool as_raw = (decorators & AS_RAW) != 0;
1770   if (as_raw) {
1771     bs->BarrierSetAssembler::store_at(this, decorators, type, dst, src, tmp1, tmp2, tmp3);
1772   } else {
1773     bs->store_at(this, decorators, type, dst, src, tmp1, tmp2, tmp3);
1774   }
1775 }
1776 
1777 // Algorithm must match CompressedOops::encode.
1778 void MacroAssembler::encode_heap_oop(Register d, Register s) {
1779   verify_oop_msg(s, "broken oop in encode_heap_oop");
1780   if (CompressedOops::base() == NULL) {
1781     if (CompressedOops::shift() != 0) {
1782       assert (LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong");
1783       srli(d, s, LogMinObjAlignmentInBytes);
1784     } else {
1785       mv(d, s);
1786     }
1787   } else {
1788     Label notNull;
1789     sub(d, s, xheapbase);
1790     bgez(d, notNull);
1791     mv(d, zr);
1792     bind(notNull);
1793     if (CompressedOops::shift() != 0) {
1794       assert (LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong");
1795       srli(d, d, CompressedOops::shift());
1796     }
1797   }
1798 }
1799 
1800 void MacroAssembler::load_klass(Register dst, Register src) {
1801   if (UseCompressedClassPointers) {
1802     lwu(dst, Address(src, oopDesc::klass_offset_in_bytes()));
1803     decode_klass_not_null(dst);
1804   } else {
1805     ld(dst, Address(src, oopDesc::klass_offset_in_bytes()));
1806   }
1807 }
1808 
1809 void MacroAssembler::store_klass(Register dst, Register src) {
1810   // FIXME: Should this be a store release? concurrent gcs assumes
1811   // klass length is valid if klass field is not null.
1812   if (UseCompressedClassPointers) {
1813     encode_klass_not_null(src);
1814     sw(src, Address(dst, oopDesc::klass_offset_in_bytes()));
1815   } else {
1816     sd(src, Address(dst, oopDesc::klass_offset_in_bytes()));
1817   }
1818 }
1819 
1820 void MacroAssembler::store_klass_gap(Register dst, Register src) {
1821   if (UseCompressedClassPointers) {
1822     // Store to klass gap in destination
1823     sw(src, Address(dst, oopDesc::klass_gap_offset_in_bytes()));
1824   }
1825 }
1826 
1827 void  MacroAssembler::decode_klass_not_null(Register r) {
1828   decode_klass_not_null(r, r);
1829 }
1830 
1831 void MacroAssembler::decode_klass_not_null(Register dst, Register src, Register tmp) {
1832   assert(UseCompressedClassPointers, "should only be used for compressed headers");
1833 
1834   if (CompressedKlassPointers::base() == NULL) {
1835     if (CompressedKlassPointers::shift() != 0) {
1836       assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong");
1837       slli(dst, src, LogKlassAlignmentInBytes);
1838     } else {
1839       mv(dst, src);
1840     }
1841     return;
1842   }
1843 
1844   Register xbase = dst;
1845   if (dst == src) {
1846     xbase = tmp;
1847   }
1848 
1849   assert_different_registers(src, xbase);
1850   mv(xbase, (uintptr_t)CompressedKlassPointers::base());
1851 
1852   if (CompressedKlassPointers::shift() != 0) {
1853     assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong");
1854     assert_different_registers(t0, xbase);
1855     shadd(dst, src, xbase, t0, LogKlassAlignmentInBytes);
1856   } else {
1857     add(dst, xbase, src);
1858   }
1859 
1860   if (xbase == xheapbase) { reinit_heapbase(); }
1861 }
1862 
1863 void MacroAssembler::encode_klass_not_null(Register r) {
1864   encode_klass_not_null(r, r);
1865 }
1866 
1867 void MacroAssembler::encode_klass_not_null(Register dst, Register src, Register tmp) {
1868   assert(UseCompressedClassPointers, "should only be used for compressed headers");
1869 
1870   if (CompressedKlassPointers::base() == NULL) {
1871     if (CompressedKlassPointers::shift() != 0) {
1872       assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong");
1873       srli(dst, src, LogKlassAlignmentInBytes);
1874     } else {
1875       mv(dst, src);
1876     }
1877     return;
1878   }
1879 
1880   if (((uint64_t)(uintptr_t)CompressedKlassPointers::base() & 0xffffffff) == 0 &&
1881       CompressedKlassPointers::shift() == 0) {
1882     zero_extend(dst, src, 32);
1883     return;
1884   }
1885 
1886   Register xbase = dst;
1887   if (dst == src) {
1888     xbase = tmp;
1889   }
1890 
1891   assert_different_registers(src, xbase);
1892   mv(xbase, (intptr_t)CompressedKlassPointers::base());
1893   sub(dst, src, xbase);
1894   if (CompressedKlassPointers::shift() != 0) {
1895     assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong");
1896     srli(dst, dst, LogKlassAlignmentInBytes);
1897   }
1898   if (xbase == xheapbase) {
1899     reinit_heapbase();
1900   }
1901 }
1902 
1903 void  MacroAssembler::decode_heap_oop_not_null(Register r) {
1904   decode_heap_oop_not_null(r, r);
1905 }
1906 
1907 void MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) {
1908   assert(UseCompressedOops, "should only be used for compressed headers");
1909   assert(Universe::heap() != NULL, "java heap should be initialized");
1910   // Cannot assert, unverified entry point counts instructions (see .ad file)
1911   // vtableStubs also counts instructions in pd_code_size_limit.
1912   // Also do not verify_oop as this is called by verify_oop.
1913   if (CompressedOops::shift() != 0) {
1914     assert(LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong");
1915     slli(dst, src, LogMinObjAlignmentInBytes);
1916     if (CompressedOops::base() != NULL) {
1917       add(dst, xheapbase, dst);
1918     }
1919   } else {
1920     assert(CompressedOops::base() == NULL, "sanity");
1921     mv(dst, src);
1922   }
1923 }
1924 
1925 void  MacroAssembler::decode_heap_oop(Register d, Register s) {
1926   if (CompressedOops::base() == NULL) {
1927     if (CompressedOops::shift() != 0 || d != s) {
1928       slli(d, s, CompressedOops::shift());
1929     }
1930   } else {
1931     Label done;
1932     mv(d, s);
1933     beqz(s, done);
1934     shadd(d, s, xheapbase, d, LogMinObjAlignmentInBytes);
1935     bind(done);
1936   }
1937   verify_oop_msg(d, "broken oop in decode_heap_oop");
1938 }
1939 
1940 void MacroAssembler::store_heap_oop(Address dst, Register src, Register tmp1,
1941                                     Register tmp2, Register tmp3, DecoratorSet decorators) {
1942   access_store_at(T_OBJECT, IN_HEAP | decorators, dst, src, tmp1, tmp2, tmp3);
1943 }
1944 
1945 void MacroAssembler::load_heap_oop(Register dst, Address src, Register tmp1,
1946                                    Register tmp2, DecoratorSet decorators) {
1947   access_load_at(T_OBJECT, IN_HEAP | decorators, dst, src, tmp1, tmp2);
1948 }
1949 
1950 void MacroAssembler::load_heap_oop_not_null(Register dst, Address src, Register tmp1,
1951                                             Register tmp2, DecoratorSet decorators) {
1952   access_load_at(T_OBJECT, IN_HEAP | IS_NOT_NULL, dst, src, tmp1, tmp2);
1953 }
1954 
1955 // Used for storing NULLs.
1956 void MacroAssembler::store_heap_oop_null(Address dst) {
1957   access_store_at(T_OBJECT, IN_HEAP, dst, noreg, noreg, noreg, noreg);
1958 }
1959 
1960 int MacroAssembler::corrected_idivl(Register result, Register rs1, Register rs2,
1961                                     bool want_remainder)
1962 {
1963   // Full implementation of Java idiv and irem.  The function
1964   // returns the (pc) offset of the div instruction - may be needed
1965   // for implicit exceptions.
1966   //
1967   // input : rs1: dividend
1968   //         rs2: divisor
1969   //
1970   // result: either
1971   //         quotient  (= rs1 idiv rs2)
1972   //         remainder (= rs1 irem rs2)
1973 
1974 
1975   int idivl_offset = offset();
1976   if (!want_remainder) {
1977     divw(result, rs1, rs2);
1978   } else {
1979     remw(result, rs1, rs2); // result = rs1 % rs2;
1980   }
1981   return idivl_offset;
1982 }
1983 
1984 int MacroAssembler::corrected_idivq(Register result, Register rs1, Register rs2,
1985                                     bool want_remainder)
1986 {
1987   // Full implementation of Java ldiv and lrem.  The function
1988   // returns the (pc) offset of the div instruction - may be needed
1989   // for implicit exceptions.
1990   //
1991   // input : rs1: dividend
1992   //         rs2: divisor
1993   //
1994   // result: either
1995   //         quotient  (= rs1 idiv rs2)
1996   //         remainder (= rs1 irem rs2)
1997 
1998   int idivq_offset = offset();
1999   if (!want_remainder) {
2000     div(result, rs1, rs2);
2001   } else {
2002     rem(result, rs1, rs2); // result = rs1 % rs2;
2003   }
2004   return idivq_offset;
2005 }
2006 
2007 // Look up the method for a megamorpic invkkeinterface call.
2008 // The target method is determined by <intf_klass, itable_index>.
2009 // The receiver klass is in recv_klass.
2010 // On success, the result will be in method_result, and execution falls through.
2011 // On failure, execution transfers to the given label.
2012 void MacroAssembler::lookup_interface_method(Register recv_klass,
2013                                              Register intf_klass,
2014                                              RegisterOrConstant itable_index,
2015                                              Register method_result,
2016                                              Register scan_tmp,
2017                                              Label& L_no_such_interface,
2018                                              bool return_method) {
2019   assert_different_registers(recv_klass, intf_klass, scan_tmp);
2020   assert_different_registers(method_result, intf_klass, scan_tmp);
2021   assert(recv_klass != method_result || !return_method,
2022          "recv_klass can be destroyed when mehtid isn't needed");
2023   assert(itable_index.is_constant() || itable_index.as_register() == method_result,
2024          "caller must be same register for non-constant itable index as for method");
2025 
2026   // Compute start of first itableOffsetEntry (which is at the end of the vtable).
2027   int vtable_base = in_bytes(Klass::vtable_start_offset());
2028   int itentry_off = itableMethodEntry::method_offset_in_bytes();
2029   int scan_step   = itableOffsetEntry::size() * wordSize;
2030   int vte_size    = vtableEntry::size_in_bytes();
2031   assert(vte_size == wordSize, "else adjust times_vte_scale");
2032 
2033   lwu(scan_tmp, Address(recv_klass, Klass::vtable_length_offset()));
2034 
2035   // %%% Could store the aligned, prescaled offset in the klassoop.
2036   shadd(scan_tmp, scan_tmp, recv_klass, scan_tmp, 3);
2037   add(scan_tmp, scan_tmp, vtable_base);
2038 
2039   if (return_method) {
2040     // Adjust recv_klass by scaled itable_index, so we can free itable_index.
2041     assert(itableMethodEntry::size() * wordSize == wordSize, "adjust the scaling in the code below");
2042     if (itable_index.is_register()) {
2043       slli(t0, itable_index.as_register(), 3);
2044     } else {
2045       mv(t0, itable_index.as_constant() << 3);
2046     }
2047     add(recv_klass, recv_klass, t0);
2048     if (itentry_off) {
2049       add(recv_klass, recv_klass, itentry_off);
2050     }
2051   }
2052 
2053   Label search, found_method;
2054 
2055   ld(method_result, Address(scan_tmp, itableOffsetEntry::interface_offset_in_bytes()));
2056   beq(intf_klass, method_result, found_method);
2057   bind(search);
2058   // Check that the previous entry is non-null. A null entry means that
2059   // the receiver class doesn't implement the interface, and wasn't the
2060   // same as when the caller was compiled.
2061   beqz(method_result, L_no_such_interface, /* is_far */ true);
2062   addi(scan_tmp, scan_tmp, scan_step);
2063   ld(method_result, Address(scan_tmp, itableOffsetEntry::interface_offset_in_bytes()));
2064   bne(intf_klass, method_result, search);
2065 
2066   bind(found_method);
2067 
2068   // Got a hit.
2069   if (return_method) {
2070     lwu(scan_tmp, Address(scan_tmp, itableOffsetEntry::offset_offset_in_bytes()));
2071     add(method_result, recv_klass, scan_tmp);
2072     ld(method_result, Address(method_result));
2073   }
2074 }
2075 
2076 // virtual method calling
2077 void MacroAssembler::lookup_virtual_method(Register recv_klass,
2078                                            RegisterOrConstant vtable_index,
2079                                            Register method_result) {
2080   const int base = in_bytes(Klass::vtable_start_offset());
2081   assert(vtableEntry::size() * wordSize == 8,
2082          "adjust the scaling in the code below");
2083   int vtable_offset_in_bytes = base + vtableEntry::method_offset_in_bytes();
2084 
2085   if (vtable_index.is_register()) {
2086     shadd(method_result, vtable_index.as_register(), recv_klass, method_result, LogBytesPerWord);
2087     ld(method_result, Address(method_result, vtable_offset_in_bytes));
2088   } else {
2089     vtable_offset_in_bytes += vtable_index.as_constant() * wordSize;
2090     ld(method_result, form_address(method_result, recv_klass, vtable_offset_in_bytes));
2091   }
2092 }
2093 
2094 void MacroAssembler::membar(uint32_t order_constraint) {
2095   address prev = pc() - NativeMembar::instruction_size;
2096   address last = code()->last_insn();
2097 
2098   if (last != NULL && nativeInstruction_at(last)->is_membar() && prev == last) {
2099     NativeMembar *bar = NativeMembar_at(prev);
2100     // We are merging two memory barrier instructions.  On RISCV we
2101     // can do this simply by ORing them together.
2102     bar->set_kind(bar->get_kind() | order_constraint);
2103     BLOCK_COMMENT("merged membar");
2104   } else {
2105     code()->set_last_insn(pc());
2106 
2107     uint32_t predecessor = 0;
2108     uint32_t successor = 0;
2109 
2110     membar_mask_to_pred_succ(order_constraint, predecessor, successor);
2111     fence(predecessor, successor);
2112   }
2113 }
2114 
2115 // Form an address from base + offset in Rd. Rd my or may not
2116 // actually be used: you must use the Address that is returned. It
2117 // is up to you to ensure that the shift provided matches the size
2118 // of your data.
2119 Address MacroAssembler::form_address(Register Rd, Register base, long byte_offset) {
2120   if (is_offset_in_range(byte_offset, 12)) { // 12: imm in range 2^12
2121     return Address(base, byte_offset);
2122   }
2123 
2124   // Do it the hard way
2125   mv(Rd, byte_offset);
2126   add(Rd, base, Rd);
2127   return Address(Rd);
2128 }
2129 
2130 void MacroAssembler::check_klass_subtype(Register sub_klass,
2131                                          Register super_klass,
2132                                          Register tmp_reg,
2133                                          Label& L_success) {
2134   Label L_failure;
2135   check_klass_subtype_fast_path(sub_klass, super_klass, tmp_reg, &L_success, &L_failure, NULL);
2136   check_klass_subtype_slow_path(sub_klass, super_klass, tmp_reg, noreg, &L_success, NULL);
2137   bind(L_failure);
2138 }
2139 
2140 void MacroAssembler::safepoint_poll(Label& slow_path, bool at_return, bool acquire, bool in_nmethod) {
2141   ld(t0, Address(xthread, JavaThread::polling_word_offset()));
2142   if (acquire) {
2143     membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
2144   }
2145   if (at_return) {
2146     bgtu(in_nmethod ? sp : fp, t0, slow_path, true /* is_far */);
2147   } else {
2148     andi(t0, t0, SafepointMechanism::poll_bit());
2149     bnez(t0, slow_path, true /* is_far */);
2150   }
2151 }
2152 
2153 void MacroAssembler::cmpxchgptr(Register oldv, Register newv, Register addr, Register tmp,
2154                                 Label &succeed, Label *fail) {
2155   // oldv holds comparison value
2156   // newv holds value to write in exchange
2157   // addr identifies memory word to compare against/update
2158   Label retry_load, nope;
2159   bind(retry_load);
2160   // Load reserved from the memory location
2161   lr_d(tmp, addr, Assembler::aqrl);
2162   // Fail and exit if it is not what we expect
2163   bne(tmp, oldv, nope);
2164   // If the store conditional succeeds, tmp will be zero
2165   sc_d(tmp, newv, addr, Assembler::rl);
2166   beqz(tmp, succeed);
2167   // Retry only when the store conditional failed
2168   j(retry_load);
2169 
2170   bind(nope);
2171   membar(AnyAny);
2172   mv(oldv, tmp);
2173   if (fail != NULL) {
2174     j(*fail);
2175   }
2176 }
2177 
2178 void MacroAssembler::cmpxchg_obj_header(Register oldv, Register newv, Register obj, Register tmp,
2179                                         Label &succeed, Label *fail) {
2180   assert(oopDesc::mark_offset_in_bytes() == 0, "assumption");
2181   cmpxchgptr(oldv, newv, obj, tmp, succeed, fail);
2182 }
2183 
2184 void MacroAssembler::load_reserved(Register addr,
2185                                    enum operand_size size,
2186                                    Assembler::Aqrl acquire) {
2187   switch (size) {
2188     case int64:
2189       lr_d(t0, addr, acquire);
2190       break;
2191     case int32:
2192       lr_w(t0, addr, acquire);
2193       break;
2194     case uint32:
2195       lr_w(t0, addr, acquire);
2196       zero_extend(t0, t0, 32);
2197       break;
2198     default:
2199       ShouldNotReachHere();
2200   }
2201 }
2202 
2203 void MacroAssembler::store_conditional(Register addr,
2204                                        Register new_val,
2205                                        enum operand_size size,
2206                                        Assembler::Aqrl release) {
2207   switch (size) {
2208     case int64:
2209       sc_d(t0, new_val, addr, release);
2210       break;
2211     case int32:
2212     case uint32:
2213       sc_w(t0, new_val, addr, release);
2214       break;
2215     default:
2216       ShouldNotReachHere();
2217   }
2218 }
2219 
2220 
2221 void MacroAssembler::cmpxchg_narrow_value_helper(Register addr, Register expected,
2222                                                  Register new_val,
2223                                                  enum operand_size size,
2224                                                  Register tmp1, Register tmp2, Register tmp3) {
2225   assert(size == int8 || size == int16, "unsupported operand size");
2226 
2227   Register aligned_addr = t1, shift = tmp1, mask = tmp2, not_mask = tmp3;
2228 
2229   andi(shift, addr, 3);
2230   slli(shift, shift, 3);
2231 
2232   andi(aligned_addr, addr, ~3);
2233 
2234   if (size == int8) {
2235     mv(mask, 0xff);
2236   } else {
2237     // size == int16 case
2238     mv(mask, -1);
2239     zero_extend(mask, mask, 16);
2240   }
2241   sll(mask, mask, shift);
2242 
2243   xori(not_mask, mask, -1);
2244 
2245   sll(expected, expected, shift);
2246   andr(expected, expected, mask);
2247 
2248   sll(new_val, new_val, shift);
2249   andr(new_val, new_val, mask);
2250 }
2251 
2252 // cmpxchg_narrow_value will kill t0, t1, expected, new_val and tmps.
2253 // It's designed to implement compare and swap byte/boolean/char/short by lr.w/sc.w,
2254 // which are forced to work with 4-byte aligned address.
2255 void MacroAssembler::cmpxchg_narrow_value(Register addr, Register expected,
2256                                           Register new_val,
2257                                           enum operand_size size,
2258                                           Assembler::Aqrl acquire, Assembler::Aqrl release,
2259                                           Register result, bool result_as_bool,
2260                                           Register tmp1, Register tmp2, Register tmp3) {
2261   Register aligned_addr = t1, shift = tmp1, mask = tmp2, not_mask = tmp3, old = result, tmp = t0;
2262   assert_different_registers(addr, old, mask, not_mask, new_val, expected, shift, tmp);
2263   cmpxchg_narrow_value_helper(addr, expected, new_val, size, tmp1, tmp2, tmp3);
2264 
2265   Label retry, fail, done;
2266 
2267   bind(retry);
2268   lr_w(old, aligned_addr, acquire);
2269   andr(tmp, old, mask);
2270   bne(tmp, expected, fail);
2271 
2272   andr(tmp, old, not_mask);
2273   orr(tmp, tmp, new_val);
2274   sc_w(tmp, tmp, aligned_addr, release);
2275   bnez(tmp, retry);
2276 
2277   if (result_as_bool) {
2278     mv(result, 1);
2279     j(done);
2280 
2281     bind(fail);
2282     mv(result, zr);
2283 
2284     bind(done);
2285   } else {
2286     andr(tmp, old, mask);
2287 
2288     bind(fail);
2289     srl(result, tmp, shift);
2290 
2291     if (size == int8) {
2292       sign_extend(result, result, 8);
2293     } else {
2294       // size == int16 case
2295       sign_extend(result, result, 16);
2296     }
2297   }
2298 }
2299 
2300 // weak_cmpxchg_narrow_value is a weak version of cmpxchg_narrow_value, to implement
2301 // the weak CAS stuff. The major difference is that it just failed when store conditional
2302 // failed.
2303 void MacroAssembler::weak_cmpxchg_narrow_value(Register addr, Register expected,
2304                                                Register new_val,
2305                                                enum operand_size size,
2306                                                Assembler::Aqrl acquire, Assembler::Aqrl release,
2307                                                Register result,
2308                                                Register tmp1, Register tmp2, Register tmp3) {
2309   Register aligned_addr = t1, shift = tmp1, mask = tmp2, not_mask = tmp3, old = result, tmp = t0;
2310   assert_different_registers(addr, old, mask, not_mask, new_val, expected, shift, tmp);
2311   cmpxchg_narrow_value_helper(addr, expected, new_val, size, tmp1, tmp2, tmp3);
2312 
2313   Label fail, done;
2314 
2315   lr_w(old, aligned_addr, acquire);
2316   andr(tmp, old, mask);
2317   bne(tmp, expected, fail);
2318 
2319   andr(tmp, old, not_mask);
2320   orr(tmp, tmp, new_val);
2321   sc_w(tmp, tmp, aligned_addr, release);
2322   bnez(tmp, fail);
2323 
2324   // Success
2325   mv(result, 1);
2326   j(done);
2327 
2328   // Fail
2329   bind(fail);
2330   mv(result, zr);
2331 
2332   bind(done);
2333 }
2334 
2335 void MacroAssembler::cmpxchg(Register addr, Register expected,
2336                              Register new_val,
2337                              enum operand_size size,
2338                              Assembler::Aqrl acquire, Assembler::Aqrl release,
2339                              Register result, bool result_as_bool) {
2340   assert(size != int8 && size != int16, "unsupported operand size");
2341 
2342   Label retry_load, done, ne_done;
2343   bind(retry_load);
2344   load_reserved(addr, size, acquire);
2345   bne(t0, expected, ne_done);
2346   store_conditional(addr, new_val, size, release);
2347   bnez(t0, retry_load);
2348 
2349   // equal, succeed
2350   if (result_as_bool) {
2351     mv(result, 1);
2352   } else {
2353     mv(result, expected);
2354   }
2355   j(done);
2356 
2357   // not equal, failed
2358   bind(ne_done);
2359   if (result_as_bool) {
2360     mv(result, zr);
2361   } else {
2362     mv(result, t0);
2363   }
2364 
2365   bind(done);
2366 }
2367 
2368 void MacroAssembler::cmpxchg_weak(Register addr, Register expected,
2369                                   Register new_val,
2370                                   enum operand_size size,
2371                                   Assembler::Aqrl acquire, Assembler::Aqrl release,
2372                                   Register result) {
2373   Label fail, done;
2374   load_reserved(addr, size, acquire);
2375   bne(t0, expected, fail);
2376   store_conditional(addr, new_val, size, release);
2377   bnez(t0, fail);
2378 
2379   // Success
2380   mv(result, 1);
2381   j(done);
2382 
2383   // Fail
2384   bind(fail);
2385   mv(result, zr);
2386 
2387   bind(done);
2388 }
2389 
2390 #define ATOMIC_OP(NAME, AOP, ACQUIRE, RELEASE)                                              \
2391 void MacroAssembler::atomic_##NAME(Register prev, RegisterOrConstant incr, Register addr) { \
2392   prev = prev->is_valid() ? prev : zr;                                                      \
2393   if (incr.is_register()) {                                                                 \
2394     AOP(prev, addr, incr.as_register(), (Assembler::Aqrl)(ACQUIRE | RELEASE));              \
2395   } else {                                                                                  \
2396     mv(t0, incr.as_constant());                                                             \
2397     AOP(prev, addr, t0, (Assembler::Aqrl)(ACQUIRE | RELEASE));                              \
2398   }                                                                                         \
2399   return;                                                                                   \
2400 }
2401 
2402 ATOMIC_OP(add, amoadd_d, Assembler::relaxed, Assembler::relaxed)
2403 ATOMIC_OP(addw, amoadd_w, Assembler::relaxed, Assembler::relaxed)
2404 ATOMIC_OP(addal, amoadd_d, Assembler::aq, Assembler::rl)
2405 ATOMIC_OP(addalw, amoadd_w, Assembler::aq, Assembler::rl)
2406 
2407 #undef ATOMIC_OP
2408 
2409 #define ATOMIC_XCHG(OP, AOP, ACQUIRE, RELEASE)                                       \
2410 void MacroAssembler::atomic_##OP(Register prev, Register newv, Register addr) {      \
2411   prev = prev->is_valid() ? prev : zr;                                               \
2412   AOP(prev, addr, newv, (Assembler::Aqrl)(ACQUIRE | RELEASE));                       \
2413   return;                                                                            \
2414 }
2415 
2416 ATOMIC_XCHG(xchg, amoswap_d, Assembler::relaxed, Assembler::relaxed)
2417 ATOMIC_XCHG(xchgw, amoswap_w, Assembler::relaxed, Assembler::relaxed)
2418 ATOMIC_XCHG(xchgal, amoswap_d, Assembler::aq, Assembler::rl)
2419 ATOMIC_XCHG(xchgalw, amoswap_w, Assembler::aq, Assembler::rl)
2420 
2421 #undef ATOMIC_XCHG
2422 
2423 #define ATOMIC_XCHGU(OP1, OP2)                                                       \
2424 void MacroAssembler::atomic_##OP1(Register prev, Register newv, Register addr) {     \
2425   atomic_##OP2(prev, newv, addr);                                                    \
2426   zero_extend(prev, prev, 32);                                                       \
2427   return;                                                                            \
2428 }
2429 
2430 ATOMIC_XCHGU(xchgwu, xchgw)
2431 ATOMIC_XCHGU(xchgalwu, xchgalw)
2432 
2433 #undef ATOMIC_XCHGU
2434 
2435 void MacroAssembler::far_jump(Address entry, Register tmp) {
2436   assert(ReservedCodeCacheSize < 4*G, "branch out of range");
2437   assert(CodeCache::find_blob(entry.target()) != NULL,
2438          "destination of far call not found in code cache");
2439   assert(entry.rspec().type() == relocInfo::external_word_type
2440         || entry.rspec().type() == relocInfo::runtime_call_type
2441         || entry.rspec().type() == relocInfo::none, "wrong entry relocInfo type");
2442   int32_t offset = 0;
2443   if (far_branches()) {
2444     // We can use auipc + jalr here because we know that the total size of
2445     // the code cache cannot exceed 2Gb.
2446     la_patchable(tmp, entry, offset);
2447     jalr(x0, tmp, offset);
2448   } else {
2449     j(entry);
2450   }
2451 }
2452 
2453 void MacroAssembler::far_call(Address entry, Register tmp) {
2454   assert(ReservedCodeCacheSize < 4*G, "branch out of range");
2455   assert(CodeCache::find_blob(entry.target()) != NULL,
2456          "destination of far call not found in code cache");
2457   assert(entry.rspec().type() == relocInfo::external_word_type
2458         || entry.rspec().type() == relocInfo::runtime_call_type
2459         || entry.rspec().type() == relocInfo::none, "wrong entry relocInfo type");
2460   int32_t offset = 0;
2461   if (far_branches()) {
2462     // We can use auipc + jalr here because we know that the total size of
2463     // the code cache cannot exceed 2Gb.
2464     la_patchable(tmp, entry, offset);
2465     jalr(x1, tmp, offset); // link
2466   } else {
2467     jal(entry); // link
2468   }
2469 }
2470 
2471 void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass,
2472                                                    Register super_klass,
2473                                                    Register tmp_reg,
2474                                                    Label* L_success,
2475                                                    Label* L_failure,
2476                                                    Label* L_slow_path,
2477                                                    Register super_check_offset) {
2478   assert_different_registers(sub_klass, super_klass, tmp_reg);
2479   bool must_load_sco = (super_check_offset == noreg);
2480   if (must_load_sco) {
2481     assert(tmp_reg != noreg, "supply either a temp or a register offset");
2482   } else {
2483     assert_different_registers(sub_klass, super_klass, super_check_offset);
2484   }
2485 
2486   Label L_fallthrough;
2487   int label_nulls = 0;
2488   if (L_success == NULL)   { L_success   = &L_fallthrough; label_nulls++; }
2489   if (L_failure == NULL)   { L_failure   = &L_fallthrough; label_nulls++; }
2490   if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; }
2491   assert(label_nulls <= 1, "at most one NULL in batch");
2492 
2493   int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
2494   int sco_offset = in_bytes(Klass::super_check_offset_offset());
2495   Address super_check_offset_addr(super_klass, sco_offset);
2496 
2497   // Hacked jmp, which may only be used just before L_fallthrough.
2498 #define final_jmp(label)                                                \
2499   if (&(label) == &L_fallthrough) { /*do nothing*/ }                    \
2500   else                            j(label)             /*omit semi*/
2501 
2502   // If the pointers are equal, we are done (e.g., String[] elements).
2503   // This self-check enables sharing of secondary supertype arrays among
2504   // non-primary types such as array-of-interface. Otherwise, each such
2505   // type would need its own customized SSA.
2506   // We move this check to the front of the fast path because many
2507   // type checks are in fact trivially successful in this manner,
2508   // so we get a nicely predicted branch right at the start of the check.
2509   beq(sub_klass, super_klass, *L_success);
2510 
2511   // Check the supertype display:
2512   if (must_load_sco) {
2513     lwu(tmp_reg, super_check_offset_addr);
2514     super_check_offset = tmp_reg;
2515   }
2516   add(t0, sub_klass, super_check_offset);
2517   Address super_check_addr(t0);
2518   ld(t0, super_check_addr); // load displayed supertype
2519 
2520   // This check has worked decisively for primary supers.
2521   // Secondary supers are sought in the super_cache ('super_cache_addr').
2522   // (Secondary supers are interfaces and very deeply nested subtypes.)
2523   // This works in the same check above because of a tricky aliasing
2524   // between the super_Cache and the primary super display elements.
2525   // (The 'super_check_addr' can address either, as the case requires.)
2526   // Note that the cache is updated below if it does not help us find
2527   // what we need immediately.
2528   // So if it was a primary super, we can just fail immediately.
2529   // Otherwise, it's the slow path for us (no success at this point).
2530 
2531   beq(super_klass, t0, *L_success);
2532   mv(t1, sc_offset);
2533   if (L_failure == &L_fallthrough) {
2534     beq(super_check_offset, t1, *L_slow_path);
2535   } else {
2536     bne(super_check_offset, t1, *L_failure, /* is_far */ true);
2537     final_jmp(*L_slow_path);
2538   }
2539 
2540   bind(L_fallthrough);
2541 
2542 #undef final_jmp
2543 }
2544 
2545 // Scans count pointer sized words at [addr] for occurrence of value,
2546 // generic
2547 void MacroAssembler::repne_scan(Register addr, Register value, Register count,
2548                                 Register tmp) {
2549   Label Lloop, Lexit;
2550   beqz(count, Lexit);
2551   bind(Lloop);
2552   ld(tmp, addr);
2553   beq(value, tmp, Lexit);
2554   add(addr, addr, wordSize);
2555   sub(count, count, 1);
2556   bnez(count, Lloop);
2557   bind(Lexit);
2558 }
2559 
2560 void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass,
2561                                                    Register super_klass,
2562                                                    Register tmp1_reg,
2563                                                    Register tmp2_reg,
2564                                                    Label* L_success,
2565                                                    Label* L_failure) {
2566   assert_different_registers(sub_klass, super_klass, tmp1_reg);
2567   if (tmp2_reg != noreg) {
2568     assert_different_registers(sub_klass, super_klass, tmp1_reg, tmp2_reg, t0);
2569   }
2570 #define IS_A_TEMP(reg) ((reg) == tmp1_reg || (reg) == tmp2_reg)
2571 
2572   Label L_fallthrough;
2573   int label_nulls = 0;
2574   if (L_success == NULL)   { L_success   = &L_fallthrough; label_nulls++; }
2575   if (L_failure == NULL)   { L_failure   = &L_fallthrough; label_nulls++; }
2576 
2577   assert(label_nulls <= 1, "at most one NULL in the batch");
2578 
2579   // A couple of useful fields in sub_klass:
2580   int ss_offset = in_bytes(Klass::secondary_supers_offset());
2581   int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
2582   Address secondary_supers_addr(sub_klass, ss_offset);
2583   Address super_cache_addr(     sub_klass, sc_offset);
2584 
2585   BLOCK_COMMENT("check_klass_subtype_slow_path");
2586 
2587   // Do a linear scan of the secondary super-klass chain.
2588   // This code is rarely used, so simplicity is a virtue here.
2589   // The repne_scan instruction uses fixed registers, which we must spill.
2590   // Don't worry too much about pre-existing connections with the input regs.
2591 
2592   assert(sub_klass != x10, "killed reg"); // killed by mv(x10, super)
2593   assert(sub_klass != x12, "killed reg"); // killed by la(x12, &pst_counter)
2594 
2595   RegSet pushed_registers;
2596   if (!IS_A_TEMP(x12)) {
2597     pushed_registers += x12;
2598   }
2599   if (!IS_A_TEMP(x15)) {
2600     pushed_registers += x15;
2601   }
2602 
2603   if (super_klass != x10) {
2604     if (!IS_A_TEMP(x10)) {
2605       pushed_registers += x10;
2606     }
2607   }
2608 
2609   push_reg(pushed_registers, sp);
2610 
2611   // Get super_klass value into x10 (even if it was in x15 or x12)
2612   mv(x10, super_klass);
2613 
2614 #ifndef PRODUCT
2615   mv(t1, (address)&SharedRuntime::_partial_subtype_ctr);
2616   Address pst_counter_addr(t1);
2617   ld(t0, pst_counter_addr);
2618   add(t0, t0, 1);
2619   sd(t0, pst_counter_addr);
2620 #endif // PRODUCT
2621 
2622   // We will consult the secondary-super array.
2623   ld(x15, secondary_supers_addr);
2624   // Load the array length.
2625   lwu(x12, Address(x15, Array<Klass*>::length_offset_in_bytes()));
2626   // Skip to start of data.
2627   add(x15, x15, Array<Klass*>::base_offset_in_bytes());
2628 
2629   // Set t0 to an obvious invalid value, falling through by default
2630   mv(t0, -1);
2631   // Scan X12 words at [X15] for an occurrence of X10.
2632   repne_scan(x15, x10, x12, t0);
2633 
2634   // pop will restore x10, so we should use a temp register to keep its value
2635   mv(t1, x10);
2636 
2637   // Unspill the temp registers:
2638   pop_reg(pushed_registers, sp);
2639 
2640   bne(t1, t0, *L_failure);
2641 
2642   // Success. Cache the super we found an proceed in triumph.
2643   sd(super_klass, super_cache_addr);
2644 
2645   if (L_success != &L_fallthrough) {
2646     j(*L_success);
2647   }
2648 
2649 #undef IS_A_TEMP
2650 
2651   bind(L_fallthrough);
2652 }
2653 
2654 // Defines obj, preserves var_size_in_bytes, okay for tmp2 == var_size_in_bytes.
2655 void MacroAssembler::tlab_allocate(Register obj,
2656                                    Register var_size_in_bytes,
2657                                    int con_size_in_bytes,
2658                                    Register tmp1,
2659                                    Register tmp2,
2660                                    Label& slow_case,
2661                                    bool is_far) {
2662   BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
2663   bs->tlab_allocate(this, obj, var_size_in_bytes, con_size_in_bytes, tmp1, tmp2, slow_case, is_far);
2664 }
2665 
2666 // get_thread() can be called anywhere inside generated code so we
2667 // need to save whatever non-callee save context might get clobbered
2668 // by the call to Thread::current() or, indeed, the call setup code.
2669 void MacroAssembler::get_thread(Register thread) {
2670   // save all call-clobbered regs except thread
2671   RegSet saved_regs = RegSet::range(x5, x7) + RegSet::range(x10, x17) +
2672                       RegSet::range(x28, x31) + ra - thread;
2673   push_reg(saved_regs, sp);
2674 
2675   mv(ra, CAST_FROM_FN_PTR(address, Thread::current));
2676   jalr(ra);
2677   if (thread != c_rarg0) {
2678     mv(thread, c_rarg0);
2679   }
2680 
2681   // restore pushed registers
2682   pop_reg(saved_regs, sp);
2683 }
2684 
2685 void MacroAssembler::load_byte_map_base(Register reg) {
2686   CardTable::CardValue* byte_map_base =
2687     ((CardTableBarrierSet*)(BarrierSet::barrier_set()))->card_table()->byte_map_base();
2688   mv(reg, (uint64_t)byte_map_base);
2689 }
2690 
2691 void MacroAssembler::la_patchable(Register reg1, const Address &dest, int32_t &offset) {
2692   unsigned long low_address = (uintptr_t)CodeCache::low_bound();
2693   unsigned long high_address = (uintptr_t)CodeCache::high_bound();
2694   unsigned long dest_address = (uintptr_t)dest.target();
2695   long offset_low = dest_address - low_address;
2696   long offset_high = dest_address - high_address;
2697 
2698   assert(is_valid_riscv64_address(dest.target()), "bad address");
2699   assert(dest.getMode() == Address::literal, "la_patchable must be applied to a literal address");
2700 
2701   relocate(dest.rspec());
2702   // RISC-V doesn't compute a page-aligned address, in order to partially
2703   // compensate for the use of *signed* offsets in its base+disp12
2704   // addressing mode (RISC-V's PC-relative reach remains asymmetric
2705   // [-(2G + 2K), 2G - 2K).
2706   if (offset_high >= -((1L << 31) + (1L << 11)) && offset_low < (1L << 31) - (1L << 11)) {
2707     int64_t distance = dest.target() - pc();
2708     auipc(reg1, (int32_t)distance + 0x800);
2709     offset = ((int32_t)distance << 20) >> 20;
2710   } else {
2711     movptr(reg1, dest.target(), offset);
2712   }
2713 }
2714 
2715 void MacroAssembler::build_frame(int framesize) {
2716   assert(framesize >= 2, "framesize must include space for FP/RA");
2717   assert(framesize % (2*wordSize) == 0, "must preserve 2*wordSize alignment");
2718   sub(sp, sp, framesize);
2719   sd(fp, Address(sp, framesize - 2 * wordSize));
2720   sd(ra, Address(sp, framesize - wordSize));
2721   if (PreserveFramePointer) { add(fp, sp, framesize); }
2722 }
2723 
2724 void MacroAssembler::remove_frame(int framesize) {
2725   assert(framesize >= 2, "framesize must include space for FP/RA");
2726   assert(framesize % (2*wordSize) == 0, "must preserve 2*wordSize alignment");
2727   ld(fp, Address(sp, framesize - 2 * wordSize));
2728   ld(ra, Address(sp, framesize - wordSize));
2729   add(sp, sp, framesize);
2730 }
2731 
2732 void MacroAssembler::reserved_stack_check() {
2733     // testing if reserved zone needs to be enabled
2734     Label no_reserved_zone_enabling;
2735 
2736     ld(t0, Address(xthread, JavaThread::reserved_stack_activation_offset()));
2737     bltu(sp, t0, no_reserved_zone_enabling);
2738 
2739     enter();   // RA and FP are live.
2740     mv(c_rarg0, xthread);
2741     int32_t offset = 0;
2742     la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::enable_stack_reserved_zone)), offset);
2743     jalr(x1, t0, offset);
2744     leave();
2745 
2746     // We have already removed our own frame.
2747     // throw_delayed_StackOverflowError will think that it's been
2748     // called by our caller.
2749     offset = 0;
2750     la_patchable(t0, RuntimeAddress(StubRoutines::throw_delayed_StackOverflowError_entry()), offset);
2751     jalr(x0, t0, offset);
2752     should_not_reach_here();
2753 
2754     bind(no_reserved_zone_enabling);
2755 }
2756 
2757 // Move the address of the polling page into dest.
2758 void MacroAssembler::get_polling_page(Register dest, relocInfo::relocType rtype) {
2759   ld(dest, Address(xthread, JavaThread::polling_page_offset()));
2760 }
2761 
2762 // Read the polling page.  The address of the polling page must
2763 // already be in r.
2764 void MacroAssembler::read_polling_page(Register r, int32_t offset, relocInfo::relocType rtype) {
2765   relocate(rtype);
2766   lwu(zr, Address(r, offset));
2767 }
2768 
2769 void  MacroAssembler::set_narrow_oop(Register dst, jobject obj) {
2770 #ifdef ASSERT
2771   {
2772     ThreadInVMfromUnknown tiv;
2773     assert (UseCompressedOops, "should only be used for compressed oops");
2774     assert (Universe::heap() != NULL, "java heap should be initialized");
2775     assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
2776     assert(Universe::heap()->is_in(JNIHandles::resolve(obj)), "should be real oop");
2777   }
2778 #endif
2779   int oop_index = oop_recorder()->find_index(obj);
2780   relocate(oop_Relocation::spec(oop_index));
2781   li32(dst, 0xDEADBEEF);
2782   zero_extend(dst, dst, 32);
2783 }
2784 
2785 void  MacroAssembler::set_narrow_klass(Register dst, Klass* k) {
2786   assert (UseCompressedClassPointers, "should only be used for compressed headers");
2787   assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
2788   int index = oop_recorder()->find_index(k);
2789   assert(!Universe::heap()->is_in(k), "should not be an oop");
2790 
2791   narrowKlass nk = CompressedKlassPointers::encode(k);
2792   relocate(metadata_Relocation::spec(index));
2793   li32(dst, nk);
2794   zero_extend(dst, dst, 32);
2795 }
2796 
2797 // Maybe emit a call via a trampoline.  If the code cache is small
2798 // trampolines won't be emitted.
2799 address MacroAssembler::trampoline_call(Address entry) {
2800   assert(JavaThread::current()->is_Compiler_thread(), "just checking");
2801   assert(entry.rspec().type() == relocInfo::runtime_call_type ||
2802          entry.rspec().type() == relocInfo::opt_virtual_call_type ||
2803          entry.rspec().type() == relocInfo::static_call_type ||
2804          entry.rspec().type() == relocInfo::virtual_call_type, "wrong reloc type");
2805 
2806   // We need a trampoline if branches are far.
2807   if (far_branches()) {
2808     bool in_scratch_emit_size = false;
2809 #ifdef COMPILER2
2810     // We don't want to emit a trampoline if C2 is generating dummy
2811     // code during its branch shortening phase.
2812     CompileTask* task = ciEnv::current()->task();
2813     in_scratch_emit_size =
2814       (task != NULL && is_c2_compile(task->comp_level()) &&
2815        Compile::current()->output()->in_scratch_emit_size());
2816 #endif
2817     if (!in_scratch_emit_size) {
2818       address stub = emit_trampoline_stub(offset(), entry.target());
2819       if (stub == NULL) {
2820         postcond(pc() == badAddress);
2821         return NULL; // CodeCache is full
2822       }
2823     }
2824   }
2825 
2826   address call_pc = pc();
2827 #ifdef ASSERT
2828   if (entry.rspec().type() != relocInfo::runtime_call_type) {
2829     assert_alignment(call_pc);
2830   }
2831 #endif
2832   relocate(entry.rspec());
2833   if (!far_branches()) {
2834     jal(entry.target());
2835   } else {
2836     jal(pc());
2837   }
2838 
2839   postcond(pc() != badAddress);
2840   return call_pc;
2841 }
2842 
2843 address MacroAssembler::ic_call(address entry, jint method_index) {
2844   RelocationHolder rh = virtual_call_Relocation::spec(pc(), method_index);
2845   movptr(t1, (address)Universe::non_oop_word());
2846   assert_cond(entry != NULL);
2847   return trampoline_call(Address(entry, rh));
2848 }
2849 
2850 // Emit a trampoline stub for a call to a target which is too far away.
2851 //
2852 // code sequences:
2853 //
2854 // call-site:
2855 //   branch-and-link to <destination> or <trampoline stub>
2856 //
2857 // Related trampoline stub for this call site in the stub section:
2858 //   load the call target from the constant pool
2859 //   branch (RA still points to the call site above)
2860 
2861 address MacroAssembler::emit_trampoline_stub(int insts_call_instruction_offset,
2862                                              address dest) {
2863   address stub = start_a_stub(NativeInstruction::instruction_size
2864                             + NativeCallTrampolineStub::instruction_size);
2865   if (stub == NULL) {
2866     return NULL;  // CodeBuffer::expand failed
2867   }
2868 
2869   // Create a trampoline stub relocation which relates this trampoline stub
2870   // with the call instruction at insts_call_instruction_offset in the
2871   // instructions code-section.
2872 
2873   // Make sure the address of destination 8-byte aligned after 3 instructions.
2874   align(wordSize, NativeCallTrampolineStub::data_offset);
2875 
2876   relocate(trampoline_stub_Relocation::spec(code()->insts()->start() +
2877                                             insts_call_instruction_offset));
2878   const int stub_start_offset = offset();
2879 
2880   // Now, create the trampoline stub's code:
2881   // - load the call
2882   // - call
2883   Label target;
2884   ld(t0, target);  // auipc + ld
2885   jr(t0);          // jalr
2886   bind(target);
2887   assert(offset() - stub_start_offset == NativeCallTrampolineStub::data_offset,
2888          "should be");
2889   assert(offset() % wordSize == 0, "bad alignment");
2890   emit_int64((intptr_t)dest);
2891 
2892   const address stub_start_addr = addr_at(stub_start_offset);
2893 
2894   assert(is_NativeCallTrampolineStub_at(stub_start_addr), "doesn't look like a trampoline");
2895 
2896   end_a_stub();
2897   return stub_start_addr;
2898 }
2899 
2900 Address MacroAssembler::add_memory_helper(const Address dst) {
2901   switch (dst.getMode()) {
2902     case Address::base_plus_offset:
2903       // This is the expected mode, although we allow all the other
2904       // forms below.
2905       return form_address(t1, dst.base(), dst.offset());
2906     default:
2907       la(t1, dst);
2908       return Address(t1);
2909   }
2910 }
2911 
2912 void MacroAssembler::increment(const Address dst, int64_t value) {
2913   assert(((dst.getMode() == Address::base_plus_offset &&
2914            is_offset_in_range(dst.offset(), 12)) || is_imm_in_range(value, 12, 0)),
2915           "invalid value and address mode combination");
2916   Address adr = add_memory_helper(dst);
2917   assert(!adr.uses(t0), "invalid dst for address increment");
2918   ld(t0, adr);
2919   add(t0, t0, value, t1);
2920   sd(t0, adr);
2921 }
2922 
2923 void MacroAssembler::incrementw(const Address dst, int32_t value) {
2924   assert(((dst.getMode() == Address::base_plus_offset &&
2925            is_offset_in_range(dst.offset(), 12)) || is_imm_in_range(value, 12, 0)),
2926           "invalid value and address mode combination");
2927   Address adr = add_memory_helper(dst);
2928   assert(!adr.uses(t0), "invalid dst for address increment");
2929   lwu(t0, adr);
2930   addw(t0, t0, value, t1);
2931   sw(t0, adr);
2932 }
2933 
2934 void MacroAssembler::decrement(const Address dst, int64_t value) {
2935   assert(((dst.getMode() == Address::base_plus_offset &&
2936            is_offset_in_range(dst.offset(), 12)) || is_imm_in_range(value, 12, 0)),
2937           "invalid value and address mode combination");
2938   Address adr = add_memory_helper(dst);
2939   assert(!adr.uses(t0), "invalid dst for address decrement");
2940   ld(t0, adr);
2941   sub(t0, t0, value, t1);
2942   sd(t0, adr);
2943 }
2944 
2945 void MacroAssembler::decrementw(const Address dst, int32_t value) {
2946   assert(((dst.getMode() == Address::base_plus_offset &&
2947            is_offset_in_range(dst.offset(), 12)) || is_imm_in_range(value, 12, 0)),
2948           "invalid value and address mode combination");
2949   Address adr = add_memory_helper(dst);
2950   assert(!adr.uses(t0), "invalid dst for address decrement");
2951   lwu(t0, adr);
2952   subw(t0, t0, value, t1);
2953   sw(t0, adr);
2954 }
2955 
2956 void MacroAssembler::cmpptr(Register src1, Address src2, Label& equal) {
2957   assert_different_registers(src1, t0);
2958   int32_t offset;
2959   la_patchable(t0, src2, offset);
2960   ld(t0, Address(t0, offset));
2961   beq(src1, t0, equal);
2962 }
2963 
2964 void MacroAssembler::load_method_holder_cld(Register result, Register method) {
2965   load_method_holder(result, method);
2966   ld(result, Address(result, InstanceKlass::class_loader_data_offset()));
2967 }
2968 
2969 void MacroAssembler::load_method_holder(Register holder, Register method) {
2970   ld(holder, Address(method, Method::const_offset()));                      // ConstMethod*
2971   ld(holder, Address(holder, ConstMethod::constants_offset()));             // ConstantPool*
2972   ld(holder, Address(holder, ConstantPool::pool_holder_offset_in_bytes())); // InstanceKlass*
2973 }
2974 
2975 // string indexof
2976 // compute index by trailing zeros
2977 void MacroAssembler::compute_index(Register haystack, Register trailing_zeros,
2978                                    Register match_mask, Register result,
2979                                    Register ch2, Register tmp,
2980                                    bool haystack_isL)
2981 {
2982   int haystack_chr_shift = haystack_isL ? 0 : 1;
2983   srl(match_mask, match_mask, trailing_zeros);
2984   srli(match_mask, match_mask, 1);
2985   srli(tmp, trailing_zeros, LogBitsPerByte);
2986   if (!haystack_isL) andi(tmp, tmp, 0xE);
2987   add(haystack, haystack, tmp);
2988   ld(ch2, Address(haystack));
2989   if (!haystack_isL) srli(tmp, tmp, haystack_chr_shift);
2990   add(result, result, tmp);
2991 }
2992 
2993 // string indexof
2994 // Find pattern element in src, compute match mask,
2995 // only the first occurrence of 0x80/0x8000 at low bits is the valid match index
2996 // match mask patterns and corresponding indices would be like:
2997 // - 0x8080808080808080 (Latin1)
2998 // -   7 6 5 4 3 2 1 0  (match index)
2999 // - 0x8000800080008000 (UTF16)
3000 // -   3   2   1   0    (match index)
3001 void MacroAssembler::compute_match_mask(Register src, Register pattern, Register match_mask,
3002                                         Register mask1, Register mask2)
3003 {
3004   xorr(src, pattern, src);
3005   sub(match_mask, src, mask1);
3006   orr(src, src, mask2);
3007   notr(src, src);
3008   andr(match_mask, match_mask, src);
3009 }
3010 
3011 #ifdef COMPILER2
3012 // Code for BigInteger::mulAdd intrinsic
3013 // out     = x10
3014 // in      = x11
3015 // offset  = x12  (already out.length-offset)
3016 // len     = x13
3017 // k       = x14
3018 // tmp     = x28
3019 //
3020 // pseudo code from java implementation:
3021 // long kLong = k & LONG_MASK;
3022 // carry = 0;
3023 // offset = out.length-offset - 1;
3024 // for (int j = len - 1; j >= 0; j--) {
3025 //     product = (in[j] & LONG_MASK) * kLong + (out[offset] & LONG_MASK) + carry;
3026 //     out[offset--] = (int)product;
3027 //     carry = product >>> 32;
3028 // }
3029 // return (int)carry;
3030 void MacroAssembler::mul_add(Register out, Register in, Register offset,
3031                              Register len, Register k, Register tmp) {
3032   Label L_tail_loop, L_unroll, L_end;
3033   mv(tmp, out);
3034   mv(out, zr);
3035   blez(len, L_end);
3036   zero_extend(k, k, 32);
3037   slliw(t0, offset, LogBytesPerInt);
3038   add(offset, tmp, t0);
3039   slliw(t0, len, LogBytesPerInt);
3040   add(in, in, t0);
3041 
3042   const int unroll = 8;
3043   mv(tmp, unroll);
3044   blt(len, tmp, L_tail_loop);
3045   bind(L_unroll);
3046   for (int i = 0; i < unroll; i++) {
3047     sub(in, in, BytesPerInt);
3048     lwu(t0, Address(in, 0));
3049     mul(t1, t0, k);
3050     add(t0, t1, out);
3051     sub(offset, offset, BytesPerInt);
3052     lwu(t1, Address(offset, 0));
3053     add(t0, t0, t1);
3054     sw(t0, Address(offset, 0));
3055     srli(out, t0, 32);
3056   }
3057   subw(len, len, tmp);
3058   bge(len, tmp, L_unroll);
3059 
3060   bind(L_tail_loop);
3061   blez(len, L_end);
3062   sub(in, in, BytesPerInt);
3063   lwu(t0, Address(in, 0));
3064   mul(t1, t0, k);
3065   add(t0, t1, out);
3066   sub(offset, offset, BytesPerInt);
3067   lwu(t1, Address(offset, 0));
3068   add(t0, t0, t1);
3069   sw(t0, Address(offset, 0));
3070   srli(out, t0, 32);
3071   subw(len, len, 1);
3072   j(L_tail_loop);
3073 
3074   bind(L_end);
3075 }
3076 
3077 // add two unsigned input and output carry
3078 void MacroAssembler::cad(Register dst, Register src1, Register src2, Register carry)
3079 {
3080   assert_different_registers(dst, carry);
3081   assert_different_registers(dst, src2);
3082   add(dst, src1, src2);
3083   sltu(carry, dst, src2);
3084 }
3085 
3086 // add two input with carry
3087 void MacroAssembler::adc(Register dst, Register src1, Register src2, Register carry)
3088 {
3089   assert_different_registers(dst, carry);
3090   add(dst, src1, src2);
3091   add(dst, dst, carry);
3092 }
3093 
3094 // add two unsigned input with carry and output carry
3095 void MacroAssembler::cadc(Register dst, Register src1, Register src2, Register carry)
3096 {
3097   assert_different_registers(dst, src2);
3098   adc(dst, src1, src2, carry);
3099   sltu(carry, dst, src2);
3100 }
3101 
3102 void MacroAssembler::add2_with_carry(Register final_dest_hi, Register dest_hi, Register dest_lo,
3103                                      Register src1, Register src2, Register carry)
3104 {
3105   cad(dest_lo, dest_lo, src1, carry);
3106   add(dest_hi, dest_hi, carry);
3107   cad(dest_lo, dest_lo, src2, carry);
3108   add(final_dest_hi, dest_hi, carry);
3109 }
3110 
3111 /**
3112  * Multiply 32 bit by 32 bit first loop.
3113  */
3114 void MacroAssembler::multiply_32_x_32_loop(Register x, Register xstart, Register x_xstart,
3115                                            Register y, Register y_idx, Register z,
3116                                            Register carry, Register product,
3117                                            Register idx, Register kdx)
3118 {
3119   // jlong carry, x[], y[], z[];
3120   // for (int idx=ystart, kdx=ystart+1+xstart; idx >= 0; idx--, kdx--) {
3121   //     long product = y[idx] * x[xstart] + carry;
3122   //     z[kdx] = (int)product;
3123   //     carry = product >>> 32;
3124   // }
3125   // z[xstart] = (int)carry;
3126 
3127   Label L_first_loop, L_first_loop_exit;
3128   blez(idx, L_first_loop_exit);
3129 
3130   shadd(t0, xstart, x, t0, LogBytesPerInt);
3131   lwu(x_xstart, Address(t0, 0));
3132 
3133   bind(L_first_loop);
3134   subw(idx, idx, 1);
3135   shadd(t0, idx, y, t0, LogBytesPerInt);
3136   lwu(y_idx, Address(t0, 0));
3137   mul(product, x_xstart, y_idx);
3138   add(product, product, carry);
3139   srli(carry, product, 32);
3140   subw(kdx, kdx, 1);
3141   shadd(t0, kdx, z, t0, LogBytesPerInt);
3142   sw(product, Address(t0, 0));
3143   bgtz(idx, L_first_loop);
3144 
3145   bind(L_first_loop_exit);
3146 }
3147 
3148 /**
3149  * Multiply 64 bit by 64 bit first loop.
3150  */
3151 void MacroAssembler::multiply_64_x_64_loop(Register x, Register xstart, Register x_xstart,
3152                                            Register y, Register y_idx, Register z,
3153                                            Register carry, Register product,
3154                                            Register idx, Register kdx)
3155 {
3156   //
3157   //  jlong carry, x[], y[], z[];
3158   //  for (int idx=ystart, kdx=ystart+1+xstart; idx >= 0; idx--, kdx--) {
3159   //    huge_128 product = y[idx] * x[xstart] + carry;
3160   //    z[kdx] = (jlong)product;
3161   //    carry  = (jlong)(product >>> 64);
3162   //  }
3163   //  z[xstart] = carry;
3164   //
3165 
3166   Label L_first_loop, L_first_loop_exit;
3167   Label L_one_x, L_one_y, L_multiply;
3168 
3169   subw(xstart, xstart, 1);
3170   bltz(xstart, L_one_x);
3171 
3172   shadd(t0, xstart, x, t0, LogBytesPerInt);
3173   ld(x_xstart, Address(t0, 0));
3174   ror_imm(x_xstart, x_xstart, 32); // convert big-endian to little-endian
3175 
3176   bind(L_first_loop);
3177   subw(idx, idx, 1);
3178   bltz(idx, L_first_loop_exit);
3179   subw(idx, idx, 1);
3180   bltz(idx, L_one_y);
3181 
3182   shadd(t0, idx, y, t0, LogBytesPerInt);
3183   ld(y_idx, Address(t0, 0));
3184   ror_imm(y_idx, y_idx, 32); // convert big-endian to little-endian
3185   bind(L_multiply);
3186 
3187   mulhu(t0, x_xstart, y_idx);
3188   mul(product, x_xstart, y_idx);
3189   cad(product, product, carry, t1);
3190   adc(carry, t0, zr, t1);
3191 
3192   subw(kdx, kdx, 2);
3193   ror_imm(product, product, 32); // back to big-endian
3194   shadd(t0, kdx, z, t0, LogBytesPerInt);
3195   sd(product, Address(t0, 0));
3196 
3197   j(L_first_loop);
3198 
3199   bind(L_one_y);
3200   lwu(y_idx, Address(y, 0));
3201   j(L_multiply);
3202 
3203   bind(L_one_x);
3204   lwu(x_xstart, Address(x, 0));
3205   j(L_first_loop);
3206 
3207   bind(L_first_loop_exit);
3208 }
3209 
3210 /**
3211  * Multiply 128 bit by 128 bit. Unrolled inner loop.
3212  *
3213  */
3214 void MacroAssembler::multiply_128_x_128_loop(Register y, Register z,
3215                                              Register carry, Register carry2,
3216                                              Register idx, Register jdx,
3217                                              Register yz_idx1, Register yz_idx2,
3218                                              Register tmp, Register tmp3, Register tmp4,
3219                                              Register tmp6, Register product_hi)
3220 {
3221   //   jlong carry, x[], y[], z[];
3222   //   int kdx = xstart+1;
3223   //   for (int idx=ystart-2; idx >= 0; idx -= 2) { // Third loop
3224   //     huge_128 tmp3 = (y[idx+1] * product_hi) + z[kdx+idx+1] + carry;
3225   //     jlong carry2  = (jlong)(tmp3 >>> 64);
3226   //     huge_128 tmp4 = (y[idx]   * product_hi) + z[kdx+idx] + carry2;
3227   //     carry  = (jlong)(tmp4 >>> 64);
3228   //     z[kdx+idx+1] = (jlong)tmp3;
3229   //     z[kdx+idx] = (jlong)tmp4;
3230   //   }
3231   //   idx += 2;
3232   //   if (idx > 0) {
3233   //     yz_idx1 = (y[idx] * product_hi) + z[kdx+idx] + carry;
3234   //     z[kdx+idx] = (jlong)yz_idx1;
3235   //     carry  = (jlong)(yz_idx1 >>> 64);
3236   //   }
3237   //
3238 
3239   Label L_third_loop, L_third_loop_exit, L_post_third_loop_done;
3240 
3241   srliw(jdx, idx, 2);
3242 
3243   bind(L_third_loop);
3244 
3245   subw(jdx, jdx, 1);
3246   bltz(jdx, L_third_loop_exit);
3247   subw(idx, idx, 4);
3248 
3249   shadd(t0, idx, y, t0, LogBytesPerInt);
3250   ld(yz_idx2, Address(t0, 0));
3251   ld(yz_idx1, Address(t0, wordSize));
3252 
3253   shadd(tmp6, idx, z, t0, LogBytesPerInt);
3254 
3255   ror_imm(yz_idx1, yz_idx1, 32); // convert big-endian to little-endian
3256   ror_imm(yz_idx2, yz_idx2, 32);
3257 
3258   ld(t1, Address(tmp6, 0));
3259   ld(t0, Address(tmp6, wordSize));
3260 
3261   mul(tmp3, product_hi, yz_idx1); //  yz_idx1 * product_hi -> tmp4:tmp3
3262   mulhu(tmp4, product_hi, yz_idx1);
3263 
3264   ror_imm(t0, t0, 32, tmp); // convert big-endian to little-endian
3265   ror_imm(t1, t1, 32, tmp);
3266 
3267   mul(tmp, product_hi, yz_idx2); //  yz_idx2 * product_hi -> carry2:tmp
3268   mulhu(carry2, product_hi, yz_idx2);
3269 
3270   cad(tmp3, tmp3, carry, carry);
3271   adc(tmp4, tmp4, zr, carry);
3272   cad(tmp3, tmp3, t0, t0);
3273   cadc(tmp4, tmp4, tmp, t0);
3274   adc(carry, carry2, zr, t0);
3275   cad(tmp4, tmp4, t1, carry2);
3276   adc(carry, carry, zr, carry2);
3277 
3278   ror_imm(tmp3, tmp3, 32); // convert little-endian to big-endian
3279   ror_imm(tmp4, tmp4, 32);
3280   sd(tmp4, Address(tmp6, 0));
3281   sd(tmp3, Address(tmp6, wordSize));
3282 
3283   j(L_third_loop);
3284 
3285   bind(L_third_loop_exit);
3286 
3287   andi(idx, idx, 0x3);
3288   beqz(idx, L_post_third_loop_done);
3289 
3290   Label L_check_1;
3291   subw(idx, idx, 2);
3292   bltz(idx, L_check_1);
3293 
3294   shadd(t0, idx, y, t0, LogBytesPerInt);
3295   ld(yz_idx1, Address(t0, 0));
3296   ror_imm(yz_idx1, yz_idx1, 32);
3297 
3298   mul(tmp3, product_hi, yz_idx1); //  yz_idx1 * product_hi -> tmp4:tmp3
3299   mulhu(tmp4, product_hi, yz_idx1);
3300 
3301   shadd(t0, idx, z, t0, LogBytesPerInt);
3302   ld(yz_idx2, Address(t0, 0));
3303   ror_imm(yz_idx2, yz_idx2, 32, tmp);
3304 
3305   add2_with_carry(carry, tmp4, tmp3, carry, yz_idx2, tmp);
3306 
3307   ror_imm(tmp3, tmp3, 32, tmp);
3308   sd(tmp3, Address(t0, 0));
3309 
3310   bind(L_check_1);
3311 
3312   andi(idx, idx, 0x1);
3313   subw(idx, idx, 1);
3314   bltz(idx, L_post_third_loop_done);
3315   shadd(t0, idx, y, t0, LogBytesPerInt);
3316   lwu(tmp4, Address(t0, 0));
3317   mul(tmp3, tmp4, product_hi); //  tmp4 * product_hi -> carry2:tmp3
3318   mulhu(carry2, tmp4, product_hi);
3319 
3320   shadd(t0, idx, z, t0, LogBytesPerInt);
3321   lwu(tmp4, Address(t0, 0));
3322 
3323   add2_with_carry(carry2, carry2, tmp3, tmp4, carry, t0);
3324 
3325   shadd(t0, idx, z, t0, LogBytesPerInt);
3326   sw(tmp3, Address(t0, 0));
3327 
3328   slli(t0, carry2, 32);
3329   srli(carry, tmp3, 32);
3330   orr(carry, carry, t0);
3331 
3332   bind(L_post_third_loop_done);
3333 }
3334 
3335 /**
3336  * Code for BigInteger::multiplyToLen() intrinsic.
3337  *
3338  * x10: x
3339  * x11: xlen
3340  * x12: y
3341  * x13: ylen
3342  * x14: z
3343  * x15: zlen
3344  * x16: tmp1
3345  * x17: tmp2
3346  * x7:  tmp3
3347  * x28: tmp4
3348  * x29: tmp5
3349  * x30: tmp6
3350  * x31: tmp7
3351  */
3352 void MacroAssembler::multiply_to_len(Register x, Register xlen, Register y, Register ylen,
3353                                      Register z, Register zlen,
3354                                      Register tmp1, Register tmp2, Register tmp3, Register tmp4,
3355                                      Register tmp5, Register tmp6, Register product_hi)
3356 {
3357   assert_different_registers(x, xlen, y, ylen, z, zlen, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6);
3358 
3359   const Register idx = tmp1;
3360   const Register kdx = tmp2;
3361   const Register xstart = tmp3;
3362 
3363   const Register y_idx = tmp4;
3364   const Register carry = tmp5;
3365   const Register product = xlen;
3366   const Register x_xstart = zlen; // reuse register
3367 
3368   mv(idx, ylen); // idx = ylen;
3369   mv(kdx, zlen); // kdx = xlen+ylen;
3370   mv(carry, zr); // carry = 0;
3371 
3372   Label L_multiply_64_x_64_loop, L_done;
3373 
3374   subw(xstart, xlen, 1);
3375   bltz(xstart, L_done);
3376 
3377   const Register jdx = tmp1;
3378 
3379   if (AvoidUnalignedAccesses) {
3380     // Check if x and y are both 8-byte aligned.
3381     orr(t0, xlen, ylen);
3382     andi(t0, t0, 0x1);
3383     beqz(t0, L_multiply_64_x_64_loop);
3384 
3385     multiply_32_x_32_loop(x, xstart, x_xstart, y, y_idx, z, carry, product, idx, kdx);
3386     shadd(t0, xstart, z, t0, LogBytesPerInt);
3387     sw(carry, Address(t0, 0));
3388 
3389     Label L_second_loop_unaligned;
3390     bind(L_second_loop_unaligned);
3391     mv(carry, zr);
3392     mv(jdx, ylen);
3393     subw(xstart, xstart, 1);
3394     bltz(xstart, L_done);
3395     sub(sp, sp, 2 * wordSize);
3396     sd(z, Address(sp, 0));
3397     sd(zr, Address(sp, wordSize));
3398     shadd(t0, xstart, z, t0, LogBytesPerInt);
3399     addi(z, t0, 4);
3400     shadd(t0, xstart, x, t0, LogBytesPerInt);
3401     lwu(product, Address(t0, 0));
3402     Label L_third_loop, L_third_loop_exit;
3403 
3404     blez(jdx, L_third_loop_exit);
3405 
3406     bind(L_third_loop);
3407     subw(jdx, jdx, 1);
3408     shadd(t0, jdx, y, t0, LogBytesPerInt);
3409     lwu(t0, Address(t0, 0));
3410     mul(t1, t0, product);
3411     add(t0, t1, carry);
3412     shadd(tmp6, jdx, z, t1, LogBytesPerInt);
3413     lwu(t1, Address(tmp6, 0));
3414     add(t0, t0, t1);
3415     sw(t0, Address(tmp6, 0));
3416     srli(carry, t0, 32);
3417     bgtz(jdx, L_third_loop);
3418 
3419     bind(L_third_loop_exit);
3420     ld(z, Address(sp, 0));
3421     addi(sp, sp, 2 * wordSize);
3422     shadd(t0, xstart, z, t0, LogBytesPerInt);
3423     sw(carry, Address(t0, 0));
3424 
3425     j(L_second_loop_unaligned);
3426   }
3427 
3428   bind(L_multiply_64_x_64_loop);
3429   multiply_64_x_64_loop(x, xstart, x_xstart, y, y_idx, z, carry, product, idx, kdx);
3430 
3431   Label L_second_loop_aligned;
3432   beqz(kdx, L_second_loop_aligned);
3433 
3434   Label L_carry;
3435   subw(kdx, kdx, 1);
3436   beqz(kdx, L_carry);
3437 
3438   shadd(t0, kdx, z, t0, LogBytesPerInt);
3439   sw(carry, Address(t0, 0));
3440   srli(carry, carry, 32);
3441   subw(kdx, kdx, 1);
3442 
3443   bind(L_carry);
3444   shadd(t0, kdx, z, t0, LogBytesPerInt);
3445   sw(carry, Address(t0, 0));
3446 
3447   // Second and third (nested) loops.
3448   //
3449   // for (int i = xstart-1; i >= 0; i--) { // Second loop
3450   //   carry = 0;
3451   //   for (int jdx=ystart, k=ystart+1+i; jdx >= 0; jdx--, k--) { // Third loop
3452   //     long product = (y[jdx] & LONG_MASK) * (x[i] & LONG_MASK) +
3453   //                    (z[k] & LONG_MASK) + carry;
3454   //     z[k] = (int)product;
3455   //     carry = product >>> 32;
3456   //   }
3457   //   z[i] = (int)carry;
3458   // }
3459   //
3460   // i = xlen, j = tmp1, k = tmp2, carry = tmp5, x[i] = product_hi
3461 
3462   bind(L_second_loop_aligned);
3463   mv(carry, zr); // carry = 0;
3464   mv(jdx, ylen); // j = ystart+1
3465 
3466   subw(xstart, xstart, 1); // i = xstart-1;
3467   bltz(xstart, L_done);
3468 
3469   sub(sp, sp, 4 * wordSize);
3470   sd(z, Address(sp, 0));
3471 
3472   Label L_last_x;
3473   shadd(t0, xstart, z, t0, LogBytesPerInt);
3474   addi(z, t0, 4);
3475   subw(xstart, xstart, 1); // i = xstart-1;
3476   bltz(xstart, L_last_x);
3477 
3478   shadd(t0, xstart, x, t0, LogBytesPerInt);
3479   ld(product_hi, Address(t0, 0));
3480   ror_imm(product_hi, product_hi, 32); // convert big-endian to little-endian
3481 
3482   Label L_third_loop_prologue;
3483   bind(L_third_loop_prologue);
3484 
3485   sd(ylen, Address(sp, wordSize));
3486   sd(x, Address(sp, 2 * wordSize));
3487   sd(xstart, Address(sp, 3 * wordSize));
3488   multiply_128_x_128_loop(y, z, carry, x, jdx, ylen, product,
3489                           tmp2, x_xstart, tmp3, tmp4, tmp6, product_hi);
3490   ld(z, Address(sp, 0));
3491   ld(ylen, Address(sp, wordSize));
3492   ld(x, Address(sp, 2 * wordSize));
3493   ld(xlen, Address(sp, 3 * wordSize)); // copy old xstart -> xlen
3494   addi(sp, sp, 4 * wordSize);
3495 
3496   addiw(tmp3, xlen, 1);
3497   shadd(t0, tmp3, z, t0, LogBytesPerInt);
3498   sw(carry, Address(t0, 0));
3499 
3500   subw(tmp3, tmp3, 1);
3501   bltz(tmp3, L_done);
3502 
3503   srli(carry, carry, 32);
3504   shadd(t0, tmp3, z, t0, LogBytesPerInt);
3505   sw(carry, Address(t0, 0));
3506   j(L_second_loop_aligned);
3507 
3508   // Next infrequent code is moved outside loops.
3509   bind(L_last_x);
3510   lwu(product_hi, Address(x, 0));
3511   j(L_third_loop_prologue);
3512 
3513   bind(L_done);
3514 }
3515 #endif
3516 
3517 // Count bits of trailing zero chars from lsb to msb until first non-zero element.
3518 // For LL case, one byte for one element, so shift 8 bits once, and for other case,
3519 // shift 16 bits once.
3520 void MacroAssembler::ctzc_bit(Register Rd, Register Rs, bool isLL, Register tmp1, Register tmp2)
3521 {
3522   if (UseZbb) {
3523     assert_different_registers(Rd, Rs, tmp1);
3524     int step = isLL ? 8 : 16;
3525     ctz(Rd, Rs);
3526     andi(tmp1, Rd, step - 1);
3527     sub(Rd, Rd, tmp1);
3528     return;
3529   }
3530   assert_different_registers(Rd, Rs, tmp1, tmp2);
3531   Label Loop;
3532   int step = isLL ? 8 : 16;
3533   mv(Rd, -step);
3534   mv(tmp2, Rs);
3535 
3536   bind(Loop);
3537   addi(Rd, Rd, step);
3538   andi(tmp1, tmp2, ((1 << step) - 1));
3539   srli(tmp2, tmp2, step);
3540   beqz(tmp1, Loop);
3541 }
3542 
3543 // This instruction reads adjacent 4 bytes from the lower half of source register,
3544 // inflate into a register, for example:
3545 // Rs: A7A6A5A4A3A2A1A0
3546 // Rd: 00A300A200A100A0
3547 void MacroAssembler::inflate_lo32(Register Rd, Register Rs, Register tmp1, Register tmp2)
3548 {
3549   assert_different_registers(Rd, Rs, tmp1, tmp2);
3550   mv(tmp1, 0xFF);
3551   mv(Rd, zr);
3552   for (int i = 0; i <= 3; i++)
3553   {
3554     andr(tmp2, Rs, tmp1);
3555     if (i) {
3556       slli(tmp2, tmp2, i * 8);
3557     }
3558     orr(Rd, Rd, tmp2);
3559     if (i != 3) {
3560       slli(tmp1, tmp1, 8);
3561     }
3562   }
3563 }
3564 
3565 // This instruction reads adjacent 4 bytes from the upper half of source register,
3566 // inflate into a register, for example:
3567 // Rs: A7A6A5A4A3A2A1A0
3568 // Rd: 00A700A600A500A4
3569 void MacroAssembler::inflate_hi32(Register Rd, Register Rs, Register tmp1, Register tmp2)
3570 {
3571   assert_different_registers(Rd, Rs, tmp1, tmp2);
3572   mv(tmp1, 0xFF00000000);
3573   mv(Rd, zr);
3574   for (int i = 0; i <= 3; i++)
3575   {
3576     andr(tmp2, Rs, tmp1);
3577     orr(Rd, Rd, tmp2);
3578     srli(Rd, Rd, 8);
3579     if (i != 3) {
3580       slli(tmp1, tmp1, 8);
3581     }
3582   }
3583 }
3584 
3585 // The size of the blocks erased by the zero_blocks stub.  We must
3586 // handle anything smaller than this ourselves in zero_words().
3587 const int MacroAssembler::zero_words_block_size = 8;
3588 
3589 // zero_words() is used by C2 ClearArray patterns.  It is as small as
3590 // possible, handling small word counts locally and delegating
3591 // anything larger to the zero_blocks stub.  It is expanded many times
3592 // in compiled code, so it is important to keep it short.
3593 
3594 // ptr:   Address of a buffer to be zeroed.
3595 // cnt:   Count in HeapWords.
3596 //
3597 // ptr, cnt, and t0 are clobbered.
3598 address MacroAssembler::zero_words(Register ptr, Register cnt)
3599 {
3600   assert(is_power_of_2(zero_words_block_size), "adjust this");
3601   assert(ptr == x28 && cnt == x29, "mismatch in register usage");
3602   assert_different_registers(cnt, t0);
3603 
3604   BLOCK_COMMENT("zero_words {");
3605   mv(t0, zero_words_block_size);
3606   Label around, done, done16;
3607   bltu(cnt, t0, around);
3608   {
3609     RuntimeAddress zero_blocks = RuntimeAddress(StubRoutines::riscv::zero_blocks());
3610     assert(zero_blocks.target() != NULL, "zero_blocks stub has not been generated");
3611     if (StubRoutines::riscv::complete()) {
3612       address tpc = trampoline_call(zero_blocks);
3613       if (tpc == NULL) {
3614         DEBUG_ONLY(reset_labels(around));
3615         postcond(pc() == badAddress);
3616         return NULL;
3617       }
3618     } else {
3619       jal(zero_blocks);
3620     }
3621   }
3622   bind(around);
3623   for (int i = zero_words_block_size >> 1; i > 1; i >>= 1) {
3624     Label l;
3625     andi(t0, cnt, i);
3626     beqz(t0, l);
3627     for (int j = 0; j < i; j++) {
3628       sd(zr, Address(ptr, 0));
3629       addi(ptr, ptr, 8);
3630     }
3631     bind(l);
3632   }
3633   {
3634     Label l;
3635     andi(t0, cnt, 1);
3636     beqz(t0, l);
3637     sd(zr, Address(ptr, 0));
3638     bind(l);
3639   }
3640   BLOCK_COMMENT("} zero_words");
3641   postcond(pc() != badAddress);
3642   return pc();
3643 }
3644 
3645 #define SmallArraySize (18 * BytesPerLong)
3646 
3647 // base:  Address of a buffer to be zeroed, 8 bytes aligned.
3648 // cnt:   Immediate count in HeapWords.
3649 void MacroAssembler::zero_words(Register base, u_int64_t cnt)
3650 {
3651   assert_different_registers(base, t0, t1);
3652 
3653   BLOCK_COMMENT("zero_words {");
3654 
3655   if (cnt <= SmallArraySize / BytesPerLong) {
3656     for (int i = 0; i < (int)cnt; i++) {
3657       sd(zr, Address(base, i * wordSize));
3658     }
3659   } else {
3660     const int unroll = 8; // Number of sd(zr, adr), instructions we'll unroll
3661     int remainder = cnt % unroll;
3662     for (int i = 0; i < remainder; i++) {
3663       sd(zr, Address(base, i * wordSize));
3664     }
3665 
3666     Label loop;
3667     Register cnt_reg = t0;
3668     Register loop_base = t1;
3669     cnt = cnt - remainder;
3670     mv(cnt_reg, cnt);
3671     add(loop_base, base, remainder * wordSize);
3672     bind(loop);
3673     sub(cnt_reg, cnt_reg, unroll);
3674     for (int i = 0; i < unroll; i++) {
3675       sd(zr, Address(loop_base, i * wordSize));
3676     }
3677     add(loop_base, loop_base, unroll * wordSize);
3678     bnez(cnt_reg, loop);
3679   }
3680 
3681   BLOCK_COMMENT("} zero_words");
3682 }
3683 
3684 // base:   Address of a buffer to be filled, 8 bytes aligned.
3685 // cnt:    Count in 8-byte unit.
3686 // value:  Value to be filled with.
3687 // base will point to the end of the buffer after filling.
3688 void MacroAssembler::fill_words(Register base, Register cnt, Register value)
3689 {
3690 //  Algorithm:
3691 //
3692 //    t0 = cnt & 7
3693 //    cnt -= t0
3694 //    p += t0
3695 //    switch (t0):
3696 //      switch start:
3697 //      do while cnt
3698 //        cnt -= 8
3699 //          p[-8] = value
3700 //        case 7:
3701 //          p[-7] = value
3702 //        case 6:
3703 //          p[-6] = value
3704 //          // ...
3705 //        case 1:
3706 //          p[-1] = value
3707 //        case 0:
3708 //          p += 8
3709 //      do-while end
3710 //    switch end
3711 
3712   assert_different_registers(base, cnt, value, t0, t1);
3713 
3714   Label fini, skip, entry, loop;
3715   const int unroll = 8; // Number of sd instructions we'll unroll
3716 
3717   beqz(cnt, fini);
3718 
3719   andi(t0, cnt, unroll - 1);
3720   sub(cnt, cnt, t0);
3721   // align 8, so first sd n % 8 = mod, next loop sd 8 * n.
3722   shadd(base, t0, base, t1, 3);
3723   la(t1, entry);
3724   slli(t0, t0, 2); // sd_inst_nums * 4; t0 is cnt % 8, so t1 = t1 - sd_inst_nums * 4, 4 is sizeof(inst)
3725   sub(t1, t1, t0);
3726   jr(t1);
3727 
3728   bind(loop);
3729   add(base, base, unroll * 8);
3730   for (int i = -unroll; i < 0; i++) {
3731     sd(value, Address(base, i * 8));
3732   }
3733   bind(entry);
3734   sub(cnt, cnt, unroll);
3735   bgez(cnt, loop);
3736 
3737   bind(fini);
3738 }
3739 
3740 #define FCVT_SAFE(FLOATCVT, FLOATEQ)                                                             \
3741 void MacroAssembler:: FLOATCVT##_safe(Register dst, FloatRegister src, Register tmp) {           \
3742   Label L_Okay;                                                                                  \
3743   fscsr(zr);                                                                                     \
3744   FLOATCVT(dst, src);                                                                            \
3745   frcsr(tmp);                                                                                    \
3746   andi(tmp, tmp, 0x1E);                                                                          \
3747   beqz(tmp, L_Okay);                                                                             \
3748   FLOATEQ(tmp, src, src);                                                                        \
3749   bnez(tmp, L_Okay);                                                                             \
3750   mv(dst, zr);                                                                                   \
3751   bind(L_Okay);                                                                                  \
3752 }
3753 
3754 FCVT_SAFE(fcvt_w_s, feq_s)
3755 FCVT_SAFE(fcvt_l_s, feq_s)
3756 FCVT_SAFE(fcvt_w_d, feq_d)
3757 FCVT_SAFE(fcvt_l_d, feq_d)
3758 
3759 #undef FCVT_SAFE
3760 
3761 #define FCMP(FLOATTYPE, FLOATSIG)                                                       \
3762 void MacroAssembler::FLOATTYPE##_compare(Register result, FloatRegister Rs1,            \
3763                                          FloatRegister Rs2, int unordered_result) {     \
3764   Label Ldone;                                                                          \
3765   if (unordered_result < 0) {                                                           \
3766     /* we want -1 for unordered or less than, 0 for equal and 1 for greater than. */    \
3767     /* installs 1 if gt else 0 */                                                       \
3768     flt_##FLOATSIG(result, Rs2, Rs1);                                                   \
3769     /* Rs1 > Rs2, install 1 */                                                          \
3770     bgtz(result, Ldone);                                                                \
3771     feq_##FLOATSIG(result, Rs1, Rs2);                                                   \
3772     addi(result, result, -1);                                                           \
3773     /* Rs1 = Rs2, install 0 */                                                          \
3774     /* NaN or Rs1 < Rs2, install -1 */                                                  \
3775     bind(Ldone);                                                                        \
3776   } else {                                                                              \
3777     /* we want -1 for less than, 0 for equal and 1 for unordered or greater than. */    \
3778     /* installs 1 if gt or unordered else 0 */                                          \
3779     flt_##FLOATSIG(result, Rs1, Rs2);                                                   \
3780     /* Rs1 < Rs2, install -1 */                                                         \
3781     bgtz(result, Ldone);                                                                \
3782     feq_##FLOATSIG(result, Rs1, Rs2);                                                   \
3783     addi(result, result, -1);                                                           \
3784     /* Rs1 = Rs2, install 0 */                                                          \
3785     /* NaN or Rs1 > Rs2, install 1 */                                                   \
3786     bind(Ldone);                                                                        \
3787     neg(result, result);                                                                \
3788   }                                                                                     \
3789 }
3790 
3791 FCMP(float, s);
3792 FCMP(double, d);
3793 
3794 #undef FCMP
3795 
3796 // Zero words; len is in bytes
3797 // Destroys all registers except addr
3798 // len must be a nonzero multiple of wordSize
3799 void MacroAssembler::zero_memory(Register addr, Register len, Register tmp) {
3800   assert_different_registers(addr, len, tmp, t0, t1);
3801 
3802 #ifdef ASSERT
3803   {
3804     Label L;
3805     andi(t0, len, BytesPerWord - 1);
3806     beqz(t0, L);
3807     stop("len is not a multiple of BytesPerWord");
3808     bind(L);
3809   }
3810 #endif // ASSERT
3811 
3812 #ifndef PRODUCT
3813   block_comment("zero memory");
3814 #endif // PRODUCT
3815 
3816   Label loop;
3817   Label entry;
3818 
3819   // Algorithm:
3820   //
3821   //  t0 = cnt & 7
3822   //  cnt -= t0
3823   //  p += t0
3824   //  switch (t0) {
3825   //    do {
3826   //      cnt -= 8
3827   //        p[-8] = 0
3828   //      case 7:
3829   //        p[-7] = 0
3830   //      case 6:
3831   //        p[-6] = 0
3832   //        ...
3833   //      case 1:
3834   //        p[-1] = 0
3835   //      case 0:
3836   //        p += 8
3837   //     } while (cnt)
3838   //  }
3839 
3840   const int unroll = 8;   // Number of sd(zr) instructions we'll unroll
3841 
3842   srli(len, len, LogBytesPerWord);
3843   andi(t0, len, unroll - 1);  // t0 = cnt % unroll
3844   sub(len, len, t0);          // cnt -= unroll
3845   // tmp always points to the end of the region we're about to zero
3846   shadd(tmp, t0, addr, t1, LogBytesPerWord);
3847   la(t1, entry);
3848   slli(t0, t0, 2);
3849   sub(t1, t1, t0);
3850   jr(t1);
3851   bind(loop);
3852   sub(len, len, unroll);
3853   for (int i = -unroll; i < 0; i++) {
3854     Assembler::sd(zr, Address(tmp, i * wordSize));
3855   }
3856   bind(entry);
3857   add(tmp, tmp, unroll * wordSize);
3858   bnez(len, loop);
3859 }
3860 
3861 // shift left by shamt and add
3862 // Rd = (Rs1 << shamt) + Rs2
3863 void MacroAssembler::shadd(Register Rd, Register Rs1, Register Rs2, Register tmp, int shamt) {
3864   if (UseZba) {
3865     if (shamt == 1) {
3866       sh1add(Rd, Rs1, Rs2);
3867       return;
3868     } else if (shamt == 2) {
3869       sh2add(Rd, Rs1, Rs2);
3870       return;
3871     } else if (shamt == 3) {
3872       sh3add(Rd, Rs1, Rs2);
3873       return;
3874     }
3875   }
3876 
3877   if (shamt != 0) {
3878     slli(tmp, Rs1, shamt);
3879     add(Rd, Rs2, tmp);
3880   } else {
3881     add(Rd, Rs1, Rs2);
3882   }
3883 }
3884 
3885 void MacroAssembler::zero_extend(Register dst, Register src, int bits) {
3886   if (UseZba && bits == 32) {
3887     zext_w(dst, src);
3888     return;
3889   }
3890 
3891   if (UseZbb && bits == 16) {
3892     zext_h(dst, src);
3893     return;
3894   }
3895 
3896   if (bits == 8) {
3897     zext_b(dst, src);
3898   } else {
3899     slli(dst, src, XLEN - bits);
3900     srli(dst, dst, XLEN - bits);
3901   }
3902 }
3903 
3904 void MacroAssembler::sign_extend(Register dst, Register src, int bits) {
3905   if (UseZbb) {
3906     if (bits == 8) {
3907       sext_b(dst, src);
3908       return;
3909     } else if (bits == 16) {
3910       sext_h(dst, src);
3911       return;
3912     }
3913   }
3914 
3915   if (bits == 32) {
3916     sext_w(dst, src);
3917   } else {
3918     slli(dst, src, XLEN - bits);
3919     srai(dst, dst, XLEN - bits);
3920   }
3921 }
3922 
3923 void MacroAssembler::cmp_l2i(Register dst, Register src1, Register src2, Register tmp)
3924 {
3925   if (src1 == src2) {
3926     mv(dst, zr);
3927     return;
3928   }
3929   Label done;
3930   Register left = src1;
3931   Register right = src2;
3932   if (dst == src1) {
3933     assert_different_registers(dst, src2, tmp);
3934     mv(tmp, src1);
3935     left = tmp;
3936   } else if (dst == src2) {
3937     assert_different_registers(dst, src1, tmp);
3938     mv(tmp, src2);
3939     right = tmp;
3940   }
3941 
3942   // installs 1 if gt else 0
3943   slt(dst, right, left);
3944   bnez(dst, done);
3945   slt(dst, left, right);
3946   // dst = -1 if lt; else if eq , dst = 0
3947   neg(dst, dst);
3948   bind(done);
3949 }
3950 
3951 // The java_calling_convention describes stack locations as ideal slots on
3952 // a frame with no abi restrictions. Since we must observe abi restrictions
3953 // (like the placement of the register window) the slots must be biased by
3954 // the following value.
3955 static int reg2offset_in(VMReg r) {
3956   // Account for saved fp and ra
3957   // This should really be in_preserve_stack_slots
3958   return r->reg2stack() * VMRegImpl::stack_slot_size;
3959 }
3960 
3961 static int reg2offset_out(VMReg r) {
3962   return (r->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size;
3963 }
3964 
3965 // On 64 bit we will store integer like items to the stack as
3966 // 64 bits items (riscv64 abi) even though java would only store
3967 // 32bits for a parameter. On 32bit it will simply be 32 bits
3968 // So this routine will do 32->32 on 32bit and 32->64 on 64bit
3969 void MacroAssembler::move32_64(VMRegPair src, VMRegPair dst, Register tmp) {
3970   if (src.first()->is_stack()) {
3971     if (dst.first()->is_stack()) {
3972       // stack to stack
3973       ld(tmp, Address(fp, reg2offset_in(src.first())));
3974       sd(tmp, Address(sp, reg2offset_out(dst.first())));
3975     } else {
3976       // stack to reg
3977       lw(dst.first()->as_Register(), Address(fp, reg2offset_in(src.first())));
3978     }
3979   } else if (dst.first()->is_stack()) {
3980     // reg to stack
3981     sd(src.first()->as_Register(), Address(sp, reg2offset_out(dst.first())));
3982   } else {
3983     if (dst.first() != src.first()) {
3984       // 32bits extend sign
3985       addw(dst.first()->as_Register(), src.first()->as_Register(), zr);
3986     }
3987   }
3988 }
3989 
3990 // An oop arg. Must pass a handle not the oop itself
3991 void MacroAssembler::object_move(OopMap* map,
3992                                  int oop_handle_offset,
3993                                  int framesize_in_slots,
3994                                  VMRegPair src,
3995                                  VMRegPair dst,
3996                                  bool is_receiver,
3997                                  int* receiver_offset) {
3998   assert_cond(map != NULL && receiver_offset != NULL);
3999   // must pass a handle. First figure out the location we use as a handle
4000   Register rHandle = dst.first()->is_stack() ? t1 : dst.first()->as_Register();
4001 
4002   // See if oop is NULL if it is we need no handle
4003 
4004   if (src.first()->is_stack()) {
4005     // Oop is already on the stack as an argument
4006     int offset_in_older_frame = src.first()->reg2stack() + SharedRuntime::out_preserve_stack_slots();
4007     map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + framesize_in_slots));
4008     if (is_receiver) {
4009       *receiver_offset = (offset_in_older_frame + framesize_in_slots) * VMRegImpl::stack_slot_size;
4010     }
4011 
4012     ld(t0, Address(fp, reg2offset_in(src.first())));
4013     la(rHandle, Address(fp, reg2offset_in(src.first())));
4014     // conditionally move a NULL
4015     Label notZero1;
4016     bnez(t0, notZero1);
4017     mv(rHandle, zr);
4018     bind(notZero1);
4019   } else {
4020 
4021     // Oop is in a register we must store it to the space we reserve
4022     // on the stack for oop_handles and pass a handle if oop is non-NULL
4023 
4024     const Register rOop = src.first()->as_Register();
4025     int oop_slot = -1;
4026     if (rOop == j_rarg0) {
4027       oop_slot = 0;
4028     } else if (rOop == j_rarg1) {
4029       oop_slot = 1;
4030     } else if (rOop == j_rarg2) {
4031       oop_slot = 2;
4032     } else if (rOop == j_rarg3) {
4033       oop_slot = 3;
4034     } else if (rOop == j_rarg4) {
4035       oop_slot = 4;
4036     } else if (rOop == j_rarg5) {
4037       oop_slot = 5;
4038     } else if (rOop == j_rarg6) {
4039       oop_slot = 6;
4040     } else {
4041       assert(rOop == j_rarg7, "wrong register");
4042       oop_slot = 7;
4043     }
4044 
4045     oop_slot = oop_slot * VMRegImpl::slots_per_word + oop_handle_offset;
4046     int offset = oop_slot * VMRegImpl::stack_slot_size;
4047 
4048     map->set_oop(VMRegImpl::stack2reg(oop_slot));
4049     // Store oop in handle area, may be NULL
4050     sd(rOop, Address(sp, offset));
4051     if (is_receiver) {
4052       *receiver_offset = offset;
4053     }
4054 
4055     //rOop maybe the same as rHandle
4056     if (rOop == rHandle) {
4057       Label isZero;
4058       beqz(rOop, isZero);
4059       la(rHandle, Address(sp, offset));
4060       bind(isZero);
4061     } else {
4062       Label notZero2;
4063       la(rHandle, Address(sp, offset));
4064       bnez(rOop, notZero2);
4065       mv(rHandle, zr);
4066       bind(notZero2);
4067     }
4068   }
4069 
4070   // If arg is on the stack then place it otherwise it is already in correct reg.
4071   if (dst.first()->is_stack()) {
4072     sd(rHandle, Address(sp, reg2offset_out(dst.first())));
4073   }
4074 }
4075 
4076 // A float arg may have to do float reg int reg conversion
4077 void MacroAssembler::float_move(VMRegPair src, VMRegPair dst, Register tmp) {
4078   assert(src.first()->is_stack() && dst.first()->is_stack() ||
4079          src.first()->is_reg() && dst.first()->is_reg() ||
4080          src.first()->is_stack() && dst.first()->is_reg(), "Unexpected error");
4081   if (src.first()->is_stack()) {
4082     if (dst.first()->is_stack()) {
4083       lwu(tmp, Address(fp, reg2offset_in(src.first())));
4084       sw(tmp, Address(sp, reg2offset_out(dst.first())));
4085     } else if (dst.first()->is_Register()) {
4086       lwu(dst.first()->as_Register(), Address(fp, reg2offset_in(src.first())));
4087     } else {
4088       ShouldNotReachHere();
4089     }
4090   } else if (src.first() != dst.first()) {
4091     if (src.is_single_phys_reg() && dst.is_single_phys_reg()) {
4092       fmv_s(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister());
4093     } else {
4094       ShouldNotReachHere();
4095     }
4096   }
4097 }
4098 
4099 // A long move
4100 void MacroAssembler::long_move(VMRegPair src, VMRegPair dst, Register tmp) {
4101   if (src.first()->is_stack()) {
4102     if (dst.first()->is_stack()) {
4103       // stack to stack
4104       ld(tmp, Address(fp, reg2offset_in(src.first())));
4105       sd(tmp, Address(sp, reg2offset_out(dst.first())));
4106     } else {
4107       // stack to reg
4108       ld(dst.first()->as_Register(), Address(fp, reg2offset_in(src.first())));
4109     }
4110   } else if (dst.first()->is_stack()) {
4111     // reg to stack
4112     sd(src.first()->as_Register(), Address(sp, reg2offset_out(dst.first())));
4113   } else {
4114     if (dst.first() != src.first()) {
4115       mv(dst.first()->as_Register(), src.first()->as_Register());
4116     }
4117   }
4118 }
4119 
4120 // A double move
4121 void MacroAssembler::double_move(VMRegPair src, VMRegPair dst, Register tmp) {
4122   assert(src.first()->is_stack() && dst.first()->is_stack() ||
4123          src.first()->is_reg() && dst.first()->is_reg() ||
4124          src.first()->is_stack() && dst.first()->is_reg(), "Unexpected error");
4125   if (src.first()->is_stack()) {
4126     if (dst.first()->is_stack()) {
4127       ld(tmp, Address(fp, reg2offset_in(src.first())));
4128       sd(tmp, Address(sp, reg2offset_out(dst.first())));
4129     } else if (dst.first()-> is_Register()) {
4130       ld(dst.first()->as_Register(), Address(fp, reg2offset_in(src.first())));
4131     } else {
4132       ShouldNotReachHere();
4133     }
4134   } else if (src.first() != dst.first()) {
4135     if (src.is_single_phys_reg() && dst.is_single_phys_reg()) {
4136       fmv_d(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister());
4137     } else {
4138       ShouldNotReachHere();
4139     }
4140   }
4141 }
4142 
4143 void MacroAssembler::rt_call(address dest, Register tmp) {
4144   CodeBlob *cb = CodeCache::find_blob(dest);
4145   if (cb) {
4146     far_call(RuntimeAddress(dest));
4147   } else {
4148     int32_t offset = 0;
4149     la_patchable(tmp, RuntimeAddress(dest), offset);
4150     jalr(x1, tmp, offset);
4151   }
4152 }