1 /*
   2  * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
   3  * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
   4  * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
   5  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   6  *
   7  * This code is free software; you can redistribute it and/or modify it
   8  * under the terms of the GNU General Public License version 2 only, as
   9  * published by the Free Software Foundation.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  *
  25  */
  26 
  27 #include "precompiled.hpp"
  28 #include "asm/assembler.hpp"
  29 #include "asm/assembler.inline.hpp"
  30 #include "compiler/disassembler.hpp"
  31 #include "gc/shared/barrierSet.hpp"
  32 #include "gc/shared/barrierSetAssembler.hpp"
  33 #include "gc/shared/cardTable.hpp"
  34 #include "gc/shared/cardTableBarrierSet.hpp"
  35 #include "interpreter/bytecodeHistogram.hpp"
  36 #include "interpreter/interpreter.hpp"
  37 #include "memory/resourceArea.hpp"
  38 #include "memory/universe.hpp"
  39 #include "nativeInst_riscv.hpp"
  40 #include "oops/accessDecorators.hpp"
  41 #include "oops/compressedOops.inline.hpp"
  42 #include "oops/klass.inline.hpp"
  43 #include "oops/oop.hpp"
  44 #include "runtime/interfaceSupport.inline.hpp"
  45 #include "runtime/jniHandles.inline.hpp"
  46 #include "runtime/sharedRuntime.hpp"
  47 #include "runtime/stubRoutines.hpp"
  48 #include "runtime/thread.hpp"
  49 #include "utilities/powerOfTwo.hpp"
  50 #ifdef COMPILER2
  51 #include "opto/compile.hpp"
  52 #include "opto/node.hpp"
  53 #include "opto/output.hpp"
  54 #endif
  55 
  56 #ifdef PRODUCT
  57 #define BLOCK_COMMENT(str) /* nothing */
  58 #else
  59 #define BLOCK_COMMENT(str) block_comment(str)
  60 #endif
  61 #define BIND(label) bind(label); __ BLOCK_COMMENT(#label ":")
  62 
  63 static void pass_arg0(MacroAssembler* masm, Register arg) {
  64   if (c_rarg0 != arg) {
  65     assert_cond(masm != NULL);
  66     masm->mv(c_rarg0, arg);
  67   }
  68 }
  69 
  70 static void pass_arg1(MacroAssembler* masm, Register arg) {
  71   if (c_rarg1 != arg) {
  72     assert_cond(masm != NULL);
  73     masm->mv(c_rarg1, arg);
  74   }
  75 }
  76 
  77 static void pass_arg2(MacroAssembler* masm, Register arg) {
  78   if (c_rarg2 != arg) {
  79     assert_cond(masm != NULL);
  80     masm->mv(c_rarg2, arg);
  81   }
  82 }
  83 
  84 static void pass_arg3(MacroAssembler* masm, Register arg) {
  85   if (c_rarg3 != arg) {
  86     assert_cond(masm != NULL);
  87     masm->mv(c_rarg3, arg);
  88   }
  89 }
  90 
  91 void MacroAssembler::align(int modulus) {
  92   while (offset() % modulus != 0) { nop(); }
  93 }
  94 
  95 void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) {
  96   call_VM_base(oop_result, noreg, noreg, entry_point, number_of_arguments, check_exceptions);
  97 }
  98 
  99 // Implementation of call_VM versions
 100 
 101 void MacroAssembler::call_VM(Register oop_result,
 102                              address entry_point,
 103                              bool check_exceptions) {
 104   call_VM_helper(oop_result, entry_point, 0, check_exceptions);
 105 }
 106 
 107 void MacroAssembler::call_VM(Register oop_result,
 108                              address entry_point,
 109                              Register arg_1,
 110                              bool check_exceptions) {
 111   pass_arg1(this, arg_1);
 112   call_VM_helper(oop_result, entry_point, 1, check_exceptions);
 113 }
 114 
 115 void MacroAssembler::call_VM(Register oop_result,
 116                              address entry_point,
 117                              Register arg_1,
 118                              Register arg_2,
 119                              bool check_exceptions) {
 120   assert(arg_1 != c_rarg2, "smashed arg");
 121   pass_arg2(this, arg_2);
 122   pass_arg1(this, arg_1);
 123   call_VM_helper(oop_result, entry_point, 2, check_exceptions);
 124 }
 125 
 126 void MacroAssembler::call_VM(Register oop_result,
 127                              address entry_point,
 128                              Register arg_1,
 129                              Register arg_2,
 130                              Register arg_3,
 131                              bool check_exceptions) {
 132   assert(arg_1 != c_rarg3, "smashed arg");
 133   assert(arg_2 != c_rarg3, "smashed arg");
 134   pass_arg3(this, arg_3);
 135 
 136   assert(arg_1 != c_rarg2, "smashed arg");
 137   pass_arg2(this, arg_2);
 138 
 139   pass_arg1(this, arg_1);
 140   call_VM_helper(oop_result, entry_point, 3, check_exceptions);
 141 }
 142 
 143 void MacroAssembler::call_VM(Register oop_result,
 144                              Register last_java_sp,
 145                              address entry_point,
 146                              int number_of_arguments,
 147                              bool check_exceptions) {
 148   call_VM_base(oop_result, xthread, last_java_sp, entry_point, number_of_arguments, check_exceptions);
 149 }
 150 
 151 void MacroAssembler::call_VM(Register oop_result,
 152                              Register last_java_sp,
 153                              address entry_point,
 154                              Register arg_1,
 155                              bool check_exceptions) {
 156   pass_arg1(this, arg_1);
 157   call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions);
 158 }
 159 
 160 void MacroAssembler::call_VM(Register oop_result,
 161                              Register last_java_sp,
 162                              address entry_point,
 163                              Register arg_1,
 164                              Register arg_2,
 165                              bool check_exceptions) {
 166 
 167   assert(arg_1 != c_rarg2, "smashed arg");
 168   pass_arg2(this, arg_2);
 169   pass_arg1(this, arg_1);
 170   call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions);
 171 }
 172 
 173 void MacroAssembler::call_VM(Register oop_result,
 174                              Register last_java_sp,
 175                              address entry_point,
 176                              Register arg_1,
 177                              Register arg_2,
 178                              Register arg_3,
 179                              bool check_exceptions) {
 180   assert(arg_1 != c_rarg3, "smashed arg");
 181   assert(arg_2 != c_rarg3, "smashed arg");
 182   pass_arg3(this, arg_3);
 183   assert(arg_1 != c_rarg2, "smashed arg");
 184   pass_arg2(this, arg_2);
 185   pass_arg1(this, arg_1);
 186   call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions);
 187 }
 188 
 189 // these are no-ops overridden by InterpreterMacroAssembler
 190 void MacroAssembler::check_and_handle_earlyret(Register java_thread) {}
 191 void MacroAssembler::check_and_handle_popframe(Register java_thread) {}
 192 
 193 // Calls to C land
 194 //
 195 // When entering C land, the fp, & esp of the last Java frame have to be recorded
 196 // in the (thread-local) JavaThread object. When leaving C land, the last Java fp
 197 // has to be reset to 0. This is required to allow proper stack traversal.
 198 void MacroAssembler::set_last_Java_frame(Register last_java_sp,
 199                                          Register last_java_fp,
 200                                          Register last_java_pc,
 201                                          Register temp) {
 202 
 203   if (last_java_pc->is_valid()) {
 204       sd(last_java_pc, Address(xthread,
 205                                JavaThread::frame_anchor_offset() +
 206                                JavaFrameAnchor::last_Java_pc_offset()));
 207   }
 208 
 209   // determine last_java_sp register
 210   if (last_java_sp == sp) {
 211     mv(temp, sp);
 212     last_java_sp = temp;
 213   } else if (!last_java_sp->is_valid()) {
 214     last_java_sp = esp;
 215   }
 216 
 217   sd(last_java_sp, Address(xthread, JavaThread::last_Java_sp_offset()));
 218 
 219   // last_java_fp is optional
 220   if (last_java_fp->is_valid()) {
 221     sd(last_java_fp, Address(xthread, JavaThread::last_Java_fp_offset()));
 222   }
 223 }
 224 
 225 void MacroAssembler::set_last_Java_frame(Register last_java_sp,
 226                                          Register last_java_fp,
 227                                          address  last_java_pc,
 228                                          Register temp) {
 229   assert(last_java_pc != NULL, "must provide a valid PC");
 230 
 231   la(temp, last_java_pc);
 232   sd(temp, Address(xthread, JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()));
 233 
 234   set_last_Java_frame(last_java_sp, last_java_fp, noreg, temp);
 235 }
 236 
 237 void MacroAssembler::set_last_Java_frame(Register last_java_sp,
 238                                          Register last_java_fp,
 239                                          Label &L,
 240                                          Register temp) {
 241   if (L.is_bound()) {
 242     set_last_Java_frame(last_java_sp, last_java_fp, target(L), temp);
 243   } else {
 244     InstructionMark im(this);
 245     L.add_patch_at(code(), locator());
 246     set_last_Java_frame(last_java_sp, last_java_fp, pc() /* Patched later */, temp);
 247   }
 248 }
 249 
 250 void MacroAssembler::reset_last_Java_frame(bool clear_fp) {
 251   // we must set sp to zero to clear frame
 252   sd(zr, Address(xthread, JavaThread::last_Java_sp_offset()));
 253 
 254   // must clear fp, so that compiled frames are not confused; it is
 255   // possible that we need it only for debugging
 256   if (clear_fp) {
 257     sd(zr, Address(xthread, JavaThread::last_Java_fp_offset()));
 258   }
 259 
 260   // Always clear the pc because it could have been set by make_walkable()
 261   sd(zr, Address(xthread, JavaThread::last_Java_pc_offset()));
 262 }
 263 
 264 void MacroAssembler::call_VM_base(Register oop_result,
 265                                   Register java_thread,
 266                                   Register last_java_sp,
 267                                   address  entry_point,
 268                                   int      number_of_arguments,
 269                                   bool     check_exceptions) {
 270    // determine java_thread register
 271   if (!java_thread->is_valid()) {
 272     java_thread = xthread;
 273   }
 274   // determine last_java_sp register
 275   if (!last_java_sp->is_valid()) {
 276     last_java_sp = esp;
 277   }
 278 
 279   // debugging support
 280   assert(number_of_arguments >= 0   , "cannot have negative number of arguments");
 281   assert(java_thread == xthread, "unexpected register");
 282 
 283   assert(java_thread != oop_result  , "cannot use the same register for java_thread & oop_result");
 284   assert(java_thread != last_java_sp, "cannot use the same register for java_thread & last_java_sp");
 285 
 286   // push java thread (becomes first argument of C function)
 287   mv(c_rarg0, java_thread);
 288 
 289   // set last Java frame before call
 290   assert(last_java_sp != fp, "can't use fp");
 291 
 292   Label l;
 293   set_last_Java_frame(last_java_sp, fp, l, t0);
 294 
 295   // do the call, remove parameters
 296   MacroAssembler::call_VM_leaf_base(entry_point, number_of_arguments, &l);
 297 
 298   // reset last Java frame
 299   // Only interpreter should have to clear fp
 300   reset_last_Java_frame(true);
 301 
 302    // C++ interp handles this in the interpreter
 303   check_and_handle_popframe(java_thread);
 304   check_and_handle_earlyret(java_thread);
 305 
 306   if (check_exceptions) {
 307     // check for pending exceptions (java_thread is set upon return)
 308     ld(t0, Address(java_thread, in_bytes(Thread::pending_exception_offset())));
 309     Label ok;
 310     beqz(t0, ok);
 311     int32_t offset = 0;
 312     la_patchable(t0, RuntimeAddress(StubRoutines::forward_exception_entry()), offset);
 313     jalr(x0, t0, offset);
 314     bind(ok);
 315   }
 316 
 317   // get oop result if there is one and reset the value in the thread
 318   if (oop_result->is_valid()) {
 319     get_vm_result(oop_result, java_thread);
 320   }
 321 }
 322 
 323 void MacroAssembler::get_vm_result(Register oop_result, Register java_thread) {
 324   ld(oop_result, Address(java_thread, JavaThread::vm_result_offset()));
 325   sd(zr, Address(java_thread, JavaThread::vm_result_offset()));
 326   verify_oop(oop_result, "broken oop in call_VM_base");
 327 }
 328 
 329 void MacroAssembler::get_vm_result_2(Register metadata_result, Register java_thread) {
 330   ld(metadata_result, Address(java_thread, JavaThread::vm_result_2_offset()));
 331   sd(zr, Address(java_thread, JavaThread::vm_result_2_offset()));
 332 }
 333 
 334 void MacroAssembler::clinit_barrier(Register klass, Register tmp, Label* L_fast_path, Label* L_slow_path) {
 335   assert(L_fast_path != NULL || L_slow_path != NULL, "at least one is required");
 336   assert_different_registers(klass, xthread, tmp);
 337 
 338   Label L_fallthrough, L_tmp;
 339   if (L_fast_path == NULL) {
 340     L_fast_path = &L_fallthrough;
 341   } else if (L_slow_path == NULL) {
 342     L_slow_path = &L_fallthrough;
 343   }
 344 
 345   // Fast path check: class is fully initialized
 346   lbu(tmp, Address(klass, InstanceKlass::init_state_offset()));
 347   sub(tmp, tmp, InstanceKlass::fully_initialized);
 348   beqz(tmp, *L_fast_path);
 349 
 350   // Fast path check: current thread is initializer thread
 351   ld(tmp, Address(klass, InstanceKlass::init_thread_offset()));
 352 
 353   if (L_slow_path == &L_fallthrough) {
 354     beq(xthread, tmp, *L_fast_path);
 355     bind(*L_slow_path);
 356   } else if (L_fast_path == &L_fallthrough) {
 357     bne(xthread, tmp, *L_slow_path);
 358     bind(*L_fast_path);
 359   } else {
 360     Unimplemented();
 361   }
 362 }
 363 
 364 void MacroAssembler::verify_oop(Register reg, const char* s) {
 365   if (!VerifyOops) { return; }
 366 
 367   // Pass register number to verify_oop_subroutine
 368   const char* b = NULL;
 369   {
 370     ResourceMark rm;
 371     stringStream ss;
 372     ss.print("verify_oop: %s: %s", reg->name(), s);
 373     b = code_string(ss.as_string());
 374   }
 375   BLOCK_COMMENT("verify_oop {");
 376 
 377   push_reg(RegSet::of(lr, t0, t1, c_rarg0), sp);
 378 
 379   mv(c_rarg0, reg); // c_rarg0 : x10
 380   if(b != NULL) {
 381     li(t0, (uintptr_t)(address)b);
 382   } else {
 383     ShouldNotReachHere();
 384   }
 385 
 386   // call indirectly to solve generation ordering problem
 387   int32_t offset = 0;
 388   la_patchable(t1, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address()), offset);
 389   ld(t1, Address(t1, offset));
 390   jalr(t1);
 391 
 392   pop_reg(RegSet::of(lr, t0, t1, c_rarg0), sp);
 393 
 394   BLOCK_COMMENT("} verify_oop");
 395 }
 396 
 397 void MacroAssembler::verify_oop_addr(Address addr, const char* s) {
 398   if (!VerifyOops) {
 399     return;
 400   }
 401 
 402   const char* b = NULL;
 403   {
 404     ResourceMark rm;
 405     stringStream ss;
 406     ss.print("verify_oop_addr: %s", s);
 407     b = code_string(ss.as_string());
 408   }
 409   BLOCK_COMMENT("verify_oop_addr {");
 410 
 411   push_reg(RegSet::of(lr, t0, t1, c_rarg0), sp);
 412 
 413   if (addr.uses(sp)) {
 414     la(x10, addr);
 415     ld(x10, Address(x10, 4 * wordSize));
 416   } else {
 417     ld(x10, addr);
 418   }
 419   if(b != NULL) {
 420     li(t0, (uintptr_t)(address)b);
 421   } else {
 422     ShouldNotReachHere();
 423   }
 424 
 425   // call indirectly to solve generation ordering problem
 426   int32_t offset = 0;
 427   la_patchable(t1, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address()), offset);
 428   ld(t1, Address(t1, offset));
 429   jalr(t1);
 430 
 431   pop_reg(RegSet::of(lr, t0, t1, c_rarg0), sp);
 432 
 433   BLOCK_COMMENT("} verify_oop_addr");
 434 }
 435 
 436 Address MacroAssembler::argument_address(RegisterOrConstant arg_slot,
 437                                          int extra_slot_offset) {
 438   // cf. TemplateTable::prepare_invoke(), if (load_receiver).
 439   int stackElementSize = Interpreter::stackElementSize;
 440   int offset = Interpreter::expr_offset_in_bytes(extra_slot_offset+0);
 441 #ifdef ASSERT
 442   int offset1 = Interpreter::expr_offset_in_bytes(extra_slot_offset+1);
 443   assert(offset1 - offset == stackElementSize, "correct arithmetic");
 444 #endif
 445   if (arg_slot.is_constant()) {
 446     return Address(esp, arg_slot.as_constant() * stackElementSize + offset);
 447   } else {
 448     assert_different_registers(t0, arg_slot.as_register());
 449     slli(t0, arg_slot.as_register(), log2i_exact(stackElementSize));
 450     add(t0, esp, t0);
 451     return Address(t0, offset);
 452   }
 453 }
 454 
 455 #ifndef PRODUCT
 456 extern "C" void findpc(intptr_t x);
 457 #endif
 458 
 459 void MacroAssembler::debug64(char* msg, int64_t pc, int64_t regs[])
 460 {
 461   // In order to get locks to work, we need to fake a in_VM state
 462   if (ShowMessageBoxOnError) {
 463     JavaThread* thread = JavaThread::current();
 464     JavaThreadState saved_state = thread->thread_state();
 465     thread->set_thread_state(_thread_in_vm);
 466 #ifndef PRODUCT
 467     if (CountBytecodes || TraceBytecodes || StopInterpreterAt) {
 468       ttyLocker ttyl;
 469       BytecodeCounter::print();
 470     }
 471 #endif
 472     if (os::message_box(msg, "Execution stopped, print registers?")) {
 473       ttyLocker ttyl;
 474       tty->print_cr(" pc = 0x%016lx", pc);
 475 #ifndef PRODUCT
 476       tty->cr();
 477       findpc(pc);
 478       tty->cr();
 479 #endif
 480       tty->print_cr(" x0 = 0x%016lx", regs[0]);
 481       tty->print_cr(" x1 = 0x%016lx", regs[1]);
 482       tty->print_cr(" x2 = 0x%016lx", regs[2]);
 483       tty->print_cr(" x3 = 0x%016lx", regs[3]);
 484       tty->print_cr(" x4 = 0x%016lx", regs[4]);
 485       tty->print_cr(" x5 = 0x%016lx", regs[5]);
 486       tty->print_cr(" x6 = 0x%016lx", regs[6]);
 487       tty->print_cr(" x7 = 0x%016lx", regs[7]);
 488       tty->print_cr(" x8 = 0x%016lx", regs[8]);
 489       tty->print_cr(" x9 = 0x%016lx", regs[9]);
 490       tty->print_cr("x10 = 0x%016lx", regs[10]);
 491       tty->print_cr("x11 = 0x%016lx", regs[11]);
 492       tty->print_cr("x12 = 0x%016lx", regs[12]);
 493       tty->print_cr("x13 = 0x%016lx", regs[13]);
 494       tty->print_cr("x14 = 0x%016lx", regs[14]);
 495       tty->print_cr("x15 = 0x%016lx", regs[15]);
 496       tty->print_cr("x16 = 0x%016lx", regs[16]);
 497       tty->print_cr("x17 = 0x%016lx", regs[17]);
 498       tty->print_cr("x18 = 0x%016lx", regs[18]);
 499       tty->print_cr("x19 = 0x%016lx", regs[19]);
 500       tty->print_cr("x20 = 0x%016lx", regs[20]);
 501       tty->print_cr("x21 = 0x%016lx", regs[21]);
 502       tty->print_cr("x22 = 0x%016lx", regs[22]);
 503       tty->print_cr("x23 = 0x%016lx", regs[23]);
 504       tty->print_cr("x24 = 0x%016lx", regs[24]);
 505       tty->print_cr("x25 = 0x%016lx", regs[25]);
 506       tty->print_cr("x26 = 0x%016lx", regs[26]);
 507       tty->print_cr("x27 = 0x%016lx", regs[27]);
 508       tty->print_cr("x28 = 0x%016lx", regs[28]);
 509       tty->print_cr("x30 = 0x%016lx", regs[30]);
 510       tty->print_cr("x31 = 0x%016lx", regs[31]);
 511       BREAKPOINT;
 512     }
 513   }
 514   fatal("DEBUG MESSAGE: %s", msg);
 515 }
 516 
 517 void MacroAssembler::resolve_jobject(Register value, Register thread, Register tmp) {
 518   Label done, not_weak;
 519   beqz(value, done);           // Use NULL as-is.
 520 
 521   // Test for jweak tag.
 522   andi(t0, value, JNIHandles::weak_tag_mask);
 523   beqz(t0, not_weak);
 524 
 525   // Resolve jweak.
 526   access_load_at(T_OBJECT, IN_NATIVE | ON_PHANTOM_OOP_REF, value,
 527                  Address(value, -JNIHandles::weak_tag_value), tmp, thread);
 528   verify_oop(value);
 529   j(done);
 530 
 531   bind(not_weak);
 532   // Resolve (untagged) jobject.
 533   access_load_at(T_OBJECT, IN_NATIVE, value, Address(value, 0), tmp, thread);
 534   verify_oop(value);
 535   bind(done);
 536 }
 537 
 538 void MacroAssembler::stop(const char* msg) {
 539   address ip = pc();
 540   pusha();
 541   if(msg != NULL && ip != NULL) {
 542     li(c_rarg0, (uintptr_t)(address)msg);
 543     li(c_rarg1, (uintptr_t)(address)ip);
 544   } else {
 545     ShouldNotReachHere();
 546   }
 547   mv(c_rarg2, sp);
 548   mv(c_rarg3, CAST_FROM_FN_PTR(address, MacroAssembler::debug64));
 549   jalr(c_rarg3);
 550   ebreak();
 551 }
 552 
 553 void MacroAssembler::unimplemented(const char* what) {
 554   const char* buf = NULL;
 555   {
 556     ResourceMark rm;
 557     stringStream ss;
 558     ss.print("unimplemented: %s", what);
 559     buf = code_string(ss.as_string());
 560   }
 561   stop(buf);
 562 }
 563 
 564 void MacroAssembler::emit_static_call_stub() {
 565   // CompiledDirectStaticCall::set_to_interpreted knows the
 566   // exact layout of this stub.
 567 
 568   ifence();
 569 
 570   mov_metadata(xmethod, (Metadata*)NULL);
 571 
 572   // Jump to the entry point of the i2c stub.
 573   int32_t offset = 0;
 574   movptr_with_offset(t0, 0, offset);
 575   jalr(x0, t0, offset);
 576 }
 577 void MacroAssembler::call_VM_leaf_base(address entry_point,
 578                                        int number_of_arguments,
 579                                        Label *retaddr) {
 580   call_native_base(entry_point, retaddr);
 581 }
 582 
 583 void MacroAssembler::call_native(address entry_point, Register arg_0) {
 584   pass_arg0(this, arg_0);
 585   call_native_base(entry_point);
 586 }
 587 
 588 void MacroAssembler::call_native_base(address entry_point, Label *retaddr) {
 589   Label E, L;
 590   int32_t offset = 0;
 591   push_reg(0x80000040, sp);   // push << t0 & xmethod >> to sp
 592   movptr_with_offset(t0, entry_point, offset);
 593   jalr(x1, t0, offset);
 594   if (retaddr != NULL) {
 595     bind(*retaddr);
 596   }
 597   pop_reg(0x80000040, sp);   // pop << t0 & xmethod >> from sp
 598 }
 599 
 600 void MacroAssembler::call_VM_leaf(address entry_point, int number_of_arguments) {
 601   call_VM_leaf_base(entry_point, number_of_arguments);
 602 }
 603 
 604 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0) {
 605   pass_arg0(this, arg_0);
 606   call_VM_leaf_base(entry_point, 1);
 607 }
 608 
 609 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1) {
 610   pass_arg0(this, arg_0);
 611   pass_arg1(this, arg_1);
 612   call_VM_leaf_base(entry_point, 2);
 613 }
 614 
 615 void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0,
 616                                   Register arg_1, Register arg_2) {
 617   pass_arg0(this, arg_0);
 618   pass_arg1(this, arg_1);
 619   pass_arg2(this, arg_2);
 620   call_VM_leaf_base(entry_point, 3);
 621 }
 622 
 623 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0) {
 624   pass_arg0(this, arg_0);
 625   MacroAssembler::call_VM_leaf_base(entry_point, 1);
 626 }
 627 
 628 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1) {
 629 
 630   assert(arg_0 != c_rarg1, "smashed arg");
 631   pass_arg1(this, arg_1);
 632   pass_arg0(this, arg_0);
 633   MacroAssembler::call_VM_leaf_base(entry_point, 2);
 634 }
 635 
 636 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) {
 637   assert(arg_0 != c_rarg2, "smashed arg");
 638   assert(arg_1 != c_rarg2, "smashed arg");
 639   pass_arg2(this, arg_2);
 640   assert(arg_0 != c_rarg1, "smashed arg");
 641   pass_arg1(this, arg_1);
 642   pass_arg0(this, arg_0);
 643   MacroAssembler::call_VM_leaf_base(entry_point, 3);
 644 }
 645 
 646 void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2, Register arg_3) {
 647   assert(arg_0 != c_rarg3, "smashed arg");
 648   assert(arg_1 != c_rarg3, "smashed arg");
 649   assert(arg_2 != c_rarg3, "smashed arg");
 650   pass_arg3(this, arg_3);
 651   assert(arg_0 != c_rarg2, "smashed arg");
 652   assert(arg_1 != c_rarg2, "smashed arg");
 653   pass_arg2(this, arg_2);
 654   assert(arg_0 != c_rarg1, "smashed arg");
 655   pass_arg1(this, arg_1);
 656   pass_arg0(this, arg_0);
 657   MacroAssembler::call_VM_leaf_base(entry_point, 4);
 658 }
 659 
 660 void MacroAssembler::nop() {
 661   addi(x0, x0, 0);
 662 }
 663 
 664 void MacroAssembler::mv(Register Rd, Register Rs) {
 665   if (Rd != Rs) {
 666     addi(Rd, Rs, 0);
 667   }
 668 }
 669 
 670 void MacroAssembler::notr(Register Rd, Register Rs) {
 671   xori(Rd, Rs, -1);
 672 }
 673 
 674 void MacroAssembler::neg(Register Rd, Register Rs) {
 675   sub(Rd, x0, Rs);
 676 }
 677 
 678 void MacroAssembler::negw(Register Rd, Register Rs) {
 679   subw(Rd, x0, Rs);
 680 }
 681 
 682 void MacroAssembler::sext_w(Register Rd, Register Rs) {
 683   addiw(Rd, Rs, 0);
 684 }
 685 
 686 void MacroAssembler::seqz(Register Rd, Register Rs) {
 687   sltiu(Rd, Rs, 1);
 688 }
 689 
 690 void MacroAssembler::snez(Register Rd, Register Rs) {
 691   sltu(Rd, x0, Rs);
 692 }
 693 
 694 void MacroAssembler::sltz(Register Rd, Register Rs) {
 695   slt(Rd, Rs, x0);
 696 }
 697 
 698 void MacroAssembler::sgtz(Register Rd, Register Rs) {
 699   slt(Rd, x0, Rs);
 700 }
 701 
 702 void MacroAssembler::fmv_s(FloatRegister Rd, FloatRegister Rs) {
 703   if (Rd != Rs) {
 704     fsgnj_s(Rd, Rs, Rs);
 705   }
 706 }
 707 
 708 void MacroAssembler::fabs_s(FloatRegister Rd, FloatRegister Rs) {
 709   fsgnjx_s(Rd, Rs, Rs);
 710 }
 711 
 712 void MacroAssembler::fneg_s(FloatRegister Rd, FloatRegister Rs) {
 713   fsgnjn_s(Rd, Rs, Rs);
 714 }
 715 
 716 void MacroAssembler::fmv_d(FloatRegister Rd, FloatRegister Rs) {
 717   if (Rd != Rs) {
 718     fsgnj_d(Rd, Rs, Rs);
 719   }
 720 }
 721 
 722 void MacroAssembler::fabs_d(FloatRegister Rd, FloatRegister Rs) {
 723   fsgnjx_d(Rd, Rs, Rs);
 724 }
 725 
 726 void MacroAssembler::fneg_d(FloatRegister Rd, FloatRegister Rs) {
 727   fsgnjn_d(Rd, Rs, Rs);
 728 }
 729 
 730 void MacroAssembler::vmnot_m(VectorRegister vd, VectorRegister vs) {
 731   vmnand_mm(vd, vs, vs);
 732 }
 733 
 734 void MacroAssembler::vncvt_x_x_w(VectorRegister vd, VectorRegister vs, VectorMask vm) {
 735   vnsrl_wx(vd, vs, x0, vm);
 736 }
 737 
 738 void MacroAssembler::vfneg_v(VectorRegister vd, VectorRegister vs) {
 739   vfsgnjn_vv(vd, vs, vs);
 740 }
 741 
 742 void MacroAssembler::la(Register Rd, const address &dest) {
 743   int64_t offset = dest - pc();
 744   if (is_offset_in_range(offset, 32)) {
 745     auipc(Rd, (int32_t)offset + 0x800);  //0x800, Note:the 11th sign bit
 746     addi(Rd, Rd, ((int64_t)offset << 52) >> 52);
 747   } else {
 748     movptr(Rd, dest);
 749   }
 750 }
 751 
 752 void MacroAssembler::la(Register Rd, const Address &adr) {
 753   InstructionMark im(this);
 754   code_section()->relocate(inst_mark(), adr.rspec());
 755   relocInfo::relocType rtype = adr.rspec().reloc()->type();
 756 
 757   switch(adr.getMode()) {
 758     case Address::literal: {
 759       if (rtype == relocInfo::none) {
 760         li(Rd, (intptr_t)(adr.target()));
 761       } else {
 762         movptr(Rd, adr.target());
 763       }
 764       break;
 765     }
 766     case Address::base_plus_offset:{
 767       int32_t offset = 0;
 768       baseOffset(Rd, adr, offset);
 769       addi(Rd, Rd, offset);
 770       break;
 771     }
 772     default:
 773       ShouldNotReachHere();
 774   }
 775 }
 776 
 777 void MacroAssembler::la(Register Rd, Label &label) {
 778   la(Rd, target(label));
 779 }
 780 
 781 #define INSN(NAME)                                                                \
 782   void MacroAssembler::NAME##z(Register Rs, const address &dest) {                \
 783     NAME(Rs, zr, dest);                                                           \
 784   }                                                                               \
 785   void MacroAssembler::NAME##z(Register Rs, Label &l, bool is_far) {              \
 786     NAME(Rs, zr, l, is_far);                                                      \
 787   }                                                                               \
 788 
 789   INSN(beq);
 790   INSN(bne);
 791   INSN(blt);
 792   INSN(ble);
 793   INSN(bge);
 794   INSN(bgt);
 795 
 796 #undef INSN
 797 
 798 // Float compare branch instructions
 799 
 800 #define INSN(NAME, FLOATCMP, BRANCH)                                                                                   \
 801   void MacroAssembler::float_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far, bool is_unordered) {  \
 802     FLOATCMP##_s(t0, Rs1, Rs2);                                                                                        \
 803     BRANCH(t0, l, is_far);                                                                                             \
 804   }                                                                                                                    \
 805   void MacroAssembler::double_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far, bool is_unordered) { \
 806     FLOATCMP##_d(t0, Rs1, Rs2);                                                                                        \
 807     BRANCH(t0, l, is_far);                                                                                             \
 808   }
 809 
 810   INSN(beq, feq, bnez);
 811   INSN(bne, feq, beqz);
 812 #undef INSN
 813 
 814 
 815 #define INSN(NAME, FLOATCMP1, FLOATCMP2)                                              \
 816   void MacroAssembler::float_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l,   \
 817                                     bool is_far, bool is_unordered) {                 \
 818     if(is_unordered) {                                                                \
 819       /* jump if either source is NaN or condition is expected */                     \
 820       FLOATCMP2##_s(t0, Rs2, Rs1);                                                    \
 821       beqz(t0, l, is_far);                                                            \
 822     } else {                                                                          \
 823       /* jump if no NaN in source and condition is expected */                        \
 824       FLOATCMP1##_s(t0, Rs1, Rs2);                                                    \
 825       bnez(t0, l, is_far);                                                            \
 826     }                                                                                 \
 827   }                                                                                   \
 828   void MacroAssembler::double_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l,  \
 829                                      bool is_far, bool is_unordered) {                \
 830     if(is_unordered) {                                                                \
 831       /* jump if either source is NaN or condition is expected */                     \
 832       FLOATCMP2##_d(t0, Rs2, Rs1);                                                    \
 833       beqz(t0, l, is_far);                                                            \
 834     } else {                                                                          \
 835       /* jump if no NaN in source and condition is expected */                        \
 836       FLOATCMP1##_d(t0, Rs1, Rs2);                                                    \
 837       bnez(t0, l, is_far);                                                            \
 838     }                                                                                 \
 839   }
 840 
 841   INSN(ble, fle, flt);
 842   INSN(blt, flt, fle);
 843 
 844 #undef INSN
 845 
 846 #define INSN(NAME, CMP)                                                              \
 847   void MacroAssembler::float_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l,  \
 848                                     bool is_far, bool is_unordered) {                \
 849     float_##CMP(Rs2, Rs1, l, is_far, is_unordered);                                  \
 850   }                                                                                  \
 851   void MacroAssembler::double_##NAME(FloatRegister Rs1, FloatRegister Rs2, Label &l, \
 852                                      bool is_far, bool is_unordered) {               \
 853     double_##CMP(Rs2, Rs1, l, is_far, is_unordered);                                 \
 854   }
 855 
 856   INSN(bgt, blt);
 857   INSN(bge, ble);
 858 
 859 #undef INSN
 860 
 861 
 862 #define INSN(NAME, CSR)                       \
 863   void MacroAssembler::NAME(Register Rd) {    \
 864     csrr(Rd, CSR);                            \
 865   }
 866 
 867   INSN(rdinstret,  CSR_INSTERT);
 868   INSN(rdcycle,    CSR_CYCLE);
 869   INSN(rdtime,     CSR_TIME);
 870   INSN(frcsr,      CSR_FCSR);
 871   INSN(frrm,       CSR_FRM);
 872   INSN(frflags,    CSR_FFLAGS);
 873 
 874 #undef INSN
 875 
 876 void MacroAssembler::csrr(Register Rd, unsigned csr) {
 877   csrrs(Rd, csr, x0);
 878 }
 879 
 880 #define INSN(NAME, OPFUN)                                      \
 881   void MacroAssembler::NAME(unsigned csr, Register Rs) {       \
 882     OPFUN(x0, csr, Rs);                                        \
 883   }
 884 
 885   INSN(csrw, csrrw);
 886   INSN(csrs, csrrs);
 887   INSN(csrc, csrrc);
 888 
 889 #undef INSN
 890 
 891 #define INSN(NAME, OPFUN)                                      \
 892   void MacroAssembler::NAME(unsigned csr, unsigned imm) {      \
 893     OPFUN(x0, csr, imm);                                       \
 894   }
 895 
 896   INSN(csrwi, csrrwi);
 897   INSN(csrsi, csrrsi);
 898   INSN(csrci, csrrci);
 899 
 900 #undef INSN
 901 
 902 #define INSN(NAME, CSR)                                      \
 903   void MacroAssembler::NAME(Register Rd, Register Rs) {      \
 904     csrrw(Rd, CSR, Rs);                                      \
 905   }
 906 
 907   INSN(fscsr,   CSR_FCSR);
 908   INSN(fsrm,    CSR_FRM);
 909   INSN(fsflags, CSR_FFLAGS);
 910 
 911 #undef INSN
 912 
 913 #define INSN(NAME)                              \
 914   void MacroAssembler::NAME(Register Rs) {      \
 915     NAME(x0, Rs);                               \
 916   }
 917 
 918   INSN(fscsr);
 919   INSN(fsrm);
 920   INSN(fsflags);
 921 
 922 #undef INSN
 923 
 924 void MacroAssembler::fsrmi(Register Rd, unsigned imm) {
 925   guarantee(imm < 5, "Rounding Mode is invalid in Rounding Mode register");
 926   csrrwi(Rd, CSR_FRM, imm);
 927 }
 928 
 929 void MacroAssembler::fsflagsi(Register Rd, unsigned imm) {
 930    csrrwi(Rd, CSR_FFLAGS, imm);
 931 }
 932 
 933 #define INSN(NAME)                             \
 934   void MacroAssembler::NAME(unsigned imm) {    \
 935     NAME(x0, imm);                             \
 936   }
 937 
 938   INSN(fsrmi);
 939   INSN(fsflagsi);
 940 
 941 #undef INSN
 942 
 943 void MacroAssembler::push_reg(Register Rs)
 944 {
 945   addi(esp, esp, 0 - wordSize);
 946   sd(Rs, Address(esp, 0));
 947 }
 948 
 949 void MacroAssembler::pop_reg(Register Rd)
 950 {
 951   ld(Rd, esp, 0);
 952   addi(esp, esp, wordSize);
 953 }
 954 
 955 int MacroAssembler::bitset_to_regs(unsigned int bitset, unsigned char* regs) {
 956   DEBUG_ONLY(int words_pushed = 0;)
 957 
 958   int count = 0;
 959   // Sp is x2, and zr is x0, which should not be pushed.
 960   // If the number of registers is odd, zr is used for stack alignment.Otherwise, it will be ignored.
 961   bitset &= ~ (1U << 2);
 962   bitset |= 0x1;
 963 
 964   // Scan bitset to accumulate register pairs
 965   for (int reg = 31; reg >= 0; reg --) {
 966     if ((1U << 31) & bitset) {
 967       regs[count++] = reg;
 968     }
 969     bitset <<= 1;
 970   }
 971   count &= ~1;  // Only push an even number of regs
 972   return count;
 973 }
 974 
 975 // Push lots of registers in the bit set supplied.  Don't push sp.
 976 // Return the number of words pushed
 977 int MacroAssembler::push_reg(unsigned int bitset, Register stack) {
 978   DEBUG_ONLY(int words_pushed = 0;)
 979 
 980   unsigned char regs[32];
 981   int count = bitset_to_regs(bitset, regs);
 982 
 983   if (count) {
 984     addi(stack, stack, - count * wordSize);
 985   }
 986   for (int i = count - 1; i >= 0; i--) {
 987     sd(as_Register(regs[i]), Address(stack, (count -1 - i) * wordSize));
 988     DEBUG_ONLY(words_pushed ++;)
 989   }
 990 
 991   assert(words_pushed == count, "oops, pushed != count");
 992 
 993   return count;
 994 }
 995 
 996 int MacroAssembler::pop_reg(unsigned int bitset, Register stack) {
 997   DEBUG_ONLY(int words_popped = 0;)
 998 
 999   unsigned char regs[32];
1000   int count = bitset_to_regs(bitset, regs);
1001 
1002   for (int i = count - 1; i >= 0; i--) {
1003     ld(as_Register(regs[i]), Address(stack, (count -1 - i) * wordSize));
1004     DEBUG_ONLY(words_popped ++;)
1005   }
1006 
1007   if (count) {
1008     addi(stack, stack, count * wordSize);
1009   }
1010   assert(words_popped == count, "oops, popped != count");
1011 
1012   return count;
1013 }
1014 
1015 int MacroAssembler::bitset_to_fregs(unsigned int bitset, unsigned char* regs) {
1016   int count = 0;
1017   // Scan bitset to accumulate register pairs
1018   for (int reg = 31; reg >= 0; reg--) {
1019     if ((1U << 31) & bitset) {
1020       regs[count++] = reg;
1021     }
1022     bitset <<= 1;
1023   }
1024 
1025   return count;
1026 }
1027 
1028 // Push float registers in the bitset, except sp.
1029 // Return the number of heapwords pushed.
1030 int MacroAssembler::push_fp(unsigned int bitset, Register stack) {
1031   int words_pushed = 0;
1032   unsigned char regs[32];
1033   int count = bitset_to_fregs(bitset, regs);
1034   int push_slots = count + (count & 1);
1035 
1036   if (count) {
1037     addi(stack, stack, -push_slots * wordSize);
1038   }
1039 
1040   for (int i = count - 1; i >= 0; i--) {
1041     fsd(as_FloatRegister(regs[i]), Address(stack, (push_slots - 1 - i) * wordSize));
1042     words_pushed++;
1043   }
1044 
1045   assert(words_pushed == count, "oops, pushed(%d) != count(%d)", words_pushed, count);
1046   return count;
1047 }
1048 
1049 int MacroAssembler::pop_fp(unsigned int bitset, Register stack) {
1050   int words_popped = 0;
1051   unsigned char regs[32];
1052   int count = bitset_to_fregs(bitset, regs);
1053   int pop_slots = count + (count & 1);
1054 
1055   for (int i = count - 1; i >= 0; i--) {
1056     fld(as_FloatRegister(regs[i]), Address(stack, (pop_slots - 1 - i) * wordSize));
1057     words_popped++;
1058   }
1059 
1060   if (count) {
1061     addi(stack, stack, pop_slots * wordSize);
1062   }
1063 
1064   assert(words_popped == count, "oops, popped(%d) != count(%d)", words_popped, count);
1065   return count;
1066 }
1067 
1068 #ifdef COMPILER2
1069 int MacroAssembler::push_vp(unsigned int bitset, Register stack) {
1070   int vector_size_in_bytes = Matcher::scalable_vector_reg_size(T_BYTE);
1071 
1072   // Scan bitset to accumulate register pairs
1073   unsigned char regs[32];
1074   int count = 0;
1075   for (int reg = 31; reg >= 0; reg--) {
1076     if ((1U << 31) & bitset) {
1077       regs[count++] = reg;
1078     }
1079     bitset <<= 1;
1080   }
1081 
1082   for (int i = 0; i < count; i++) {
1083     sub(stack, stack, vector_size_in_bytes);
1084     vs1r_v(as_VectorRegister(regs[i]), stack);
1085   }
1086 
1087   return count * vector_size_in_bytes / wordSize;
1088 }
1089 
1090 int MacroAssembler::pop_vp(unsigned int bitset, Register stack) {
1091   int vector_size_in_bytes = Matcher::scalable_vector_reg_size(T_BYTE);
1092 
1093   // Scan bitset to accumulate register pairs
1094   unsigned char regs[32];
1095   int count = 0;
1096   for (int reg = 31; reg >= 0; reg--) {
1097     if ((1U << 31) & bitset) {
1098       regs[count++] = reg;
1099     }
1100     bitset <<= 1;
1101   }
1102 
1103   for (int i = count - 1; i >= 0; i--) {
1104     vl1r_v(as_VectorRegister(regs[i]), stack);
1105     add(stack, stack, vector_size_in_bytes);
1106   }
1107 
1108   return count * vector_size_in_bytes / wordSize;
1109 }
1110 #endif // COMPILER2
1111 
1112 void MacroAssembler::push_call_clobbered_registers_except(RegSet exclude) {
1113   // Push integer registers x7, x10-x17, x28-x31.
1114   push_reg(RegSet::of(x7) + RegSet::range(x10, x17) + RegSet::range(x28, x31) - exclude, sp);
1115 
1116   // Push float registers f0-f7, f10-f17, f28-f31.
1117   addi(sp, sp, - wordSize * 20);
1118   int offset = 0;
1119   for (int i = 0; i < 32; i++) {
1120     if (i <= f7->encoding() || i >= f28->encoding() || (i >= f10->encoding() && i <= f17->encoding())) {
1121       fsd(as_FloatRegister(i), Address(sp, wordSize * (offset ++)));
1122     }
1123   }
1124 }
1125 
1126 void MacroAssembler::pop_call_clobbered_registers_except(RegSet exclude) {
1127   int offset = 0;
1128   for (int i = 0; i < 32; i++) {
1129     if (i <= f7->encoding() || i >= f28->encoding() || (i >= f10->encoding() && i <= f17->encoding())) {
1130       fld(as_FloatRegister(i), Address(sp, wordSize * (offset ++)));
1131     }
1132   }
1133   addi(sp, sp, wordSize * 20);
1134 
1135   pop_reg(RegSet::of(x7) + RegSet::range(x10, x17) + RegSet::range(x28, x31) - exclude, sp);
1136 }
1137 
1138 // Push all the integer registers, except zr(x0) & sp(x2).
1139 void MacroAssembler::pusha() {
1140   push_reg(0xfffffffa, sp);
1141 }
1142 
1143 void MacroAssembler::popa() {
1144   pop_reg(0xfffffffa, sp);
1145 }
1146 
1147 void MacroAssembler::push_CPU_state(bool save_vectors, int vector_size_in_bytes) {
1148   // integer registers, except zr(x0) & ra(x1) & sp(x2)
1149   push_reg(0xfffffff8, sp);
1150 
1151   // float registers
1152   addi(sp, sp, - 32 * wordSize);
1153   for (int i = 0; i < 32; i++) {
1154     fsd(as_FloatRegister(i), Address(sp, i * wordSize));
1155   }
1156 
1157   // vector registers
1158   if (save_vectors) {
1159     sub(sp, sp, vector_size_in_bytes * VectorRegisterImpl::number_of_registers);
1160     vsetvli(t0, x0, Assembler::e64, Assembler::m8);
1161     for (int i = 0; i < VectorRegisterImpl::number_of_registers; i += 8) {
1162         add(t0, sp, vector_size_in_bytes * i);
1163         vse64_v(as_VectorRegister(i), t0);
1164     }
1165   }
1166 }
1167 
1168 void MacroAssembler::pop_CPU_state(bool restore_vectors, int vector_size_in_bytes) {
1169   // vector registers
1170   if (restore_vectors) {
1171     vsetvli(t0, x0, Assembler::e64, Assembler::m8);
1172     for (int i = 0; i < VectorRegisterImpl::number_of_registers; i += 8) {
1173       vle64_v(as_VectorRegister(i), sp);
1174       add(sp, sp, vector_size_in_bytes * 8);
1175     }
1176   }
1177 
1178   // float registers
1179   for (int i = 0; i < 32; i++) {
1180     fld(as_FloatRegister(i), Address(sp, i * wordSize));
1181   }
1182   addi(sp, sp, 32 * wordSize);
1183 
1184   // integer registers, except zr(x0) & ra(x1) & sp(x2)
1185   pop_reg(0xfffffff8, sp);
1186 }
1187 
1188 static int patch_offset_in_jal(address branch, int64_t offset) {
1189   assert(is_imm_in_range(offset, 20, 1), "offset is too large to be patched in one jal insrusction!\n");
1190   Assembler::patch(branch, 31, 31, (offset >> 20) & 0x1);                       // offset[20]    ==> branch[31]
1191   Assembler::patch(branch, 30, 21, (offset >> 1)  & 0x3ff);                     // offset[10:1]  ==> branch[30:21]
1192   Assembler::patch(branch, 20, 20, (offset >> 11) & 0x1);                       // offset[11]    ==> branch[20]
1193   Assembler::patch(branch, 19, 12, (offset >> 12) & 0xff);                      // offset[19:12] ==> branch[19:12]
1194   return NativeInstruction::instruction_size;                                             // only one instruction
1195 }
1196 
1197 static int patch_offset_in_conditional_branch(address branch, int64_t offset) {
1198   assert(is_imm_in_range(offset, 12, 1), "offset is too large to be patched in one beq/bge/bgeu/blt/bltu/bne insrusction!\n");
1199   Assembler::patch(branch, 31, 31, (offset >> 12) & 0x1);                       // offset[12]    ==> branch[31]
1200   Assembler::patch(branch, 30, 25, (offset >> 5)  & 0x3f);                      // offset[10:5]  ==> branch[30:25]
1201   Assembler::patch(branch, 7,  7,  (offset >> 11) & 0x1);                       // offset[11]    ==> branch[7]
1202   Assembler::patch(branch, 11, 8,  (offset >> 1)  & 0xf);                       // offset[4:1]   ==> branch[11:8]
1203   return NativeInstruction::instruction_size;                                   // only one instruction
1204 }
1205 
1206 static int patch_offset_in_pc_relative(address branch, int64_t offset) {
1207   const int PC_RELATIVE_INSTRUCTION_NUM = 2;                                    // auipc, addi/jalr/load
1208   Assembler::patch(branch, 31, 12, ((offset + 0x800) >> 12) & 0xfffff);         // Auipc.          offset[31:12]  ==> branch[31:12]
1209   Assembler::patch(branch + 4, 31, 20, offset & 0xfff);                         // Addi/Jalr/Load. offset[11:0]   ==> branch[31:20]
1210   return PC_RELATIVE_INSTRUCTION_NUM * NativeInstruction::instruction_size;
1211 }
1212 
1213 static int patch_addr_in_movptr(address branch, address target) {
1214   const int MOVPTR_INSTRUCTIONS_NUM = 6;                                        // lui + addi + slli + addi + slli + addi/jalr/load
1215   int32_t lower = ((intptr_t)target << 36) >> 36;
1216   int64_t upper = ((intptr_t)target - lower) >> 28;
1217   Assembler::patch(branch + 0,  31, 12, upper & 0xfffff);                       // Lui.             target[47:28] + target[27] ==> branch[31:12]
1218   Assembler::patch(branch + 4,  31, 20, (lower >> 16) & 0xfff);                 // Addi.            target[27:16] ==> branch[31:20]
1219   Assembler::patch(branch + 12, 31, 20, (lower >> 5) & 0x7ff);                  // Addi.            target[15: 5] ==> branch[31:20]
1220   Assembler::patch(branch + 20, 31, 20, lower & 0x1f);                          // Addi/Jalr/Load.  target[ 4: 0] ==> branch[31:20]
1221   return MOVPTR_INSTRUCTIONS_NUM * NativeInstruction::instruction_size;
1222 }
1223 
1224 static int patch_imm_in_li64(address branch, address target) {
1225   const int LI64_INSTRUCTIONS_NUM = 8;                                          // lui + addi + slli + addi + slli + addi + slli + addi
1226   int64_t lower = (intptr_t)target & 0xffffffff;
1227   lower = lower - ((lower << 44) >> 44);
1228   int64_t tmp_imm = ((uint64_t)((intptr_t)target & 0xffffffff00000000)) + (uint64_t)lower;
1229   int32_t upper =  (tmp_imm - (int32_t)lower) >> 32;
1230   int64_t tmp_upper = upper, tmp_lower = upper;
1231   tmp_lower = (tmp_lower << 52) >> 52;
1232   tmp_upper -= tmp_lower;
1233   tmp_upper >>= 12;
1234   // Load upper 32 bits. Upper = target[63:32], but if target[31] = 1 or (target[31:28] == 0x7ff && target[19] == 1),
1235   // upper = target[63:32] + 1.
1236   Assembler::patch(branch + 0,  31, 12, tmp_upper & 0xfffff);                       // Lui.
1237   Assembler::patch(branch + 4,  31, 20, tmp_lower & 0xfff);                         // Addi.
1238   // Load the rest 32 bits.
1239   Assembler::patch(branch + 12, 31, 20, ((int32_t)lower >> 20) & 0xfff);            // Addi.
1240   Assembler::patch(branch + 20, 31, 20, (((intptr_t)target << 44) >> 52) & 0xfff);  // Addi.
1241   Assembler::patch(branch + 28, 31, 20, (intptr_t)target & 0xff);                   // Addi.
1242   return LI64_INSTRUCTIONS_NUM * NativeInstruction::instruction_size;
1243 }
1244 
1245 static int patch_imm_in_li32(address branch, int32_t target) {
1246   const int LI32_INSTRUCTIONS_NUM = 2;                                          // lui + addiw
1247   int64_t upper = (intptr_t)target;
1248   int32_t lower = (((int32_t)target) << 20) >> 20;
1249   upper -= lower;
1250   upper = (int32_t)upper;
1251   Assembler::patch(branch + 0,  31, 12, (upper >> 12) & 0xfffff);               // Lui.
1252   Assembler::patch(branch + 4,  31, 20, lower & 0xfff);                         // Addiw.
1253   return LI32_INSTRUCTIONS_NUM * NativeInstruction::instruction_size;
1254 }
1255 
1256 static long get_offset_of_jal(address insn_addr) {
1257   assert_cond(insn_addr != NULL);
1258   long offset = 0;
1259   unsigned insn = *(unsigned*)insn_addr;
1260   long val = (long)Assembler::sextract(insn, 31, 12);
1261   offset |= ((val >> 19) & 0x1) << 20;
1262   offset |= (val & 0xff) << 12;
1263   offset |= ((val >> 8) & 0x1) << 11;
1264   offset |= ((val >> 9) & 0x3ff) << 1;
1265   offset = (offset << 43) >> 43;
1266   return offset;
1267 }
1268 
1269 static long get_offset_of_conditional_branch(address insn_addr) {
1270   long offset = 0;
1271   assert_cond(insn_addr != NULL);
1272   unsigned insn = *(unsigned*)insn_addr;
1273   offset = (long)Assembler::sextract(insn, 31, 31);
1274   offset = (offset << 12) | (((long)(Assembler::sextract(insn, 7, 7) & 0x1)) << 11);
1275   offset = offset | (((long)(Assembler::sextract(insn, 30, 25) & 0x3f)) << 5);
1276   offset = offset | (((long)(Assembler::sextract(insn, 11, 8) & 0xf)) << 1);
1277   offset = (offset << 41) >> 41;
1278   return offset;
1279 }
1280 
1281 static long get_offset_of_pc_relative(address insn_addr) {
1282   long offset = 0;
1283   assert_cond(insn_addr != NULL);
1284   offset = ((long)(Assembler::sextract(((unsigned*)insn_addr)[0], 31, 12))) << 12;                                  // Auipc.
1285   offset += ((long)Assembler::sextract(((unsigned*)insn_addr)[1], 31, 20));                                         // Addi/Jalr/Load.
1286   offset = (offset << 32) >> 32;
1287   return offset;
1288 }
1289 
1290 static address get_target_of_movptr(address insn_addr) {
1291   assert_cond(insn_addr != NULL);
1292   intptr_t target_address = (((int64_t)Assembler::sextract(((unsigned*)insn_addr)[0], 31, 12)) & 0xfffff) << 28;    // Lui.
1293   target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[1], 31, 20)) << 16;                        // Addi.
1294   target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[3], 31, 20)) << 5;                         // Addi.
1295   target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[5], 31, 20));                              // Addi/Jalr/Load.
1296   return (address) target_address;
1297 }
1298 
1299 static address get_target_of_li64(address insn_addr) {
1300   assert_cond(insn_addr != NULL);
1301   intptr_t target_address = (((int64_t)Assembler::sextract(((unsigned*)insn_addr)[0], 31, 12)) & 0xfffff) << 44;    // Lui.
1302   target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[1], 31, 20)) << 32;                        // Addi.
1303   target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[3], 31, 20)) << 20;                        // Addi.
1304   target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[5], 31, 20)) << 8;                         // Addi.
1305   target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[7], 31, 20));                              // Addi.
1306   return (address)target_address;
1307 }
1308 
1309 static address get_target_of_li32(address insn_addr) {
1310   assert_cond(insn_addr != NULL);
1311   intptr_t target_address = (((int64_t)Assembler::sextract(((unsigned*)insn_addr)[0], 31, 12)) & 0xfffff) << 12;    // Lui.
1312   target_address += ((int64_t)Assembler::sextract(((unsigned*)insn_addr)[1], 31, 20));                              // Addiw.
1313   return (address)target_address;
1314 }
1315 
1316 // Patch any kind of instruction; there may be several instructions.
1317 // Return the total length (in bytes) of the instructions.
1318 int MacroAssembler::pd_patch_instruction_size(address branch, address target) {
1319   assert_cond(branch != NULL);
1320   int64_t offset = target - branch;
1321   if (NativeInstruction::is_jal_at(branch)) {                         // jal
1322     return patch_offset_in_jal(branch, offset);
1323   } else if (NativeInstruction::is_branch_at(branch)) {               // beq/bge/bgeu/blt/bltu/bne
1324     return patch_offset_in_conditional_branch(branch, offset);
1325   } else if (NativeInstruction::is_pc_relative_at(branch)) {          // auipc, addi/jalr/load
1326     return patch_offset_in_pc_relative(branch, offset);
1327   } else if (NativeInstruction::is_movptr_at(branch)) {               // movptr
1328     return patch_addr_in_movptr(branch, target);
1329   } else if (NativeInstruction::is_li64_at(branch)) {                 // li64
1330     return patch_imm_in_li64(branch, target);
1331   } else if (NativeInstruction::is_li32_at(branch)) {                 // li32
1332     int64_t imm = (intptr_t)target;
1333     return patch_imm_in_li32(branch, (int32_t)imm);
1334   } else {
1335     tty->print_cr("pd_patch_instruction_size: instruction 0x%x could not be patched!\n", *(unsigned*)branch);
1336     ShouldNotReachHere();
1337   }
1338   return -1;
1339 }
1340 
1341 address MacroAssembler::target_addr_for_insn(address insn_addr) {
1342   long offset = 0;
1343   assert_cond(insn_addr != NULL);
1344   if (NativeInstruction::is_jal_at(insn_addr)) {                     // jal
1345     offset = get_offset_of_jal(insn_addr);
1346   } else if (NativeInstruction::is_branch_at(insn_addr)) {           // beq/bge/bgeu/blt/bltu/bne
1347     offset = get_offset_of_conditional_branch(insn_addr);
1348   } else if (NativeInstruction::is_pc_relative_at(insn_addr)) {      // auipc, addi/jalr/load
1349     offset = get_offset_of_pc_relative(insn_addr);
1350   } else if (NativeInstruction::is_movptr_at(insn_addr)) {           // movptr
1351     return get_target_of_movptr(insn_addr);
1352   } else if (NativeInstruction::is_li64_at(insn_addr)) {             // li64
1353     return get_target_of_li64(insn_addr);
1354   } else if (NativeInstruction::is_li32_at(insn_addr)) {             // li32
1355     return get_target_of_li32(insn_addr);
1356   } else {
1357     ShouldNotReachHere();
1358   }
1359   return address(((uintptr_t)insn_addr + offset));
1360 }
1361 
1362 int MacroAssembler::patch_oop(address insn_addr, address o) {
1363   // OOPs are either narrow (32 bits) or wide (48 bits).  We encode
1364   // narrow OOPs by setting the upper 16 bits in the first
1365   // instruction.
1366   if (NativeInstruction::is_li32_at(insn_addr)) {
1367     // Move narrow OOP
1368     uint32_t n = CompressedOops::narrow_oop_value(cast_to_oop(o));
1369     return patch_imm_in_li32(insn_addr, (int32_t)n);
1370   } else if (NativeInstruction::is_movptr_at(insn_addr)) {
1371     // Move wide OOP
1372     return patch_addr_in_movptr(insn_addr, o);
1373   }
1374   ShouldNotReachHere();
1375   return -1;
1376 }
1377 
1378 void MacroAssembler::reinit_heapbase() {
1379   if (UseCompressedOops) {
1380     if (Universe::is_fully_initialized()) {
1381       mv(xheapbase, CompressedOops::ptrs_base());
1382     } else {
1383       int32_t offset = 0;
1384       la_patchable(xheapbase, ExternalAddress((address)CompressedOops::ptrs_base_addr()), offset);
1385       ld(xheapbase, Address(xheapbase, offset));
1386     }
1387   }
1388 }
1389 
1390 void MacroAssembler::mv(Register Rd, int64_t imm64) {
1391   li(Rd, imm64);
1392 }
1393 
1394 void MacroAssembler::mv(Register Rd, int imm) {
1395   mv(Rd, (int64_t)imm);
1396 }
1397 
1398 void MacroAssembler::mvw(Register Rd, int32_t imm32) {
1399   mv(Rd, imm32);
1400 }
1401 
1402 void MacroAssembler::mv(Register Rd, Address dest) {
1403   assert(dest.getMode() == Address::literal, "Address mode should be Address::literal");
1404   code_section()->relocate(pc(), dest.rspec());
1405   movptr(Rd, dest.target());
1406 }
1407 
1408 void MacroAssembler::mv(Register Rd, address addr) {
1409   // Here in case of use with relocation, use fix length instruciton
1410   // movptr instead of li
1411   movptr(Rd, addr);
1412 }
1413 
1414 void MacroAssembler::mv(Register Rd, RegisterOrConstant src) {
1415   if (src.is_register()) {
1416     mv(Rd, src.as_register());
1417   } else {
1418     mv(Rd, src.as_constant());
1419   }
1420 }
1421 
1422 void MacroAssembler::andrw(Register Rd, Register Rs1, Register Rs2) {
1423   andr(Rd, Rs1, Rs2);
1424   // addw: The result is clipped to 32 bits, then the sign bit is extended,
1425   // and the result is stored in Rd
1426   addw(Rd, Rd, zr);
1427 }
1428 
1429 void MacroAssembler::orrw(Register Rd, Register Rs1, Register Rs2) {
1430   orr(Rd, Rs1, Rs2);
1431   // addw: The result is clipped to 32 bits, then the sign bit is extended,
1432   // and the result is stored in Rd
1433   addw(Rd, Rd, zr);
1434 }
1435 
1436 void MacroAssembler::xorrw(Register Rd, Register Rs1, Register Rs2) {
1437   xorr(Rd, Rs1, Rs2);
1438   // addw: The result is clipped to 32 bits, then the sign bit is extended,
1439   // and the result is stored in Rd
1440   addw(Rd, Rd, zr);
1441 }
1442 
1443 // Note: load_unsigned_short used to be called load_unsigned_word.
1444 int MacroAssembler::load_unsigned_short(Register dst, Address src) {
1445   int off = offset();
1446   lhu(dst, src);
1447   return off;
1448 }
1449 
1450 int MacroAssembler::load_unsigned_byte(Register dst, Address src) {
1451   int off = offset();
1452   lbu(dst, src);
1453   return off;
1454 }
1455 
1456 int MacroAssembler::load_signed_short(Register dst, Address src) {
1457   int off = offset();
1458   lh(dst, src);
1459   return off;
1460 }
1461 
1462 int MacroAssembler::load_signed_byte(Register dst, Address src) {
1463   int off = offset();
1464   lb(dst, src);
1465   return off;
1466 }
1467 
1468 void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2) {
1469   switch (size_in_bytes) {
1470     case  8:  ld(dst, src); break;
1471     case  4:  is_signed ? lw(dst, src) : lwu(dst, src); break;
1472     case  2:  is_signed ? load_signed_short(dst, src) : load_unsigned_short(dst, src); break;
1473     case  1:  is_signed ? load_signed_byte( dst, src) : load_unsigned_byte( dst, src); break;
1474     default:  ShouldNotReachHere();
1475   }
1476 }
1477 
1478 void MacroAssembler::store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2) {
1479   switch (size_in_bytes) {
1480     case  8:  sd(src, dst); break;
1481     case  4:  sw(src, dst); break;
1482     case  2:  sh(src, dst); break;
1483     case  1:  sb(src, dst); break;
1484     default:  ShouldNotReachHere();
1485   }
1486 }
1487 
1488 void MacroAssembler::reverseb16(Register Rd, Register Rs, Register Rtmp1, Register Rtmp2) {
1489   // This method is only used for grev16
1490   // Rd = Rs[47:0] Rs[55:48] Rs[63:56]
1491   assert_different_registers(Rs, Rtmp1, Rtmp2);
1492   assert_different_registers(Rd, Rtmp1);
1493   srli(Rtmp1, Rs, 48);
1494   andi(Rtmp2, Rtmp1, 0xff);
1495   slli(Rtmp2, Rtmp2, 8);
1496   srli(Rtmp1, Rtmp1, 8);
1497   orr(Rtmp1, Rtmp1, Rtmp2);
1498   slli(Rd, Rs, 16);
1499   orr(Rd, Rd, Rtmp1);
1500 }
1501 
1502 void MacroAssembler::reverseh32(Register Rd, Register Rs, Register Rtmp1, Register Rtmp2) {
1503   // This method is only used for grev32
1504   // Rd[63:0] = Rs[31:0] Rs[47:32] Rs[63:48]
1505   assert_different_registers(Rs, Rtmp1, Rtmp2);
1506   assert_different_registers(Rd, Rtmp1);
1507   srli(Rtmp1, Rs, 32);
1508   slli(Rtmp2, Rtmp1, 48);
1509   srli(Rtmp2, Rtmp2, 32);
1510   srli(Rtmp1, Rtmp1, 16);
1511   orr(Rtmp1, Rtmp1, Rtmp2);
1512   slli(Rd, Rs, 32);
1513   orr(Rd, Rd, Rtmp1);
1514 }
1515 
1516 void MacroAssembler::grevh(Register Rd, Register Rs, Register Rtmp) {
1517   // Reverse bytes in half-word
1518   // Rd[15:0] = Rs[7:0] Rs[15:8] (sign-extend to 64 bits)
1519   assert_different_registers(Rs, Rtmp);
1520   assert_different_registers(Rd, Rtmp);
1521   srli(Rtmp, Rs, 8);
1522   andi(Rtmp, Rtmp, 0xFF);
1523   slli(Rd, Rs, 56);
1524   srai(Rd, Rd, 48); // sign-extend
1525   orr(Rd, Rd, Rtmp);
1526 }
1527 
1528 void MacroAssembler::grevhu(Register Rd, Register Rs, Register Rtmp) {
1529   // Reverse bytes in half-word
1530   // Rd[15:0] = Rs[7:0] Rs[15:8] (zero-extend to 64 bits)
1531   assert_different_registers(Rs, Rtmp);
1532   assert_different_registers(Rd, Rtmp);
1533   srli(Rtmp, Rs, 8);
1534   andi(Rtmp, Rtmp, 0xFF);
1535   andi(Rd, Rs, 0xFF);
1536   slli(Rd, Rd, 8);
1537   orr(Rd, Rd, Rtmp);
1538 }
1539 
1540 void MacroAssembler::grev16w(Register Rd, Register Rs, Register Rtmp1, Register Rtmp2) {
1541   // Reverse bytes in half-word (32bit)
1542   // Rd[31:0] = Rs[23:16] Rs[31:24] Rs[7:0] Rs[15:8] (sign-extend to 64 bits)
1543   assert_different_registers(Rs, Rtmp1, Rtmp2);
1544   assert_different_registers(Rd, Rtmp1, Rtmp2);
1545   srli(Rtmp2, Rs, 16);
1546   grevh(Rtmp2, Rtmp2, Rtmp1);
1547   grevhu(Rd, Rs, Rtmp1);
1548   slli(Rtmp2, Rtmp2, 16);
1549   orr(Rd, Rd, Rtmp2);
1550 }
1551 
1552 void MacroAssembler::grev16wu(Register Rd, Register Rs, Register Rtmp1, Register Rtmp2) {
1553   // Reverse bytes in half-word (32bit)
1554   // Rd[31:0] = Rs[23:16] Rs[31:24] Rs[7:0] Rs[15:8] (zero-extend to 64 bits)
1555   assert_different_registers(Rs, Rtmp1, Rtmp2);
1556   assert_different_registers(Rd, Rtmp1, Rtmp2);
1557   srli(Rtmp2, Rs, 16);
1558   grevhu(Rtmp2, Rtmp2, Rtmp1);
1559   grevhu(Rd, Rs, Rtmp1);
1560   slli(Rtmp2, Rtmp2, 16);
1561   orr(Rd, Rd, Rtmp2);
1562 }
1563 
1564 void MacroAssembler::grevw(Register Rd, Register Rs, Register Rtmp1, Register Rtmp2) {
1565   // Reverse bytes in word (32bit)
1566   // Rd[31:0] = Rs[7:0] Rs[15:8] Rs[23:16] Rs[31:24] (sign-extend to 64 bits)
1567   assert_different_registers(Rs, Rtmp1, Rtmp2);
1568   assert_different_registers(Rd, Rtmp1, Rtmp2);
1569   grev16wu(Rd, Rs, Rtmp1, Rtmp2);
1570   slli(Rtmp2, Rd, 48);
1571   srai(Rtmp2, Rtmp2, 32); // sign-extend
1572   srli(Rd, Rd, 16);
1573   orr(Rd, Rd, Rtmp2);
1574 }
1575 
1576 void MacroAssembler::grevwu(Register Rd, Register Rs, Register Rtmp1, Register Rtmp2) {
1577   // Reverse bytes in word (32bit)
1578   // Rd[31:0] = Rs[7:0] Rs[15:8] Rs[23:16] Rs[31:24] (zero-extend to 64 bits)
1579   assert_different_registers(Rs, Rtmp1, Rtmp2);
1580   assert_different_registers(Rd, Rtmp1, Rtmp2);
1581   grev16wu(Rd, Rs, Rtmp1, Rtmp2);
1582   slli(Rtmp2, Rd, 48);
1583   srli(Rtmp2, Rtmp2, 32);
1584   srli(Rd, Rd, 16);
1585   orr(Rd, Rd, Rtmp2);
1586 }
1587 
1588 void MacroAssembler::grev16(Register Rd, Register Rs, Register Rtmp1, Register Rtmp2) {
1589   // Reverse bytes in half-word (64bit)
1590   // Rd[63:0] = Rs[55:48] Rs[63:56] Rs[39:32] Rs[47:40] Rs[23:16] Rs[31:24] Rs[7:0] Rs[15:8]
1591   assert_different_registers(Rs, Rtmp1, Rtmp2);
1592   assert_different_registers(Rd, Rtmp1, Rtmp2);
1593   reverseb16(Rd, Rs, Rtmp1, Rtmp2);
1594   for (int i = 0; i < 3; ++i) {
1595     reverseb16(Rd, Rd, Rtmp1, Rtmp2);
1596   }
1597 }
1598 
1599 void MacroAssembler::grev32(Register Rd, Register Rs, Register Rtmp1, Register Rtmp2) {
1600   // Reverse bytes in word (64bit)
1601   // Rd[63:0] = Rs[39:32] Rs[47:40] Rs[55:48] Rs[63:56] Rs[7:0] Rs[15:8] Rs[23:16] Rs[31:24]
1602   assert_different_registers(Rs, Rtmp1, Rtmp2);
1603   assert_different_registers(Rd, Rtmp1, Rtmp2);
1604   grev16(Rd, Rs, Rtmp1, Rtmp2);
1605   reverseh32(Rd, Rd, Rtmp1, Rtmp2);
1606   reverseh32(Rd, Rd, Rtmp1, Rtmp2);
1607 }
1608 
1609 void MacroAssembler::grev(Register Rd, Register Rs, Register Rtmp1, Register Rtmp2) {
1610   // Reverse bytes in double-word (64bit)
1611   // Rd[63:0] = Rs[7:0] Rs[15:8] Rs[23:16] Rs[31:24] Rs[39:32] Rs[47,40] Rs[55,48] Rs[63:56]
1612   assert_different_registers(Rs, Rtmp1, Rtmp2);
1613   assert_different_registers(Rd, Rtmp1, Rtmp2);
1614   grev32(Rd, Rs, Rtmp1, Rtmp2);
1615   slli(Rtmp2, Rd, 32);
1616   srli(Rd, Rd, 32);
1617   orr(Rd, Rd, Rtmp2);
1618 }
1619 
1620 void MacroAssembler::andi(Register Rd, Register Rn, int64_t imm, Register tmp) {
1621   if (is_imm_in_range(imm, 12, 0)) {
1622     and_imm12(Rd, Rn, imm);
1623   } else {
1624     assert_different_registers(Rn, tmp);
1625     li(tmp, imm);
1626     andr(Rd, Rn, tmp);
1627   }
1628 }
1629 
1630 void MacroAssembler::orptr(Address adr, RegisterOrConstant src, Register tmp1, Register tmp2) {
1631   ld(tmp1, adr);
1632   if (src.is_register()) {
1633     orr(tmp1, tmp1, src.as_register());
1634   } else {
1635     if(is_imm_in_range(src.as_constant(), 12, 0)) {
1636       ori(tmp1, tmp1, src.as_constant());
1637     } else {
1638       assert_different_registers(tmp1, tmp2);
1639       li(tmp2, src.as_constant());
1640       orr(tmp1, tmp1, tmp2);
1641     }
1642   }
1643   sd(tmp1, adr);
1644 }
1645 
1646 void MacroAssembler::cmp_klass(Register oop, Register trial_klass, Register tmp, Label &L) {
1647   if (UseCompressedClassPointers) {
1648       lwu(tmp, Address(oop, oopDesc::klass_offset_in_bytes()));
1649     if (CompressedKlassPointers::base() == NULL) {
1650       slli(tmp, tmp, CompressedKlassPointers::shift());
1651       beq(trial_klass, tmp, L);
1652       return;
1653     }
1654     decode_klass_not_null(tmp);
1655   } else {
1656     ld(tmp, Address(oop, oopDesc::klass_offset_in_bytes()));
1657   }
1658   beq(trial_klass, tmp, L);
1659 }
1660 
1661 // Move an oop into a register. immediate is true if we want
1662 // immediate instructions and nmethod entry barriers are not enabled.
1663 // i.e. we are not going to patch this instruction while the code is being
1664 // executed by another thread.
1665 void MacroAssembler::movoop(Register dst, jobject obj, bool immediate) {
1666   int oop_index;
1667   if (obj == NULL) {
1668     oop_index = oop_recorder()->allocate_oop_index(obj);
1669   } else {
1670 #ifdef ASSERT
1671     {
1672       ThreadInVMfromUnknown tiv;
1673       assert(Universe::heap()->is_in(JNIHandles::resolve(obj)), "should be real oop");
1674     }
1675 #endif
1676     oop_index = oop_recorder()->find_index(obj);
1677   }
1678   RelocationHolder rspec = oop_Relocation::spec(oop_index);
1679 
1680   // nmethod entry barrier necessitate using the constant pool. They have to be
1681   // ordered with respected to oop access.
1682   // Using immediate literals would necessitate fence.i.
1683   if (BarrierSet::barrier_set()->barrier_set_nmethod() != NULL || !immediate) {
1684     address dummy = address(uintptr_t(pc()) & -wordSize); // A nearby aligned address
1685     ld_constant(dst, Address(dummy, rspec));
1686   } else
1687     mv(dst, Address((address)obj, rspec));
1688 }
1689 
1690 // Move a metadata address into a register.
1691 void MacroAssembler::mov_metadata(Register dst, Metadata* obj) {
1692   int oop_index;
1693   if (obj == NULL) {
1694     oop_index = oop_recorder()->allocate_metadata_index(obj);
1695   } else {
1696     oop_index = oop_recorder()->find_index(obj);
1697   }
1698   RelocationHolder rspec = metadata_Relocation::spec(oop_index);
1699   mv(dst, Address((address)obj, rspec));
1700 }
1701 
1702 // Writes to stack successive pages until offset reached to check for
1703 // stack overflow + shadow pages.  This clobbers tmp.
1704 void MacroAssembler::bang_stack_size(Register size, Register tmp) {
1705   assert_different_registers(tmp, size, t0);
1706   // Bang stack for total size given plus shadow page size.
1707   // Bang one page at a time because large size can bang beyond yellow and
1708   // red zones.
1709   mv(t0, os::vm_page_size());
1710   Label loop;
1711   bind(loop);
1712   sub(tmp, sp, t0);
1713   subw(size, size, t0);
1714   sd(size, Address(tmp));
1715   bgtz(size, loop);
1716 
1717   // Bang down shadow pages too.
1718   // At this point, (tmp-0) is the last address touched, so don't
1719   // touch it again.  (It was touched as (tmp-pagesize) but then tmp
1720   // was post-decremented.)  Skip this address by starting at i=1, and
1721   // touch a few more pages below.  N.B.  It is important to touch all
1722   // the way down to and including i=StackShadowPages.
1723   for (int i = 0; i < (int)(StackOverflow::stack_shadow_zone_size() / os::vm_page_size()) - 1; i++) {
1724     // this could be any sized move but this is can be a debugging crumb
1725     // so the bigger the better.
1726     sub(tmp, tmp, os::vm_page_size());
1727     sd(size, Address(tmp, 0));
1728   }
1729 }
1730 
1731 SkipIfEqual::SkipIfEqual(MacroAssembler* masm, const bool* flag_addr, bool value) {
1732   assert_cond(masm != NULL);
1733   int32_t offset = 0;
1734   _masm = masm;
1735   _masm->la_patchable(t0, ExternalAddress((address)flag_addr), offset);
1736   _masm->lbu(t0, Address(t0, offset));
1737   _masm->beqz(t0, _label);
1738 }
1739 
1740 SkipIfEqual::~SkipIfEqual() {
1741   assert_cond(_masm != NULL);
1742   _masm->bind(_label);
1743   _masm = NULL;
1744 }
1745 
1746 void MacroAssembler::load_mirror(Register dst, Register method, Register tmp) {
1747   const int mirror_offset = in_bytes(Klass::java_mirror_offset());
1748   ld(dst, Address(xmethod, Method::const_offset()));
1749   ld(dst, Address(dst, ConstMethod::constants_offset()));
1750   ld(dst, Address(dst, ConstantPool::pool_holder_offset_in_bytes()));
1751   ld(dst, Address(dst, mirror_offset));
1752   resolve_oop_handle(dst, tmp);
1753 }
1754 
1755 void MacroAssembler::resolve_oop_handle(Register result, Register tmp) {
1756   // OopHandle::resolve is an indirection.
1757   assert_different_registers(result, tmp);
1758   access_load_at(T_OBJECT, IN_NATIVE, result, Address(result, 0), tmp, noreg);
1759 }
1760 
1761 // ((WeakHandle)result).resolve()
1762 void MacroAssembler::resolve_weak_handle(Register result, Register tmp) {
1763   assert_different_registers(result, tmp);
1764   Label resolved;
1765 
1766   // A null weak handle resolves to null.
1767   beqz(result, resolved);
1768 
1769   // Only 64 bit platforms support GCs that require a tmp register
1770   // Only IN_HEAP loads require a thread_tmp register
1771   // WeakHandle::resolve is an indirection like jweak.
1772   access_load_at(T_OBJECT, IN_NATIVE | ON_PHANTOM_OOP_REF,
1773                  result, Address(result), tmp, noreg /* tmp_thread */);
1774   bind(resolved);
1775 }
1776 
1777 void MacroAssembler::access_load_at(BasicType type, DecoratorSet decorators,
1778                                     Register dst, Address src,
1779                                     Register tmp1, Register thread_tmp) {
1780   BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
1781   decorators = AccessInternal::decorator_fixup(decorators);
1782   bool as_raw = (decorators & AS_RAW) != 0;
1783   if (as_raw) {
1784     bs->BarrierSetAssembler::load_at(this, decorators, type, dst, src, tmp1, thread_tmp);
1785   } else {
1786     bs->load_at(this, decorators, type, dst, src, tmp1, thread_tmp);
1787   }
1788 }
1789 
1790 void MacroAssembler::null_check(Register reg, int offset) {
1791   if (needs_explicit_null_check(offset)) {
1792     // provoke OS NULL exception if reg = NULL by
1793     // accessing M[reg] w/o changing any registers
1794     // NOTE: this is plenty to provoke a segv
1795     ld(zr, Address(reg, 0));
1796   } else {
1797     // nothing to do, (later) access of M[reg + offset]
1798     // will provoke OS NULL exception if reg = NULL
1799   }
1800 }
1801 
1802 void MacroAssembler::access_store_at(BasicType type, DecoratorSet decorators,
1803                                      Address dst, Register src,
1804                                      Register tmp1, Register thread_tmp) {
1805   BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
1806   decorators = AccessInternal::decorator_fixup(decorators);
1807   bool as_raw = (decorators & AS_RAW) != 0;
1808   if (as_raw) {
1809     bs->BarrierSetAssembler::store_at(this, decorators, type, dst, src, tmp1, thread_tmp);
1810   } else {
1811     bs->store_at(this, decorators, type, dst, src, tmp1, thread_tmp);
1812   }
1813 }
1814 
1815 // Algorithm must match CompressedOops::encode.
1816 void MacroAssembler::encode_heap_oop(Register d, Register s) {
1817   verify_oop(s, "broken oop in encode_heap_oop");
1818   if (CompressedOops::base() == NULL) {
1819     if (CompressedOops::shift() != 0) {
1820       assert (LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong");
1821       srli(d, s, LogMinObjAlignmentInBytes);
1822     } else {
1823       mv(d, s);
1824     }
1825   } else {
1826     Label notNull;
1827     sub(d, s, xheapbase);
1828     bgez(d, notNull);
1829     mv(d, zr);
1830     bind(notNull);
1831     if (CompressedOops::shift() != 0) {
1832       assert (LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong");
1833       srli(d, d, CompressedOops::shift());
1834     }
1835   }
1836 }
1837 
1838 void MacroAssembler::load_klass(Register dst, Register src) {
1839   if (UseCompressedClassPointers) {
1840     lwu(dst, Address(src, oopDesc::klass_offset_in_bytes()));
1841     decode_klass_not_null(dst);
1842   } else {
1843     ld(dst, Address(src, oopDesc::klass_offset_in_bytes()));
1844   }
1845 }
1846 
1847 void MacroAssembler::store_klass(Register dst, Register src) {
1848   // FIXME: Should this be a store release? concurrent gcs assumes
1849   // klass length is valid if klass field is not null.
1850   if (UseCompressedClassPointers) {
1851     encode_klass_not_null(src);
1852     sw(src, Address(dst, oopDesc::klass_offset_in_bytes()));
1853   } else {
1854     sd(src, Address(dst, oopDesc::klass_offset_in_bytes()));
1855   }
1856 }
1857 
1858 void MacroAssembler::store_klass_gap(Register dst, Register src) {
1859   if (UseCompressedClassPointers) {
1860     // Store to klass gap in destination
1861     sw(src, Address(dst, oopDesc::klass_gap_offset_in_bytes()));
1862   }
1863 }
1864 
1865 void  MacroAssembler::decode_klass_not_null(Register r) {
1866   decode_klass_not_null(r, r);
1867 }
1868 
1869 void MacroAssembler::decode_klass_not_null(Register dst, Register src, Register tmp) {
1870   assert(UseCompressedClassPointers, "should only be used for compressed headers");
1871 
1872   if (CompressedKlassPointers::base() == NULL) {
1873     if (CompressedKlassPointers::shift() != 0) {
1874       assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong");
1875       slli(dst, src, LogKlassAlignmentInBytes);
1876     } else {
1877       mv(dst, src);
1878     }
1879     return;
1880   }
1881 
1882   Register xbase = dst;
1883   if (dst == src) {
1884     xbase = tmp;
1885   }
1886 
1887   assert_different_registers(src, xbase);
1888   li(xbase, (uintptr_t)CompressedKlassPointers::base());
1889   if (CompressedKlassPointers::shift() != 0) {
1890     assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong");
1891     assert_different_registers(t0, xbase);
1892     slli(t0, src, LogKlassAlignmentInBytes);
1893     add(dst, xbase, t0);
1894   } else {
1895     add(dst, xbase, src);
1896   }
1897   if (xbase == xheapbase) { reinit_heapbase(); }
1898 
1899 }
1900 
1901 void MacroAssembler::encode_klass_not_null(Register r) {
1902   encode_klass_not_null(r, r);
1903 }
1904 
1905 void MacroAssembler::encode_klass_not_null(Register dst, Register src, Register tmp) {
1906   assert(UseCompressedClassPointers, "should only be used for compressed headers");
1907 
1908   if (CompressedKlassPointers::base() == NULL) {
1909     if (CompressedKlassPointers::shift() != 0) {
1910       assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong");
1911       srli(dst, src, LogKlassAlignmentInBytes);
1912     } else {
1913       mv(dst, src);
1914     }
1915     return;
1916   }
1917 
1918   if (((uint64_t)(uintptr_t)CompressedKlassPointers::base() & 0xffffffff) == 0 &&
1919       CompressedKlassPointers::shift() == 0) {
1920     zero_ext(dst, src, 32); // clear upper 32 bits
1921     return;
1922   }
1923 
1924   Register xbase = dst;
1925   if (dst == src) {
1926     xbase = tmp;
1927   }
1928 
1929   assert_different_registers(src, xbase);
1930   li(xbase, (intptr_t)CompressedKlassPointers::base());
1931   sub(dst, src, xbase);
1932   if (CompressedKlassPointers::shift() != 0) {
1933     assert(LogKlassAlignmentInBytes == CompressedKlassPointers::shift(), "decode alg wrong");
1934     srli(dst, dst, LogKlassAlignmentInBytes);
1935   }
1936   if (xbase == xheapbase) {
1937     reinit_heapbase();
1938   }
1939 }
1940 
1941 void  MacroAssembler::decode_heap_oop_not_null(Register r) {
1942   decode_heap_oop_not_null(r, r);
1943 }
1944 
1945 void MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) {
1946   assert(UseCompressedOops, "should only be used for compressed headers");
1947   assert(Universe::heap() != NULL, "java heap should be initialized");
1948   // Cannot assert, unverified entry point counts instructions (see .ad file)
1949   // vtableStubs also counts instructions in pd_code_size_limit.
1950   // Also do not verify_oop as this is called by verify_oop.
1951   if (CompressedOops::shift() != 0) {
1952     assert(LogMinObjAlignmentInBytes == CompressedOops::shift(), "decode alg wrong");
1953     slli(dst, src, LogMinObjAlignmentInBytes);
1954     if (CompressedOops::base() != NULL) {
1955       add(dst, xheapbase, dst);
1956     }
1957   } else {
1958     assert(CompressedOops::base() == NULL, "sanity");
1959     mv(dst, src);
1960   }
1961 }
1962 
1963 void  MacroAssembler::decode_heap_oop(Register d, Register s) {
1964   if (CompressedOops::base() == NULL) {
1965     if (CompressedOops::shift() != 0 || d != s) {
1966       slli(d, s, CompressedOops::shift());
1967     }
1968   } else {
1969     Label done;
1970     mv(d, s);
1971     beqz(s, done);
1972     slli(d, s, LogMinObjAlignmentInBytes);
1973     add(d, xheapbase, d);
1974     bind(done);
1975   }
1976   verify_oop(d, "broken oop in decode_heap_oop");
1977 }
1978 
1979 void MacroAssembler::store_heap_oop(Address dst, Register src, Register tmp1,
1980                                     Register thread_tmp, DecoratorSet decorators) {
1981   access_store_at(T_OBJECT, IN_HEAP | decorators, dst, src, tmp1, thread_tmp);
1982 }
1983 
1984 void MacroAssembler::load_heap_oop(Register dst, Address src, Register tmp1,
1985                                    Register thread_tmp, DecoratorSet decorators) {
1986   access_load_at(T_OBJECT, IN_HEAP | decorators, dst, src, tmp1, thread_tmp);
1987 }
1988 
1989 void MacroAssembler::load_heap_oop_not_null(Register dst, Address src, Register tmp1,
1990                                             Register thread_tmp, DecoratorSet decorators) {
1991   access_load_at(T_OBJECT, IN_HEAP | IS_NOT_NULL, dst, src, tmp1, thread_tmp);
1992 }
1993 
1994 // Used for storing NULLs.
1995 void MacroAssembler::store_heap_oop_null(Address dst) {
1996   access_store_at(T_OBJECT, IN_HEAP, dst, noreg, noreg, noreg);
1997 }
1998 
1999 int MacroAssembler::corrected_idivl(Register result, Register rs1, Register rs2,
2000                                     bool want_remainder)
2001 {
2002   // Full implementation of Java idiv and irem.  The function
2003   // returns the (pc) offset of the div instruction - may be needed
2004   // for implicit exceptions.
2005   //
2006   // input : rs1: dividend
2007   //         rs2: divisor
2008   //
2009   // result: either
2010   //         quotient  (= rs1 idiv rs2)
2011   //         remainder (= rs1 irem rs2)
2012 
2013 
2014   int idivl_offset = offset();
2015   if (!want_remainder) {
2016     divw(result, rs1, rs2);
2017   } else {
2018     remw(result, rs1, rs2); // result = rs1 % rs2;
2019   }
2020   return idivl_offset;
2021 }
2022 
2023 int MacroAssembler::corrected_idivq(Register result, Register rs1, Register rs2,
2024                                     bool want_remainder)
2025 {
2026   // Full implementation of Java ldiv and lrem.  The function
2027   // returns the (pc) offset of the div instruction - may be needed
2028   // for implicit exceptions.
2029   //
2030   // input : rs1: dividend
2031   //         rs2: divisor
2032   //
2033   // result: either
2034   //         quotient  (= rs1 idiv rs2)
2035   //         remainder (= rs1 irem rs2)
2036 
2037   int idivq_offset = offset();
2038   if (!want_remainder) {
2039     div(result, rs1, rs2);
2040   } else {
2041     rem(result, rs1, rs2); // result = rs1 % rs2;
2042   }
2043   return idivq_offset;
2044 }
2045 
2046 // Look up the method for a megamorpic invkkeinterface call.
2047 // The target method is determined by <intf_klass, itable_index>.
2048 // The receiver klass is in recv_klass.
2049 // On success, the result will be in method_result, and execution falls through.
2050 // On failure, execution transfers to the given label.
2051 void MacroAssembler::lookup_interface_method(Register recv_klass,
2052                                              Register intf_klass,
2053                                              RegisterOrConstant itable_index,
2054                                              Register method_result,
2055                                              Register scan_temp,
2056                                              Label& L_no_such_interface,
2057                                              bool return_method) {
2058   assert_different_registers(recv_klass, intf_klass, scan_temp);
2059   assert_different_registers(method_result, intf_klass, scan_temp);
2060   assert(recv_klass != method_result || !return_method,
2061          "recv_klass can be destroyed when mehtid isn't needed");
2062   assert(itable_index.is_constant() || itable_index.as_register() == method_result,
2063          "caller must be same register for non-constant itable index as for method");
2064 
2065   // Compute start of first itableOffsetEntry (which is at the end of the vtable).
2066   int vtable_base = in_bytes(Klass::vtable_start_offset());
2067   int itentry_off = itableMethodEntry::method_offset_in_bytes();
2068   int scan_step   = itableOffsetEntry::size() * wordSize;
2069   int vte_size    = vtableEntry::size_in_bytes();
2070   assert(vte_size == wordSize, "else adjust times_vte_scale");
2071 
2072   lwu(scan_temp, Address(recv_klass, Klass::vtable_length_offset()));
2073 
2074   // %%% Could store the aligned, prescaled offset in the klassoop.
2075   slli(scan_temp, scan_temp, 3);
2076   add(scan_temp, recv_klass, scan_temp);
2077   add(scan_temp, scan_temp, vtable_base);
2078 
2079   if (return_method) {
2080     // Adjust recv_klass by scaled itable_index, so we can free itable_index.
2081     assert(itableMethodEntry::size() * wordSize == wordSize, "adjust the scaling in the code below");
2082     if (itable_index.is_register()) {
2083       slli(t0, itable_index.as_register(), 3);
2084     } else {
2085       li(t0, itable_index.as_constant() << 3);
2086     }
2087     add(recv_klass, recv_klass, t0);
2088     if (itentry_off) {
2089       add(recv_klass, recv_klass, itentry_off);
2090     }
2091   }
2092 
2093   Label search, found_method;
2094 
2095   ld(method_result, Address(scan_temp, itableOffsetEntry::interface_offset_in_bytes()));
2096   beq(intf_klass, method_result, found_method);
2097   bind(search);
2098   // Check that the previous entry is non-null. A null entry means that
2099   // the receiver class doens't implement the interface, and wasn't the
2100   // same as when the caller was compiled.
2101   beqz(method_result, L_no_such_interface, /* is_far */ true);
2102   addi(scan_temp, scan_temp, scan_step);
2103   ld(method_result, Address(scan_temp, itableOffsetEntry::interface_offset_in_bytes()));
2104   bne(intf_klass, method_result, search);
2105 
2106   bind(found_method);
2107 
2108   // Got a hit.
2109   if (return_method) {
2110     lwu(scan_temp, Address(scan_temp, itableOffsetEntry::offset_offset_in_bytes()));
2111     add(method_result, recv_klass, scan_temp);
2112     ld(method_result, Address(method_result));
2113   }
2114 }
2115 
2116 // virtual method calling
2117 void MacroAssembler::lookup_virtual_method(Register recv_klass,
2118                                            RegisterOrConstant vtable_index,
2119                                            Register method_result) {
2120   const int base = in_bytes(Klass::vtable_start_offset());
2121   assert(vtableEntry::size() * wordSize == 8,
2122          "adjust the scaling in the code below");
2123   int vtable_offset_in_bytes = base + vtableEntry::method_offset_in_bytes();
2124 
2125   if (vtable_index.is_register()) {
2126     slli(method_result, vtable_index.as_register(), LogBytesPerWord);
2127     add(method_result, recv_klass, method_result);
2128     ld(method_result, Address(method_result, vtable_offset_in_bytes));
2129   } else {
2130     vtable_offset_in_bytes += vtable_index.as_constant() * wordSize;
2131     ld(method_result, form_address(method_result, recv_klass, vtable_offset_in_bytes));
2132   }
2133 }
2134 
2135 void MacroAssembler::membar(uint32_t order_constraint) {
2136   address prev = pc() - NativeMembar::instruction_size;
2137   address last = code()->last_insn();
2138 
2139   if (last != NULL && nativeInstruction_at(last)->is_membar() && prev == last) {
2140     NativeMembar *bar = NativeMembar_at(prev);
2141     // We are merging two memory barrier instructions.  On RISCV we
2142     // can do this simply by ORing them together.
2143     bar->set_kind(bar->get_kind() | order_constraint);
2144     BLOCK_COMMENT("merged membar");
2145   } else {
2146     code()->set_last_insn(pc());
2147 
2148     uint32_t predecessor = 0;
2149     uint32_t successor = 0;
2150 
2151     membar_mask_to_pred_succ(order_constraint, predecessor, successor);
2152     fence(predecessor, successor);
2153   }
2154 }
2155 
2156 // Form an addres from base + offset in Rd. Rd my or may not
2157 // actually be used: you must use the Address that is returned. It
2158 // is up to you to ensure that the shift provided mathces the size
2159 // of your data.
2160 Address MacroAssembler::form_address(Register Rd, Register base, long byte_offset) {
2161   if (is_offset_in_range(byte_offset, 12)) { // 12: imm in range 2^12
2162     return Address(base, byte_offset);
2163   }
2164 
2165   // Do it the hard way
2166   mv(Rd, byte_offset);
2167   add(Rd, base, Rd);
2168   return Address(Rd);
2169 }
2170 
2171 void MacroAssembler::check_klass_subtype(Register sub_klass,
2172                                          Register super_klass,
2173                                          Register temp_reg,
2174                                          Label& L_success) {
2175   Label L_failure;
2176   check_klass_subtype_fast_path(sub_klass, super_klass, temp_reg, &L_success, &L_failure, NULL);
2177   check_klass_subtype_slow_path(sub_klass, super_klass, temp_reg, noreg, &L_success, NULL);
2178   bind(L_failure);
2179 }
2180 
2181 void MacroAssembler::safepoint_poll(Label& slow_path, bool at_return, bool acquire, bool in_nmethod) {
2182   ld(t0, Address(xthread, JavaThread::polling_word_offset()));
2183   if (acquire) {
2184     membar(MacroAssembler::LoadLoad | MacroAssembler::LoadStore);
2185   }
2186   if (at_return) {
2187     bgtu(in_nmethod ? sp : fp, t0, slow_path, true /* is_far */);
2188   } else {
2189     andi(t0, t0, SafepointMechanism::poll_bit());
2190     bnez(t0, slow_path, true /* is_far */);
2191   }
2192 }
2193 
2194 void MacroAssembler::cmpxchgptr(Register oldv, Register newv, Register addr, Register tmp,
2195                                 Label &succeed, Label *fail) {
2196   // oldv holds comparison value
2197   // newv holds value to write in exchange
2198   // addr identifies memory word to compare against/update
2199   Label retry_load, nope;
2200   bind(retry_load);
2201   // flush and load exclusive from the memory location
2202   // and fail if it is not what we expect
2203   lr_d(tmp, addr, Assembler::aqrl);
2204   bne(tmp, oldv, nope);
2205   // if we store+flush with no intervening write tmp wil be zero
2206   sc_d(tmp, newv, addr, Assembler::rl);
2207   beqz(tmp, succeed);
2208   // retry so we only ever return after a load fails to compare
2209   // ensures we don't return a stale value after a failed write.
2210   j(retry_load);
2211   // if the memory word differs we return it in oldv and signal a fail
2212   bind(nope);
2213   membar(AnyAny);
2214   mv(oldv, tmp);
2215   if (fail != NULL) {
2216     j(*fail);
2217   }
2218 }
2219 
2220 void MacroAssembler::cmpxchg_obj_header(Register oldv, Register newv, Register obj, Register tmp,
2221                                         Label &succeed, Label *fail) {
2222   assert(oopDesc::mark_offset_in_bytes() == 0, "assumption");
2223   cmpxchgptr(oldv, newv, obj, tmp, succeed, fail);
2224 }
2225 
2226 void MacroAssembler::load_reserved(Register addr,
2227                                    enum operand_size size,
2228                                    Assembler::Aqrl acquire) {
2229   switch (size) {
2230     case int64:
2231       lr_d(t0, addr, acquire);
2232       break;
2233     case int32:
2234       lr_w(t0, addr, acquire);
2235       break;
2236     case uint32:
2237       lr_w(t0, addr, acquire);
2238       clear_upper_bits(t0, 32);
2239       break;
2240     default:
2241       ShouldNotReachHere();
2242   }
2243 }
2244 
2245 void MacroAssembler::store_conditional(Register addr,
2246                                        Register new_val,
2247                                        enum operand_size size,
2248                                        Assembler::Aqrl release) {
2249   switch (size) {
2250     case int64:
2251       sc_d(t0, new_val, addr, release);
2252       break;
2253     case int32:
2254     case uint32:
2255       sc_w(t0, new_val, addr, release);
2256       break;
2257     default:
2258       ShouldNotReachHere();
2259   }
2260 }
2261 
2262 
2263 void MacroAssembler::cmpxchg_narrow_value_helper(Register addr, Register expected,
2264                                                  Register new_val,
2265                                                  enum operand_size size,
2266                                                  Register tmp1, Register tmp2, Register tmp3) {
2267   assert(size == int8 || size == int16, "unsupported operand size");
2268 
2269   Register aligned_addr = t1, shift = tmp1, mask = tmp2, not_mask = tmp3;
2270 
2271   andi(shift, addr, 3);
2272   slli(shift, shift, 3);
2273 
2274   andi(aligned_addr, addr, ~3);
2275 
2276   if (size == int8) {
2277     addi(mask, zr, 0xff);
2278   } else {
2279     addi(mask, zr, -1);
2280     zero_ext(mask, mask, registerSize - 16);
2281   }
2282   sll(mask, mask, shift);
2283 
2284   xori(not_mask, mask, -1);
2285 
2286   sll(expected, expected, shift);
2287   andr(expected, expected, mask);
2288 
2289   sll(new_val, new_val, shift);
2290   andr(new_val, new_val, mask);
2291 }
2292 
2293 // cmpxchg_narrow_value will kill t0, t1, expected, new_val and tmps.
2294 // It's designed to implement compare and swap byte/boolean/char/short by lr.w/sc.w,
2295 // which are forced to work with 4-byte aligned address.
2296 void MacroAssembler::cmpxchg_narrow_value(Register addr, Register expected,
2297                                           Register new_val,
2298                                           enum operand_size size,
2299                                           Assembler::Aqrl acquire, Assembler::Aqrl release,
2300                                           Register result, bool result_as_bool,
2301                                           Register tmp1, Register tmp2, Register tmp3) {
2302   Register aligned_addr = t1, shift = tmp1, mask = tmp2, not_mask = tmp3, old = result, tmp = t0;
2303   assert_different_registers(addr, old, mask, not_mask, new_val, expected, shift, tmp);
2304   cmpxchg_narrow_value_helper(addr, expected, new_val, size, tmp1, tmp2, tmp3);
2305 
2306   Label retry, fail, done;
2307 
2308   bind(retry);
2309   lr_w(old, aligned_addr, acquire);
2310   andr(tmp, old, mask);
2311   bne(tmp, expected, fail);
2312 
2313   andr(tmp, old, not_mask);
2314   orr(tmp, tmp, new_val);
2315   sc_w(tmp, tmp, aligned_addr, release);
2316   bnez(tmp, retry);
2317 
2318   if (result_as_bool) {
2319     addi(result, zr, 1);
2320     j(done);
2321 
2322     bind(fail);
2323     mv(result, zr);
2324 
2325     bind(done);
2326   } else {
2327     andr(tmp, old, mask);
2328 
2329     bind(fail);
2330     srl(result, tmp, shift);
2331   }
2332 }
2333 
2334 // weak_cmpxchg_narrow_value is a weak version of cmpxchg_narrow_value, to implement
2335 // the weak CAS stuff. The major difference is that it just failed when store conditional
2336 // failed.
2337 void MacroAssembler::weak_cmpxchg_narrow_value(Register addr, Register expected,
2338                                                Register new_val,
2339                                                enum operand_size size,
2340                                                Assembler::Aqrl acquire, Assembler::Aqrl release,
2341                                                Register result,
2342                                                Register tmp1, Register tmp2, Register tmp3) {
2343   Register aligned_addr = t1, shift = tmp1, mask = tmp2, not_mask = tmp3, old = result, tmp = t0;
2344   assert_different_registers(addr, old, mask, not_mask, new_val, expected, shift, tmp);
2345   cmpxchg_narrow_value_helper(addr, expected, new_val, size, tmp1, tmp2, tmp3);
2346 
2347   Label succ, fail, done;
2348 
2349   lr_w(old, aligned_addr, acquire);
2350   andr(tmp, old, mask);
2351   bne(tmp, expected, fail);
2352 
2353   andr(tmp, old, not_mask);
2354   orr(tmp, tmp, new_val);
2355   sc_w(tmp, tmp, aligned_addr, release);
2356   beqz(tmp, succ);
2357 
2358   bind(fail);
2359   addi(result, zr, 1);
2360   j(done);
2361 
2362   bind(succ);
2363   mv(result, zr);
2364 
2365   bind(done);
2366 }
2367 
2368 void MacroAssembler::cmpxchg(Register addr, Register expected,
2369                              Register new_val,
2370                              enum operand_size size,
2371                              Assembler::Aqrl acquire, Assembler::Aqrl release,
2372                              Register result, bool result_as_bool) {
2373   assert(size != int8 && size != int16, "unsupported operand size");
2374 
2375   Label retry_load, done, ne_done;
2376   bind(retry_load);
2377   load_reserved(addr, size, acquire);
2378   bne(t0, expected, ne_done);
2379   store_conditional(addr, new_val, size, release);
2380   bnez(t0, retry_load);
2381 
2382   // equal, succeed
2383   if (result_as_bool) {
2384     li(result, 1);
2385   } else {
2386     mv(result, expected);
2387   }
2388   j(done);
2389 
2390   // not equal, failed
2391   bind(ne_done);
2392   if (result_as_bool) {
2393     mv(result, zr);
2394   } else {
2395     mv(result, t0);
2396   }
2397 
2398   bind(done);
2399 }
2400 
2401 void MacroAssembler::cmpxchg_weak(Register addr, Register expected,
2402                                   Register new_val,
2403                                   enum operand_size size,
2404                                   Assembler::Aqrl acquire, Assembler::Aqrl release,
2405                                   Register result) {
2406   Label fail, done, sc_done;
2407   load_reserved(addr, size, acquire);
2408   bne(t0, expected, fail);
2409   store_conditional(addr, new_val, size, release);
2410   beqz(t0, sc_done);
2411 
2412   // fail
2413   bind(fail);
2414   li(result, 1);
2415   j(done);
2416 
2417   // sc_done
2418   bind(sc_done);
2419   mv(result, 0);
2420   bind(done);
2421 }
2422 
2423 #define ATOMIC_OP(NAME, AOP, ACQUIRE, RELEASE)                                              \
2424 void MacroAssembler::atomic_##NAME(Register prev, RegisterOrConstant incr, Register addr) { \
2425   prev = prev->is_valid() ? prev : zr;                                                      \
2426   if (incr.is_register()) {                                                                 \
2427     AOP(prev, addr, incr.as_register(), (Assembler::Aqrl)(ACQUIRE | RELEASE));              \
2428   } else {                                                                                  \
2429     mv(t0, incr.as_constant());                                                             \
2430     AOP(prev, addr, t0, (Assembler::Aqrl)(ACQUIRE | RELEASE));                              \
2431   }                                                                                         \
2432   return;                                                                                   \
2433 }
2434 
2435 ATOMIC_OP(add, amoadd_d, Assembler::relaxed, Assembler::relaxed)
2436 ATOMIC_OP(addw, amoadd_w, Assembler::relaxed, Assembler::relaxed)
2437 ATOMIC_OP(addal, amoadd_d, Assembler::aq, Assembler::rl)
2438 ATOMIC_OP(addalw, amoadd_w, Assembler::aq, Assembler::rl)
2439 
2440 #undef ATOMIC_OP
2441 
2442 #define ATOMIC_XCHG(OP, AOP, ACQUIRE, RELEASE)                                       \
2443 void MacroAssembler::atomic_##OP(Register prev, Register newv, Register addr) {      \
2444   prev = prev->is_valid() ? prev : zr;                                               \
2445   AOP(prev, addr, newv, (Assembler::Aqrl)(ACQUIRE | RELEASE));                       \
2446   return;                                                                            \
2447 }
2448 
2449 ATOMIC_XCHG(xchg, amoswap_d, Assembler::relaxed, Assembler::relaxed)
2450 ATOMIC_XCHG(xchgw, amoswap_w, Assembler::relaxed, Assembler::relaxed)
2451 ATOMIC_XCHG(xchgal, amoswap_d, Assembler::aq, Assembler::rl)
2452 ATOMIC_XCHG(xchgalw, amoswap_w, Assembler::aq, Assembler::rl)
2453 
2454 #undef ATOMIC_XCHG
2455 
2456 #define ATOMIC_XCHGU(OP1, OP2)                                                       \
2457 void MacroAssembler::atomic_##OP1(Register prev, Register newv, Register addr) {     \
2458   atomic_##OP2(prev, newv, addr);                                                    \
2459   clear_upper_bits(prev, 32);                                                        \
2460   return;                                                                            \
2461 }
2462 
2463 ATOMIC_XCHGU(xchgwu, xchgw)
2464 ATOMIC_XCHGU(xchgalwu, xchgalw)
2465 
2466 #undef ATOMIC_XCHGU
2467 
2468 void MacroAssembler::atomic_incw(Register counter_addr, Register tmp) {
2469   Label retry_load;
2470   bind(retry_load);
2471   // flush and load exclusive from the memory location
2472   lr_w(tmp, counter_addr);
2473   addw(tmp, tmp, 1);
2474   // if we store+flush with no intervening write tmp wil be zero
2475   sc_w(tmp, counter_addr, tmp);
2476   bnez(tmp, retry_load);
2477 }
2478 
2479 void MacroAssembler::far_jump(Address entry, CodeBuffer *cbuf, Register tmp) {
2480   assert(ReservedCodeCacheSize < 4*G, "branch out of range");
2481   assert(CodeCache::find_blob(entry.target()) != NULL,
2482          "destination of far call not found in code cache");
2483   int32_t offset = 0;
2484   if (far_branches()) {
2485     // We can use auipc + jalr here because we know that the total size of
2486     // the code cache cannot exceed 2Gb.
2487     la_patchable(tmp, entry, offset);
2488     if (cbuf != NULL) { cbuf->set_insts_mark(); }
2489     jalr(x0, tmp, offset);
2490   } else {
2491     if (cbuf != NULL) { cbuf->set_insts_mark(); }
2492     j(entry);
2493   }
2494 }
2495 
2496 void MacroAssembler::far_call(Address entry, CodeBuffer *cbuf, Register tmp) {
2497   assert(ReservedCodeCacheSize < 4*G, "branch out of range");
2498   assert(CodeCache::find_blob(entry.target()) != NULL,
2499          "destination of far call not found in code cache");
2500   int32_t offset = 0;
2501   if (far_branches()) {
2502     // We can use auipc + jalr here because we know that the total size of
2503     // the code cache cannot exceed 2Gb.
2504     la_patchable(tmp, entry, offset);
2505     if (cbuf != NULL) { cbuf->set_insts_mark(); }
2506     jalr(x1, tmp, offset); // link
2507   } else {
2508     if (cbuf != NULL) { cbuf->set_insts_mark(); }
2509     jal(entry); // link
2510   }
2511 }
2512 
2513 void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass,
2514                                                    Register super_klass,
2515                                                    Register temp_reg,
2516                                                    Label* L_success,
2517                                                    Label* L_failure,
2518                                                    Label* L_slow_path,
2519                                                    Register super_check_offset) {
2520   assert_different_registers(sub_klass, super_klass, temp_reg);
2521   bool must_load_sco = (super_check_offset == noreg);
2522   if (must_load_sco) {
2523     assert(temp_reg != noreg, "supply either a temp or a register offset");
2524   } else {
2525     assert_different_registers(sub_klass, super_klass, super_check_offset);
2526   }
2527 
2528   Label L_fallthrough;
2529   int label_nulls = 0;
2530   if (L_success == NULL)   { L_success   = &L_fallthrough; label_nulls++; }
2531   if (L_failure == NULL)   { L_failure   = &L_fallthrough; label_nulls++; }
2532   if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; }
2533   assert(label_nulls <= 1, "at most one NULL in batch");
2534 
2535   int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
2536   int sco_offset = in_bytes(Klass::super_check_offset_offset());
2537   Address super_check_offset_addr(super_klass, sco_offset);
2538 
2539   // Hacked jmp, which may only be used just before L_fallthrough.
2540 #define final_jmp(label)                                                \
2541   if (&(label) == &L_fallthrough) { /*do nothing*/ }                    \
2542   else                            j(label)             /*omit semi*/
2543 
2544   // If the pointers are equal, we are done (e.g., String[] elements).
2545   // This self-check enables sharing of secondary supertype arrays among
2546   // non-primary types such as array-of-interface. Otherwise, each such
2547   // type would need its own customized SSA.
2548   // We move this check to the front fo the fast path because many
2549   // type checks are in fact trivially successful in this manner,
2550   // so we get a nicely predicted branch right at the start of the check.
2551   beq(sub_klass, super_klass, *L_success);
2552 
2553   // Check the supertype display:
2554   if (must_load_sco) {
2555     lwu(temp_reg, super_check_offset_addr);
2556     super_check_offset = temp_reg;
2557   }
2558   add(t0, sub_klass, super_check_offset);
2559   Address super_check_addr(t0);
2560   ld(t0, super_check_addr); // load displayed supertype
2561 
2562   // Ths check has worked decisively for primary supers.
2563   // Secondary supers are sought in the super_cache ('super_cache_addr').
2564   // (Secondary supers are interfaces and very deeply nested subtypes.)
2565   // This works in the same check above because of a tricky aliasing
2566   // between the super_Cache and the primary super dispaly elements.
2567   // (The 'super_check_addr' can address either, as the case requires.)
2568   // Note that the cache is updated below if it does not help us find
2569   // what we need immediately.
2570   // So if it was a primary super, we can just fail immediately.
2571   // Otherwise, it's the slow path for us (no success at this point).
2572 
2573   beq(super_klass, t0, *L_success);
2574   mv(t1, sc_offset);
2575   if (L_failure == &L_fallthrough) {
2576     beq(super_check_offset, t1, *L_slow_path);
2577   } else {
2578     bne(super_check_offset, t1, *L_failure, /* is_far */ true);
2579     final_jmp(*L_slow_path);
2580   }
2581 
2582   bind(L_fallthrough);
2583 
2584 #undef final_jmp
2585 }
2586 
2587 // scans count pointer sized words at [addr] for occurence of value,
2588 // generic
2589 void MacroAssembler::repne_scan(Register addr, Register value, Register count,
2590                                 Register temp) {
2591   Label Lloop, Lexit;
2592   beqz(count, Lexit);
2593   bind(Lloop);
2594   ld(temp, addr);
2595   beq(value, temp, Lexit);
2596   add(addr, addr, wordSize);
2597   sub(count, count, 1);
2598   bnez(count, Lloop);
2599   bind(Lexit);
2600 }
2601 
2602 void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass,
2603                                                    Register super_klass,
2604                                                    Register temp_reg,
2605                                                    Register temp2_reg,
2606                                                    Label* L_success,
2607                                                    Label* L_failure) {
2608   assert_different_registers(sub_klass, super_klass, temp_reg);
2609   if (temp2_reg != noreg) {
2610     assert_different_registers(sub_klass, super_klass, temp_reg, temp2_reg, t0);
2611   }
2612 #define IS_A_TEMP(reg) ((reg) == temp_reg || (reg) == temp2_reg)
2613 
2614   Label L_fallthrough;
2615   int label_nulls = 0;
2616   if (L_success == NULL)   { L_success   = &L_fallthrough; label_nulls++; }
2617   if (L_failure == NULL)   { L_failure   = &L_fallthrough; label_nulls++; }
2618 
2619   assert(label_nulls <= 1, "at most one NULL in the batch");
2620 
2621   // a couple of usefule fields in sub_klass:
2622   int ss_offset = in_bytes(Klass::secondary_supers_offset());
2623   int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
2624   Address secondary_supers_addr(sub_klass, ss_offset);
2625   Address super_cache_addr(     sub_klass, sc_offset);
2626 
2627   BLOCK_COMMENT("check_klass_subtype_slow_path");
2628 
2629   // Do a linear scan of the secondary super-klass chain.
2630   // This code is rarely used, so simplicity is a virtue here.
2631   // The repne_scan instruction uses fixed registers, which we must spill.
2632   // Don't worry too much about pre-existing connecitons with the input regs.
2633 
2634   assert(sub_klass != x10, "killed reg"); // killed by mv(x10, super)
2635   assert(sub_klass != x12, "killed reg"); // killed by la(x12, &pst_counter)
2636 
2637   RegSet pushed_registers;
2638   if (!IS_A_TEMP(x12)) {
2639     pushed_registers += x12;
2640   }
2641   if (!IS_A_TEMP(x15)) {
2642     pushed_registers += x15;
2643   }
2644 
2645   if (super_klass != x10 || UseCompressedOops) {
2646     if (!IS_A_TEMP(x10)) {
2647       pushed_registers += x10;
2648     }
2649   }
2650 
2651   push_reg(pushed_registers, sp);
2652 
2653   // Get super_klass value into x10 (even if it was in x15 or x12)
2654   mv(x10, super_klass);
2655 
2656 #ifndef PRODUCT
2657   mv(t1, (address)&SharedRuntime::_partial_subtype_ctr);
2658   Address pst_counter_addr(t1);
2659   ld(t0, pst_counter_addr);
2660   add(t0, t0, 1);
2661   sd(t0, pst_counter_addr);
2662 #endif // PRODUCT
2663 
2664   // We will consult the secondary-super array.
2665   ld(x15, secondary_supers_addr);
2666   // Load the array length.
2667   lwu(x12, Address(x15, Array<Klass*>::length_offset_in_bytes()));
2668   // Skip to start of data.
2669   add(x15, x15, Array<Klass*>::base_offset_in_bytes());
2670 
2671   // Set t0 to an obvious invalid value, falling through by default
2672   li(t0, -1);
2673   // Scan X12 words at [X15] for an occurrence of X10.
2674   repne_scan(x15, x10, x12, t0);
2675 
2676   // pop will restore x10, so we should use a temp register to keep its value
2677   mv(t1, x10);
2678 
2679   // Unspill the temp. registers:
2680   pop_reg(pushed_registers, sp);
2681 
2682   bne(t1, t0, *L_failure);
2683 
2684   // Success. Cache the super we found an proceed in triumph.
2685   sd(super_klass, super_cache_addr);
2686 
2687   if (L_success != &L_fallthrough) {
2688     j(*L_success);
2689   }
2690 
2691 #undef IS_A_TEMP
2692 
2693   bind(L_fallthrough);
2694 }
2695 
2696 // Defines obj, preserves var_size_in_bytes, okay for tmp2 == var_size_in_bytes.
2697 void MacroAssembler::tlab_allocate(Register obj,
2698                                    Register var_size_in_bytes,
2699                                    int con_size_in_bytes,
2700                                    Register tmp1,
2701                                    Register tmp2,
2702                                    Label& slow_case,
2703                                    bool is_far) {
2704   BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
2705   bs->tlab_allocate(this, obj, var_size_in_bytes, con_size_in_bytes, tmp1, tmp2, slow_case, is_far);
2706 }
2707 
2708 // Defines obj, preserves var_size_in_bytes
2709 void MacroAssembler::eden_allocate(Register obj,
2710                                    Register var_size_in_bytes,
2711                                    int con_size_in_bytes,
2712                                    Register tmp1,
2713                                    Label& slow_case,
2714                                    bool is_far) {
2715   BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
2716   bs->eden_allocate(this, obj, var_size_in_bytes, con_size_in_bytes, tmp1, slow_case, is_far);
2717 }
2718 
2719 
2720 // get_thread() can be called anywhere inside generated code so we
2721 // need to save whatever non-callee save context might get clobbered
2722 // by the call to Thread::current() or, indeed, the call setup code.
2723 //
2724 // FIXME: RISC-V does not yet support TLSDESC (Thread-Local Storage
2725 // Descriptors), once supported, we should repalce Thread::current
2726 // with JavaThread::riscv64_get_thread_helper() to reduce the clbber
2727 // of non-callee save context.
2728 void MacroAssembler::get_thread(Register thread) {
2729   // save all call-clobbered regs except thread
2730   RegSet saved_regs = RegSet::range(x5, x7) + RegSet::range(x10, x17) +
2731                       RegSet::range(x28, x31) + lr - thread;
2732   push_reg(saved_regs, sp);
2733 
2734   call_VM_leaf_base(CAST_FROM_FN_PTR(address, Thread::current), 0);
2735   mv(thread, x10); // x10 is function call_vm_leaf_base return value, return java_thread value.
2736 
2737   // restore pushed registers
2738   pop_reg(saved_regs, sp);
2739 }
2740 
2741 void MacroAssembler::load_byte_map_base(Register reg) {
2742   CardTable::CardValue* byte_map_base =
2743     ((CardTableBarrierSet*)(BarrierSet::barrier_set()))->card_table()->byte_map_base();
2744   li(reg, (uint64_t)byte_map_base);
2745 }
2746 
2747 void MacroAssembler::la_patchable(Register reg1, const Address &dest, int32_t &offset) {
2748   relocInfo::relocType rtype = dest.rspec().reloc()->type();
2749   unsigned long low_address = (uintptr_t)CodeCache::low_bound();
2750   unsigned long high_address = (uintptr_t)CodeCache::high_bound();
2751   unsigned long dest_address = (uintptr_t)dest.target();
2752   long offset_low = dest_address - low_address;
2753   long offset_high = dest_address - high_address;
2754 
2755   assert(is_valid_riscv64_address(dest.target()), "bad address");
2756   assert(dest.getMode() == Address::literal, "la_patchable must be applied to a literal address");
2757 
2758   InstructionMark im(this);
2759   code_section()->relocate(inst_mark(), dest.rspec());
2760   // RISC-V doesn't compute a page-aligned address, in order to partially
2761   // compensate for the use of *signed* offsets in its base+disp12
2762   // addressing mode (RISC-V's PC-relative reach remains asymmetric
2763   // [-(2G + 2K), 2G - 2k).
2764   if (offset_high >= -((1L << 31) + (1L << 11)) && offset_low < (1L << 31) - (1L << 11)) {
2765     int64_t distance = dest.target() - pc();
2766     auipc(reg1, (int32_t)distance + 0x800);
2767     offset = ((int32_t)distance << 20) >> 20;
2768   } else {
2769     movptr_with_offset(reg1, dest.target(), offset);
2770   }
2771 }
2772 
2773 void MacroAssembler::build_frame(int framesize) {
2774   assert(framesize >= 2, "framesize must include space for FP/LR");
2775   assert(framesize % (2*wordSize) == 0, "must preserve 2*wordSize alignment");
2776   sub(sp, sp, framesize);
2777   sd(fp, Address(sp, framesize - 2 * wordSize));
2778   sd(lr, Address(sp, framesize - wordSize));
2779   if (PreserveFramePointer) { add(fp, sp, framesize - 2 * wordSize); }
2780   verify_cross_modify_fence_not_required();
2781 }
2782 
2783 void MacroAssembler::remove_frame(int framesize) {
2784   assert(framesize >= 2, "framesize must include space for FP/LR");
2785   assert(framesize % (2*wordSize) == 0, "must preserve 2*wordSize alignment");
2786   ld(fp, Address(sp, framesize - 2 * wordSize));
2787   ld(lr, Address(sp, framesize - wordSize));
2788   add(sp, sp, framesize);
2789 }
2790 
2791 void MacroAssembler::reserved_stack_check() {
2792     // testing if reserved zone needs to be enabled
2793     Label no_reserved_zone_enabling;
2794 
2795     ld(t0, Address(xthread, JavaThread::reserved_stack_activation_offset()));
2796     bltu(sp, t0, no_reserved_zone_enabling);
2797 
2798     enter();   // LR and FP are live.
2799     mv(c_rarg0, xthread);
2800     int32_t offset = 0;
2801     la_patchable(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::enable_stack_reserved_zone)), offset);
2802     jalr(x1, t0, offset);
2803     leave();
2804 
2805     // We have already removed our own frame.
2806     // throw_delayed_StackOverflowError will think that it's been
2807     // called by our caller.
2808     offset = 0;
2809     la_patchable(t0, RuntimeAddress(StubRoutines::throw_delayed_StackOverflowError_entry()), offset);
2810     jalr(x0, t0, offset);
2811     should_not_reach_here();
2812 
2813     bind(no_reserved_zone_enabling);
2814 }
2815 
2816 // Move the address of the polling page into dest.
2817 void MacroAssembler::get_polling_page(Register dest, relocInfo::relocType rtype) {
2818   ld(dest, Address(xthread, JavaThread::polling_page_offset()));
2819 }
2820 
2821 // Read the polling page.  The address of the polling page must
2822 // already be in r.
2823 address MacroAssembler::read_polling_page(Register r, int32_t offset, relocInfo::relocType rtype) {
2824   address mark;
2825   {
2826     InstructionMark im(this);
2827     code_section()->relocate(inst_mark(), rtype);
2828     lwu(zr, Address(r, offset));
2829     mark = inst_mark();
2830   }
2831   verify_cross_modify_fence_not_required();
2832   return mark;
2833 }
2834 
2835 void  MacroAssembler::set_narrow_oop(Register dst, jobject obj) {
2836 #ifdef ASSERT
2837   {
2838     ThreadInVMfromUnknown tiv;
2839     assert (UseCompressedOops, "should only be used for compressed oops");
2840     assert (Universe::heap() != NULL, "java heap should be initialized");
2841     assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
2842     assert(Universe::heap()->is_in(JNIHandles::resolve(obj)), "should be real oop");
2843   }
2844 #endif
2845   int oop_index = oop_recorder()->find_index(obj);
2846   InstructionMark im(this);
2847   RelocationHolder rspec = oop_Relocation::spec(oop_index);
2848   code_section()->relocate(inst_mark(), rspec);
2849   li32(dst, 0xDEADBEEF);
2850   clear_upper_bits(dst, 32); // clear upper 32bit, do not sign extend.
2851 }
2852 
2853 void  MacroAssembler::set_narrow_klass(Register dst, Klass* k) {
2854   assert (UseCompressedClassPointers, "should only be used for compressed headers");
2855   assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
2856   int index = oop_recorder()->find_index(k);
2857   assert(!Universe::heap()->is_in(k), "should not be an oop");
2858 
2859   InstructionMark im(this);
2860   RelocationHolder rspec = metadata_Relocation::spec(index);
2861   code_section()->relocate(inst_mark(), rspec);
2862   narrowKlass nk = CompressedKlassPointers::encode(k);
2863   li32(dst, nk);
2864   clear_upper_bits(dst, 32); // clear upper 32bit, do not sign extend.
2865 }
2866 
2867 // Maybe emit a call via a trampoline.  If the code cache is small
2868 // trampolines won't be emitted.
2869 address MacroAssembler::trampoline_call(Address entry, CodeBuffer* cbuf) {
2870   assert(JavaThread::current()->is_Compiler_thread(), "just checking");
2871   assert(entry.rspec().type() == relocInfo::runtime_call_type ||
2872          entry.rspec().type() == relocInfo::opt_virtual_call_type ||
2873          entry.rspec().type() == relocInfo::static_call_type ||
2874          entry.rspec().type() == relocInfo::virtual_call_type, "wrong reloc type");
2875 
2876   // We need a trampoline if branches are far.
2877   if (far_branches()) {
2878     bool in_scratch_emit_size = false;
2879 #ifdef COMPILER2
2880     // We don't want to emit a trampoline if C2 is generating dummy
2881     // code during its branch shortening phase.
2882     CompileTask* task = ciEnv::current()->task();
2883     in_scratch_emit_size =
2884       (task != NULL && is_c2_compile(task->comp_level()) &&
2885        Compile::current()->output()->in_scratch_emit_size());
2886 #endif
2887     if (!in_scratch_emit_size) {
2888       address stub = emit_trampoline_stub(offset(), entry.target());
2889       if (stub == NULL) {
2890         postcond(pc() == badAddress);
2891         return NULL; // CodeCache is full
2892       }
2893     }
2894   }
2895 
2896   if (cbuf != NULL) { cbuf->set_insts_mark(); }
2897   relocate(entry.rspec());
2898   if (!far_branches()) {
2899     jal(entry.target());
2900   } else {
2901     jal(pc());
2902   }
2903   // just need to return a non-null address
2904   postcond(pc() != badAddress);
2905   return pc();
2906 }
2907 
2908 address MacroAssembler::ic_call(address entry, jint method_index) {
2909   RelocationHolder rh = virtual_call_Relocation::spec(pc(), method_index);
2910   movptr(t1, (address)Universe::non_oop_word());
2911   assert_cond(entry != NULL);
2912   return trampoline_call(Address(entry, rh));
2913 }
2914 
2915 // Emit a trampoline stub for a call to a target which is too far away.
2916 //
2917 // code sequences:
2918 //
2919 // call-site:
2920 //   branch-and-link to <destination> or <trampoline stub>
2921 //
2922 // Related trampoline stub for this call site in the stub section:
2923 //   load the call target from the constant pool
2924 //   branch (LR still points to the call site above)
2925 
2926 address MacroAssembler::emit_trampoline_stub(int insts_call_instruction_offset,
2927                                              address dest) {
2928   address stub = start_a_stub(NativeInstruction::instruction_size
2929                             + NativeCallTrampolineStub::instruction_size);
2930   if (stub == NULL) {
2931     return NULL;  // CodeBuffer::expand failed
2932   }
2933 
2934   // Create a trampoline stub relocation which relates this trampoline stub
2935   // with the call instruction at insts_call_instruction_offset in the
2936   // instructions code-section.
2937 
2938   // make sure 4 byte aligned here, so that the destination address would be
2939   // 8 byte aligned after 3 intructions
2940   while (offset() % wordSize == 0) { nop(); }
2941 
2942   relocate(trampoline_stub_Relocation::spec(code()->insts()->start() +
2943                                             insts_call_instruction_offset));
2944   const int stub_start_offset = offset();
2945 
2946   // Now, create the trampoline stub's code:
2947   // - load the call
2948   // - call
2949   Label target;
2950   ld(t0, target);  // auipc + ld
2951   jr(t0);          // jalr
2952   bind(target);
2953   assert(offset() - stub_start_offset == NativeCallTrampolineStub::data_offset,
2954          "should be");
2955   emit_int64((intptr_t)dest);
2956 
2957   const address stub_start_addr = addr_at(stub_start_offset);
2958 
2959   assert(is_NativeCallTrampolineStub_at(stub_start_addr), "doesn't look like a trampoline");
2960 
2961   end_a_stub();
2962   return stub_start_addr;
2963 }
2964 
2965 Address MacroAssembler::add_memory_helper(const Address dst) {
2966   switch (dst.getMode()) {
2967     case Address::base_plus_offset:
2968       // This is the expected mode, although we allow all the other
2969       // forms below.
2970       return form_address(t1, dst.base(), dst.offset());
2971     default:
2972       la(t1, dst);
2973       return Address(t1);
2974   }
2975 }
2976 
2977 void MacroAssembler::add_memory_int64(const Address dst, int64_t imm) {
2978   Address adr = add_memory_helper(dst);
2979   assert_different_registers(adr.base(), t0);
2980   ld(t0, adr);
2981   addi(t0, t0, imm);
2982   sd(t0, adr);
2983 }
2984 
2985 void MacroAssembler::add_memory_int32(const Address dst, int32_t imm) {
2986   Address adr = add_memory_helper(dst);
2987   assert_different_registers(adr.base(), t0);
2988   lwu(t0, adr);
2989   addiw(t0, t0, imm);
2990   sw(t0, adr);
2991 }
2992 
2993 void MacroAssembler::cmpptr(Register src1, Address src2, Label& equal) {
2994   assert_different_registers(src1, t0);
2995   int32_t offset;
2996   la_patchable(t0, src2, offset);
2997   ld(t0, Address(t0, offset));
2998   beq(src1, t0, equal);
2999 }
3000 
3001 void MacroAssembler::load_method_holder_cld(Register result, Register method) {
3002   load_method_holder(result, method);
3003   ld(result, Address(result, InstanceKlass::class_loader_data_offset()));
3004 }
3005 
3006 void MacroAssembler::load_method_holder(Register holder, Register method) {
3007   ld(holder, Address(method, Method::const_offset()));                      // ConstMethod*
3008   ld(holder, Address(holder, ConstMethod::constants_offset()));             // ConstantPool*
3009   ld(holder, Address(holder, ConstantPool::pool_holder_offset_in_bytes())); // InstanceKlass*
3010 }
3011 
3012 // string indexof
3013 // compute index by trailing zeros
3014 void MacroAssembler::compute_index(Register haystack, Register trailing_zeros,
3015                                    Register match_mask, Register result,
3016                                    Register ch2, Register tmp,
3017                                    bool haystack_isL)
3018 {
3019   int haystack_chr_shift = haystack_isL ? 0 : 1;
3020   srl(match_mask, match_mask, trailing_zeros);
3021   srli(match_mask, match_mask, 1);
3022   srli(tmp, trailing_zeros, LogBitsPerByte);
3023   if (!haystack_isL) andi(tmp, tmp, 0xE);
3024   add(haystack, haystack, tmp);
3025   ld(ch2, Address(haystack));
3026   if (!haystack_isL) srli(tmp, tmp, haystack_chr_shift);
3027   add(result, result, tmp);
3028 }
3029 
3030 // string indexof
3031 // find pattern element in src, compute match mask,
3032 // only the first occurrence of 0x80/0x8000 at low bits is the valid match index
3033 // match mask patterns and corresponding indices would be like:
3034 // - 0x8080808080808080 (Latin1)
3035 // -   7 6 5 4 3 2 1 0  (match index)
3036 // - 0x8000800080008000 (UTF16)
3037 // -   3   2   1   0    (match index)
3038 void MacroAssembler::compute_match_mask(Register src, Register pattern, Register match_mask,
3039                                         Register mask1, Register mask2)
3040 {
3041   xorr(src, pattern, src);
3042   sub(match_mask, src, mask1);
3043   orr(src, src, mask2);
3044   notr(src, src);
3045   andr(match_mask, match_mask, src);
3046 }
3047 
3048 // count bits of trailing zero chars from lsb to msb until first non-zero element.
3049 // For LL case, one byte for one element, so shift 8 bits once, and for other case,
3050 // shift 16 bits once.
3051 void MacroAssembler::ctzc_bit(Register Rd, Register Rs, bool isLL, Register Rtmp1, Register Rtmp2)
3052 {
3053   assert_different_registers(Rd, Rs, Rtmp1, Rtmp2);
3054   Label Loop;
3055   int step = isLL ? 8 : 16;
3056   li(Rd, -step);
3057   mv(Rtmp2, Rs);
3058 
3059   bind(Loop);
3060   addi(Rd, Rd, step);
3061   andi(Rtmp1, Rtmp2, ((1 << step) - 1));
3062   srli(Rtmp2, Rtmp2, step);
3063   beqz(Rtmp1, Loop);
3064 }
3065 
3066 // This instruction reads adjacent 4 bytes from the lower half of source register,
3067 // inflate into a register, for example:
3068 // Rs: A7A6A5A4A3A2A1A0
3069 // Rd: 00A300A200A100A0
3070 void MacroAssembler::inflate_lo32(Register Rd, Register Rs, Register Rtmp1, Register Rtmp2)
3071 {
3072   assert_different_registers(Rd, Rs, Rtmp1, Rtmp2);
3073   li(Rtmp1, 0xFF);
3074   mv(Rd, zr);
3075   for (int i = 0; i <= 3; i++)
3076   {
3077     andr(Rtmp2, Rs, Rtmp1);
3078     if (i) {
3079       slli(Rtmp2, Rtmp2, i * 8);
3080     }
3081     orr(Rd, Rd, Rtmp2);
3082     if (i != 3) {
3083       slli(Rtmp1, Rtmp1, 8);
3084     }
3085   }
3086 }
3087 
3088 // This instruction reads adjacent 4 bytes from the upper half of source register,
3089 // inflate into a register, for example:
3090 // Rs: A7A6A5A4A3A2A1A0
3091 // Rd: 00A700A600A500A4
3092 void MacroAssembler::inflate_hi32(Register Rd, Register Rs, Register Rtmp1, Register Rtmp2)
3093 {
3094   assert_different_registers(Rd, Rs, Rtmp1, Rtmp2);
3095   li(Rtmp1, 0xFF00000000);
3096   mv(Rd, zr);
3097   for (int i = 0; i <= 3; i++)
3098   {
3099     andr(Rtmp2, Rs, Rtmp1);
3100     orr(Rd, Rd, Rtmp2);
3101     srli(Rd, Rd, 8);
3102     if (i != 3) {
3103       slli(Rtmp1, Rtmp1, 8);
3104     }
3105   }
3106 }
3107 
3108 // The size of the blocks erased by the zero_blocks stub.  We must
3109 // handle anything smaller than this ourselves in zero_words().
3110 const int MacroAssembler::zero_words_block_size = 8;
3111 
3112 // zero_words() is used by C2 ClearArray patterns.  It is as small as
3113 // possible, handling small word counts locally and delegating
3114 // anything larger to the zero_blocks stub.  It is expanded many times
3115 // in compiled code, so it is important to keep it short.
3116 
3117 // ptr:   Address of a buffer to be zeroed.
3118 // cnt:   Count in HeapWords.
3119 //
3120 // ptr, cnt, and t0 are clobbered.
3121 address MacroAssembler::zero_words(Register ptr, Register cnt)
3122 {
3123   assert(is_power_of_2(zero_words_block_size), "adjust this");
3124   assert(ptr == x28 && cnt == x29, "mismatch in register usage");
3125   assert_different_registers(cnt, t0);
3126 
3127   BLOCK_COMMENT("zero_words {");
3128   mv(t0, zero_words_block_size);
3129   Label around, done, done16;
3130   bltu(cnt, t0, around);
3131   {
3132     RuntimeAddress zero_blocks = RuntimeAddress(StubRoutines::riscv64::zero_blocks());
3133     assert(zero_blocks.target() != NULL, "zero_blocks stub has not been generated");
3134     if (StubRoutines::riscv64::complete()) {
3135       address tpc = trampoline_call(zero_blocks);
3136       if (tpc == NULL) {
3137         DEBUG_ONLY(reset_labels(around));
3138         postcond(pc() == badAddress);
3139         return NULL;
3140       }
3141     } else {
3142       jal(zero_blocks);
3143     }
3144   }
3145   bind(around);
3146   for (int i = zero_words_block_size >> 1; i > 1; i >>= 1) {
3147     Label l;
3148     andi(t0, cnt, i);
3149     beqz(t0, l);
3150     for (int j = 0; j < i; j++) {
3151       sd(zr, Address(ptr, 0));
3152       addi(ptr, ptr, 8);
3153     }
3154     bind(l);
3155   }
3156   {
3157     Label l;
3158     andi(t0, cnt, 1);
3159     beqz(t0, l);
3160     sd(zr, Address(ptr, 0));
3161     bind(l);
3162   }
3163   BLOCK_COMMENT("} zero_words");
3164   postcond(pc() != badAddress);
3165   return pc();
3166 }
3167 
3168 // base:         Address of a buffer to be zeroed, 8 bytes aligned.
3169 // cnt:          Immediate count in HeapWords.
3170 #define SmallArraySize (18 * BytesPerLong)
3171 void MacroAssembler::zero_words(Register base, u_int64_t cnt)
3172 {
3173   assert_different_registers(base, t0, t1);
3174 
3175   BLOCK_COMMENT("zero_words {");
3176 
3177   if (cnt <= SmallArraySize / BytesPerLong) {
3178     for (int i = 0; i < (int)cnt; i++) {
3179       sd(zr, Address(base, i * wordSize));
3180     }
3181   } else {
3182     const int unroll = 8; // Number of sd(zr, adr), instructions we'll unroll
3183     int remainder = cnt %  unroll;
3184     for (int i = 0; i < remainder; i++) {
3185       sd(zr, Address(base, i * wordSize));
3186     }
3187 
3188     Label loop;
3189     Register cnt_reg = t0;
3190     Register loop_base = t1;
3191     cnt = cnt - remainder;
3192     li(cnt_reg, cnt);
3193     add(loop_base, base, remainder * wordSize);
3194     bind(loop);
3195     sub(cnt_reg, cnt_reg, unroll);
3196     for (int i = 0; i < unroll; i++) {
3197       sd(zr, Address(loop_base, i * wordSize));
3198     }
3199     add(loop_base, loop_base, unroll * wordSize);
3200     bnez(cnt_reg, loop);
3201   }
3202   BLOCK_COMMENT("} zero_words");
3203 }
3204 
3205 // base:   Address of a buffer to be filled, 8 bytes aligned.
3206 // cnt:    Count in 8-byte unit.
3207 // value:  Value to be filled with.
3208 // base will point to the end of the buffer after filling.
3209 void MacroAssembler::fill_words(Register base, Register cnt, Register value)
3210 {
3211 //  Algorithm:
3212 //
3213 //    t0 = cnt & 7
3214 //    cnt -= t0
3215 //    p += t0
3216 //    switch (t0):
3217 //      switch start:
3218 //      do while cnt
3219 //        cnt -= 8
3220 //          p[-8] = value
3221 //        case 7:
3222 //          p[-7] = value
3223 //        case 6:
3224 //          p[-6] = value
3225 //          // ...
3226 //        case 1:
3227 //          p[-1] = value
3228 //        case 0:
3229 //          p += 8
3230 //      do-while end
3231 //    switch end
3232 
3233   assert_different_registers(base, cnt, value, t0, t1);
3234 
3235   Label fini, skip, entry, loop;
3236   const int unroll = 8; // Number of sd instructions we'll unroll
3237 
3238   beqz(cnt, fini);
3239 
3240   andi(t0, cnt, unroll - 1);
3241   sub(cnt, cnt, t0);
3242   slli(t1, t0, 3);
3243   add(base, base, t1); // align 8, so first sd n % 8 = mod, next loop sd 8 * n.
3244   la(t1, entry);
3245   slli(t0, t0, 2); // sd_inst_nums * 4; t0 is cnt % 8, so t1 = t1 - sd_inst_nums * 4, 4 is sizeof(inst)
3246   sub(t1, t1, t0);
3247   jr(t1);
3248 
3249   bind(loop);
3250   add(base, base, unroll * 8);
3251   for (int i = -unroll; i < 0; i++) {
3252     sd(value, Address(base, i * 8));
3253   }
3254   bind(entry);
3255   sub(cnt, cnt, unroll);
3256   bgez(cnt, loop);
3257 
3258   bind(fini);
3259 }
3260 
3261 #define FCVT_SAFE(FLOATCVT, FLOATEQ)                                                             \
3262 void MacroAssembler:: FLOATCVT##_safe(Register dst, FloatRegister src, Register temp) {          \
3263   Label L_Okay;                                                                                  \
3264   fscsr(zr);                                                                                     \
3265   FLOATCVT(dst, src);                                                                            \
3266   frcsr(temp);                                                                                   \
3267   andi(temp, temp, 0x1E);                                                                        \
3268   beqz(temp, L_Okay);                                                                            \
3269   FLOATEQ(temp, src, src);                                                                       \
3270   bnez(temp, L_Okay);                                                                            \
3271   mv(dst, zr);                                                                                   \
3272   bind(L_Okay);                                                                                  \
3273 }
3274 
3275 FCVT_SAFE(fcvt_w_s, feq_s)
3276 FCVT_SAFE(fcvt_l_s, feq_s)
3277 FCVT_SAFE(fcvt_w_d, feq_d)
3278 FCVT_SAFE(fcvt_l_d, feq_d)
3279 
3280 #undef FCVT_SAFE
3281 
3282 #define FCMP(FLOATTYPE, FLOATSIG)                                                       \
3283 void MacroAssembler::FLOATTYPE##_compare(Register result, FloatRegister Rs1,            \
3284                                          FloatRegister Rs2, int unordered_result) {     \
3285   Label Ldone;                                                                          \
3286   if (unordered_result < 0) {                                                           \
3287     /* we want -1 for unordered or less than, 0 for equal and 1 for greater than. */    \
3288     /* installs 1 if gt else 0 */                                                       \
3289     flt_##FLOATSIG(result, Rs2, Rs1);                                                   \
3290     /* Rs1 > Rs2, install 1 */                                                          \
3291     bgtz(result, Ldone);                                                                \
3292     feq_##FLOATSIG(result, Rs1, Rs2);                                                   \
3293     addi(result, result, -1);                                                           \
3294     /* Rs1 = Rs2, install 0 */                                                          \
3295     /* NaN or Rs1 < Rs2, install -1 */                                                  \
3296     bind(Ldone);                                                                        \
3297   } else {                                                                              \
3298     /* we want -1 for less than, 0 for equal and 1 for unordered or greater than. */    \
3299     /* installs 1 if gt or unordered else 0 */                                          \
3300     flt_##FLOATSIG(result, Rs1, Rs2);                                                   \
3301     /* Rs1 < Rs2, install -1 */                                                         \
3302     bgtz(result, Ldone);                                                                \
3303     feq_##FLOATSIG(result, Rs1, Rs2);                                                   \
3304     addi(result, result, -1);                                                           \
3305     /* Rs1 = Rs2, install 0 */                                                          \
3306     /* NaN or Rs1 > Rs2, install 1 */                                                   \
3307     bind(Ldone);                                                                        \
3308     neg(result, result);                                                                \
3309   }                                                                                     \
3310 }
3311 
3312 FCMP(float, s);
3313 FCMP(double, d);
3314 
3315 #undef FCMP
3316 
3317 // Zero words; len is in bytes
3318 // Destroys all registers except addr
3319 // len must be a nonzero multiple of wordSize
3320 void MacroAssembler::zero_memory(Register addr, Register len, Register tmp1) {
3321   assert_different_registers(addr, len, tmp1, t0, t1);
3322 
3323 #ifdef ASSERT
3324   {
3325     Label L;
3326     andi(t0, len, BytesPerWord - 1);
3327     beqz(t0, L);
3328     stop("len is not a multiple of BytesPerWord");
3329     bind(L);
3330   }
3331 #endif // ASSERT
3332 
3333 #ifndef PRODUCT
3334   block_comment("zero memory");
3335 #endif // PRODUCT
3336 
3337   Label loop;
3338   Label entry;
3339 
3340   // Algorithm:
3341   //
3342   //  t0 = cnt & 7
3343   //  cnt -= t0
3344   //  p += t0
3345   //  switch (t0) {
3346   //    do {
3347   //      cnt -= 8
3348   //        p[-8] = 0
3349   //      case 7:
3350   //        p[-7] = 0
3351   //      case 6:
3352   //        p[-6] = 0
3353   //        ...
3354   //      case 1:
3355   //        p[-1] = 0
3356   //      case 0:
3357   //        p += 8
3358   //     } while (cnt)
3359   //  }
3360 
3361   const int unroll = 8;   // Number of sd(zr) instructions we'll unroll
3362 
3363   srli(len, len, LogBytesPerWord);
3364   andi(t0, len, unroll - 1);  // t0 = cnt % unroll
3365   sub(len, len, t0);          // cnt -= unroll
3366   // tmp1 always points to the end of the region we're about to zero
3367   slli(t1, t0, LogBytesPerWord);
3368   add(tmp1, addr, t1);
3369   la(t1, entry);
3370   slli(t0, t0, 2);
3371   sub(t1, t1, t0);
3372   jr(t1);
3373   bind(loop);
3374   sub(len, len, unroll);
3375   for (int i = -unroll; i < 0; i++) {
3376     Assembler::sd(zr, Address(tmp1, i * wordSize));
3377   }
3378   bind(entry);
3379   add(tmp1, tmp1, unroll * wordSize);
3380   bnez(len, loop);
3381 }
3382 
3383 void MacroAssembler::zero_ext(Register dst, Register src, int clear_bits) {
3384   slli(dst, src, clear_bits);
3385   srli(dst, dst, clear_bits);
3386 }
3387 
3388 void MacroAssembler::sign_ext(Register dst, Register src, int clear_bits) {
3389   slli(dst, src, clear_bits);
3390   srai(dst, dst, clear_bits);
3391 }
3392 
3393 void MacroAssembler::cmp_l2i(Register dst, Register src1, Register src2, Register tmp)
3394 {
3395   if (src1 == src2) {
3396     mv(dst, zr);
3397     return;
3398   }
3399   Label done;
3400   Register left = src1;
3401   Register right = src2;
3402   if (dst == src1) {
3403     assert_different_registers(dst, src2, tmp);
3404     mv(tmp, src1);
3405     left = tmp;
3406   } else if (dst == src2) {
3407     assert_different_registers(dst, src1, tmp);
3408     mv(tmp, src2);
3409     right = tmp;
3410   }
3411 
3412   // installs 1 if gt else 0
3413   slt(dst, right, left);
3414   bnez(dst, done);
3415   slt(dst, left, right);
3416   // dst = -1 if lt; else if eq , dst = 0
3417   neg(dst, dst);
3418   bind(done);
3419 }
3420 
3421 void MacroAssembler::safepoint_ifence() {
3422   ifence();
3423 #ifndef PRODUCT
3424   if (VerifyCrossModifyFence) {
3425     // Clear the thread state.
3426     sb(zr, Address(xthread, in_bytes(JavaThread::requires_cross_modify_fence_offset())));
3427   }
3428 #endif
3429 }
3430 
3431 #ifndef PRODUCT
3432 void MacroAssembler::verify_cross_modify_fence_not_required() {
3433   if (VerifyCrossModifyFence) {
3434     // Check if thread needs a cross modify fence.
3435     lbu(t0, Address(xthread, in_bytes(JavaThread::requires_cross_modify_fence_offset())));
3436     Label fence_not_required;
3437     beqz(t0, fence_not_required);
3438     // If it does then fail.
3439     la(t0, RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::verify_cross_modify_fence_failure)));
3440     mv(c_rarg0, xthread);
3441     jalr(t0);
3442     bind(fence_not_required);
3443   }
3444 }
3445 #endif