New src/hotspot/cpu/riscv/macroAssembler

   1 /*
   2  * Copyright (c) 1997, 2026, Oracle and/or its affiliates. All rights reserved.
   3  * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
   4  * Copyright (c) 2020, 2024, Huawei Technologies Co., Ltd. All rights reserved.
   5  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   6  *
   7  * This code is free software; you can redistribute it and/or modify it
   8  * under the terms of the GNU General Public License version 2 only, as
   9  * published by the Free Software Foundation.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  *
  25  */
  26 
  27 #ifndef CPU_RISCV_MACROASSEMBLER_RISCV_HPP
  28 #define CPU_RISCV_MACROASSEMBLER_RISCV_HPP
  29 
  30 #include "asm/assembler.inline.hpp"
  31 #include "code/vmreg.hpp"
  32 #include "metaprogramming/enableIf.hpp"
  33 #include "oops/compressedOops.hpp"
  34 #include "utilities/powerOfTwo.hpp"
  35 
  36 class ciInlineKlass;
  37 class SigEntry;
  38 class VMRegPair;
  39 
  40 // MacroAssembler extends Assembler by frequently used macros.
  41 //
  42 // Instructions for which a 'better' code sequence exists depending
  43 // on arguments should also go in here.
  44 
  45 class MacroAssembler: public Assembler {
  46 
  47  public:
  48 
  49   MacroAssembler(CodeBuffer* code) : Assembler(code) {}
  50 
  51   void safepoint_poll(Label& slow_path, bool at_return, bool in_nmethod, Register tmp_reg = t0);
  52 
  53   // Alignment
  54   int align(int modulus, int extra_offset = 0);
  55 
  56   static inline void assert_alignment(address pc, int alignment = MacroAssembler::instruction_size) {
  57     assert(is_aligned(pc, alignment), "bad alignment");
  58   }
  59 
  60   // nop
  61   void post_call_nop();
  62 
  63   // Stack frame creation/removal
  64   // Note that SP must be updated to the right place before saving/restoring RA and FP
  65   // because signal based thread suspend/resume could happen asynchronously.
  66   void enter() {
  67     subi(sp, sp, 2 * wordSize);
  68     sd(ra, Address(sp, wordSize));
  69     sd(fp, Address(sp));
  70     addi(fp, sp, 2 * wordSize);
  71   }
  72 
  73   void leave() {
  74     subi(sp, fp, 2 * wordSize);
  75     ld(fp, Address(sp));
  76     ld(ra, Address(sp, wordSize));
  77     addi(sp, sp, 2 * wordSize);
  78   }
  79 
  80 
  81   // Support for getting the JavaThread pointer (i.e.; a reference to thread-local information)
  82   // The pointer will be loaded into the thread register.
  83   void get_thread(Register thread);
  84 
  85   // Support for VM calls
  86   //
  87   // It is imperative that all calls into the VM are handled via the call_VM macros.
  88   // They make sure that the stack linkage is setup correctly. call_VM's correspond
  89   // to ENTRY/ENTRY_X entry points while call_VM_leaf's correspond to LEAF entry points.
  90 
  91   void call_VM(Register oop_result,
  92                address entry_point,
  93                bool check_exceptions = true);
  94   void call_VM(Register oop_result,
  95                address entry_point,
  96                Register arg_1,
  97                bool check_exceptions = true);
  98   void call_VM(Register oop_result,
  99                address entry_point,
 100                Register arg_1, Register arg_2,
 101                bool check_exceptions = true);
 102   void call_VM(Register oop_result,
 103                address entry_point,
 104                Register arg_1, Register arg_2, Register arg_3,
 105                bool check_exceptions = true);
 106 
 107   // Overloadings with last_Java_sp
 108   void call_VM(Register oop_result,
 109                Register last_java_sp,
 110                address entry_point,
 111                int number_of_arguments = 0,
 112                bool check_exceptions = true);
 113   void call_VM(Register oop_result,
 114                Register last_java_sp,
 115                address entry_point,
 116                Register arg_1,
 117                bool check_exceptions = true);
 118   void call_VM(Register oop_result,
 119                Register last_java_sp,
 120                address entry_point,
 121                Register arg_1, Register arg_2,
 122                bool check_exceptions = true);
 123   void call_VM(Register oop_result,
 124                Register last_java_sp,
 125                address entry_point,
 126                Register arg_1, Register arg_2, Register arg_3,
 127                bool check_exceptions = true);
 128 
 129   void get_vm_result_oop(Register oop_result, Register java_thread);
 130   void get_vm_result_metadata(Register metadata_result, Register java_thread);
 131 
 132   // These always tightly bind to MacroAssembler::call_VM_leaf_base
 133   // bypassing the virtual implementation
 134   void call_VM_leaf(address entry_point,
 135                     int number_of_arguments = 0);
 136   void call_VM_leaf(address entry_point,
 137                     Register arg_0);
 138   void call_VM_leaf(address entry_point,
 139                     Register arg_0, Register arg_1);
 140   void call_VM_leaf(address entry_point,
 141                     Register arg_0, Register arg_1, Register arg_2);
 142 
 143   // These always tightly bind to MacroAssembler::call_VM_base
 144   // bypassing the virtual implementation
 145   void super_call_VM_leaf(address entry_point, Register arg_0);
 146   void super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1);
 147   void super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2);
 148   void super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2, Register arg_3);
 149 
 150   // last Java Frame (fills frame anchor)
 151   void set_last_Java_frame(Register last_java_sp, Register last_java_fp, address last_java_pc, Register tmp);
 152   void set_last_Java_frame(Register last_java_sp, Register last_java_fp, Label &last_java_pc, Register tmp);
 153   void set_last_Java_frame(Register last_java_sp, Register last_java_fp, Register last_java_pc);
 154 
 155   // thread in the default location (xthread)
 156   void reset_last_Java_frame(bool clear_fp);
 157 
 158   virtual void call_VM_leaf_base(
 159     address entry_point,                // the entry point
 160     int     number_of_arguments,        // the number of arguments to pop after the call
 161     Label*  retaddr = nullptr
 162   );
 163 
 164   virtual void call_VM_leaf_base(
 165     address entry_point,                // the entry point
 166     int     number_of_arguments,        // the number of arguments to pop after the call
 167     Label&  retaddr) {
 168     call_VM_leaf_base(entry_point, number_of_arguments, &retaddr);
 169   }
 170 
 171   virtual void call_VM_base(           // returns the register containing the thread upon return
 172     Register oop_result,               // where an oop-result ends up if any; use noreg otherwise
 173     Register java_thread,              // the thread if computed before     ; use noreg otherwise
 174     Register last_java_sp,             // to set up last_Java_frame in stubs; use noreg otherwise
 175     Label*   return_pc,                // to set up last_Java_frame; use nullptr otherwise
 176     address  entry_point,              // the entry point
 177     int      number_of_arguments,      // the number of arguments (w/o thread) to pop after the call
 178     bool     check_exceptions          // whether to check for pending exceptions after return
 179   );
 180 
 181   void call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions);
 182 
 183   virtual void check_and_handle_earlyret(Register java_thread);
 184   virtual void check_and_handle_popframe(Register java_thread);
 185 
 186   void resolve_weak_handle(Register result, Register tmp1, Register tmp2);
 187   void resolve_oop_handle(Register result, Register tmp1, Register tmp2);
 188   void resolve_jobject(Register value, Register tmp1, Register tmp2);
 189   void resolve_global_jobject(Register value, Register tmp1, Register tmp2);
 190 
 191   void movoop(Register dst, jobject obj);
 192   void mov_metadata(Register dst, Metadata* obj);
 193   void bang_stack_size(Register size, Register tmp);
 194   void set_narrow_oop(Register dst, jobject obj);
 195   void set_narrow_klass(Register dst, Klass* k);
 196 
 197   void load_mirror(Register dst, Register method, Register tmp1, Register tmp2);
 198   void access_load_at(BasicType type, DecoratorSet decorators, Register dst,
 199                       Address src, Register tmp1, Register tmp2);
 200   void access_store_at(BasicType type, DecoratorSet decorators, Address dst,
 201                        Register val, Register tmp1, Register tmp2, Register tmp3);
 202   void load_klass(Register dst, Register src, Register tmp = t0);
 203   void load_narrow_klass_compact(Register dst, Register src);
 204   void store_klass(Register dst, Register src, Register tmp = t0);
 205   void cmp_klass_beq(Register obj, Register klass,
 206                      Register tmp1, Register tmp2,
 207                      Label &L, bool is_far = false);
 208   void cmp_klass_bne(Register obj, Register klass,
 209                      Register tmp1, Register tmp2,
 210                      Label &L, bool is_far = false);
 211 
 212   void encode_klass_not_null(Register r, Register tmp = t0);
 213   void decode_klass_not_null(Register r, Register tmp = t0);
 214   void encode_klass_not_null(Register dst, Register src, Register tmp);
 215   void decode_klass_not_null(Register dst, Register src, Register tmp);
 216   void decode_heap_oop_not_null(Register r);
 217   void decode_heap_oop_not_null(Register dst, Register src);
 218   void decode_heap_oop(Register d, Register s);
 219   void decode_heap_oop(Register r) { decode_heap_oop(r, r); }
 220   void encode_heap_oop_not_null(Register r);
 221   void encode_heap_oop_not_null(Register dst, Register src);
 222   void encode_heap_oop(Register d, Register s);
 223   void encode_heap_oop(Register r) { encode_heap_oop(r, r); };
 224   void load_heap_oop(Register dst, Address src, Register tmp1,
 225                      Register tmp2, DecoratorSet decorators = 0);
 226   void load_heap_oop_not_null(Register dst, Address src, Register tmp1,
 227                               Register tmp2, DecoratorSet decorators = 0);
 228   void store_heap_oop(Address dst, Register val, Register tmp1,
 229                       Register tmp2, Register tmp3, DecoratorSet decorators = 0);
 230 
 231   void store_klass_gap(Register dst, Register src);
 232 
 233   // currently unimplemented
 234   // Used for storing null. All other oop constants should be
 235   // stored using routines that take a jobject.
 236   void store_heap_oop_null(Address dst);
 237 
 238   // This dummy is to prevent a call to store_heap_oop from
 239   // converting a zero (linked null) into a Register by giving
 240   // the compiler two choices it can't resolve
 241 
 242   void store_heap_oop(Address dst, void* dummy);
 243 
 244   // Support for null-checks
 245   //
 246   // Generates code that causes a null OS exception if the content of reg is null.
 247   // If the accessed location is M[reg + offset] and the offset is known, provide the
 248   // offset. No explicit code generateion is needed if the offset is within a certain
 249   // range (0 <= offset <= page_size).
 250 
 251   virtual void null_check(Register reg, int offset = -1);
 252   static bool needs_explicit_null_check(intptr_t offset);
 253   static bool uses_implicit_null_check(void* address);
 254 
 255   // interface method calling
 256   void lookup_interface_method(Register recv_klass,
 257                                Register intf_klass,
 258                                RegisterOrConstant itable_index,
 259                                Register method_result,
 260                                Register scan_tmp,
 261                                Label& no_such_interface,
 262                                bool return_method = true);
 263 
 264   void lookup_interface_method_stub(Register recv_klass,
 265                                     Register holder_klass,
 266                                     Register resolved_klass,
 267                                     Register method_result,
 268                                     Register temp_reg,
 269                                     Register temp_reg2,
 270                                     int itable_index,
 271                                     Label& L_no_such_interface);
 272 
 273   // virtual method calling
 274   // n.n. x86 allows RegisterOrConstant for vtable_index
 275   void lookup_virtual_method(Register recv_klass,
 276                              RegisterOrConstant vtable_index,
 277                              Register method_result);
 278 
 279   // Form an address from base + offset in Rd. Rd my or may not
 280   // actually be used: you must use the Address that is returned. It
 281   // is up to you to ensure that the shift provided matches the size
 282   // of your data.
 283   Address form_address(Register Rd, Register base, int64_t byte_offset);
 284 
 285   // Sometimes we get misaligned loads and stores, usually from Unsafe
 286   // accesses, and these can exceed the offset range.
 287   Address legitimize_address(Register Rd, const Address &adr) {
 288     if (adr.getMode() == Address::base_plus_offset) {
 289       if (!is_simm12(adr.offset())) {
 290         return form_address(Rd, adr.base(), adr.offset());
 291       }
 292     }
 293     return adr;
 294   }
 295 
 296   // allocation
 297   void tlab_allocate(
 298     Register obj,                   // result: pointer to object after successful allocation
 299     Register var_size_in_bytes,     // object size in bytes if unknown at compile time; invalid otherwise
 300     int      con_size_in_bytes,     // object size in bytes if   known at compile time
 301     Register tmp1,                  // temp register
 302     Register tmp2,                  // temp register
 303     Label&   slow_case,             // continuation point of fast allocation fails
 304     bool     is_far = false
 305   );
 306 
 307   // Test sub_klass against super_klass, with fast and slow paths.
 308 
 309   // The fast path produces a tri-state answer: yes / no / maybe-slow.
 310   // One of the three labels can be null, meaning take the fall-through.
 311   // If super_check_offset is -1, the value is loaded up from super_klass.
 312   // No registers are killed, except tmp_reg
 313   void check_klass_subtype_fast_path(Register sub_klass,
 314                                      Register super_klass,
 315                                      Register tmp_reg,
 316                                      Label* L_success,
 317                                      Label* L_failure,
 318                                      Label* L_slow_path,
 319                                      Register super_check_offset = noreg);
 320 
 321   // The reset of the type check; must be wired to a corresponding fast path.
 322   // It does not repeat the fast path logic, so don't use it standalone.
 323   // The tmp1_reg and tmp2_reg can be noreg, if no temps are available.
 324   // Updates the sub's secondary super cache as necessary.
 325   void check_klass_subtype_slow_path(Register sub_klass,
 326                                      Register super_klass,
 327                                      Register tmp1_reg,
 328                                      Register tmp2_reg,
 329                                      Label* L_success,
 330                                      Label* L_failure,
 331                                      bool set_cond_codes = false);
 332 
 333   void check_klass_subtype_slow_path_linear(Register sub_klass,
 334                                             Register super_klass,
 335                                             Register tmp1_reg,
 336                                             Register tmp2_reg,
 337                                             Label* L_success,
 338                                             Label* L_failure,
 339                                             bool set_cond_codes = false);
 340 
 341   void check_klass_subtype_slow_path_table(Register sub_klass,
 342                                            Register super_klass,
 343                                            Register tmp1_reg,
 344                                            Register tmp2_reg,
 345                                            Label* L_success,
 346                                            Label* L_failure,
 347                                            bool set_cond_codes = false);
 348 
 349   // If r is valid, return r.
 350   // If r is invalid, remove a register r2 from available_regs, add r2
 351   // to regs_to_push, then return r2.
 352   Register allocate_if_noreg(const Register r,
 353                              RegSetIterator<Register> &available_regs,
 354                              RegSet &regs_to_push);
 355 
 356   // Secondary subtype checking
 357   void lookup_secondary_supers_table_var(Register sub_klass,
 358                                          Register r_super_klass,
 359                                          Register result,
 360                                          Register tmp1,
 361                                          Register tmp2,
 362                                          Register tmp3,
 363                                          Register tmp4,
 364                                          Label *L_success);
 365 
 366   void population_count(Register dst, Register src, Register tmp1, Register tmp2);
 367 
 368   // As above, but with a constant super_klass.
 369   // The result is in Register result, not the condition codes.
 370   bool lookup_secondary_supers_table_const(Register r_sub_klass,
 371                                            Register r_super_klass,
 372                                            Register result,
 373                                            Register tmp1,
 374                                            Register tmp2,
 375                                            Register tmp3,
 376                                            Register tmp4,
 377                                            u1 super_klass_slot,
 378                                            bool stub_is_near = false);
 379 
 380   void verify_secondary_supers_table(Register r_sub_klass,
 381                                      Register r_super_klass,
 382                                      Register result,
 383                                      Register tmp1,
 384                                      Register tmp2,
 385                                      Register tmp3);
 386 
 387   void lookup_secondary_supers_table_slow_path(Register r_super_klass,
 388                                                Register r_array_base,
 389                                                Register r_array_index,
 390                                                Register r_bitmap,
 391                                                Register result,
 392                                                Register tmp,
 393                                                bool is_stub = true);
 394 
 395   void check_klass_subtype(Register sub_klass,
 396                            Register super_klass,
 397                            Register tmp_reg,
 398                            Label& L_success);
 399 
 400   Address argument_address(RegisterOrConstant arg_slot, int extra_slot_offset = 0);
 401 
 402   void profile_receiver_type(Register recv, Register mdp, int mdp_offset);
 403 
 404   // only if +VerifyOops
 405   void _verify_oop(Register reg, const char* s, const char* file, int line);
 406   void _verify_oop_addr(Address addr, const char* s, const char* file, int line);
 407 
 408   void _verify_oop_checked(Register reg, const char* s, const char* file, int line) {
 409     if (VerifyOops) {
 410       _verify_oop(reg, s, file, line);
 411     }
 412   }
 413   void _verify_oop_addr_checked(Address reg, const char* s, const char* file, int line) {
 414     if (VerifyOops) {
 415       _verify_oop_addr(reg, s, file, line);
 416     }
 417   }
 418 
 419   void _verify_method_ptr(Register reg, const char* msg, const char* file, int line) {}
 420   void _verify_klass_ptr(Register reg, const char* msg, const char* file, int line) {}
 421 
 422 #define verify_oop(reg) _verify_oop_checked(reg, "broken oop " #reg, __FILE__, __LINE__)
 423 #define verify_oop_msg(reg, msg) _verify_oop_checked(reg, "broken oop " #reg ", " #msg, __FILE__, __LINE__)
 424 #define verify_oop_addr(addr) _verify_oop_addr_checked(addr, "broken oop addr " #addr, __FILE__, __LINE__)
 425 #define verify_method_ptr(reg) _verify_method_ptr(reg, "broken method " #reg, __FILE__, __LINE__)
 426 #define verify_klass_ptr(reg) _verify_method_ptr(reg, "broken klass " #reg, __FILE__, __LINE__)
 427 
 428   // A more convenient access to fence for our purposes
 429   // We used four bit to indicate the read and write bits in the predecessors and successors,
 430   // and extended i for r, o for w if UseConservativeFence enabled.
 431   enum Membar_mask_bits {
 432     StoreStore = 0b0101,               // (pred = w   + succ = w)
 433     LoadStore  = 0b1001,               // (pred = r   + succ = w)
 434     StoreLoad  = 0b0110,               // (pred = w   + succ = r)
 435     LoadLoad   = 0b1010,               // (pred = r   + succ = r)
 436     AnyAny     = LoadStore | StoreLoad // (pred = rw  + succ = rw)
 437   };
 438 
 439   void membar(uint32_t order_constraint);
 440 
 441  private:
 442 
 443   static void membar_mask_to_pred_succ(uint32_t order_constraint,
 444                                        uint32_t& predecessor, uint32_t& successor) {
 445     predecessor = (order_constraint >> 2) & 0x3;
 446     successor = order_constraint & 0x3;
 447 
 448     // extend rw -> iorw:
 449     // 01(w) -> 0101(ow)
 450     // 10(r) -> 1010(ir)
 451     // 11(rw)-> 1111(iorw)
 452     if (UseConservativeFence) {
 453       predecessor |= predecessor << 2;
 454       successor   |= successor << 2;
 455     }
 456   }
 457 
 458   static int pred_succ_to_membar_mask(uint32_t predecessor, uint32_t successor) {
 459     return ((predecessor & 0x3) << 2) | (successor & 0x3);
 460   }
 461 
 462  public:
 463 
 464   void cmodx_fence();
 465 
 466   void pause() {
 467     // Zihintpause
 468     // PAUSE is encoded as a FENCE instruction with pred=W, succ=0, fm=0, rd=x0, and rs1=x0.
 469     Assembler::fence(w, 0);
 470   }
 471 
 472   // prints msg, dumps registers and stops execution
 473   void stop(const char* msg);
 474 
 475   static void debug64(char* msg, int64_t pc, int64_t regs[]);
 476 
 477   void unimplemented(const char* what = "");
 478 
 479   void should_not_reach_here() { stop("should not reach here"); }
 480 
 481   static address target_addr_for_insn(address insn_addr);
 482 
 483   // Required platform-specific helpers for Label::patch_instructions.
 484   // They _shadow_ the declarations in AbstractAssembler, which are undefined.
 485   static int pd_patch_instruction_size(address branch, address target);
 486   static void pd_patch_instruction(address branch, address target, const char* file = nullptr, int line = 0) {
 487     pd_patch_instruction_size(branch, target);
 488   }
 489   static address pd_call_destination(address branch) {
 490     return target_addr_for_insn(branch);
 491   }
 492 
 493   static int patch_oop(address insn_addr, address o);
 494 
 495   static address get_target_of_li32(address insn_addr);
 496   static int patch_imm_in_li32(address branch, int32_t target);
 497 
 498   // Return whether code is emitted to a scratch blob.
 499   virtual bool in_scratch_emit_size() {
 500     return false;
 501   }
 502 
 503   address emit_reloc_call_address_stub(int insts_call_instruction_offset, address target);
 504   static int max_reloc_call_address_stub_size();
 505 
 506   void emit_static_call_stub();
 507   static int static_call_stub_size();
 508 
 509   // The following 4 methods return the offset of the appropriate move instruction
 510 
 511   // Support for fast byte/short loading with zero extension (depending on particular CPU)
 512   int load_unsigned_byte(Register dst, Address src);
 513   int load_unsigned_short(Register dst, Address src);
 514 
 515   // Support for fast byte/short loading with sign extension (depending on particular CPU)
 516   int load_signed_byte(Register dst, Address src);
 517   int load_signed_short(Register dst, Address src);
 518 
 519   // Load and store values by size and signed-ness
 520   void load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed);
 521   void store_sized_value(Address dst, Register src, size_t size_in_bytes);
 522 
 523   // Misaligned loads, will use the best way, according to the AvoidUnalignedAccess flag
 524   void load_short_misaligned(Register dst, Address src, Register tmp, bool is_signed, int granularity = 1);
 525   void load_int_misaligned(Register dst, Address src, Register tmp, bool is_signed, int granularity = 1);
 526   void load_long_misaligned(Register dst, Address src, Register tmp, int granularity = 1);
 527 
 528  public:
 529   // Standard pseudo instructions
 530   inline void nop() {
 531     addi(x0, x0, 0);
 532   }
 533 
 534   inline void mv(Register Rd, Register Rs) {
 535     if (Rd != Rs) {
 536       addi(Rd, Rs, 0);
 537     }
 538   }
 539 
 540   inline void notr(Register Rd, Register Rs) {
 541     if (do_compress_zcb(Rd, Rs) && (Rd == Rs)) {
 542       c_not(Rd);
 543     } else {
 544       xori(Rd, Rs, -1);
 545     }
 546   }
 547 
 548   inline void neg(Register Rd, Register Rs) {
 549     sub(Rd, x0, Rs);
 550   }
 551 
 552   inline void negw(Register Rd, Register Rs) {
 553     subw(Rd, x0, Rs);
 554   }
 555 
 556   inline void sext_w(Register Rd, Register Rs) {
 557     addiw(Rd, Rs, 0);
 558   }
 559 
 560   inline void zext_b(Register Rd, Register Rs) {
 561     if (do_compress_zcb(Rd, Rs) && (Rd == Rs)) {
 562       c_zext_b(Rd);
 563     } else {
 564       andi(Rd, Rs, 0xFF);
 565     }
 566   }
 567 
 568   inline void seqz(Register Rd, Register Rs) {
 569     sltiu(Rd, Rs, 1);
 570   }
 571 
 572   inline void snez(Register Rd, Register Rs) {
 573     sltu(Rd, x0, Rs);
 574   }
 575 
 576   inline void sltz(Register Rd, Register Rs) {
 577     slt(Rd, Rs, x0);
 578   }
 579 
 580   inline void sgtz(Register Rd, Register Rs) {
 581     slt(Rd, x0, Rs);
 582   }
 583 
 584   // Bit-manipulation extension pseudo instructions
 585   // zero extend word
 586   inline void zext_w(Register Rd, Register Rs) {
 587     assert(UseZba, "must be");
 588     if (do_compress_zcb(Rd, Rs) && (Rd == Rs)) {
 589       c_zext_w(Rd);
 590     } else {
 591       add_uw(Rd, Rs, zr);
 592     }
 593   }
 594 
 595   // Floating-point data-processing pseudo instructions
 596   inline void fmv_s(FloatRegister Rd, FloatRegister Rs) {
 597     if (Rd != Rs) {
 598       fsgnj_s(Rd, Rs, Rs);
 599     }
 600   }
 601 
 602   inline void fabs_s(FloatRegister Rd, FloatRegister Rs) {
 603     fsgnjx_s(Rd, Rs, Rs);
 604   }
 605 
 606   inline void fneg_s(FloatRegister Rd, FloatRegister Rs) {
 607     fsgnjn_s(Rd, Rs, Rs);
 608   }
 609 
 610   inline void fmv_d(FloatRegister Rd, FloatRegister Rs) {
 611     if (Rd != Rs) {
 612       fsgnj_d(Rd, Rs, Rs);
 613     }
 614   }
 615 
 616   inline void fabs_d(FloatRegister Rd, FloatRegister Rs) {
 617     fsgnjx_d(Rd, Rs, Rs);
 618   }
 619 
 620   inline void fneg_d(FloatRegister Rd, FloatRegister Rs) {
 621     fsgnjn_d(Rd, Rs, Rs);
 622   }
 623 
 624   // Control and status pseudo instructions
 625   void csrr(Register Rd, unsigned csr);         // read csr
 626   void csrw(unsigned csr, Register Rs);         // write csr
 627   void csrs(unsigned csr, Register Rs);         // set bits in csr
 628   void csrc(unsigned csr, Register Rs);         // clear bits in csr
 629   void csrwi(unsigned csr, unsigned imm);
 630   void csrsi(unsigned csr, unsigned imm);
 631   void csrci(unsigned csr, unsigned imm);
 632   void frcsr(Register Rd) { csrr(Rd, CSR_FCSR); }; // read float-point csr
 633   void fscsr(Register Rd, Register Rs);            // swap float-point csr
 634   void fscsr(Register Rs);                         // write float-point csr
 635   void frrm(Register Rd) { csrr(Rd, CSR_FRM); };   // read float-point rounding mode
 636   void fsrm(Register Rd, Register Rs);             // swap float-point rounding mode
 637   void fsrm(Register Rs);                          // write float-point rounding mode
 638   void fsrmi(Register Rd, unsigned imm);
 639   void fsrmi(unsigned imm);
 640   void frflags(Register Rd) { csrr(Rd, CSR_FFLAGS); }; // read float-point exception flags
 641   void fsflags(Register Rd, Register Rs);              // swap float-point exception flags
 642   void fsflags(Register Rs);                           // write float-point exception flags
 643   void fsflagsi(Register Rd, unsigned imm);
 644   void fsflagsi(unsigned imm);
 645   // Requires Zicntr
 646   void rdinstret(Register Rd) { csrr(Rd, CSR_INSTRET); }; // read instruction-retired counter
 647   void rdcycle(Register Rd)   { csrr(Rd, CSR_CYCLE); };   // read cycle counter
 648   void rdtime(Register Rd)    { csrr(Rd, CSR_TIME); };    // read time
 649 
 650   // Restore cpu control state after JNI call
 651   void restore_cpu_control_state_after_jni(Register tmp);
 652 
 653   // Control transfer pseudo instructions
 654   void beqz(Register Rs, const address dest);
 655   void bnez(Register Rs, const address dest);
 656   void blez(Register Rs, const address dest);
 657   void bgez(Register Rs, const address dest);
 658   void bltz(Register Rs, const address dest);
 659   void bgtz(Register Rs, const address dest);
 660 
 661   void cmov_eq(Register cmp1, Register cmp2, Register dst, Register src);
 662   void cmov_ne(Register cmp1, Register cmp2, Register dst, Register src);
 663   void cmov_le(Register cmp1, Register cmp2, Register dst, Register src);
 664   void cmov_leu(Register cmp1, Register cmp2, Register dst, Register src);
 665   void cmov_ge(Register cmp1, Register cmp2, Register dst, Register src);
 666   void cmov_geu(Register cmp1, Register cmp2, Register dst, Register src);
 667   void cmov_lt(Register cmp1, Register cmp2, Register dst, Register src);
 668   void cmov_ltu(Register cmp1, Register cmp2, Register dst, Register src);
 669   void cmov_gt(Register cmp1, Register cmp2, Register dst, Register src);
 670   void cmov_gtu(Register cmp1, Register cmp2, Register dst, Register src);
 671 
 672   void cmov_cmp_fp_eq(FloatRegister cmp1, FloatRegister cmp2, Register dst, Register src, bool is_single);
 673   void cmov_cmp_fp_ne(FloatRegister cmp1, FloatRegister cmp2, Register dst, Register src, bool is_single);
 674   void cmov_cmp_fp_le(FloatRegister cmp1, FloatRegister cmp2, Register dst, Register src, bool is_single);
 675   void cmov_cmp_fp_ge(FloatRegister cmp1, FloatRegister cmp2, Register dst, Register src, bool is_single);
 676   void cmov_cmp_fp_lt(FloatRegister cmp1, FloatRegister cmp2, Register dst, Register src, bool is_single);
 677   void cmov_cmp_fp_gt(FloatRegister cmp1, FloatRegister cmp2, Register dst, Register src, bool is_single);
 678 
 679   void cmov_fp_eq(Register cmp1, Register cmp2, FloatRegister dst, FloatRegister src, bool is_single);
 680   void cmov_fp_ne(Register cmp1, Register cmp2, FloatRegister dst, FloatRegister src, bool is_single);
 681   void cmov_fp_le(Register cmp1, Register cmp2, FloatRegister dst, FloatRegister src, bool is_single);
 682   void cmov_fp_leu(Register cmp1, Register cmp2, FloatRegister dst, FloatRegister src, bool is_single);
 683   void cmov_fp_ge(Register cmp1, Register cmp2, FloatRegister dst, FloatRegister src, bool is_single);
 684   void cmov_fp_geu(Register cmp1, Register cmp2, FloatRegister dst, FloatRegister src, bool is_single);
 685   void cmov_fp_lt(Register cmp1, Register cmp2, FloatRegister dst, FloatRegister src, bool is_single);
 686   void cmov_fp_ltu(Register cmp1, Register cmp2, FloatRegister dst, FloatRegister src, bool is_single);
 687   void cmov_fp_gt(Register cmp1, Register cmp2, FloatRegister dst, FloatRegister src, bool is_single);
 688   void cmov_fp_gtu(Register cmp1, Register cmp2, FloatRegister dst, FloatRegister src, bool is_single);
 689 
 690   void cmov_fp_cmp_fp_eq(FloatRegister cmp1, FloatRegister cmp2, FloatRegister dst, FloatRegister src, bool cmp_single, bool cmov_single);
 691   void cmov_fp_cmp_fp_ne(FloatRegister cmp1, FloatRegister cmp2, FloatRegister dst, FloatRegister src, bool cmp_single, bool cmov_single);
 692   void cmov_fp_cmp_fp_le(FloatRegister cmp1, FloatRegister cmp2, FloatRegister dst, FloatRegister src, bool cmp_single, bool cmov_single);
 693   void cmov_fp_cmp_fp_ge(FloatRegister cmp1, FloatRegister cmp2, FloatRegister dst, FloatRegister src, bool cmp_single, bool cmov_single);
 694   void cmov_fp_cmp_fp_lt(FloatRegister cmp1, FloatRegister cmp2, FloatRegister dst, FloatRegister src, bool cmp_single, bool cmov_single);
 695   void cmov_fp_cmp_fp_gt(FloatRegister cmp1, FloatRegister cmp2, FloatRegister dst, FloatRegister src, bool cmp_single, bool cmov_single);
 696 
 697  public:
 698   // We try to follow risc-v asm menomics.
 699   // But as we don't layout a reachable GOT,
 700   // we often need to resort to movptr, li <48imm>.
 701   // https://github.com/riscv-non-isa/riscv-asm-manual/blob/main/src/asm-manual.adoc
 702 
 703   // Hotspot only use the standard calling convention using x1/ra.
 704   // The alternative calling convection using x5/t0 is not used.
 705   // Using x5 as a temp causes the CPU to mispredict returns.
 706 
 707   // JALR, return address stack updates:
 708   // | rd is x1/x5 | rs1 is x1/x5 | rd=rs1 | RAS action
 709   // | ----------- | ------------ | ------ |-------------
 710   // |     No      |      No      |   -    | None
 711   // |     No      |      Yes     |   -    | Pop
 712   // |     Yes     |      No      |   -    | Push
 713   // |     Yes     |      Yes     |   No   | Pop, then push
 714   // |     Yes     |      Yes     |   Yes  | Push
 715   //
 716   // JAL, return address stack updates:
 717   // | rd is x1/x5 | RAS action
 718   // | ----------- | ----------
 719   // |     Yes     | Push
 720   // |     No      | None
 721   //
 722   // JUMPs   uses Rd = x0/zero and Rs = x6/t1 or imm
 723   // CALLS   uses Rd = x1/ra   and Rs = x6/t1 or imm (or x1/ra*)
 724   // RETURNS uses Rd = x0/zero and Rs = x1/ra
 725   // *use of x1/ra should not normally be used, special case only.
 726 
 727   // jump: jal x0, offset
 728   // For long reach uses temp register for:
 729   // la + jr
 730   void j(const address dest, Register temp = t1);
 731   void j(const Address &dest, Register temp = t1);
 732   void j(Label &l, Register temp = noreg);
 733 
 734   // jump register: jalr x0, offset(rs)
 735   void jr(Register Rd, int32_t offset = 0);
 736 
 737   // call: la + jalr x1
 738   void call(const address dest, Register temp = t1);
 739 
 740   // jalr: jalr x1, offset(rs)
 741   void jalr(Register Rs, int32_t offset = 0);
 742 
 743   // Emit a runtime call. Only invalidates the tmp register which
 744   // is used to keep the entry address for jalr/movptr.
 745   // Uses call() for intra code cache, else movptr + jalr.
 746   // Clobebrs t1
 747   void rt_call(address dest, Register tmp = t1);
 748 
 749   // ret: jalr x0, 0(x1)
 750   inline void ret() {
 751     Assembler::jalr(x0, x1, 0);
 752   }
 753 
 754   //label
 755   void beqz(Register Rs, Label &l, bool is_far = false);
 756   void bnez(Register Rs, Label &l, bool is_far = false);
 757   void blez(Register Rs, Label &l, bool is_far = false);
 758   void bgez(Register Rs, Label &l, bool is_far = false);
 759   void bltz(Register Rs, Label &l, bool is_far = false);
 760   void bgtz(Register Rs, Label &l, bool is_far = false);
 761 
 762   void beq (Register Rs1, Register Rs2, Label &L, bool is_far = false);
 763   void bne (Register Rs1, Register Rs2, Label &L, bool is_far = false);
 764   void blt (Register Rs1, Register Rs2, Label &L, bool is_far = false);
 765   void bge (Register Rs1, Register Rs2, Label &L, bool is_far = false);
 766   void bltu(Register Rs1, Register Rs2, Label &L, bool is_far = false);
 767   void bgeu(Register Rs1, Register Rs2, Label &L, bool is_far = false);
 768 
 769   void bgt (Register Rs, Register Rt, const address dest);
 770   void ble (Register Rs, Register Rt, const address dest);
 771   void bgtu(Register Rs, Register Rt, const address dest);
 772   void bleu(Register Rs, Register Rt, const address dest);
 773 
 774   void bgt (Register Rs, Register Rt, Label &l, bool is_far = false);
 775   void ble (Register Rs, Register Rt, Label &l, bool is_far = false);
 776   void bgtu(Register Rs, Register Rt, Label &l, bool is_far = false);
 777   void bleu(Register Rs, Register Rt, Label &l, bool is_far = false);
 778 
 779 #define INSN_ENTRY_RELOC(result_type, header)                               \
 780   result_type header {                                                      \
 781     guarantee(rtype == relocInfo::internal_word_type,                       \
 782               "only internal_word_type relocs make sense here");            \
 783     relocate(InternalAddress(dest).rspec());                                \
 784     IncompressibleScope scope(this);  /* relocations */
 785 
 786 #define INSN(NAME)                                                                                       \
 787   void NAME(Register Rs1, Register Rs2, const address dest) {                                            \
 788     assert_cond(dest != nullptr);                                                                        \
 789     int64_t offset = dest - pc();                                                                        \
 790     guarantee(is_simm13(offset) && is_even(offset),                                                      \
 791               "offset is invalid: is_simm_13: %s offset: " INT64_FORMAT,                                 \
 792               BOOL_TO_STR(is_simm13(offset)), offset);                                                   \
 793     Assembler::NAME(Rs1, Rs2, offset);                                                                   \
 794   }                                                                                                      \
 795   INSN_ENTRY_RELOC(void, NAME(Register Rs1, Register Rs2, address dest, relocInfo::relocType rtype))     \
 796     NAME(Rs1, Rs2, dest);                                                                                \
 797   }
 798 
 799   INSN(beq);
 800   INSN(bne);
 801   INSN(bge);
 802   INSN(bgeu);
 803   INSN(blt);
 804   INSN(bltu);
 805 
 806 #undef INSN
 807 
 808 #undef INSN_ENTRY_RELOC
 809 
 810   void float_beq(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
 811   void float_bne(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
 812   void float_ble(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
 813   void float_bge(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
 814   void float_blt(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
 815   void float_bgt(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
 816 
 817   void double_beq(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
 818   void double_bne(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
 819   void double_ble(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
 820   void double_bge(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
 821   void double_blt(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
 822   void double_bgt(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
 823 
 824 private:
 825   // The signed 20-bit upper imm can materialize at most negative 0xF...F80000000, two G.
 826   // The following signed 12-bit imm can at max subtract 0x800, two K, from that previously loaded two G.
 827   bool is_valid_32bit_offset(int64_t x) {
 828     constexpr int64_t twoG = (2 * G);
 829     constexpr int64_t twoK = (2 * K);
 830     return x < (twoG - twoK) && x >= (-twoG - twoK);
 831   }
 832 
 833   // Ensure that the auipc can reach the destination at x from anywhere within
 834   // the code cache so that if it is relocated we know it will still reach.
 835   bool is_32bit_offset_from_codecache(int64_t x) {
 836     int64_t low  = (int64_t)CodeCache::low_bound();
 837     int64_t high = (int64_t)CodeCache::high_bound();
 838     return is_valid_32bit_offset(x - low) && is_valid_32bit_offset(x - high);
 839   }
 840 
 841 public:
 842   // Stack push and pop individual 64 bit registers
 843   void push_reg(Register Rs);
 844   void pop_reg(Register Rd);
 845 
 846   int push_reg(RegSet regset, Register stack);
 847   int pop_reg(RegSet regset, Register stack);
 848 
 849   int push_fp(FloatRegSet regset, Register stack);
 850   int pop_fp(FloatRegSet regset, Register stack);
 851 
 852 #ifdef COMPILER2
 853   int push_v(VectorRegSet regset, Register stack);
 854   int pop_v(VectorRegSet regset, Register stack);
 855 #endif // COMPILER2
 856 
 857   // Push and pop everything that might be clobbered by a native
 858   // runtime call except t0 and t1. (They are always
 859   // temporary registers, so we don't have to protect them.)
 860   // Additional registers can be excluded in a passed RegSet.
 861   void push_call_clobbered_registers_except(RegSet exclude);
 862   void pop_call_clobbered_registers_except(RegSet exclude);
 863 
 864   void push_call_clobbered_registers() {
 865     push_call_clobbered_registers_except(RegSet());
 866   }
 867   void pop_call_clobbered_registers() {
 868     pop_call_clobbered_registers_except(RegSet());
 869   }
 870 
 871   void push_CPU_state(bool save_vectors = false, int vector_size_in_bytes = 0);
 872   void pop_CPU_state(bool restore_vectors = false, int vector_size_in_bytes = 0);
 873 
 874   void push_cont_fastpath(Register java_thread = xthread);
 875   void pop_cont_fastpath(Register java_thread = xthread);
 876 
 877   // if heap base register is used - reinit it with the correct value
 878   void reinit_heapbase();
 879 
 880   void bind(Label& L) {
 881     Assembler::bind(L);
 882     // fences across basic blocks should not be merged
 883     code()->clear_last_insn();
 884   }
 885 
 886   typedef void (MacroAssembler::* compare_and_branch_insn)(Register Rs1, Register Rs2, const address dest);
 887   typedef void (MacroAssembler::* compare_and_branch_label_insn)(Register Rs1, Register Rs2, Label &L, bool is_far);
 888   typedef void (MacroAssembler::* jal_jalr_insn)(Register Rt, address dest);
 889 
 890   void wrap_label(Register r, Label &L, jal_jalr_insn insn);
 891   void wrap_label(Register r1, Register r2, Label &L,
 892                   compare_and_branch_insn insn,
 893                   compare_and_branch_label_insn neg_insn, bool is_far = false);
 894 
 895   void la(Register Rd, Label &label);
 896   void la(Register Rd, const address addr);
 897   void la(Register Rd, const address addr, int32_t &offset);
 898   void la(Register Rd, const Address &adr);
 899 
 900   void li16u(Register Rd, uint16_t imm);
 901   void li32(Register Rd, int32_t imm);
 902   void li  (Register Rd, int64_t imm);  // optimized load immediate
 903 
 904   // mv
 905   void mv(Register Rd, address addr)                  { li(Rd, (int64_t)addr); }
 906   void mv(Register Rd, address addr, int32_t &offset) {
 907     // Split address into a lower 12-bit sign-extended offset and the remainder,
 908     // so that the offset could be encoded in jalr or load/store instruction.
 909     offset = ((int32_t)(int64_t)addr << 20) >> 20;
 910     li(Rd, (int64_t)addr - offset);
 911   }
 912 
 913   template<typename T, ENABLE_IF(std::is_integral<T>::value)>
 914   inline void mv(Register Rd, T o)                    { li(Rd, (int64_t)o); }
 915 
 916   void mv(Register Rd, RegisterOrConstant src) {
 917     if (src.is_register()) {
 918       mv(Rd, src.as_register());
 919     } else {
 920       mv(Rd, src.as_constant());
 921     }
 922   }
 923 
 924   // Generates a load of a 48-bit constant which can be
 925   // patched to any 48-bit constant, i.e. address.
 926   // If common case supply additional temp register
 927   // to shorten the instruction sequence.
 928   void movptr(Register Rd, const Address &addr, Register tmp = noreg);
 929   void movptr(Register Rd, address addr, Register tmp = noreg);
 930   void movptr(Register Rd, address addr, int32_t &offset, Register tmp = noreg);
 931 
 932  private:
 933   void movptr1(Register Rd, uintptr_t addr, int32_t &offset);
 934   void movptr2(Register Rd, uintptr_t addr, int32_t &offset, Register tmp);
 935  public:
 936   // float imm move
 937   static bool can_hf_imm_load(short imm);
 938   static bool can_fp_imm_load(float imm);
 939   static bool can_dp_imm_load(double imm);
 940   void fli_h(FloatRegister Rd, short imm);
 941   void fli_s(FloatRegister Rd, float imm);
 942   void fli_d(FloatRegister Rd, double imm);
 943 
 944   // arith
 945   void add (Register Rd, Register Rn, int64_t increment, Register tmp = t0);
 946   void sub (Register Rd, Register Rn, int64_t decrement, Register tmp = t0);
 947   void addw(Register Rd, Register Rn, int64_t increment, Register tmp = t0);
 948   void subw(Register Rd, Register Rn, int64_t decrement, Register tmp = t0);
 949 
 950   void subi(Register Rd, Register Rn, int64_t decrement) {
 951     assert(is_simm12(-decrement), "Must be");
 952     addi(Rd, Rn, -decrement);
 953   }
 954 
 955   void subiw(Register Rd, Register Rn, int64_t decrement) {
 956     assert(is_simm12(-decrement), "Must be");
 957     addiw(Rd, Rn, -decrement);
 958   }
 959 
 960 #define INSN(NAME)                                               \
 961   inline void NAME(Register Rd, Register Rs1, Register Rs2) {    \
 962     Assembler::NAME(Rd, Rs1, Rs2);                               \
 963   }
 964 
 965   INSN(add);
 966   INSN(addw);
 967   INSN(sub);
 968   INSN(subw);
 969 
 970 #undef INSN
 971 
 972   // logic
 973   void andrw(Register Rd, Register Rs1, Register Rs2);
 974   void orrw(Register Rd, Register Rs1, Register Rs2);
 975   void xorrw(Register Rd, Register Rs1, Register Rs2);
 976 
 977   // logic with negate
 978   void andn(Register Rd, Register Rs1, Register Rs2);
 979   void orn(Register Rd, Register Rs1, Register Rs2);
 980 
 981   // reverse bytes
 982   void revbw(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2= t1);  // reverse bytes in lower word, sign-extend
 983   void revb(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1);  // reverse bytes in doubleword
 984 
 985   void ror(Register dst, Register src, Register shift, Register tmp = t0);
 986   void ror(Register dst, Register src, uint32_t shift, Register tmp = t0);
 987   void rolw(Register dst, Register src, uint32_t shift, Register tmp = t0);
 988 
 989   void orptr(Address adr, RegisterOrConstant src, Register tmp1 = t0, Register tmp2 = t1);
 990 
 991 // Load and Store Instructions
 992 #define INSN_ENTRY_RELOC(result_type, header)                               \
 993   result_type header {                                                      \
 994     guarantee(rtype == relocInfo::internal_word_type,                       \
 995               "only internal_word_type relocs make sense here");            \
 996     relocate(InternalAddress(dest).rspec());                                \
 997     IncompressibleScope scope(this);  /* relocations */
 998 
 999 #define INSN(NAME)                                                                                 \
1000   void NAME(Register Rd, address dest) {                                                           \
1001     assert_cond(dest != nullptr);                                                                  \
1002     if (CodeCache::contains(dest)) {                                                               \
1003       int64_t distance = dest - pc();                                                              \
1004       assert(is_valid_32bit_offset(distance), "Must be");                                          \
1005       auipc(Rd, (int32_t)distance + 0x800);                                                        \
1006       Assembler::NAME(Rd, Rd, ((int32_t)distance << 20) >> 20);                                    \
1007     } else {                                                                                       \
1008       int32_t offset = 0;                                                                          \
1009       movptr(Rd, dest, offset);                                                                    \
1010       Assembler::NAME(Rd, Rd, offset);                                                             \
1011     }                                                                                              \
1012   }                                                                                                \
1013   INSN_ENTRY_RELOC(void, NAME(Register Rd, address dest, relocInfo::relocType rtype))              \
1014     NAME(Rd, dest);                                                                                \
1015   }                                                                                                \
1016   void NAME(Register Rd, const Address &adr, Register temp = t0) {                                 \
1017     switch (adr.getMode()) {                                                                       \
1018       case Address::literal: {                                                                     \
1019         relocate(adr.rspec(), [&] {                                                                \
1020           NAME(Rd, adr.target());                                                                  \
1021         });                                                                                        \
1022         break;                                                                                     \
1023       }                                                                                            \
1024       case Address::base_plus_offset: {                                                            \
1025         if (is_simm12(adr.offset())) {                                                             \
1026           Assembler::NAME(Rd, adr.base(), adr.offset());                                           \
1027         } else {                                                                                   \
1028           int32_t offset = ((int32_t)adr.offset() << 20) >> 20;                                    \
1029           if (Rd == adr.base()) {                                                                  \
1030             la(temp, Address(adr.base(), adr.offset() - offset));                                  \
1031             Assembler::NAME(Rd, temp, offset);                                                     \
1032           } else {                                                                                 \
1033             la(Rd, Address(adr.base(), adr.offset() - offset));                                    \
1034             Assembler::NAME(Rd, Rd, offset);                                                       \
1035           }                                                                                        \
1036         }                                                                                          \
1037         break;                                                                                     \
1038       }                                                                                            \
1039       default:                                                                                     \
1040         ShouldNotReachHere();                                                                      \
1041     }                                                                                              \
1042   }                                                                                                \
1043   void NAME(Register Rd, Label &L) {                                                               \
1044     wrap_label(Rd, L, &MacroAssembler::NAME);                                                      \
1045   }
1046 
1047   INSN(lb);
1048   INSN(lbu);
1049   INSN(lh);
1050   INSN(lhu);
1051   INSN(lw);
1052   INSN(lwu);
1053   INSN(ld);
1054 
1055 #undef INSN
1056 
1057 #define INSN(NAME)                                                                                 \
1058   void NAME(FloatRegister Rd, address dest, Register temp = t0) {                                  \
1059     assert_cond(dest != nullptr);                                                                  \
1060     if (CodeCache::contains(dest)) {                                                               \
1061       int64_t distance = dest - pc();                                                              \
1062       assert(is_valid_32bit_offset(distance), "Must be");                                          \
1063       auipc(temp, (int32_t)distance + 0x800);                                                      \
1064       Assembler::NAME(Rd, temp, ((int32_t)distance << 20) >> 20);                                  \
1065     } else {                                                                                       \
1066       int32_t offset = 0;                                                                          \
1067       movptr(temp, dest, offset);                                                                  \
1068       Assembler::NAME(Rd, temp, offset);                                                           \
1069     }                                                                                              \
1070   }                                                                                                \
1071   INSN_ENTRY_RELOC(void, NAME(FloatRegister Rd, address dest,                                      \
1072                               relocInfo::relocType rtype, Register temp = t0))                     \
1073     NAME(Rd, dest, temp);                                                                          \
1074   }                                                                                                \
1075   void NAME(FloatRegister Rd, const Address &adr, Register temp = t0) {                            \
1076     switch (adr.getMode()) {                                                                       \
1077       case Address::literal: {                                                                     \
1078         relocate(adr.rspec(), [&] {                                                                \
1079           NAME(Rd, adr.target(), temp);                                                            \
1080         });                                                                                        \
1081         break;                                                                                     \
1082       }                                                                                            \
1083       case Address::base_plus_offset: {                                                            \
1084         if (is_simm12(adr.offset())) {                                                             \
1085           Assembler::NAME(Rd, adr.base(), adr.offset());                                           \
1086         } else {                                                                                   \
1087           int32_t offset = ((int32_t)adr.offset() << 20) >> 20;                                    \
1088           la(temp, Address(adr.base(), adr.offset() - offset));                                    \
1089           Assembler::NAME(Rd, temp, offset);                                                       \
1090         }                                                                                          \
1091         break;                                                                                     \
1092       }                                                                                            \
1093       default:                                                                                     \
1094         ShouldNotReachHere();                                                                      \
1095     }                                                                                              \
1096   }
1097 
1098   INSN(flh);
1099   INSN(flw);
1100   INSN(fld);
1101 
1102 #undef INSN
1103 
1104 #define INSN(NAME, REGISTER)                                                                       \
1105   INSN_ENTRY_RELOC(void, NAME(REGISTER Rs, address dest,                                           \
1106                               relocInfo::relocType rtype, Register temp = t0))                     \
1107     NAME(Rs, dest, temp);                                                                          \
1108   }
1109 
1110   INSN(sb,  Register);
1111   INSN(sh,  Register);
1112   INSN(sw,  Register);
1113   INSN(sd,  Register);
1114   INSN(fsw, FloatRegister);
1115   INSN(fsd, FloatRegister);
1116 
1117 #undef INSN
1118 
1119 #define INSN(NAME)                                                                                 \
1120   void NAME(Register Rs, address dest, Register temp = t0) {                                       \
1121     assert_cond(dest != nullptr);                                                                  \
1122     assert_different_registers(Rs, temp);                                                          \
1123     if (CodeCache::contains(dest)) {                                                               \
1124       int64_t distance = dest - pc();                                                              \
1125       assert(is_valid_32bit_offset(distance), "Must be");                                          \
1126       auipc(temp, (int32_t)distance + 0x800);                                                      \
1127       Assembler::NAME(Rs, temp, ((int32_t)distance << 20) >> 20);                                  \
1128     } else {                                                                                       \
1129       int32_t offset = 0;                                                                          \
1130       movptr(temp, dest, offset);                                                                  \
1131       Assembler::NAME(Rs, temp, offset);                                                           \
1132     }                                                                                              \
1133   }                                                                                                \
1134   void NAME(Register Rs, const Address &adr, Register temp = t0) {                                 \
1135     switch (adr.getMode()) {                                                                       \
1136       case Address::literal: {                                                                     \
1137         assert_different_registers(Rs, temp);                                                      \
1138         relocate(adr.rspec(), [&] {                                                                \
1139           NAME(Rs, adr.target(), temp);                                                            \
1140         });                                                                                        \
1141         break;                                                                                     \
1142       }                                                                                            \
1143       case Address::base_plus_offset: {                                                            \
1144         if (is_simm12(adr.offset())) {                                                             \
1145           Assembler::NAME(Rs, adr.base(), adr.offset());                                           \
1146         } else {                                                                                   \
1147           assert_different_registers(Rs, temp);                                                    \
1148           int32_t offset = ((int32_t)adr.offset() << 20) >> 20;                                    \
1149           la(temp, Address(adr.base(), adr.offset() - offset));                                    \
1150           Assembler::NAME(Rs, temp, offset);                                                       \
1151         }                                                                                          \
1152         break;                                                                                     \
1153       }                                                                                            \
1154       default:                                                                                     \
1155         ShouldNotReachHere();                                                                      \
1156     }                                                                                              \
1157   }
1158 
1159   INSN(sb);
1160   INSN(sh);
1161   INSN(sw);
1162   INSN(sd);
1163 
1164 #undef INSN
1165 
1166 #define INSN(NAME)                                                                                 \
1167   void NAME(FloatRegister Rs, address dest, Register temp = t0) {                                  \
1168     assert_cond(dest != nullptr);                                                                  \
1169     if (CodeCache::contains(dest)) {                                                               \
1170       int64_t distance = dest - pc();                                                              \
1171       assert(is_valid_32bit_offset(distance), "Must be");                                          \
1172       auipc(temp, (int32_t)distance + 0x800);                                                      \
1173       Assembler::NAME(Rs, temp, ((int32_t)distance << 20) >> 20);                                  \
1174     } else {                                                                                       \
1175       int32_t offset = 0;                                                                          \
1176       movptr(temp, dest, offset);                                                                  \
1177       Assembler::NAME(Rs, temp, offset);                                                           \
1178     }                                                                                              \
1179   }                                                                                                \
1180   void NAME(FloatRegister Rs, const Address &adr, Register temp = t0) {                            \
1181     switch (adr.getMode()) {                                                                       \
1182       case Address::literal: {                                                                     \
1183         relocate(adr.rspec(), [&] {                                                                \
1184           NAME(Rs, adr.target(), temp);                                                            \
1185         });                                                                                        \
1186         break;                                                                                     \
1187       }                                                                                            \
1188       case Address::base_plus_offset: {                                                            \
1189         if (is_simm12(adr.offset())) {                                                             \
1190           Assembler::NAME(Rs, adr.base(), adr.offset());                                           \
1191         } else {                                                                                   \
1192           int32_t offset = ((int32_t)adr.offset() << 20) >> 20;                                    \
1193           la(temp, Address(adr.base(), adr.offset() - offset));                                    \
1194           Assembler::NAME(Rs, temp, offset);                                                       \
1195         }                                                                                          \
1196         break;                                                                                     \
1197       }                                                                                            \
1198       default:                                                                                     \
1199         ShouldNotReachHere();                                                                      \
1200     }                                                                                              \
1201   }
1202 
1203   INSN(fsw);
1204   INSN(fsd);
1205 
1206 #undef INSN
1207 
1208 #undef INSN_ENTRY_RELOC
1209 
1210   void cmpxchg_obj_header(Register oldv, Register newv, Register obj, Register tmp, Label &succeed, Label *fail);
1211   void cmpxchgptr(Register oldv, Register newv, Register addr, Register tmp, Label &succeed, Label *fail);
1212   void cmpxchg(Register addr, Register expected,
1213                Register new_val,
1214                Assembler::operand_size size,
1215                Assembler::Aqrl acquire, Assembler::Aqrl release,
1216                Register result, bool result_as_bool = false);
1217   void weak_cmpxchg(Register addr, Register expected,
1218                     Register new_val,
1219                     Assembler::operand_size size,
1220                     Assembler::Aqrl acquire, Assembler::Aqrl release,
1221                     Register result);
1222   void cmpxchg_narrow_value_helper(Register addr, Register expected, Register new_val,
1223                                    Assembler::operand_size size,
1224                                    Register shift, Register mask, Register aligned_addr);
1225   void cmpxchg_narrow_value(Register addr, Register expected,
1226                             Register new_val,
1227                             Assembler::operand_size size,
1228                             Assembler::Aqrl acquire, Assembler::Aqrl release,
1229                             Register result, bool result_as_bool,
1230                             Register tmp1, Register tmp2, Register tmp3);
1231   void weak_cmpxchg_narrow_value(Register addr, Register expected,
1232                                  Register new_val,
1233                                  Assembler::operand_size size,
1234                                  Assembler::Aqrl acquire, Assembler::Aqrl release,
1235                                  Register result,
1236                                  Register tmp1, Register tmp2, Register tmp3);
1237 
1238   void atomic_add(Register prev, RegisterOrConstant incr, Register addr);
1239   void atomic_addw(Register prev, RegisterOrConstant incr, Register addr);
1240   void atomic_addal(Register prev, RegisterOrConstant incr, Register addr);
1241   void atomic_addalw(Register prev, RegisterOrConstant incr, Register addr);
1242 
1243   void atomic_xchg(Register prev, Register newv, Register addr);
1244   void atomic_xchgw(Register prev, Register newv, Register addr);
1245   void atomic_xchgal(Register prev, Register newv, Register addr);
1246   void atomic_xchgalw(Register prev, Register newv, Register addr);
1247   void atomic_xchgwu(Register prev, Register newv, Register addr);
1248   void atomic_xchgalwu(Register prev, Register newv, Register addr);
1249 
1250   void atomic_cas(Register prev, Register newv, Register addr, Assembler::operand_size size,
1251               Assembler::Aqrl acquire = Assembler::relaxed, Assembler::Aqrl release = Assembler::relaxed);
1252 
1253   // Emit a far call/jump. Only invalidates the tmp register which
1254   // is used to keep the entry address for jalr.
1255   // The address must be inside the code cache.
1256   // Supported entry.rspec():
1257   // - relocInfo::external_word_type
1258   // - relocInfo::runtime_call_type
1259   // - relocInfo::none
1260   // Clobbers t1 default.
1261   void far_call(const Address &entry, Register tmp = t1);
1262   void far_jump(const Address &entry, Register tmp = t1);
1263 
1264   static int far_branch_size() {
1265       return 2 * MacroAssembler::instruction_size;  // auipc + jalr, see far_call() & far_jump()
1266   }
1267 
1268   void load_byte_map_base(Register reg);
1269 
1270   void bang_stack_with_offset(int offset) {
1271     // stack grows down, caller passes positive offset
1272     assert(offset > 0, "must bang with negative offset");
1273     sub(t0, sp, offset);
1274     sd(zr, Address(t0));
1275   }
1276 
1277   virtual void _call_Unimplemented(address call_site) {
1278     mv(t1, call_site);
1279   }
1280 
1281   #define call_Unimplemented() _call_Unimplemented((address)__PRETTY_FUNCTION__)
1282 
1283   // Frame creation and destruction shared between JITs.
1284   void build_frame(int framesize);
1285   void remove_frame(int framesize);
1286 
1287   void reserved_stack_check();
1288 
1289   void get_polling_page(Register dest, relocInfo::relocType rtype);
1290   void read_polling_page(Register r, int32_t offset, relocInfo::relocType rtype);
1291 
1292   // RISCV64 OpenJDK uses three different types of calls:
1293   //
1294   //   - far call: auipc reg, pc_relative_offset; jalr ra, reg, offset
1295   //     The offset has the range [-(2G + 2K), 2G - 2K). Addresses out of the
1296   //     range in the code cache requires indirect call.
1297   //     If a jump is needed rather than a call, a far jump 'jalr x0, reg, offset'
1298   //     can be used instead.
1299   //     All instructions are embedded at a call site.
1300   //
1301   //   - indirect call: movptr + jalr
1302   //     This can reach anywhere in the address space, but it cannot be patched
1303   //     while code is running, so it must only be modified at a safepoint.
1304   //     This form of call is most suitable for targets at fixed addresses,
1305   //     which will never be patched.
1306   //
1307   //   - reloc call:
1308   //     This too can reach anywhere in the address space but is only available
1309   //     in C1/C2-generated code (nmethod).
1310   //
1311   //     [Main code section]
1312   //       auipc
1313   //       ld <address_from_stub_section>
1314   //       jalr
1315   //
1316   //     [Stub section]
1317   //     address stub:
1318   //       <64-bit destination address>
1319   //
1320   //    To change the destination we simply atomically store the new
1321   //    address in the stub section.
1322   //    There is a benign race in that the other thread might observe the old
1323   //    64-bit destination address before it observes the new address. That does
1324   //    not matter because the destination method has been invalidated, so there
1325   //    will be a trap at its start.
1326 
1327   // Emit a reloc call and create a stub to hold the entry point address.
1328   // Supported entry.rspec():
1329   // - relocInfo::runtime_call_type
1330   // - relocInfo::opt_virtual_call_type
1331   // - relocInfo::static_call_type
1332   // - relocInfo::virtual_call_type
1333   //
1334   // Return: the call PC or nullptr if CodeCache is full.
1335   address reloc_call(Address entry, Register tmp = t1);
1336 
1337   address ic_call(address entry, jint method_index = 0);
1338   static int ic_check_size();
1339   int ic_check(int end_alignment = MacroAssembler::instruction_size);
1340 
1341   // Support for memory inc/dec
1342   // n.b. increment/decrement calls with an Address destination will
1343   // need to use a scratch register to load the value to be
1344   // incremented. increment/decrement calls which add or subtract a
1345   // constant value other than sign-extended 12-bit immediate will need
1346   // to use a 2nd scratch register to hold the constant. so, an address
1347   // increment/decrement may trash both t0 and t1.
1348 
1349   void increment(const Address dst, int64_t value = 1, Register tmp1 = t0, Register tmp2 = t1);
1350   void incrementw(const Address dst, int32_t value = 1, Register tmp1 = t0, Register tmp2 = t1);
1351 
1352   void decrement(const Address dst, int64_t value = 1, Register tmp1 = t0, Register tmp2 = t1);
1353   void decrementw(const Address dst, int32_t value = 1, Register tmp1 = t0, Register tmp2 = t1);
1354 
1355   void clinit_barrier(Register klass, Register tmp, Label* L_fast_path = nullptr, Label* L_slow_path = nullptr);
1356 
1357   void load_method_holder_cld(Register result, Register method);
1358   void load_method_holder(Register holder, Register method);
1359 
1360   void compute_index(Register str1, Register trailing_zeros, Register match_mask,
1361                      Register result, Register char_tmp, Register tmp,
1362                      bool haystack_isL);
1363   void compute_match_mask(Register src, Register pattern, Register match_mask,
1364                           Register mask1, Register mask2);
1365 
1366   // CRC32 code for java.util.zip.CRC32::updateBytes() intrinsic.
1367   void kernel_crc32(Register crc, Register buf, Register len,
1368         Register table0, Register table1, Register table2, Register table3,
1369         Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5, Register tmp6);
1370   void update_word_crc32(Register crc, Register v, Register tmp1, Register tmp2, Register tmp3,
1371         Register table0, Register table1, Register table2, Register table3,
1372         bool upper);
1373   void update_byte_crc32(Register crc, Register val, Register table);
1374 
1375 #ifdef COMPILER2
1376   void vector_update_crc32(Register crc, Register buf, Register len,
1377                            Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5,
1378                            Register table0, Register table3);
1379   void kernel_crc32_vclmul_fold(Register crc, Register buf, Register len,
1380               Register table0, Register table1, Register table2, Register table3,
1381               Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5);
1382   void crc32_vclmul_fold_to_16_bytes_vectorsize_32(VectorRegister vx, VectorRegister vy, VectorRegister vt,
1383                             VectorRegister vtmp1, VectorRegister vtmp2, VectorRegister vtmp3, VectorRegister vtmp4);
1384   void kernel_crc32_vclmul_fold_vectorsize_32(Register crc, Register buf, Register len,
1385                                               Register vclmul_table, Register tmp1, Register tmp2);
1386   void crc32_vclmul_fold_16_bytes_vectorsize_16(VectorRegister vx, VectorRegister vt,
1387                       VectorRegister vtmp1, VectorRegister vtmp2, VectorRegister vtmp3, VectorRegister vtmp4,
1388                       Register buf, Register tmp, const int STEP);
1389   void crc32_vclmul_fold_16_bytes_vectorsize_16_2(VectorRegister vx, VectorRegister vy, VectorRegister vt,
1390                       VectorRegister vtmp1, VectorRegister vtmp2, VectorRegister vtmp3, VectorRegister vtmp4,
1391                       Register tmp);
1392   void crc32_vclmul_fold_16_bytes_vectorsize_16_3(VectorRegister vx, VectorRegister vy, VectorRegister vt,
1393                       VectorRegister vtmp1, VectorRegister vtmp2, VectorRegister vtmp3, VectorRegister vtmp4,
1394                       Register tmp);
1395   void kernel_crc32_vclmul_fold_vectorsize_16(Register crc, Register buf, Register len,
1396                                               Register vclmul_table, Register tmp1, Register tmp2);
1397 
1398   void mul_add(Register out, Register in, Register offset,
1399                Register len, Register k, Register tmp);
1400   void wide_mul(Register prod_lo, Register prod_hi, Register n, Register m);
1401   void wide_madd(Register sum_lo, Register sum_hi, Register n,
1402                  Register m, Register tmp1, Register tmp2);
1403   void cad(Register dst, Register src1, Register src2, Register carry);
1404   void cadc(Register dst, Register src1, Register src2, Register carry);
1405   void adc(Register dst, Register src1, Register src2, Register carry);
1406   void add2_with_carry(Register final_dest_hi, Register dest_hi, Register dest_lo,
1407                        Register src1, Register src2, Register carry);
1408   void multiply_64_x_64_loop(Register x, Register xstart, Register x_xstart,
1409                              Register y, Register y_idx, Register z,
1410                              Register carry, Register product,
1411                              Register idx, Register kdx);
1412   void multiply_128_x_128_loop(Register y, Register z,
1413                                Register carry, Register carry2,
1414                                Register idx, Register jdx,
1415                                Register yz_idx1, Register yz_idx2,
1416                                Register tmp, Register tmp3, Register tmp4,
1417                                Register tmp6, Register product_hi);
1418   void multiply_to_len(Register x, Register xlen, Register y, Register ylen,
1419                        Register z, Register tmp0,
1420                        Register tmp1, Register tmp2, Register tmp3, Register tmp4,
1421                        Register tmp5, Register tmp6, Register product_hi);
1422 
1423 #endif // COMPILER2
1424 
1425   void inflate_lo32(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1);
1426   void inflate_hi32(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1);
1427 
1428   void ctzc_bits(Register Rd, Register Rs, bool isLL = false,
1429                  Register tmp1 = t0, Register tmp2 = t1);
1430 
1431   void zero_words(Register base, uint64_t cnt);
1432   address zero_words(Register ptr, Register cnt);
1433   void fill_words(Register base, Register cnt, Register value);
1434   void zero_memory(Register addr, Register len, Register tmp);
1435   void zero_dcache_blocks(Register base, Register cnt, Register tmp1, Register tmp2);
1436 
1437   // shift left by shamt and add
1438   void shadd(Register Rd, Register Rs1, Register Rs2, Register tmp, int shamt);
1439 
1440   // test single bit in Rs, result is set to Rd
1441   void test_bit(Register Rd, Register Rs, uint32_t bit_pos);
1442 
1443   // Here the float instructions with safe deal with some exceptions.
1444   // e.g. convert from NaN, +Inf, -Inf to int, float, double
1445   // will trigger exception, we need to deal with these situations
1446   // to get correct results.
1447   void fcvt_w_s_safe(Register dst, FloatRegister src, Register tmp = t0);
1448   void fcvt_l_s_safe(Register dst, FloatRegister src, Register tmp = t0);
1449   void fcvt_w_d_safe(Register dst, FloatRegister src, Register tmp = t0);
1450   void fcvt_l_d_safe(Register dst, FloatRegister src, Register tmp = t0);
1451 
1452   void java_round_float(Register dst, FloatRegister src, FloatRegister ftmp);
1453   void java_round_double(Register dst, FloatRegister src, FloatRegister ftmp);
1454 
1455   // Helper routine processing the slow path of NaN when converting float to float16
1456   void float_to_float16_NaN(Register dst, FloatRegister src, Register tmp1, Register tmp2);
1457 
1458   // vector load/store unit-stride instructions
1459   void vlex_v(VectorRegister vd, Register base, Assembler::SEW sew, VectorMask vm = unmasked) {
1460     switch (sew) {
1461       case Assembler::e64:
1462         vle64_v(vd, base, vm);
1463         break;
1464       case Assembler::e32:
1465         vle32_v(vd, base, vm);
1466         break;
1467       case Assembler::e16:
1468         vle16_v(vd, base, vm);
1469         break;
1470       case Assembler::e8: // fall through
1471       default:
1472         vle8_v(vd, base, vm);
1473         break;
1474     }
1475   }
1476 
1477   void vsex_v(VectorRegister store_data, Register base, Assembler::SEW sew, VectorMask vm = unmasked) {
1478     switch (sew) {
1479       case Assembler::e64:
1480         vse64_v(store_data, base, vm);
1481         break;
1482       case Assembler::e32:
1483         vse32_v(store_data, base, vm);
1484         break;
1485       case Assembler::e16:
1486         vse16_v(store_data, base, vm);
1487         break;
1488       case Assembler::e8: // fall through
1489       default:
1490         vse8_v(store_data, base, vm);
1491         break;
1492     }
1493   }
1494 
1495   // vector pseudo instructions
1496   // rotate vector register left with shift bits, 32-bit version
1497   inline void vrole32_vi(VectorRegister vd, uint32_t shift, VectorRegister tmp_vr) {
1498     vsrl_vi(tmp_vr, vd, 32 - shift);
1499     vsll_vi(vd, vd, shift);
1500     vor_vv(vd, vd, tmp_vr);
1501   }
1502 
1503   inline void vl1r_v(VectorRegister vd, Register rs) {
1504     vl1re8_v(vd, rs);
1505   }
1506 
1507   inline void vmnot_m(VectorRegister vd, VectorRegister vs) {
1508     vmnand_mm(vd, vs, vs);
1509   }
1510 
1511   inline void vncvt_x_x_w(VectorRegister vd, VectorRegister vs, VectorMask vm = unmasked) {
1512     vnsrl_wx(vd, vs, x0, vm);
1513   }
1514 
1515   inline void vneg_v(VectorRegister vd, VectorRegister vs, VectorMask vm = unmasked) {
1516     vrsub_vx(vd, vs, x0, vm);
1517   }
1518 
1519   inline void vfneg_v(VectorRegister vd, VectorRegister vs, VectorMask vm = unmasked) {
1520     vfsgnjn_vv(vd, vs, vs, vm);
1521   }
1522 
1523   inline void vfabs_v(VectorRegister vd, VectorRegister vs, VectorMask vm = unmasked) {
1524     vfsgnjx_vv(vd, vs, vs, vm);
1525   }
1526 
1527   inline void vmsgt_vv(VectorRegister vd, VectorRegister vs2, VectorRegister vs1, VectorMask vm = unmasked) {
1528     vmslt_vv(vd, vs1, vs2, vm);
1529   }
1530 
1531   inline void vmsgtu_vv(VectorRegister vd, VectorRegister vs2, VectorRegister vs1, VectorMask vm = unmasked) {
1532     vmsltu_vv(vd, vs1, vs2, vm);
1533   }
1534 
1535   inline void vmsge_vv(VectorRegister vd, VectorRegister vs2, VectorRegister vs1, VectorMask vm = unmasked) {
1536     vmsle_vv(vd, vs1, vs2, vm);
1537   }
1538 
1539   inline void vmsgeu_vv(VectorRegister vd, VectorRegister vs2, VectorRegister vs1, VectorMask vm = unmasked) {
1540     vmsleu_vv(vd, vs1, vs2, vm);
1541   }
1542 
1543   inline void vmfgt_vv(VectorRegister vd, VectorRegister vs2, VectorRegister vs1, VectorMask vm = unmasked) {
1544     vmflt_vv(vd, vs1, vs2, vm);
1545   }
1546 
1547   inline void vmfge_vv(VectorRegister vd, VectorRegister vs2, VectorRegister vs1, VectorMask vm = unmasked) {
1548     vmfle_vv(vd, vs1, vs2, vm);
1549   }
1550 
1551   inline void vmsltu_vi(VectorRegister Vd, VectorRegister Vs2, uint32_t imm, VectorMask vm = unmasked) {
1552     guarantee(imm >= 1 && imm <= 16, "imm is invalid");
1553     vmsleu_vi(Vd, Vs2, imm-1, vm);
1554   }
1555 
1556   inline void vmsgeu_vi(VectorRegister Vd, VectorRegister Vs2, uint32_t imm, VectorMask vm = unmasked) {
1557     guarantee(imm >= 1 && imm <= 16, "imm is invalid");
1558     vmsgtu_vi(Vd, Vs2, imm-1, vm);
1559   }
1560 
1561   // Copy mask register
1562   inline void vmmv_m(VectorRegister vd, VectorRegister vs) {
1563     vmand_mm(vd, vs, vs);
1564   }
1565 
1566   // Clear mask register
1567   inline void vmclr_m(VectorRegister vd) {
1568     vmxor_mm(vd, vd, vd);
1569   }
1570 
1571   // Set mask register
1572   inline void vmset_m(VectorRegister vd) {
1573     vmxnor_mm(vd, vd, vd);
1574   }
1575 
1576   inline void vnot_v(VectorRegister Vd, VectorRegister Vs, VectorMask vm = unmasked) {
1577     vxor_vi(Vd, Vs, -1, vm);
1578   }
1579 
1580   static const int zero_words_block_size;
1581 
1582   void cast_primitive_type(BasicType type, Register Rt) {
1583     switch (type) {
1584       case T_BOOLEAN:
1585         sltu(Rt, zr, Rt);
1586         break;
1587       case T_CHAR   :
1588         zext(Rt, Rt, 16);
1589         break;
1590       case T_BYTE   :
1591         sext(Rt, Rt, 8);
1592         break;
1593       case T_SHORT  :
1594         sext(Rt, Rt, 16);
1595         break;
1596       case T_INT    :
1597         sext(Rt, Rt, 32);
1598         break;
1599       case T_LONG   : /* nothing to do */        break;
1600       case T_VOID   : /* nothing to do */        break;
1601       case T_FLOAT  : /* nothing to do */        break;
1602       case T_DOUBLE : /* nothing to do */        break;
1603       default: ShouldNotReachHere();
1604     }
1605   }
1606 
1607   // float cmp with unordered_result
1608   void float_compare(Register result, FloatRegister Rs1, FloatRegister Rs2, int unordered_result);
1609   void double_compare(Register result, FloatRegister Rs1, FloatRegister Rs2, int unordered_result);
1610 
1611   // Zero/Sign-extend
1612   void zext(Register dst, Register src, int bits);
1613   void sext(Register dst, Register src, int bits);
1614 
1615 private:
1616   void cmp_x2i(Register dst, Register src1, Register src2, Register tmp, bool is_signed = true);
1617 
1618 public:
1619   // compare src1 and src2 and get -1/0/1 in dst.
1620   // if [src1 > src2], dst = 1;
1621   // if [src1 == src2], dst = 0;
1622   // if [src1 < src2], dst = -1;
1623   void cmp_l2i(Register dst, Register src1, Register src2, Register tmp = t0);
1624   void cmp_ul2i(Register dst, Register src1, Register src2, Register tmp = t0);
1625   void cmp_uw2i(Register dst, Register src1, Register src2, Register tmp = t0);
1626 
1627   // support for argument shuffling
1628   void move32_64(VMRegPair src, VMRegPair dst, Register tmp = t0);
1629   void float_move(VMRegPair src, VMRegPair dst, Register tmp = t0);
1630   void long_move(VMRegPair src, VMRegPair dst, Register tmp = t0);
1631   void double_move(VMRegPair src, VMRegPair dst, Register tmp = t0);
1632   void object_move(OopMap* map,
1633                    int oop_handle_offset,
1634                    int framesize_in_slots,
1635                    VMRegPair src,
1636                    VMRegPair dst,
1637                    bool is_receiver,
1638                    int* receiver_offset);
1639 
1640 #ifdef ASSERT
1641   // Template short-hand support to clean-up after a failed call to trampoline
1642   // call generation (see trampoline_call() below), when a set of Labels must
1643   // be reset (before returning).
1644   template<typename Label, typename... More>
1645   void reset_labels(Label& lbl, More&... more) {
1646     lbl.reset(); reset_labels(more...);
1647   }
1648   template<typename Label>
1649   void reset_labels(Label& lbl) {
1650     lbl.reset();
1651   }
1652 #endif
1653 
1654 private:
1655 
1656   void repne_scan(Register addr, Register value, Register count, Register tmp);
1657 
1658   int bitset_to_regs(unsigned int bitset, unsigned char* regs);
1659   Address add_memory_helper(const Address dst, Register tmp);
1660 
1661   void load_reserved(Register dst, Register addr, Assembler::operand_size size, Assembler::Aqrl acquire);
1662   void store_conditional(Register dst, Register new_val, Register addr, Assembler::operand_size size, Assembler::Aqrl release);
1663 
1664 public:
1665   void fast_lock(Register basic_lock, Register obj, Register tmp1, Register tmp2, Register tmp3, Label& slow);
1666   void fast_unlock(Register obj, Register tmp1, Register tmp2, Register tmp3, Label& slow);
1667 
1668 public:
1669   enum {
1670     // movptr
1671     movptr1_instruction_size = 6 * MacroAssembler::instruction_size, // lui, addi, slli, addi, slli, addi.  See movptr1().
1672     movptr2_instruction_size = 5 * MacroAssembler::instruction_size, // lui, lui, slli, add, addi.  See movptr2().
1673     load_pc_relative_instruction_size = 2 * MacroAssembler::instruction_size // auipc, ld
1674   };
1675 
1676   static bool is_load_pc_relative_at(address branch);
1677   static bool is_li16u_at(address instr);
1678 
1679   static bool is_jal_at(address instr)        { assert_cond(instr != nullptr); return extract_opcode(instr) == 0b1101111; }
1680   static bool is_jalr_at(address instr)       { assert_cond(instr != nullptr); return extract_opcode(instr) == 0b1100111 && extract_funct3(instr) == 0b000; }
1681   static bool is_branch_at(address instr)     { assert_cond(instr != nullptr); return extract_opcode(instr) == 0b1100011; }
1682   static bool is_ld_at(address instr)         { assert_cond(instr != nullptr); return is_load_at(instr) && extract_funct3(instr) == 0b011; }
1683   static bool is_load_at(address instr)       { assert_cond(instr != nullptr); return extract_opcode(instr) == 0b0000011; }
1684   static bool is_float_load_at(address instr) { assert_cond(instr != nullptr); return extract_opcode(instr) == 0b0000111; }
1685   static bool is_auipc_at(address instr)      { assert_cond(instr != nullptr); return extract_opcode(instr) == 0b0010111; }
1686   static bool is_jump_at(address instr)       { assert_cond(instr != nullptr); return is_branch_at(instr) || is_jal_at(instr) || is_jalr_at(instr); }
1687   static bool is_add_at(address instr)        { assert_cond(instr != nullptr); return extract_opcode(instr) == 0b0110011 && extract_funct3(instr) == 0b000; }
1688   static bool is_addi_at(address instr)       { assert_cond(instr != nullptr); return extract_opcode(instr) == 0b0010011 && extract_funct3(instr) == 0b000; }
1689   static bool is_addiw_at(address instr)      { assert_cond(instr != nullptr); return extract_opcode(instr) == 0b0011011 && extract_funct3(instr) == 0b000; }
1690   static bool is_addiw_to_zr_at(address instr){ assert_cond(instr != nullptr); return is_addiw_at(instr) && extract_rd(instr) == zr; }
1691   static bool is_lui_at(address instr)        { assert_cond(instr != nullptr); return extract_opcode(instr) == 0b0110111; }
1692   static bool is_lui_to_zr_at(address instr)  { assert_cond(instr != nullptr); return is_lui_at(instr) && extract_rd(instr) == zr; }
1693 
1694   static bool is_srli_at(address instr) {
1695     assert_cond(instr != nullptr);
1696     return extract_opcode(instr) == 0b0010011 &&
1697            extract_funct3(instr) == 0b101 &&
1698            Assembler::extract(((unsigned*)instr)[0], 31, 26) == 0b000000;
1699   }
1700 
1701   static bool is_slli_shift_at(address instr, uint32_t shift) {
1702     assert_cond(instr != nullptr);
1703     return (extract_opcode(instr) == 0b0010011 && // opcode field
1704             extract_funct3(instr) == 0b001 &&     // funct3 field, select the type of operation
1705             Assembler::extract(Assembler::ld_instr(instr), 25, 20) == shift);    // shamt field
1706   }
1707 
1708   static bool is_movptr1_at(address instr);
1709   static bool is_movptr2_at(address instr);
1710 
1711   static bool is_lwu_to_zr(address instr);
1712 
1713   static Register extract_rs1(address instr);
1714   static Register extract_rs2(address instr);
1715   static Register extract_rd(address instr);
1716   static uint32_t extract_opcode(address instr);
1717   static uint32_t extract_funct3(address instr);
1718 
1719   // the instruction sequence of movptr is as below:
1720   //     lui
1721   //     addi
1722   //     slli
1723   //     addi
1724   //     slli
1725   //     addi/jalr/load
1726   static bool check_movptr1_data_dependency(address instr) {
1727     address lui = instr;
1728     address addi1 = lui + MacroAssembler::instruction_size;
1729     address slli1 = addi1 + MacroAssembler::instruction_size;
1730     address addi2 = slli1 + MacroAssembler::instruction_size;
1731     address slli2 = addi2 + MacroAssembler::instruction_size;
1732     address last_instr = slli2 + MacroAssembler::instruction_size;
1733     return extract_rs1(addi1) == extract_rd(lui) &&
1734            extract_rs1(addi1) == extract_rd(addi1) &&
1735            extract_rs1(slli1) == extract_rd(addi1) &&
1736            extract_rs1(slli1) == extract_rd(slli1) &&
1737            extract_rs1(addi2) == extract_rd(slli1) &&
1738            extract_rs1(addi2) == extract_rd(addi2) &&
1739            extract_rs1(slli2) == extract_rd(addi2) &&
1740            extract_rs1(slli2) == extract_rd(slli2) &&
1741            extract_rs1(last_instr) == extract_rd(slli2);
1742   }
1743 
1744   // the instruction sequence of movptr2 is as below:
1745   //     lui
1746   //     lui
1747   //     slli
1748   //     add
1749   //     addi/jalr/load
1750   static bool check_movptr2_data_dependency(address instr) {
1751     address lui1 = instr;
1752     address lui2 = lui1 + MacroAssembler::instruction_size;
1753     address slli = lui2 + MacroAssembler::instruction_size;
1754     address add  = slli + MacroAssembler::instruction_size;
1755     address last_instr = add + MacroAssembler::instruction_size;
1756     return extract_rd(add) == extract_rd(lui2) &&
1757            extract_rs1(add) == extract_rd(lui2) &&
1758            extract_rs2(add) == extract_rd(slli) &&
1759            extract_rs1(slli) == extract_rd(lui1) &&
1760            extract_rd(slli) == extract_rd(lui1) &&
1761            extract_rs1(last_instr) == extract_rd(add);
1762   }
1763 
1764   // the instruction sequence of li16u is as below:
1765   //     lui
1766   //     srli
1767   static bool check_li16u_data_dependency(address instr) {
1768     address lui = instr;
1769     address srli = lui + MacroAssembler::instruction_size;
1770 
1771     return extract_rs1(srli) == extract_rd(lui) &&
1772            extract_rs1(srli) == extract_rd(srli);
1773   }
1774 
1775   // the instruction sequence of li32 is as below:
1776   //     lui
1777   //     addiw
1778   static bool check_li32_data_dependency(address instr) {
1779     address lui = instr;
1780     address addiw = lui + MacroAssembler::instruction_size;
1781 
1782     return extract_rs1(addiw) == extract_rd(lui) &&
1783            extract_rs1(addiw) == extract_rd(addiw);
1784   }
1785 
1786   // the instruction sequence of pc-relative is as below:
1787   //     auipc
1788   //     jalr/addi/load/float_load
1789   static bool check_pc_relative_data_dependency(address instr) {
1790     address auipc = instr;
1791     address last_instr = auipc + MacroAssembler::instruction_size;
1792 
1793     return extract_rs1(last_instr) == extract_rd(auipc);
1794   }
1795 
1796   // the instruction sequence of load_label is as below:
1797   //     auipc
1798   //     load
1799   static bool check_load_pc_relative_data_dependency(address instr) {
1800     address auipc = instr;
1801     address load = auipc + MacroAssembler::instruction_size;
1802 
1803     return extract_rd(load) == extract_rd(auipc) &&
1804            extract_rs1(load) == extract_rd(load);
1805   }
1806 
1807   static bool is_li32_at(address instr);
1808   static bool is_pc_relative_at(address branch);
1809 
1810   static bool is_membar(address addr) {
1811     return (Bytes::get_native_u4(addr) & 0x7f) == 0b1111 && extract_funct3(addr) == 0;
1812   }
1813   static uint32_t get_membar_kind(address addr);
1814   static void set_membar_kind(address addr, uint32_t order_kind);
1815 
1816  public:
1817   // Inline type specific methods
1818   #include "asm/macroAssembler_common.hpp"
1819 };
1820 
1821 #ifdef ASSERT
1822 inline bool AbstractAssembler::pd_check_instruction_mark() { return false; }
1823 #endif
1824 
1825 #endif // CPU_RISCV_MACROASSEMBLER_RISCV_HPP