New src/hotspot/cpu/riscv/macroAssembler

   1 /*
   2  * Copyright (c) 1997, 2025, Oracle and/or its affiliates. All rights reserved.
   3  * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
   4  * Copyright (c) 2020, 2024, Huawei Technologies Co., Ltd. All rights reserved.
   5  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   6  *
   7  * This code is free software; you can redistribute it and/or modify it
   8  * under the terms of the GNU General Public License version 2 only, as
   9  * published by the Free Software Foundation.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  *
  25  */
  26 
  27 #ifndef CPU_RISCV_MACROASSEMBLER_RISCV_HPP
  28 #define CPU_RISCV_MACROASSEMBLER_RISCV_HPP
  29 
  30 #include "asm/assembler.inline.hpp"
  31 #include "code/vmreg.hpp"
  32 #include "metaprogramming/enableIf.hpp"
  33 #include "oops/compressedOops.hpp"
  34 #include "utilities/powerOfTwo.hpp"
  35 
  36 // MacroAssembler extends Assembler by frequently used macros.
  37 //
  38 // Instructions for which a 'better' code sequence exists depending
  39 // on arguments should also go in here.
  40 
  41 class MacroAssembler: public Assembler {
  42 
  43  public:
  44 
  45   MacroAssembler(CodeBuffer* code) : Assembler(code) {}
  46 
  47   void safepoint_poll(Label& slow_path, bool at_return, bool in_nmethod, Register tmp_reg = t0);
  48 
  49   // Alignment
  50   int align(int modulus, int extra_offset = 0);
  51 
  52   static inline void assert_alignment(address pc, int alignment = MacroAssembler::instruction_size) {
  53     assert(is_aligned(pc, alignment), "bad alignment");
  54   }
  55 
  56   // nop
  57   void post_call_nop();
  58 
  59   // Stack frame creation/removal
  60   // Note that SP must be updated to the right place before saving/restoring RA and FP
  61   // because signal based thread suspend/resume could happen asynchronously.
  62   void enter() {
  63     subi(sp, sp, 2 * wordSize);
  64     sd(ra, Address(sp, wordSize));
  65     sd(fp, Address(sp));
  66     addi(fp, sp, 2 * wordSize);
  67   }
  68 
  69   void leave() {
  70     subi(sp, fp, 2 * wordSize);
  71     ld(fp, Address(sp));
  72     ld(ra, Address(sp, wordSize));
  73     addi(sp, sp, 2 * wordSize);
  74   }
  75 
  76 
  77   // Support for getting the JavaThread pointer (i.e.; a reference to thread-local information)
  78   // The pointer will be loaded into the thread register.
  79   void get_thread(Register thread);
  80 
  81   // Support for VM calls
  82   //
  83   // It is imperative that all calls into the VM are handled via the call_VM macros.
  84   // They make sure that the stack linkage is setup correctly. call_VM's correspond
  85   // to ENTRY/ENTRY_X entry points while call_VM_leaf's correspond to LEAF entry points.
  86 
  87   void call_VM(Register oop_result,
  88                address entry_point,
  89                bool check_exceptions = true);
  90   void call_VM(Register oop_result,
  91                address entry_point,
  92                Register arg_1,
  93                bool check_exceptions = true);
  94   void call_VM(Register oop_result,
  95                address entry_point,
  96                Register arg_1, Register arg_2,
  97                bool check_exceptions = true);
  98   void call_VM(Register oop_result,
  99                address entry_point,
 100                Register arg_1, Register arg_2, Register arg_3,
 101                bool check_exceptions = true);
 102 
 103   // Overloadings with last_Java_sp
 104   void call_VM(Register oop_result,
 105                Register last_java_sp,
 106                address entry_point,
 107                int number_of_arguments = 0,
 108                bool check_exceptions = true);
 109   void call_VM(Register oop_result,
 110                Register last_java_sp,
 111                address entry_point,
 112                Register arg_1,
 113                bool check_exceptions = true);
 114   void call_VM(Register oop_result,
 115                Register last_java_sp,
 116                address entry_point,
 117                Register arg_1, Register arg_2,
 118                bool check_exceptions = true);
 119   void call_VM(Register oop_result,
 120                Register last_java_sp,
 121                address entry_point,
 122                Register arg_1, Register arg_2, Register arg_3,
 123                bool check_exceptions = true);
 124 
 125   void get_vm_result_oop(Register oop_result, Register java_thread);
 126   void get_vm_result_metadata(Register metadata_result, Register java_thread);
 127 
 128   // These always tightly bind to MacroAssembler::call_VM_leaf_base
 129   // bypassing the virtual implementation
 130   void call_VM_leaf(address entry_point,
 131                     int number_of_arguments = 0);
 132   void call_VM_leaf(address entry_point,
 133                     Register arg_0);
 134   void call_VM_leaf(address entry_point,
 135                     Register arg_0, Register arg_1);
 136   void call_VM_leaf(address entry_point,
 137                     Register arg_0, Register arg_1, Register arg_2);
 138 
 139   // These always tightly bind to MacroAssembler::call_VM_base
 140   // bypassing the virtual implementation
 141   void super_call_VM_leaf(address entry_point, Register arg_0);
 142   void super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1);
 143   void super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2);
 144   void super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2, Register arg_3);
 145 
 146   // last Java Frame (fills frame anchor)
 147   void set_last_Java_frame(Register last_java_sp, Register last_java_fp, address last_java_pc, Register tmp);
 148   void set_last_Java_frame(Register last_java_sp, Register last_java_fp, Label &last_java_pc, Register tmp);
 149   void set_last_Java_frame(Register last_java_sp, Register last_java_fp, Register last_java_pc);
 150 
 151   // thread in the default location (xthread)
 152   void reset_last_Java_frame(bool clear_fp);
 153 
 154   virtual void call_VM_leaf_base(
 155     address entry_point,                // the entry point
 156     int     number_of_arguments,        // the number of arguments to pop after the call
 157     Label*  retaddr = nullptr
 158   );
 159 
 160   virtual void call_VM_leaf_base(
 161     address entry_point,                // the entry point
 162     int     number_of_arguments,        // the number of arguments to pop after the call
 163     Label&  retaddr) {
 164     call_VM_leaf_base(entry_point, number_of_arguments, &retaddr);
 165   }
 166 
 167   virtual void call_VM_base(           // returns the register containing the thread upon return
 168     Register oop_result,               // where an oop-result ends up if any; use noreg otherwise
 169     Register java_thread,              // the thread if computed before     ; use noreg otherwise
 170     Register last_java_sp,             // to set up last_Java_frame in stubs; use noreg otherwise
 171     Label*   return_pc,                // to set up last_Java_frame; use nullptr otherwise
 172     address  entry_point,              // the entry point
 173     int      number_of_arguments,      // the number of arguments (w/o thread) to pop after the call
 174     bool     check_exceptions          // whether to check for pending exceptions after return
 175   );
 176 
 177   void call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions);
 178 
 179   virtual void check_and_handle_earlyret(Register java_thread);
 180   virtual void check_and_handle_popframe(Register java_thread);
 181 
 182   void resolve_weak_handle(Register result, Register tmp1, Register tmp2);
 183   void resolve_oop_handle(Register result, Register tmp1, Register tmp2);
 184   void resolve_jobject(Register value, Register tmp1, Register tmp2);
 185   void resolve_global_jobject(Register value, Register tmp1, Register tmp2);
 186 
 187   void movoop(Register dst, jobject obj);
 188   void mov_metadata(Register dst, Metadata* obj);
 189   void bang_stack_size(Register size, Register tmp);
 190   void set_narrow_oop(Register dst, jobject obj);
 191   void set_narrow_klass(Register dst, Klass* k);
 192 
 193   void load_mirror(Register dst, Register method, Register tmp1, Register tmp2);
 194   void access_load_at(BasicType type, DecoratorSet decorators, Register dst,
 195                       Address src, Register tmp1, Register tmp2);
 196   void access_store_at(BasicType type, DecoratorSet decorators, Address dst,
 197                        Register val, Register tmp1, Register tmp2, Register tmp3);
 198   void load_klass(Register dst, Register src, Register tmp = t0);
 199   void load_narrow_klass_compact(Register dst, Register src);
 200   void store_klass(Register dst, Register src, Register tmp = t0);
 201   void cmp_klass_compressed(Register oop, Register trial_klass, Register tmp, Label &L, bool equal);
 202 
 203   void encode_klass_not_null(Register r, Register tmp = t0);
 204   void decode_klass_not_null(Register r, Register tmp = t0);
 205   void encode_klass_not_null(Register dst, Register src, Register tmp);
 206   void decode_klass_not_null(Register dst, Register src, Register tmp);
 207   void decode_heap_oop_not_null(Register r);
 208   void decode_heap_oop_not_null(Register dst, Register src);
 209   void decode_heap_oop(Register d, Register s);
 210   void decode_heap_oop(Register r) { decode_heap_oop(r, r); }
 211   void encode_heap_oop_not_null(Register r);
 212   void encode_heap_oop_not_null(Register dst, Register src);
 213   void encode_heap_oop(Register d, Register s);
 214   void encode_heap_oop(Register r) { encode_heap_oop(r, r); };
 215   void load_heap_oop(Register dst, Address src, Register tmp1,
 216                      Register tmp2, DecoratorSet decorators = 0);
 217   void load_heap_oop_not_null(Register dst, Address src, Register tmp1,
 218                               Register tmp2, DecoratorSet decorators = 0);
 219   void store_heap_oop(Address dst, Register val, Register tmp1,
 220                       Register tmp2, Register tmp3, DecoratorSet decorators = 0);
 221 
 222   void store_klass_gap(Register dst, Register src);
 223 
 224   // currently unimplemented
 225   // Used for storing null. All other oop constants should be
 226   // stored using routines that take a jobject.
 227   void store_heap_oop_null(Address dst);
 228 
 229   // This dummy is to prevent a call to store_heap_oop from
 230   // converting a zero (linked null) into a Register by giving
 231   // the compiler two choices it can't resolve
 232 
 233   void store_heap_oop(Address dst, void* dummy);
 234 
 235   // Support for null-checks
 236   //
 237   // Generates code that causes a null OS exception if the content of reg is null.
 238   // If the accessed location is M[reg + offset] and the offset is known, provide the
 239   // offset. No explicit code generateion is needed if the offset is within a certain
 240   // range (0 <= offset <= page_size).
 241 
 242   virtual void null_check(Register reg, int offset = -1);
 243   static bool needs_explicit_null_check(intptr_t offset);
 244   static bool uses_implicit_null_check(void* address);
 245 
 246   // interface method calling
 247   void lookup_interface_method(Register recv_klass,
 248                                Register intf_klass,
 249                                RegisterOrConstant itable_index,
 250                                Register method_result,
 251                                Register scan_tmp,
 252                                Label& no_such_interface,
 253                                bool return_method = true);
 254 
 255   void lookup_interface_method_stub(Register recv_klass,
 256                                     Register holder_klass,
 257                                     Register resolved_klass,
 258                                     Register method_result,
 259                                     Register temp_reg,
 260                                     Register temp_reg2,
 261                                     int itable_index,
 262                                     Label& L_no_such_interface);
 263 
 264   // virtual method calling
 265   // n.n. x86 allows RegisterOrConstant for vtable_index
 266   void lookup_virtual_method(Register recv_klass,
 267                              RegisterOrConstant vtable_index,
 268                              Register method_result);
 269 
 270   // Form an address from base + offset in Rd. Rd my or may not
 271   // actually be used: you must use the Address that is returned. It
 272   // is up to you to ensure that the shift provided matches the size
 273   // of your data.
 274   Address form_address(Register Rd, Register base, int64_t byte_offset);
 275 
 276   // Sometimes we get misaligned loads and stores, usually from Unsafe
 277   // accesses, and these can exceed the offset range.
 278   Address legitimize_address(Register Rd, const Address &adr) {
 279     if (adr.getMode() == Address::base_plus_offset) {
 280       if (!is_simm12(adr.offset())) {
 281         return form_address(Rd, adr.base(), adr.offset());
 282       }
 283     }
 284     return adr;
 285   }
 286 
 287   // allocation
 288   void tlab_allocate(
 289     Register obj,                   // result: pointer to object after successful allocation
 290     Register var_size_in_bytes,     // object size in bytes if unknown at compile time; invalid otherwise
 291     int      con_size_in_bytes,     // object size in bytes if   known at compile time
 292     Register tmp1,                  // temp register
 293     Register tmp2,                  // temp register
 294     Label&   slow_case,             // continuation point of fast allocation fails
 295     bool     is_far = false
 296   );
 297 
 298   // Test sub_klass against super_klass, with fast and slow paths.
 299 
 300   // The fast path produces a tri-state answer: yes / no / maybe-slow.
 301   // One of the three labels can be null, meaning take the fall-through.
 302   // If super_check_offset is -1, the value is loaded up from super_klass.
 303   // No registers are killed, except tmp_reg
 304   void check_klass_subtype_fast_path(Register sub_klass,
 305                                      Register super_klass,
 306                                      Register tmp_reg,
 307                                      Label* L_success,
 308                                      Label* L_failure,
 309                                      Label* L_slow_path,
 310                                      Register super_check_offset = noreg);
 311 
 312   // The reset of the type check; must be wired to a corresponding fast path.
 313   // It does not repeat the fast path logic, so don't use it standalone.
 314   // The tmp1_reg and tmp2_reg can be noreg, if no temps are available.
 315   // Updates the sub's secondary super cache as necessary.
 316   void check_klass_subtype_slow_path(Register sub_klass,
 317                                      Register super_klass,
 318                                      Register tmp1_reg,
 319                                      Register tmp2_reg,
 320                                      Label* L_success,
 321                                      Label* L_failure,
 322                                      bool set_cond_codes = false);
 323 
 324   void check_klass_subtype_slow_path_linear(Register sub_klass,
 325                                             Register super_klass,
 326                                             Register tmp1_reg,
 327                                             Register tmp2_reg,
 328                                             Label* L_success,
 329                                             Label* L_failure,
 330                                             bool set_cond_codes = false);
 331 
 332   void check_klass_subtype_slow_path_table(Register sub_klass,
 333                                            Register super_klass,
 334                                            Register tmp1_reg,
 335                                            Register tmp2_reg,
 336                                            Label* L_success,
 337                                            Label* L_failure,
 338                                            bool set_cond_codes = false);
 339 
 340   // If r is valid, return r.
 341   // If r is invalid, remove a register r2 from available_regs, add r2
 342   // to regs_to_push, then return r2.
 343   Register allocate_if_noreg(const Register r,
 344                              RegSetIterator<Register> &available_regs,
 345                              RegSet &regs_to_push);
 346 
 347   // Secondary subtype checking
 348   void lookup_secondary_supers_table_var(Register sub_klass,
 349                                          Register r_super_klass,
 350                                          Register result,
 351                                          Register tmp1,
 352                                          Register tmp2,
 353                                          Register tmp3,
 354                                          Register tmp4,
 355                                          Label *L_success);
 356 
 357   void population_count(Register dst, Register src, Register tmp1, Register tmp2);
 358 
 359   // As above, but with a constant super_klass.
 360   // The result is in Register result, not the condition codes.
 361   bool lookup_secondary_supers_table_const(Register r_sub_klass,
 362                                            Register r_super_klass,
 363                                            Register result,
 364                                            Register tmp1,
 365                                            Register tmp2,
 366                                            Register tmp3,
 367                                            Register tmp4,
 368                                            u1 super_klass_slot,
 369                                            bool stub_is_near = false);
 370 
 371   void verify_secondary_supers_table(Register r_sub_klass,
 372                                      Register r_super_klass,
 373                                      Register result,
 374                                      Register tmp1,
 375                                      Register tmp2,
 376                                      Register tmp3);
 377 
 378   void lookup_secondary_supers_table_slow_path(Register r_super_klass,
 379                                                Register r_array_base,
 380                                                Register r_array_index,
 381                                                Register r_bitmap,
 382                                                Register result,
 383                                                Register tmp,
 384                                                bool is_stub = true);
 385 
 386   void check_klass_subtype(Register sub_klass,
 387                            Register super_klass,
 388                            Register tmp_reg,
 389                            Label& L_success);
 390 
 391   Address argument_address(RegisterOrConstant arg_slot, int extra_slot_offset = 0);
 392 
 393   // only if +VerifyOops
 394   void _verify_oop(Register reg, const char* s, const char* file, int line);
 395   void _verify_oop_addr(Address addr, const char* s, const char* file, int line);
 396 
 397   void _verify_oop_checked(Register reg, const char* s, const char* file, int line) {
 398     if (VerifyOops) {
 399       _verify_oop(reg, s, file, line);
 400     }
 401   }
 402   void _verify_oop_addr_checked(Address reg, const char* s, const char* file, int line) {
 403     if (VerifyOops) {
 404       _verify_oop_addr(reg, s, file, line);
 405     }
 406   }
 407 
 408   void _verify_method_ptr(Register reg, const char* msg, const char* file, int line) {}
 409   void _verify_klass_ptr(Register reg, const char* msg, const char* file, int line) {}
 410 
 411 #define verify_oop(reg) _verify_oop_checked(reg, "broken oop " #reg, __FILE__, __LINE__)
 412 #define verify_oop_msg(reg, msg) _verify_oop_checked(reg, "broken oop " #reg ", " #msg, __FILE__, __LINE__)
 413 #define verify_oop_addr(addr) _verify_oop_addr_checked(addr, "broken oop addr " #addr, __FILE__, __LINE__)
 414 #define verify_method_ptr(reg) _verify_method_ptr(reg, "broken method " #reg, __FILE__, __LINE__)
 415 #define verify_klass_ptr(reg) _verify_method_ptr(reg, "broken klass " #reg, __FILE__, __LINE__)
 416 
 417   // A more convenient access to fence for our purposes
 418   // We used four bit to indicate the read and write bits in the predecessors and successors,
 419   // and extended i for r, o for w if UseConservativeFence enabled.
 420   enum Membar_mask_bits {
 421     StoreStore = 0b0101,               // (pred = w   + succ = w)
 422     LoadStore  = 0b1001,               // (pred = r   + succ = w)
 423     StoreLoad  = 0b0110,               // (pred = w   + succ = r)
 424     LoadLoad   = 0b1010,               // (pred = r   + succ = r)
 425     AnyAny     = LoadStore | StoreLoad // (pred = rw  + succ = rw)
 426   };
 427 
 428   void membar(uint32_t order_constraint);
 429 
 430  private:
 431 
 432   static void membar_mask_to_pred_succ(uint32_t order_constraint,
 433                                        uint32_t& predecessor, uint32_t& successor) {
 434     predecessor = (order_constraint >> 2) & 0x3;
 435     successor = order_constraint & 0x3;
 436 
 437     // extend rw -> iorw:
 438     // 01(w) -> 0101(ow)
 439     // 10(r) -> 1010(ir)
 440     // 11(rw)-> 1111(iorw)
 441     if (UseConservativeFence) {
 442       predecessor |= predecessor << 2;
 443       successor   |= successor << 2;
 444     }
 445   }
 446 
 447   static int pred_succ_to_membar_mask(uint32_t predecessor, uint32_t successor) {
 448     return ((predecessor & 0x3) << 2) | (successor & 0x3);
 449   }
 450 
 451  public:
 452 
 453   void cmodx_fence();
 454 
 455   void pause() {
 456     // Zihintpause
 457     // PAUSE is encoded as a FENCE instruction with pred=W, succ=0, fm=0, rd=x0, and rs1=x0.
 458     Assembler::fence(w, 0);
 459   }
 460 
 461   // prints msg, dumps registers and stops execution
 462   void stop(const char* msg);
 463 
 464   static void debug64(char* msg, int64_t pc, int64_t regs[]);
 465 
 466   void unimplemented(const char* what = "");
 467 
 468   void should_not_reach_here() { stop("should not reach here"); }
 469 
 470   static address target_addr_for_insn(address insn_addr);
 471 
 472   // Required platform-specific helpers for Label::patch_instructions.
 473   // They _shadow_ the declarations in AbstractAssembler, which are undefined.
 474   static int pd_patch_instruction_size(address branch, address target);
 475   static void pd_patch_instruction(address branch, address target, const char* file = nullptr, int line = 0) {
 476     pd_patch_instruction_size(branch, target);
 477   }
 478   static address pd_call_destination(address branch) {
 479     return target_addr_for_insn(branch);
 480   }
 481 
 482   static int patch_oop(address insn_addr, address o);
 483 
 484   static address get_target_of_li32(address insn_addr);
 485   static int patch_imm_in_li32(address branch, int32_t target);
 486 
 487   // Return whether code is emitted to a scratch blob.
 488   virtual bool in_scratch_emit_size() {
 489     return false;
 490   }
 491 
 492   address emit_reloc_call_address_stub(int insts_call_instruction_offset, address target);
 493   static int max_reloc_call_address_stub_size();
 494 
 495   void emit_static_call_stub();
 496   static int static_call_stub_size();
 497 
 498   // The following 4 methods return the offset of the appropriate move instruction
 499 
 500   // Support for fast byte/short loading with zero extension (depending on particular CPU)
 501   int load_unsigned_byte(Register dst, Address src);
 502   int load_unsigned_short(Register dst, Address src);
 503 
 504   // Support for fast byte/short loading with sign extension (depending on particular CPU)
 505   int load_signed_byte(Register dst, Address src);
 506   int load_signed_short(Register dst, Address src);
 507 
 508   // Load and store values by size and signed-ness
 509   void load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed);
 510   void store_sized_value(Address dst, Register src, size_t size_in_bytes);
 511 
 512   // Misaligned loads, will use the best way, according to the AvoidUnalignedAccess flag
 513   void load_short_misaligned(Register dst, Address src, Register tmp, bool is_signed, int granularity = 1);
 514   void load_int_misaligned(Register dst, Address src, Register tmp, bool is_signed, int granularity = 1);
 515   void load_long_misaligned(Register dst, Address src, Register tmp, int granularity = 1);
 516 
 517  public:
 518   // Standard pseudo instructions
 519   inline void nop() {
 520     addi(x0, x0, 0);
 521   }
 522 
 523   inline void mv(Register Rd, Register Rs) {
 524     if (Rd != Rs) {
 525       addi(Rd, Rs, 0);
 526     }
 527   }
 528 
 529   inline void notr(Register Rd, Register Rs) {
 530     if (do_compress_zcb(Rd, Rs) && (Rd == Rs)) {
 531       c_not(Rd);
 532     } else {
 533       xori(Rd, Rs, -1);
 534     }
 535   }
 536 
 537   inline void neg(Register Rd, Register Rs) {
 538     sub(Rd, x0, Rs);
 539   }
 540 
 541   inline void negw(Register Rd, Register Rs) {
 542     subw(Rd, x0, Rs);
 543   }
 544 
 545   inline void sext_w(Register Rd, Register Rs) {
 546     addiw(Rd, Rs, 0);
 547   }
 548 
 549   inline void zext_b(Register Rd, Register Rs) {
 550     if (do_compress_zcb(Rd, Rs) && (Rd == Rs)) {
 551       c_zext_b(Rd);
 552     } else {
 553       andi(Rd, Rs, 0xFF);
 554     }
 555   }
 556 
 557   inline void seqz(Register Rd, Register Rs) {
 558     sltiu(Rd, Rs, 1);
 559   }
 560 
 561   inline void snez(Register Rd, Register Rs) {
 562     sltu(Rd, x0, Rs);
 563   }
 564 
 565   inline void sltz(Register Rd, Register Rs) {
 566     slt(Rd, Rs, x0);
 567   }
 568 
 569   inline void sgtz(Register Rd, Register Rs) {
 570     slt(Rd, x0, Rs);
 571   }
 572 
 573   // Bit-manipulation extension pseudo instructions
 574   // zero extend word
 575   inline void zext_w(Register Rd, Register Rs) {
 576     assert(UseZba, "must be");
 577     if (do_compress_zcb(Rd, Rs) && (Rd == Rs)) {
 578       c_zext_w(Rd);
 579     } else {
 580       add_uw(Rd, Rs, zr);
 581     }
 582   }
 583 
 584   // Floating-point data-processing pseudo instructions
 585   inline void fmv_s(FloatRegister Rd, FloatRegister Rs) {
 586     if (Rd != Rs) {
 587       fsgnj_s(Rd, Rs, Rs);
 588     }
 589   }
 590 
 591   inline void fabs_s(FloatRegister Rd, FloatRegister Rs) {
 592     fsgnjx_s(Rd, Rs, Rs);
 593   }
 594 
 595   inline void fneg_s(FloatRegister Rd, FloatRegister Rs) {
 596     fsgnjn_s(Rd, Rs, Rs);
 597   }
 598 
 599   inline void fmv_d(FloatRegister Rd, FloatRegister Rs) {
 600     if (Rd != Rs) {
 601       fsgnj_d(Rd, Rs, Rs);
 602     }
 603   }
 604 
 605   inline void fabs_d(FloatRegister Rd, FloatRegister Rs) {
 606     fsgnjx_d(Rd, Rs, Rs);
 607   }
 608 
 609   inline void fneg_d(FloatRegister Rd, FloatRegister Rs) {
 610     fsgnjn_d(Rd, Rs, Rs);
 611   }
 612 
 613   // Control and status pseudo instructions
 614   void csrr(Register Rd, unsigned csr);         // read csr
 615   void csrw(unsigned csr, Register Rs);         // write csr
 616   void csrs(unsigned csr, Register Rs);         // set bits in csr
 617   void csrc(unsigned csr, Register Rs);         // clear bits in csr
 618   void csrwi(unsigned csr, unsigned imm);
 619   void csrsi(unsigned csr, unsigned imm);
 620   void csrci(unsigned csr, unsigned imm);
 621   void frcsr(Register Rd) { csrr(Rd, CSR_FCSR); }; // read float-point csr
 622   void fscsr(Register Rd, Register Rs);            // swap float-point csr
 623   void fscsr(Register Rs);                         // write float-point csr
 624   void frrm(Register Rd) { csrr(Rd, CSR_FRM); };   // read float-point rounding mode
 625   void fsrm(Register Rd, Register Rs);             // swap float-point rounding mode
 626   void fsrm(Register Rs);                          // write float-point rounding mode
 627   void fsrmi(Register Rd, unsigned imm);
 628   void fsrmi(unsigned imm);
 629   void frflags(Register Rd) { csrr(Rd, CSR_FFLAGS); }; // read float-point exception flags
 630   void fsflags(Register Rd, Register Rs);              // swap float-point exception flags
 631   void fsflags(Register Rs);                           // write float-point exception flags
 632   void fsflagsi(Register Rd, unsigned imm);
 633   void fsflagsi(unsigned imm);
 634   // Requires Zicntr
 635   void rdinstret(Register Rd) { csrr(Rd, CSR_INSTRET); }; // read instruction-retired counter
 636   void rdcycle(Register Rd)   { csrr(Rd, CSR_CYCLE); };   // read cycle counter
 637   void rdtime(Register Rd)    { csrr(Rd, CSR_TIME); };    // read time
 638 
 639   // Restore cpu control state after JNI call
 640   void restore_cpu_control_state_after_jni(Register tmp);
 641 
 642   // Control transfer pseudo instructions
 643   void beqz(Register Rs, const address dest);
 644   void bnez(Register Rs, const address dest);
 645   void blez(Register Rs, const address dest);
 646   void bgez(Register Rs, const address dest);
 647   void bltz(Register Rs, const address dest);
 648   void bgtz(Register Rs, const address dest);
 649 
 650   void cmov_eq(Register cmp1, Register cmp2, Register dst, Register src);
 651   void cmov_ne(Register cmp1, Register cmp2, Register dst, Register src);
 652   void cmov_le(Register cmp1, Register cmp2, Register dst, Register src);
 653   void cmov_leu(Register cmp1, Register cmp2, Register dst, Register src);
 654   void cmov_ge(Register cmp1, Register cmp2, Register dst, Register src);
 655   void cmov_geu(Register cmp1, Register cmp2, Register dst, Register src);
 656   void cmov_lt(Register cmp1, Register cmp2, Register dst, Register src);
 657   void cmov_ltu(Register cmp1, Register cmp2, Register dst, Register src);
 658   void cmov_gt(Register cmp1, Register cmp2, Register dst, Register src);
 659   void cmov_gtu(Register cmp1, Register cmp2, Register dst, Register src);
 660 
 661   void cmov_cmp_fp_eq(FloatRegister cmp1, FloatRegister cmp2, Register dst, Register src, bool is_single);
 662   void cmov_cmp_fp_ne(FloatRegister cmp1, FloatRegister cmp2, Register dst, Register src, bool is_single);
 663   void cmov_cmp_fp_le(FloatRegister cmp1, FloatRegister cmp2, Register dst, Register src, bool is_single);
 664   void cmov_cmp_fp_ge(FloatRegister cmp1, FloatRegister cmp2, Register dst, Register src, bool is_single);
 665   void cmov_cmp_fp_lt(FloatRegister cmp1, FloatRegister cmp2, Register dst, Register src, bool is_single);
 666   void cmov_cmp_fp_gt(FloatRegister cmp1, FloatRegister cmp2, Register dst, Register src, bool is_single);
 667 
 668  public:
 669   // We try to follow risc-v asm menomics.
 670   // But as we don't layout a reachable GOT,
 671   // we often need to resort to movptr, li <48imm>.
 672   // https://github.com/riscv-non-isa/riscv-asm-manual/blob/main/src/asm-manual.adoc
 673 
 674   // Hotspot only use the standard calling convention using x1/ra.
 675   // The alternative calling convection using x5/t0 is not used.
 676   // Using x5 as a temp causes the CPU to mispredict returns.
 677 
 678   // JALR, return address stack updates:
 679   // | rd is x1/x5 | rs1 is x1/x5 | rd=rs1 | RAS action
 680   // | ----------- | ------------ | ------ |-------------
 681   // |     No      |      No      |   -    | None
 682   // |     No      |      Yes     |   -    | Pop
 683   // |     Yes     |      No      |   -    | Push
 684   // |     Yes     |      Yes     |   No   | Pop, then push
 685   // |     Yes     |      Yes     |   Yes  | Push
 686   //
 687   // JAL, return address stack updates:
 688   // | rd is x1/x5 | RAS action
 689   // | ----------- | ----------
 690   // |     Yes     | Push
 691   // |     No      | None
 692   //
 693   // JUMPs   uses Rd = x0/zero and Rs = x6/t1 or imm
 694   // CALLS   uses Rd = x1/ra   and Rs = x6/t1 or imm (or x1/ra*)
 695   // RETURNS uses Rd = x0/zero and Rs = x1/ra
 696   // *use of x1/ra should not normally be used, special case only.
 697 
 698   // jump: jal x0, offset
 699   // For long reach uses temp register for:
 700   // la + jr
 701   void j(const address dest, Register temp = t1);
 702   void j(const Address &dest, Register temp = t1);
 703   void j(Label &l, Register temp = noreg);
 704 
 705   // jump register: jalr x0, offset(rs)
 706   void jr(Register Rd, int32_t offset = 0);
 707 
 708   // call: la + jalr x1
 709   void call(const address dest, Register temp = t1);
 710 
 711   // jalr: jalr x1, offset(rs)
 712   void jalr(Register Rs, int32_t offset = 0);
 713 
 714   // Emit a runtime call. Only invalidates the tmp register which
 715   // is used to keep the entry address for jalr/movptr.
 716   // Uses call() for intra code cache, else movptr + jalr.
 717   // Clobebrs t1
 718   void rt_call(address dest, Register tmp = t1);
 719 
 720   // ret: jalr x0, 0(x1)
 721   inline void ret() {
 722     Assembler::jalr(x0, x1, 0);
 723   }
 724 
 725   //label
 726   void beqz(Register Rs, Label &l, bool is_far = false);
 727   void bnez(Register Rs, Label &l, bool is_far = false);
 728   void blez(Register Rs, Label &l, bool is_far = false);
 729   void bgez(Register Rs, Label &l, bool is_far = false);
 730   void bltz(Register Rs, Label &l, bool is_far = false);
 731   void bgtz(Register Rs, Label &l, bool is_far = false);
 732 
 733   void beq (Register Rs1, Register Rs2, Label &L, bool is_far = false);
 734   void bne (Register Rs1, Register Rs2, Label &L, bool is_far = false);
 735   void blt (Register Rs1, Register Rs2, Label &L, bool is_far = false);
 736   void bge (Register Rs1, Register Rs2, Label &L, bool is_far = false);
 737   void bltu(Register Rs1, Register Rs2, Label &L, bool is_far = false);
 738   void bgeu(Register Rs1, Register Rs2, Label &L, bool is_far = false);
 739 
 740   void bgt (Register Rs, Register Rt, const address dest);
 741   void ble (Register Rs, Register Rt, const address dest);
 742   void bgtu(Register Rs, Register Rt, const address dest);
 743   void bleu(Register Rs, Register Rt, const address dest);
 744 
 745   void bgt (Register Rs, Register Rt, Label &l, bool is_far = false);
 746   void ble (Register Rs, Register Rt, Label &l, bool is_far = false);
 747   void bgtu(Register Rs, Register Rt, Label &l, bool is_far = false);
 748   void bleu(Register Rs, Register Rt, Label &l, bool is_far = false);
 749 
 750 #define INSN_ENTRY_RELOC(result_type, header)                               \
 751   result_type header {                                                      \
 752     guarantee(rtype == relocInfo::internal_word_type,                       \
 753               "only internal_word_type relocs make sense here");            \
 754     relocate(InternalAddress(dest).rspec());                                \
 755     IncompressibleScope scope(this);  /* relocations */
 756 
 757 #define INSN(NAME)                                                                                       \
 758   void NAME(Register Rs1, Register Rs2, const address dest) {                                            \
 759     assert_cond(dest != nullptr);                                                                        \
 760     int64_t offset = dest - pc();                                                                        \
 761     guarantee(is_simm13(offset) && is_even(offset),                                                      \
 762               "offset is invalid: is_simm_13: %s offset: " INT64_FORMAT,                                 \
 763               BOOL_TO_STR(is_simm13(offset)), offset);                                                   \
 764     Assembler::NAME(Rs1, Rs2, offset);                                                                   \
 765   }                                                                                                      \
 766   INSN_ENTRY_RELOC(void, NAME(Register Rs1, Register Rs2, address dest, relocInfo::relocType rtype))     \
 767     NAME(Rs1, Rs2, dest);                                                                                \
 768   }
 769 
 770   INSN(beq);
 771   INSN(bne);
 772   INSN(bge);
 773   INSN(bgeu);
 774   INSN(blt);
 775   INSN(bltu);
 776 
 777 #undef INSN
 778 
 779 #undef INSN_ENTRY_RELOC
 780 
 781   void float_beq(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
 782   void float_bne(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
 783   void float_ble(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
 784   void float_bge(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
 785   void float_blt(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
 786   void float_bgt(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
 787 
 788   void double_beq(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
 789   void double_bne(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
 790   void double_ble(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
 791   void double_bge(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
 792   void double_blt(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
 793   void double_bgt(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
 794 
 795 private:
 796   int push_reg(unsigned int bitset, Register stack);
 797   int pop_reg(unsigned int bitset, Register stack);
 798   int push_fp(unsigned int bitset, Register stack);
 799   int pop_fp(unsigned int bitset, Register stack);
 800 #ifdef COMPILER2
 801   int push_v(unsigned int bitset, Register stack);
 802   int pop_v(unsigned int bitset, Register stack);
 803 #endif // COMPILER2
 804 
 805   // The signed 20-bit upper imm can materialize at most negative 0xF...F80000000, two G.
 806   // The following signed 12-bit imm can at max subtract 0x800, two K, from that previously loaded two G.
 807   bool is_valid_32bit_offset(int64_t x) {
 808     constexpr int64_t twoG = (2 * G);
 809     constexpr int64_t twoK = (2 * K);
 810     return x < (twoG - twoK) && x >= (-twoG - twoK);
 811   }
 812 
 813   // Ensure that the auipc can reach the destination at x from anywhere within
 814   // the code cache so that if it is relocated we know it will still reach.
 815   bool is_32bit_offset_from_codecache(int64_t x) {
 816     int64_t low  = (int64_t)CodeCache::low_bound();
 817     int64_t high = (int64_t)CodeCache::high_bound();
 818     return is_valid_32bit_offset(x - low) && is_valid_32bit_offset(x - high);
 819   }
 820 
 821 public:
 822   void push_reg(Register Rs);
 823   void pop_reg(Register Rd);
 824   void push_reg(RegSet regs, Register stack) { if (regs.bits()) push_reg(regs.bits(), stack); }
 825   void pop_reg(RegSet regs, Register stack)  { if (regs.bits()) pop_reg(regs.bits(), stack); }
 826   void push_fp(FloatRegSet regs, Register stack) { if (regs.bits()) push_fp(regs.bits(), stack); }
 827   void pop_fp(FloatRegSet regs, Register stack)  { if (regs.bits()) pop_fp(regs.bits(), stack); }
 828 #ifdef COMPILER2
 829   void push_v(VectorRegSet regs, Register stack) { if (regs.bits()) push_v(regs.bits(), stack); }
 830   void pop_v(VectorRegSet regs, Register stack)  { if (regs.bits()) pop_v(regs.bits(), stack); }
 831 #endif // COMPILER2
 832 
 833   // Push and pop everything that might be clobbered by a native
 834   // runtime call except t0 and t1. (They are always
 835   // temporary registers, so we don't have to protect them.)
 836   // Additional registers can be excluded in a passed RegSet.
 837   void push_call_clobbered_registers_except(RegSet exclude);
 838   void pop_call_clobbered_registers_except(RegSet exclude);
 839 
 840   void push_call_clobbered_registers() {
 841     push_call_clobbered_registers_except(RegSet());
 842   }
 843   void pop_call_clobbered_registers() {
 844     pop_call_clobbered_registers_except(RegSet());
 845   }
 846 
 847   void push_CPU_state(bool save_vectors = false, int vector_size_in_bytes = 0);
 848   void pop_CPU_state(bool restore_vectors = false, int vector_size_in_bytes = 0);
 849 
 850   void push_cont_fastpath(Register java_thread = xthread);
 851   void pop_cont_fastpath(Register java_thread = xthread);
 852 
 853   // if heap base register is used - reinit it with the correct value
 854   void reinit_heapbase();
 855 
 856   void bind(Label& L) {
 857     Assembler::bind(L);
 858     // fences across basic blocks should not be merged
 859     code()->clear_last_insn();
 860   }
 861 
 862   typedef void (MacroAssembler::* compare_and_branch_insn)(Register Rs1, Register Rs2, const address dest);
 863   typedef void (MacroAssembler::* compare_and_branch_label_insn)(Register Rs1, Register Rs2, Label &L, bool is_far);
 864   typedef void (MacroAssembler::* jal_jalr_insn)(Register Rt, address dest);
 865 
 866   void wrap_label(Register r, Label &L, jal_jalr_insn insn);
 867   void wrap_label(Register r1, Register r2, Label &L,
 868                   compare_and_branch_insn insn,
 869                   compare_and_branch_label_insn neg_insn, bool is_far = false);
 870 
 871   void la(Register Rd, Label &label);
 872   void la(Register Rd, const address addr);
 873   void la(Register Rd, const address addr, int32_t &offset);
 874   void la(Register Rd, const Address &adr);
 875 
 876   void li16u(Register Rd, uint16_t imm);
 877   void li32(Register Rd, int32_t imm);
 878   void li  (Register Rd, int64_t imm);  // optimized load immediate
 879 
 880   // mv
 881   void mv(Register Rd, address addr)                  { li(Rd, (int64_t)addr); }
 882   void mv(Register Rd, address addr, int32_t &offset) {
 883     // Split address into a lower 12-bit sign-extended offset and the remainder,
 884     // so that the offset could be encoded in jalr or load/store instruction.
 885     offset = ((int32_t)(int64_t)addr << 20) >> 20;
 886     li(Rd, (int64_t)addr - offset);
 887   }
 888 
 889   template<typename T, ENABLE_IF(std::is_integral<T>::value)>
 890   inline void mv(Register Rd, T o)                    { li(Rd, (int64_t)o); }
 891 
 892   void mv(Register Rd, RegisterOrConstant src) {
 893     if (src.is_register()) {
 894       mv(Rd, src.as_register());
 895     } else {
 896       mv(Rd, src.as_constant());
 897     }
 898   }
 899 
 900   // Generates a load of a 48-bit constant which can be
 901   // patched to any 48-bit constant, i.e. address.
 902   // If common case supply additional temp register
 903   // to shorten the instruction sequence.
 904   void movptr(Register Rd, const Address &addr, Register tmp = noreg);
 905   void movptr(Register Rd, address addr, Register tmp = noreg);
 906   void movptr(Register Rd, address addr, int32_t &offset, Register tmp = noreg);
 907 
 908  private:
 909   void movptr1(Register Rd, uintptr_t addr, int32_t &offset);
 910   void movptr2(Register Rd, uintptr_t addr, int32_t &offset, Register tmp);
 911  public:
 912   // float imm move
 913   static bool can_hf_imm_load(short imm);
 914   static bool can_fp_imm_load(float imm);
 915   static bool can_dp_imm_load(double imm);
 916   void fli_h(FloatRegister Rd, short imm);
 917   void fli_s(FloatRegister Rd, float imm);
 918   void fli_d(FloatRegister Rd, double imm);
 919 
 920   // arith
 921   void add (Register Rd, Register Rn, int64_t increment, Register tmp = t0);
 922   void sub (Register Rd, Register Rn, int64_t decrement, Register tmp = t0);
 923   void addw(Register Rd, Register Rn, int64_t increment, Register tmp = t0);
 924   void subw(Register Rd, Register Rn, int64_t decrement, Register tmp = t0);
 925 
 926   void subi(Register Rd, Register Rn, int64_t decrement) {
 927     assert(is_simm12(-decrement), "Must be");
 928     addi(Rd, Rn, -decrement);
 929   }
 930 
 931   void subiw(Register Rd, Register Rn, int64_t decrement) {
 932     assert(is_simm12(-decrement), "Must be");
 933     addiw(Rd, Rn, -decrement);
 934   }
 935 
 936 #define INSN(NAME)                                               \
 937   inline void NAME(Register Rd, Register Rs1, Register Rs2) {    \
 938     Assembler::NAME(Rd, Rs1, Rs2);                               \
 939   }
 940 
 941   INSN(add);
 942   INSN(addw);
 943   INSN(sub);
 944   INSN(subw);
 945 
 946 #undef INSN
 947 
 948   // logic
 949   void andrw(Register Rd, Register Rs1, Register Rs2);
 950   void orrw(Register Rd, Register Rs1, Register Rs2);
 951   void xorrw(Register Rd, Register Rs1, Register Rs2);
 952 
 953   // logic with negate
 954   void andn(Register Rd, Register Rs1, Register Rs2);
 955   void orn(Register Rd, Register Rs1, Register Rs2);
 956 
 957   // reverse bytes
 958   void revbw(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2= t1);  // reverse bytes in lower word, sign-extend
 959   void revb(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1);  // reverse bytes in doubleword
 960 
 961   void ror(Register dst, Register src, Register shift, Register tmp = t0);
 962   void ror(Register dst, Register src, uint32_t shift, Register tmp = t0);
 963   void rolw(Register dst, Register src, uint32_t shift, Register tmp = t0);
 964 
 965   void orptr(Address adr, RegisterOrConstant src, Register tmp1 = t0, Register tmp2 = t1);
 966 
 967 // Load and Store Instructions
 968 #define INSN_ENTRY_RELOC(result_type, header)                               \
 969   result_type header {                                                      \
 970     guarantee(rtype == relocInfo::internal_word_type,                       \
 971               "only internal_word_type relocs make sense here");            \
 972     relocate(InternalAddress(dest).rspec());                                \
 973     IncompressibleScope scope(this);  /* relocations */
 974 
 975 #define INSN(NAME)                                                                                 \
 976   void NAME(Register Rd, address dest) {                                                           \
 977     assert_cond(dest != nullptr);                                                                  \
 978     if (CodeCache::contains(dest)) {                                                               \
 979       int64_t distance = dest - pc();                                                              \
 980       assert(is_valid_32bit_offset(distance), "Must be");                                          \
 981       auipc(Rd, (int32_t)distance + 0x800);                                                        \
 982       Assembler::NAME(Rd, Rd, ((int32_t)distance << 20) >> 20);                                    \
 983     } else {                                                                                       \
 984       int32_t offset = 0;                                                                          \
 985       movptr(Rd, dest, offset);                                                                    \
 986       Assembler::NAME(Rd, Rd, offset);                                                             \
 987     }                                                                                              \
 988   }                                                                                                \
 989   INSN_ENTRY_RELOC(void, NAME(Register Rd, address dest, relocInfo::relocType rtype))              \
 990     NAME(Rd, dest);                                                                                \
 991   }                                                                                                \
 992   void NAME(Register Rd, const Address &adr, Register temp = t0) {                                 \
 993     switch (adr.getMode()) {                                                                       \
 994       case Address::literal: {                                                                     \
 995         relocate(adr.rspec(), [&] {                                                                \
 996           NAME(Rd, adr.target());                                                                  \
 997         });                                                                                        \
 998         break;                                                                                     \
 999       }                                                                                            \
1000       case Address::base_plus_offset: {                                                            \
1001         if (is_simm12(adr.offset())) {                                                             \
1002           Assembler::NAME(Rd, adr.base(), adr.offset());                                           \
1003         } else {                                                                                   \
1004           int32_t offset = ((int32_t)adr.offset() << 20) >> 20;                                    \
1005           if (Rd == adr.base()) {                                                                  \
1006             la(temp, Address(adr.base(), adr.offset() - offset));                                  \
1007             Assembler::NAME(Rd, temp, offset);                                                     \
1008           } else {                                                                                 \
1009             la(Rd, Address(adr.base(), adr.offset() - offset));                                    \
1010             Assembler::NAME(Rd, Rd, offset);                                                       \
1011           }                                                                                        \
1012         }                                                                                          \
1013         break;                                                                                     \
1014       }                                                                                            \
1015       default:                                                                                     \
1016         ShouldNotReachHere();                                                                      \
1017     }                                                                                              \
1018   }                                                                                                \
1019   void NAME(Register Rd, Label &L) {                                                               \
1020     wrap_label(Rd, L, &MacroAssembler::NAME);                                                      \
1021   }
1022 
1023   INSN(lb);
1024   INSN(lbu);
1025   INSN(lh);
1026   INSN(lhu);
1027   INSN(lw);
1028   INSN(lwu);
1029   INSN(ld);
1030 
1031 #undef INSN
1032 
1033 #define INSN(NAME)                                                                                 \
1034   void NAME(FloatRegister Rd, address dest, Register temp = t0) {                                  \
1035     assert_cond(dest != nullptr);                                                                  \
1036     if (CodeCache::contains(dest)) {                                                               \
1037       int64_t distance = dest - pc();                                                              \
1038       assert(is_valid_32bit_offset(distance), "Must be");                                          \
1039       auipc(temp, (int32_t)distance + 0x800);                                                      \
1040       Assembler::NAME(Rd, temp, ((int32_t)distance << 20) >> 20);                                  \
1041     } else {                                                                                       \
1042       int32_t offset = 0;                                                                          \
1043       movptr(temp, dest, offset);                                                                  \
1044       Assembler::NAME(Rd, temp, offset);                                                           \
1045     }                                                                                              \
1046   }                                                                                                \
1047   INSN_ENTRY_RELOC(void, NAME(FloatRegister Rd, address dest,                                      \
1048                               relocInfo::relocType rtype, Register temp = t0))                     \
1049     NAME(Rd, dest, temp);                                                                          \
1050   }                                                                                                \
1051   void NAME(FloatRegister Rd, const Address &adr, Register temp = t0) {                            \
1052     switch (adr.getMode()) {                                                                       \
1053       case Address::literal: {                                                                     \
1054         relocate(adr.rspec(), [&] {                                                                \
1055           NAME(Rd, adr.target(), temp);                                                            \
1056         });                                                                                        \
1057         break;                                                                                     \
1058       }                                                                                            \
1059       case Address::base_plus_offset: {                                                            \
1060         if (is_simm12(adr.offset())) {                                                             \
1061           Assembler::NAME(Rd, adr.base(), adr.offset());                                           \
1062         } else {                                                                                   \
1063           int32_t offset = ((int32_t)adr.offset() << 20) >> 20;                                    \
1064           la(temp, Address(adr.base(), adr.offset() - offset));                                    \
1065           Assembler::NAME(Rd, temp, offset);                                                       \
1066         }                                                                                          \
1067         break;                                                                                     \
1068       }                                                                                            \
1069       default:                                                                                     \
1070         ShouldNotReachHere();                                                                      \
1071     }                                                                                              \
1072   }
1073 
1074   INSN(flh);
1075   INSN(flw);
1076   INSN(fld);
1077 
1078 #undef INSN
1079 
1080 #define INSN(NAME, REGISTER)                                                                       \
1081   INSN_ENTRY_RELOC(void, NAME(REGISTER Rs, address dest,                                           \
1082                               relocInfo::relocType rtype, Register temp = t0))                     \
1083     NAME(Rs, dest, temp);                                                                          \
1084   }
1085 
1086   INSN(sb,  Register);
1087   INSN(sh,  Register);
1088   INSN(sw,  Register);
1089   INSN(sd,  Register);
1090   INSN(fsw, FloatRegister);
1091   INSN(fsd, FloatRegister);
1092 
1093 #undef INSN
1094 
1095 #define INSN(NAME)                                                                                 \
1096   void NAME(Register Rs, address dest, Register temp = t0) {                                       \
1097     assert_cond(dest != nullptr);                                                                  \
1098     assert_different_registers(Rs, temp);                                                          \
1099     if (CodeCache::contains(dest)) {                                                               \
1100       int64_t distance = dest - pc();                                                              \
1101       assert(is_valid_32bit_offset(distance), "Must be");                                          \
1102       auipc(temp, (int32_t)distance + 0x800);                                                      \
1103       Assembler::NAME(Rs, temp, ((int32_t)distance << 20) >> 20);                                  \
1104     } else {                                                                                       \
1105       int32_t offset = 0;                                                                          \
1106       movptr(temp, dest, offset);                                                                  \
1107       Assembler::NAME(Rs, temp, offset);                                                           \
1108     }                                                                                              \
1109   }                                                                                                \
1110   void NAME(Register Rs, const Address &adr, Register temp = t0) {                                 \
1111     switch (adr.getMode()) {                                                                       \
1112       case Address::literal: {                                                                     \
1113         assert_different_registers(Rs, temp);                                                      \
1114         relocate(adr.rspec(), [&] {                                                                \
1115           NAME(Rs, adr.target(), temp);                                                            \
1116         });                                                                                        \
1117         break;                                                                                     \
1118       }                                                                                            \
1119       case Address::base_plus_offset: {                                                            \
1120         if (is_simm12(adr.offset())) {                                                             \
1121           Assembler::NAME(Rs, adr.base(), adr.offset());                                           \
1122         } else {                                                                                   \
1123           assert_different_registers(Rs, temp);                                                    \
1124           int32_t offset = ((int32_t)adr.offset() << 20) >> 20;                                    \
1125           la(temp, Address(adr.base(), adr.offset() - offset));                                    \
1126           Assembler::NAME(Rs, temp, offset);                                                       \
1127         }                                                                                          \
1128         break;                                                                                     \
1129       }                                                                                            \
1130       default:                                                                                     \
1131         ShouldNotReachHere();                                                                      \
1132     }                                                                                              \
1133   }
1134 
1135   INSN(sb);
1136   INSN(sh);
1137   INSN(sw);
1138   INSN(sd);
1139 
1140 #undef INSN
1141 
1142 #define INSN(NAME)                                                                                 \
1143   void NAME(FloatRegister Rs, address dest, Register temp = t0) {                                  \
1144     assert_cond(dest != nullptr);                                                                  \
1145     if (CodeCache::contains(dest)) {                                                               \
1146       int64_t distance = dest - pc();                                                              \
1147       assert(is_valid_32bit_offset(distance), "Must be");                                          \
1148       auipc(temp, (int32_t)distance + 0x800);                                                      \
1149       Assembler::NAME(Rs, temp, ((int32_t)distance << 20) >> 20);                                  \
1150     } else {                                                                                       \
1151       int32_t offset = 0;                                                                          \
1152       movptr(temp, dest, offset);                                                                  \
1153       Assembler::NAME(Rs, temp, offset);                                                           \
1154     }                                                                                              \
1155   }                                                                                                \
1156   void NAME(FloatRegister Rs, const Address &adr, Register temp = t0) {                            \
1157     switch (adr.getMode()) {                                                                       \
1158       case Address::literal: {                                                                     \
1159         relocate(adr.rspec(), [&] {                                                                \
1160           NAME(Rs, adr.target(), temp);                                                            \
1161         });                                                                                        \
1162         break;                                                                                     \
1163       }                                                                                            \
1164       case Address::base_plus_offset: {                                                            \
1165         if (is_simm12(adr.offset())) {                                                             \
1166           Assembler::NAME(Rs, adr.base(), adr.offset());                                           \
1167         } else {                                                                                   \
1168           int32_t offset = ((int32_t)adr.offset() << 20) >> 20;                                    \
1169           la(temp, Address(adr.base(), adr.offset() - offset));                                    \
1170           Assembler::NAME(Rs, temp, offset);                                                       \
1171         }                                                                                          \
1172         break;                                                                                     \
1173       }                                                                                            \
1174       default:                                                                                     \
1175         ShouldNotReachHere();                                                                      \
1176     }                                                                                              \
1177   }
1178 
1179   INSN(fsw);
1180   INSN(fsd);
1181 
1182 #undef INSN
1183 
1184 #undef INSN_ENTRY_RELOC
1185 
1186   void cmpxchg_obj_header(Register oldv, Register newv, Register obj, Register tmp, Label &succeed, Label *fail);
1187   void cmpxchgptr(Register oldv, Register newv, Register addr, Register tmp, Label &succeed, Label *fail);
1188   void cmpxchg(Register addr, Register expected,
1189                Register new_val,
1190                Assembler::operand_size size,
1191                Assembler::Aqrl acquire, Assembler::Aqrl release,
1192                Register result, bool result_as_bool = false);
1193   void weak_cmpxchg(Register addr, Register expected,
1194                     Register new_val,
1195                     Assembler::operand_size size,
1196                     Assembler::Aqrl acquire, Assembler::Aqrl release,
1197                     Register result);
1198   void cmpxchg_narrow_value_helper(Register addr, Register expected, Register new_val,
1199                                    Assembler::operand_size size,
1200                                    Register shift, Register mask, Register aligned_addr);
1201   void cmpxchg_narrow_value(Register addr, Register expected,
1202                             Register new_val,
1203                             Assembler::operand_size size,
1204                             Assembler::Aqrl acquire, Assembler::Aqrl release,
1205                             Register result, bool result_as_bool,
1206                             Register tmp1, Register tmp2, Register tmp3);
1207   void weak_cmpxchg_narrow_value(Register addr, Register expected,
1208                                  Register new_val,
1209                                  Assembler::operand_size size,
1210                                  Assembler::Aqrl acquire, Assembler::Aqrl release,
1211                                  Register result,
1212                                  Register tmp1, Register tmp2, Register tmp3);
1213 
1214   void atomic_add(Register prev, RegisterOrConstant incr, Register addr);
1215   void atomic_addw(Register prev, RegisterOrConstant incr, Register addr);
1216   void atomic_addal(Register prev, RegisterOrConstant incr, Register addr);
1217   void atomic_addalw(Register prev, RegisterOrConstant incr, Register addr);
1218 
1219   void atomic_xchg(Register prev, Register newv, Register addr);
1220   void atomic_xchgw(Register prev, Register newv, Register addr);
1221   void atomic_xchgal(Register prev, Register newv, Register addr);
1222   void atomic_xchgalw(Register prev, Register newv, Register addr);
1223   void atomic_xchgwu(Register prev, Register newv, Register addr);
1224   void atomic_xchgalwu(Register prev, Register newv, Register addr);
1225 
1226   void atomic_cas(Register prev, Register newv, Register addr, Assembler::operand_size size,
1227               Assembler::Aqrl acquire = Assembler::relaxed, Assembler::Aqrl release = Assembler::relaxed);
1228 
1229   // Emit a far call/jump. Only invalidates the tmp register which
1230   // is used to keep the entry address for jalr.
1231   // The address must be inside the code cache.
1232   // Supported entry.rspec():
1233   // - relocInfo::external_word_type
1234   // - relocInfo::runtime_call_type
1235   // - relocInfo::none
1236   // Clobbers t1 default.
1237   void far_call(const Address &entry, Register tmp = t1);
1238   void far_jump(const Address &entry, Register tmp = t1);
1239 
1240   static int far_branch_size() {
1241       return 2 * MacroAssembler::instruction_size;  // auipc + jalr, see far_call() & far_jump()
1242   }
1243 
1244   void load_byte_map_base(Register reg);
1245 
1246   void bang_stack_with_offset(int offset) {
1247     // stack grows down, caller passes positive offset
1248     assert(offset > 0, "must bang with negative offset");
1249     sub(t0, sp, offset);
1250     sd(zr, Address(t0));
1251   }
1252 
1253   virtual void _call_Unimplemented(address call_site) {
1254     mv(t1, call_site);
1255   }
1256 
1257   #define call_Unimplemented() _call_Unimplemented((address)__PRETTY_FUNCTION__)
1258 
1259   // Frame creation and destruction shared between JITs.
1260   void build_frame(int framesize);
1261   void remove_frame(int framesize);
1262 
1263   void reserved_stack_check();
1264 
1265   void get_polling_page(Register dest, relocInfo::relocType rtype);
1266   void read_polling_page(Register r, int32_t offset, relocInfo::relocType rtype);
1267 
1268   // RISCV64 OpenJDK uses three different types of calls:
1269   //
1270   //   - far call: auipc reg, pc_relative_offset; jalr ra, reg, offset
1271   //     The offset has the range [-(2G + 2K), 2G - 2K). Addresses out of the
1272   //     range in the code cache requires indirect call.
1273   //     If a jump is needed rather than a call, a far jump 'jalr x0, reg, offset'
1274   //     can be used instead.
1275   //     All instructions are embedded at a call site.
1276   //
1277   //   - indirect call: movptr + jalr
1278   //     This can reach anywhere in the address space, but it cannot be patched
1279   //     while code is running, so it must only be modified at a safepoint.
1280   //     This form of call is most suitable for targets at fixed addresses,
1281   //     which will never be patched.
1282   //
1283   //   - reloc call:
1284   //     This too can reach anywhere in the address space but is only available
1285   //     in C1/C2-generated code (nmethod).
1286   //
1287   //     [Main code section]
1288   //       auipc
1289   //       ld <address_from_stub_section>
1290   //       jalr
1291   //
1292   //     [Stub section]
1293   //     address stub:
1294   //       <64-bit destination address>
1295   //
1296   //    To change the destination we simply atomically store the new
1297   //    address in the stub section.
1298   //    There is a benign race in that the other thread might observe the old
1299   //    64-bit destination address before it observes the new address. That does
1300   //    not matter because the destination method has been invalidated, so there
1301   //    will be a trap at its start.
1302 
1303   // Emit a reloc call and create a stub to hold the entry point address.
1304   // Supported entry.rspec():
1305   // - relocInfo::runtime_call_type
1306   // - relocInfo::opt_virtual_call_type
1307   // - relocInfo::static_call_type
1308   // - relocInfo::virtual_call_type
1309   //
1310   // Return: the call PC or nullptr if CodeCache is full.
1311   address reloc_call(Address entry, Register tmp = t1);
1312 
1313   address ic_call(address entry, jint method_index = 0);
1314   static int ic_check_size();
1315   int ic_check(int end_alignment = MacroAssembler::instruction_size);
1316 
1317   // Support for memory inc/dec
1318   // n.b. increment/decrement calls with an Address destination will
1319   // need to use a scratch register to load the value to be
1320   // incremented. increment/decrement calls which add or subtract a
1321   // constant value other than sign-extended 12-bit immediate will need
1322   // to use a 2nd scratch register to hold the constant. so, an address
1323   // increment/decrement may trash both t0 and t1.
1324 
1325   void increment(const Address dst, int64_t value = 1, Register tmp1 = t0, Register tmp2 = t1);
1326   void incrementw(const Address dst, int32_t value = 1, Register tmp1 = t0, Register tmp2 = t1);
1327 
1328   void decrement(const Address dst, int64_t value = 1, Register tmp1 = t0, Register tmp2 = t1);
1329   void decrementw(const Address dst, int32_t value = 1, Register tmp1 = t0, Register tmp2 = t1);
1330 
1331   void cmpptr(Register src1, const Address &src2, Label& equal, Register tmp = t0);
1332 
1333   void clinit_barrier(Register klass, Register tmp, Label* L_fast_path = nullptr, Label* L_slow_path = nullptr);
1334   void load_method_holder_cld(Register result, Register method);
1335   void load_method_holder(Register holder, Register method);
1336 
1337   void compute_index(Register str1, Register trailing_zeros, Register match_mask,
1338                      Register result, Register char_tmp, Register tmp,
1339                      bool haystack_isL);
1340   void compute_match_mask(Register src, Register pattern, Register match_mask,
1341                           Register mask1, Register mask2);
1342 
1343   // CRC32 code for java.util.zip.CRC32::updateBytes() intrinsic.
1344   void kernel_crc32(Register crc, Register buf, Register len,
1345         Register table0, Register table1, Register table2, Register table3,
1346         Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5, Register tmp6);
1347   void update_word_crc32(Register crc, Register v, Register tmp1, Register tmp2, Register tmp3,
1348         Register table0, Register table1, Register table2, Register table3,
1349         bool upper);
1350   void update_byte_crc32(Register crc, Register val, Register table);
1351 
1352 #ifdef COMPILER2
1353   void vector_update_crc32(Register crc, Register buf, Register len,
1354                            Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5,
1355                            Register table0, Register table3);
1356   void kernel_crc32_vclmul_fold(Register crc, Register buf, Register len,
1357               Register table0, Register table1, Register table2, Register table3,
1358               Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5);
1359   void crc32_vclmul_fold_to_16_bytes_vectorsize_32(VectorRegister vx, VectorRegister vy, VectorRegister vt,
1360                             VectorRegister vtmp1, VectorRegister vtmp2, VectorRegister vtmp3, VectorRegister vtmp4);
1361   void kernel_crc32_vclmul_fold_vectorsize_32(Register crc, Register buf, Register len,
1362                                               Register vclmul_table, Register tmp1, Register tmp2);
1363   void crc32_vclmul_fold_16_bytes_vectorsize_16(VectorRegister vx, VectorRegister vt,
1364                       VectorRegister vtmp1, VectorRegister vtmp2, VectorRegister vtmp3, VectorRegister vtmp4,
1365                       Register buf, Register tmp, const int STEP);
1366   void crc32_vclmul_fold_16_bytes_vectorsize_16_2(VectorRegister vx, VectorRegister vy, VectorRegister vt,
1367                       VectorRegister vtmp1, VectorRegister vtmp2, VectorRegister vtmp3, VectorRegister vtmp4,
1368                       Register tmp);
1369   void crc32_vclmul_fold_16_bytes_vectorsize_16_3(VectorRegister vx, VectorRegister vy, VectorRegister vt,
1370                       VectorRegister vtmp1, VectorRegister vtmp2, VectorRegister vtmp3, VectorRegister vtmp4,
1371                       Register tmp);
1372   void kernel_crc32_vclmul_fold_vectorsize_16(Register crc, Register buf, Register len,
1373                                               Register vclmul_table, Register tmp1, Register tmp2);
1374 
1375   void mul_add(Register out, Register in, Register offset,
1376                Register len, Register k, Register tmp);
1377   void wide_mul(Register prod_lo, Register prod_hi, Register n, Register m);
1378   void wide_madd(Register sum_lo, Register sum_hi, Register n,
1379                  Register m, Register tmp1, Register tmp2);
1380   void cad(Register dst, Register src1, Register src2, Register carry);
1381   void cadc(Register dst, Register src1, Register src2, Register carry);
1382   void adc(Register dst, Register src1, Register src2, Register carry);
1383   void add2_with_carry(Register final_dest_hi, Register dest_hi, Register dest_lo,
1384                        Register src1, Register src2, Register carry);
1385   void multiply_64_x_64_loop(Register x, Register xstart, Register x_xstart,
1386                              Register y, Register y_idx, Register z,
1387                              Register carry, Register product,
1388                              Register idx, Register kdx);
1389   void multiply_128_x_128_loop(Register y, Register z,
1390                                Register carry, Register carry2,
1391                                Register idx, Register jdx,
1392                                Register yz_idx1, Register yz_idx2,
1393                                Register tmp, Register tmp3, Register tmp4,
1394                                Register tmp6, Register product_hi);
1395   void multiply_to_len(Register x, Register xlen, Register y, Register ylen,
1396                        Register z, Register tmp0,
1397                        Register tmp1, Register tmp2, Register tmp3, Register tmp4,
1398                        Register tmp5, Register tmp6, Register product_hi);
1399 
1400 #endif // COMPILER2
1401 
1402   void inflate_lo32(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1);
1403   void inflate_hi32(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1);
1404 
1405   void ctzc_bits(Register Rd, Register Rs, bool isLL = false,
1406                  Register tmp1 = t0, Register tmp2 = t1);
1407 
1408   void zero_words(Register base, uint64_t cnt);
1409   address zero_words(Register ptr, Register cnt);
1410   void fill_words(Register base, Register cnt, Register value);
1411   void zero_memory(Register addr, Register len, Register tmp);
1412   void zero_dcache_blocks(Register base, Register cnt, Register tmp1, Register tmp2);
1413 
1414   // shift left by shamt and add
1415   void shadd(Register Rd, Register Rs1, Register Rs2, Register tmp, int shamt);
1416 
1417   // test single bit in Rs, result is set to Rd
1418   void test_bit(Register Rd, Register Rs, uint32_t bit_pos);
1419 
1420   // Here the float instructions with safe deal with some exceptions.
1421   // e.g. convert from NaN, +Inf, -Inf to int, float, double
1422   // will trigger exception, we need to deal with these situations
1423   // to get correct results.
1424   void fcvt_w_s_safe(Register dst, FloatRegister src, Register tmp = t0);
1425   void fcvt_l_s_safe(Register dst, FloatRegister src, Register tmp = t0);
1426   void fcvt_w_d_safe(Register dst, FloatRegister src, Register tmp = t0);
1427   void fcvt_l_d_safe(Register dst, FloatRegister src, Register tmp = t0);
1428 
1429   void java_round_float(Register dst, FloatRegister src, FloatRegister ftmp);
1430   void java_round_double(Register dst, FloatRegister src, FloatRegister ftmp);
1431 
1432   // Helper routine processing the slow path of NaN when converting float to float16
1433   void float_to_float16_NaN(Register dst, FloatRegister src, Register tmp1, Register tmp2);
1434 
1435   // vector load/store unit-stride instructions
1436   void vlex_v(VectorRegister vd, Register base, Assembler::SEW sew, VectorMask vm = unmasked) {
1437     switch (sew) {
1438       case Assembler::e64:
1439         vle64_v(vd, base, vm);
1440         break;
1441       case Assembler::e32:
1442         vle32_v(vd, base, vm);
1443         break;
1444       case Assembler::e16:
1445         vle16_v(vd, base, vm);
1446         break;
1447       case Assembler::e8: // fall through
1448       default:
1449         vle8_v(vd, base, vm);
1450         break;
1451     }
1452   }
1453 
1454   void vsex_v(VectorRegister store_data, Register base, Assembler::SEW sew, VectorMask vm = unmasked) {
1455     switch (sew) {
1456       case Assembler::e64:
1457         vse64_v(store_data, base, vm);
1458         break;
1459       case Assembler::e32:
1460         vse32_v(store_data, base, vm);
1461         break;
1462       case Assembler::e16:
1463         vse16_v(store_data, base, vm);
1464         break;
1465       case Assembler::e8: // fall through
1466       default:
1467         vse8_v(store_data, base, vm);
1468         break;
1469     }
1470   }
1471 
1472   // vector pseudo instructions
1473   // rotate vector register left with shift bits, 32-bit version
1474   inline void vrole32_vi(VectorRegister vd, uint32_t shift, VectorRegister tmp_vr) {
1475     vsrl_vi(tmp_vr, vd, 32 - shift);
1476     vsll_vi(vd, vd, shift);
1477     vor_vv(vd, vd, tmp_vr);
1478   }
1479 
1480   inline void vl1r_v(VectorRegister vd, Register rs) {
1481     vl1re8_v(vd, rs);
1482   }
1483 
1484   inline void vmnot_m(VectorRegister vd, VectorRegister vs) {
1485     vmnand_mm(vd, vs, vs);
1486   }
1487 
1488   inline void vncvt_x_x_w(VectorRegister vd, VectorRegister vs, VectorMask vm = unmasked) {
1489     vnsrl_wx(vd, vs, x0, vm);
1490   }
1491 
1492   inline void vneg_v(VectorRegister vd, VectorRegister vs, VectorMask vm = unmasked) {
1493     vrsub_vx(vd, vs, x0, vm);
1494   }
1495 
1496   inline void vfneg_v(VectorRegister vd, VectorRegister vs, VectorMask vm = unmasked) {
1497     vfsgnjn_vv(vd, vs, vs, vm);
1498   }
1499 
1500   inline void vfabs_v(VectorRegister vd, VectorRegister vs, VectorMask vm = unmasked) {
1501     vfsgnjx_vv(vd, vs, vs, vm);
1502   }
1503 
1504   inline void vmsgt_vv(VectorRegister vd, VectorRegister vs2, VectorRegister vs1, VectorMask vm = unmasked) {
1505     vmslt_vv(vd, vs1, vs2, vm);
1506   }
1507 
1508   inline void vmsgtu_vv(VectorRegister vd, VectorRegister vs2, VectorRegister vs1, VectorMask vm = unmasked) {
1509     vmsltu_vv(vd, vs1, vs2, vm);
1510   }
1511 
1512   inline void vmsge_vv(VectorRegister vd, VectorRegister vs2, VectorRegister vs1, VectorMask vm = unmasked) {
1513     vmsle_vv(vd, vs1, vs2, vm);
1514   }
1515 
1516   inline void vmsgeu_vv(VectorRegister vd, VectorRegister vs2, VectorRegister vs1, VectorMask vm = unmasked) {
1517     vmsleu_vv(vd, vs1, vs2, vm);
1518   }
1519 
1520   inline void vmfgt_vv(VectorRegister vd, VectorRegister vs2, VectorRegister vs1, VectorMask vm = unmasked) {
1521     vmflt_vv(vd, vs1, vs2, vm);
1522   }
1523 
1524   inline void vmfge_vv(VectorRegister vd, VectorRegister vs2, VectorRegister vs1, VectorMask vm = unmasked) {
1525     vmfle_vv(vd, vs1, vs2, vm);
1526   }
1527 
1528   inline void vmsltu_vi(VectorRegister Vd, VectorRegister Vs2, uint32_t imm, VectorMask vm = unmasked) {
1529     guarantee(imm >= 1 && imm <= 16, "imm is invalid");
1530     vmsleu_vi(Vd, Vs2, imm-1, vm);
1531   }
1532 
1533   inline void vmsgeu_vi(VectorRegister Vd, VectorRegister Vs2, uint32_t imm, VectorMask vm = unmasked) {
1534     guarantee(imm >= 1 && imm <= 16, "imm is invalid");
1535     vmsgtu_vi(Vd, Vs2, imm-1, vm);
1536   }
1537 
1538   // Copy mask register
1539   inline void vmmv_m(VectorRegister vd, VectorRegister vs) {
1540     vmand_mm(vd, vs, vs);
1541   }
1542 
1543   // Clear mask register
1544   inline void vmclr_m(VectorRegister vd) {
1545     vmxor_mm(vd, vd, vd);
1546   }
1547 
1548   // Set mask register
1549   inline void vmset_m(VectorRegister vd) {
1550     vmxnor_mm(vd, vd, vd);
1551   }
1552 
1553   inline void vnot_v(VectorRegister Vd, VectorRegister Vs, VectorMask vm = unmasked) {
1554     vxor_vi(Vd, Vs, -1, vm);
1555   }
1556 
1557   static const int zero_words_block_size;
1558 
1559   void cast_primitive_type(BasicType type, Register Rt) {
1560     switch (type) {
1561       case T_BOOLEAN:
1562         sltu(Rt, zr, Rt);
1563         break;
1564       case T_CHAR   :
1565         zext(Rt, Rt, 16);
1566         break;
1567       case T_BYTE   :
1568         sext(Rt, Rt, 8);
1569         break;
1570       case T_SHORT  :
1571         sext(Rt, Rt, 16);
1572         break;
1573       case T_INT    :
1574         sext(Rt, Rt, 32);
1575         break;
1576       case T_LONG   : /* nothing to do */        break;
1577       case T_VOID   : /* nothing to do */        break;
1578       case T_FLOAT  : /* nothing to do */        break;
1579       case T_DOUBLE : /* nothing to do */        break;
1580       default: ShouldNotReachHere();
1581     }
1582   }
1583 
1584   // float cmp with unordered_result
1585   void float_compare(Register result, FloatRegister Rs1, FloatRegister Rs2, int unordered_result);
1586   void double_compare(Register result, FloatRegister Rs1, FloatRegister Rs2, int unordered_result);
1587 
1588   // Zero/Sign-extend
1589   void zext(Register dst, Register src, int bits);
1590   void sext(Register dst, Register src, int bits);
1591 
1592 private:
1593   void cmp_x2i(Register dst, Register src1, Register src2, Register tmp, bool is_signed = true);
1594 
1595 public:
1596   // compare src1 and src2 and get -1/0/1 in dst.
1597   // if [src1 > src2], dst = 1;
1598   // if [src1 == src2], dst = 0;
1599   // if [src1 < src2], dst = -1;
1600   void cmp_l2i(Register dst, Register src1, Register src2, Register tmp = t0);
1601   void cmp_ul2i(Register dst, Register src1, Register src2, Register tmp = t0);
1602   void cmp_uw2i(Register dst, Register src1, Register src2, Register tmp = t0);
1603 
1604   // support for argument shuffling
1605   void move32_64(VMRegPair src, VMRegPair dst, Register tmp = t0);
1606   void float_move(VMRegPair src, VMRegPair dst, Register tmp = t0);
1607   void long_move(VMRegPair src, VMRegPair dst, Register tmp = t0);
1608   void double_move(VMRegPair src, VMRegPair dst, Register tmp = t0);
1609   void object_move(OopMap* map,
1610                    int oop_handle_offset,
1611                    int framesize_in_slots,
1612                    VMRegPair src,
1613                    VMRegPair dst,
1614                    bool is_receiver,
1615                    int* receiver_offset);
1616 
1617 #ifdef ASSERT
1618   // Template short-hand support to clean-up after a failed call to trampoline
1619   // call generation (see trampoline_call() below), when a set of Labels must
1620   // be reset (before returning).
1621   template<typename Label, typename... More>
1622   void reset_labels(Label& lbl, More&... more) {
1623     lbl.reset(); reset_labels(more...);
1624   }
1625   template<typename Label>
1626   void reset_labels(Label& lbl) {
1627     lbl.reset();
1628   }
1629 #endif
1630 
1631 private:
1632 
1633   void repne_scan(Register addr, Register value, Register count, Register tmp);
1634 
1635   int bitset_to_regs(unsigned int bitset, unsigned char* regs);
1636   Address add_memory_helper(const Address dst, Register tmp);
1637 
1638   void load_reserved(Register dst, Register addr, Assembler::operand_size size, Assembler::Aqrl acquire);
1639   void store_conditional(Register dst, Register new_val, Register addr, Assembler::operand_size size, Assembler::Aqrl release);
1640 
1641 public:
1642   void lightweight_lock(Register basic_lock, Register obj, Register tmp1, Register tmp2, Register tmp3, Label& slow);
1643   void lightweight_unlock(Register obj, Register tmp1, Register tmp2, Register tmp3, Label& slow);
1644 
1645 public:
1646   enum {
1647     // movptr
1648     movptr1_instruction_size = 6 * MacroAssembler::instruction_size, // lui, addi, slli, addi, slli, addi.  See movptr1().
1649     movptr2_instruction_size = 5 * MacroAssembler::instruction_size, // lui, lui, slli, add, addi.  See movptr2().
1650     load_pc_relative_instruction_size = 2 * MacroAssembler::instruction_size // auipc, ld
1651   };
1652 
1653   static bool is_load_pc_relative_at(address branch);
1654   static bool is_li16u_at(address instr);
1655 
1656   static bool is_jal_at(address instr)        { assert_cond(instr != nullptr); return extract_opcode(instr) == 0b1101111; }
1657   static bool is_jalr_at(address instr)       { assert_cond(instr != nullptr); return extract_opcode(instr) == 0b1100111 && extract_funct3(instr) == 0b000; }
1658   static bool is_branch_at(address instr)     { assert_cond(instr != nullptr); return extract_opcode(instr) == 0b1100011; }
1659   static bool is_ld_at(address instr)         { assert_cond(instr != nullptr); return is_load_at(instr) && extract_funct3(instr) == 0b011; }
1660   static bool is_load_at(address instr)       { assert_cond(instr != nullptr); return extract_opcode(instr) == 0b0000011; }
1661   static bool is_float_load_at(address instr) { assert_cond(instr != nullptr); return extract_opcode(instr) == 0b0000111; }
1662   static bool is_auipc_at(address instr)      { assert_cond(instr != nullptr); return extract_opcode(instr) == 0b0010111; }
1663   static bool is_jump_at(address instr)       { assert_cond(instr != nullptr); return is_branch_at(instr) || is_jal_at(instr) || is_jalr_at(instr); }
1664   static bool is_add_at(address instr)        { assert_cond(instr != nullptr); return extract_opcode(instr) == 0b0110011 && extract_funct3(instr) == 0b000; }
1665   static bool is_addi_at(address instr)       { assert_cond(instr != nullptr); return extract_opcode(instr) == 0b0010011 && extract_funct3(instr) == 0b000; }
1666   static bool is_addiw_at(address instr)      { assert_cond(instr != nullptr); return extract_opcode(instr) == 0b0011011 && extract_funct3(instr) == 0b000; }
1667   static bool is_addiw_to_zr_at(address instr){ assert_cond(instr != nullptr); return is_addiw_at(instr) && extract_rd(instr) == zr; }
1668   static bool is_lui_at(address instr)        { assert_cond(instr != nullptr); return extract_opcode(instr) == 0b0110111; }
1669   static bool is_lui_to_zr_at(address instr)  { assert_cond(instr != nullptr); return is_lui_at(instr) && extract_rd(instr) == zr; }
1670 
1671   static bool is_srli_at(address instr) {
1672     assert_cond(instr != nullptr);
1673     return extract_opcode(instr) == 0b0010011 &&
1674            extract_funct3(instr) == 0b101 &&
1675            Assembler::extract(((unsigned*)instr)[0], 31, 26) == 0b000000;
1676   }
1677 
1678   static bool is_slli_shift_at(address instr, uint32_t shift) {
1679     assert_cond(instr != nullptr);
1680     return (extract_opcode(instr) == 0b0010011 && // opcode field
1681             extract_funct3(instr) == 0b001 &&     // funct3 field, select the type of operation
1682             Assembler::extract(Assembler::ld_instr(instr), 25, 20) == shift);    // shamt field
1683   }
1684 
1685   static bool is_movptr1_at(address instr);
1686   static bool is_movptr2_at(address instr);
1687 
1688   static bool is_lwu_to_zr(address instr);
1689 
1690   static Register extract_rs1(address instr);
1691   static Register extract_rs2(address instr);
1692   static Register extract_rd(address instr);
1693   static uint32_t extract_opcode(address instr);
1694   static uint32_t extract_funct3(address instr);
1695 
1696   // the instruction sequence of movptr is as below:
1697   //     lui
1698   //     addi
1699   //     slli
1700   //     addi
1701   //     slli
1702   //     addi/jalr/load
1703   static bool check_movptr1_data_dependency(address instr) {
1704     address lui = instr;
1705     address addi1 = lui + MacroAssembler::instruction_size;
1706     address slli1 = addi1 + MacroAssembler::instruction_size;
1707     address addi2 = slli1 + MacroAssembler::instruction_size;
1708     address slli2 = addi2 + MacroAssembler::instruction_size;
1709     address last_instr = slli2 + MacroAssembler::instruction_size;
1710     return extract_rs1(addi1) == extract_rd(lui) &&
1711            extract_rs1(addi1) == extract_rd(addi1) &&
1712            extract_rs1(slli1) == extract_rd(addi1) &&
1713            extract_rs1(slli1) == extract_rd(slli1) &&
1714            extract_rs1(addi2) == extract_rd(slli1) &&
1715            extract_rs1(addi2) == extract_rd(addi2) &&
1716            extract_rs1(slli2) == extract_rd(addi2) &&
1717            extract_rs1(slli2) == extract_rd(slli2) &&
1718            extract_rs1(last_instr) == extract_rd(slli2);
1719   }
1720 
1721   // the instruction sequence of movptr2 is as below:
1722   //     lui
1723   //     lui
1724   //     slli
1725   //     add
1726   //     addi/jalr/load
1727   static bool check_movptr2_data_dependency(address instr) {
1728     address lui1 = instr;
1729     address lui2 = lui1 + MacroAssembler::instruction_size;
1730     address slli = lui2 + MacroAssembler::instruction_size;
1731     address add  = slli + MacroAssembler::instruction_size;
1732     address last_instr = add + MacroAssembler::instruction_size;
1733     return extract_rd(add) == extract_rd(lui2) &&
1734            extract_rs1(add) == extract_rd(lui2) &&
1735            extract_rs2(add) == extract_rd(slli) &&
1736            extract_rs1(slli) == extract_rd(lui1) &&
1737            extract_rd(slli) == extract_rd(lui1) &&
1738            extract_rs1(last_instr) == extract_rd(add);
1739   }
1740 
1741   // the instruction sequence of li16u is as below:
1742   //     lui
1743   //     srli
1744   static bool check_li16u_data_dependency(address instr) {
1745     address lui = instr;
1746     address srli = lui + MacroAssembler::instruction_size;
1747 
1748     return extract_rs1(srli) == extract_rd(lui) &&
1749            extract_rs1(srli) == extract_rd(srli);
1750   }
1751 
1752   // the instruction sequence of li32 is as below:
1753   //     lui
1754   //     addiw
1755   static bool check_li32_data_dependency(address instr) {
1756     address lui = instr;
1757     address addiw = lui + MacroAssembler::instruction_size;
1758 
1759     return extract_rs1(addiw) == extract_rd(lui) &&
1760            extract_rs1(addiw) == extract_rd(addiw);
1761   }
1762 
1763   // the instruction sequence of pc-relative is as below:
1764   //     auipc
1765   //     jalr/addi/load/float_load
1766   static bool check_pc_relative_data_dependency(address instr) {
1767     address auipc = instr;
1768     address last_instr = auipc + MacroAssembler::instruction_size;
1769 
1770     return extract_rs1(last_instr) == extract_rd(auipc);
1771   }
1772 
1773   // the instruction sequence of load_label is as below:
1774   //     auipc
1775   //     load
1776   static bool check_load_pc_relative_data_dependency(address instr) {
1777     address auipc = instr;
1778     address load = auipc + MacroAssembler::instruction_size;
1779 
1780     return extract_rd(load) == extract_rd(auipc) &&
1781            extract_rs1(load) == extract_rd(load);
1782   }
1783 
1784   static bool is_li32_at(address instr);
1785   static bool is_pc_relative_at(address branch);
1786 
1787   static bool is_membar(address addr) {
1788     return (Bytes::get_native_u4(addr) & 0x7f) == 0b1111 && extract_funct3(addr) == 0;
1789   }
1790   static uint32_t get_membar_kind(address addr);
1791   static void set_membar_kind(address addr, uint32_t order_kind);
1792 };
1793 
1794 #ifdef ASSERT
1795 inline bool AbstractAssembler::pd_check_instruction_mark() { return false; }
1796 #endif
1797 
1798 #endif // CPU_RISCV_MACROASSEMBLER_RISCV_HPP