1 /*
   2  * Copyright (c) 1997, 2026, Oracle and/or its affiliates. All rights reserved.
   3  * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
   4  * Copyright (c) 2020, 2024, Huawei Technologies Co., Ltd. All rights reserved.
   5  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   6  *
   7  * This code is free software; you can redistribute it and/or modify it
   8  * under the terms of the GNU General Public License version 2 only, as
   9  * published by the Free Software Foundation.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  *
  25  */
  26 
  27 #ifndef CPU_RISCV_MACROASSEMBLER_RISCV_HPP
  28 #define CPU_RISCV_MACROASSEMBLER_RISCV_HPP
  29 
  30 #include "asm/assembler.inline.hpp"
  31 #include "code/vmreg.hpp"
  32 #include "metaprogramming/enableIf.hpp"
  33 #include "oops/compressedOops.hpp"
  34 #include "utilities/powerOfTwo.hpp"
  35 #include "runtime/signature.hpp"
  36 
  37 class ciInlineKlass;
  38 
  39 // MacroAssembler extends Assembler by frequently used macros.
  40 //
  41 // Instructions for which a 'better' code sequence exists depending
  42 // on arguments should also go in here.
  43 
  44 class MacroAssembler: public Assembler {
  45 
  46  public:
  47 
  48   MacroAssembler(CodeBuffer* code) : Assembler(code) {}
  49 
  50   void safepoint_poll(Label& slow_path, bool at_return, bool in_nmethod, Register tmp_reg = t0);
  51 
  52   // Alignment
  53   int align(int modulus, int extra_offset = 0);
  54 
  55   static inline void assert_alignment(address pc, int alignment = MacroAssembler::instruction_size) {
  56     assert(is_aligned(pc, alignment), "bad alignment");
  57   }
  58 
  59   // nop
  60   void post_call_nop();
  61 
  62   // Stack frame creation/removal
  63   // Note that SP must be updated to the right place before saving/restoring RA and FP
  64   // because signal based thread suspend/resume could happen asynchronously.
  65   void enter() {
  66     subi(sp, sp, 2 * wordSize);
  67     sd(ra, Address(sp, wordSize));
  68     sd(fp, Address(sp));
  69     addi(fp, sp, 2 * wordSize);
  70   }
  71 
  72   void leave() {
  73     subi(sp, fp, 2 * wordSize);
  74     ld(fp, Address(sp));
  75     ld(ra, Address(sp, wordSize));
  76     addi(sp, sp, 2 * wordSize);
  77   }
  78 
  79 
  80   // Support for getting the JavaThread pointer (i.e.; a reference to thread-local information)
  81   // The pointer will be loaded into the thread register.
  82   void get_thread(Register thread);
  83 
  84   // Support for VM calls
  85   //
  86   // It is imperative that all calls into the VM are handled via the call_VM macros.
  87   // They make sure that the stack linkage is setup correctly. call_VM's correspond
  88   // to ENTRY/ENTRY_X entry points while call_VM_leaf's correspond to LEAF entry points.
  89 
  90   void call_VM(Register oop_result,
  91                address entry_point,
  92                bool check_exceptions = true);
  93   void call_VM(Register oop_result,
  94                address entry_point,
  95                Register arg_1,
  96                bool check_exceptions = true);
  97   void call_VM(Register oop_result,
  98                address entry_point,
  99                Register arg_1, Register arg_2,
 100                bool check_exceptions = true);
 101   void call_VM(Register oop_result,
 102                address entry_point,
 103                Register arg_1, Register arg_2, Register arg_3,
 104                bool check_exceptions = true);
 105 
 106   // Overloadings with last_Java_sp
 107   void call_VM(Register oop_result,
 108                Register last_java_sp,
 109                address entry_point,
 110                int number_of_arguments = 0,
 111                bool check_exceptions = true);
 112   void call_VM(Register oop_result,
 113                Register last_java_sp,
 114                address entry_point,
 115                Register arg_1,
 116                bool check_exceptions = true);
 117   void call_VM(Register oop_result,
 118                Register last_java_sp,
 119                address entry_point,
 120                Register arg_1, Register arg_2,
 121                bool check_exceptions = true);
 122   void call_VM(Register oop_result,
 123                Register last_java_sp,
 124                address entry_point,
 125                Register arg_1, Register arg_2, Register arg_3,
 126                bool check_exceptions = true);
 127 
 128   void get_vm_result_oop(Register oop_result, Register java_thread);
 129   void get_vm_result_metadata(Register metadata_result, Register java_thread);
 130 
 131   // These always tightly bind to MacroAssembler::call_VM_leaf_base
 132   // bypassing the virtual implementation
 133   void call_VM_leaf(address entry_point,
 134                     int number_of_arguments = 0);
 135   void call_VM_leaf(address entry_point,
 136                     Register arg_0);
 137   void call_VM_leaf(address entry_point,
 138                     Register arg_0, Register arg_1);
 139   void call_VM_leaf(address entry_point,
 140                     Register arg_0, Register arg_1, Register arg_2);
 141 
 142   // These always tightly bind to MacroAssembler::call_VM_base
 143   // bypassing the virtual implementation
 144   void super_call_VM_leaf(address entry_point);
 145   void super_call_VM_leaf(address entry_point, Register arg_0);
 146   void super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1);
 147   void super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2);
 148   void super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2, Register arg_3);
 149 
 150   // last Java Frame (fills frame anchor)
 151   void set_last_Java_frame(Register last_java_sp, Register last_java_fp, address last_java_pc, Register tmp);
 152   void set_last_Java_frame(Register last_java_sp, Register last_java_fp, Label &last_java_pc, Register tmp);
 153   void set_last_Java_frame(Register last_java_sp, Register last_java_fp, Register last_java_pc);
 154 
 155   // thread in the default location (xthread)
 156   void reset_last_Java_frame(bool clear_fp);
 157 
 158   virtual void call_VM_leaf_base(
 159     address entry_point,                // the entry point
 160     int     number_of_arguments,        // the number of arguments to pop after the call
 161     Label*  retaddr = nullptr
 162   );
 163 
 164   virtual void call_VM_leaf_base(
 165     address entry_point,                // the entry point
 166     int     number_of_arguments,        // the number of arguments to pop after the call
 167     Label&  retaddr) {
 168     call_VM_leaf_base(entry_point, number_of_arguments, &retaddr);
 169   }
 170 
 171   virtual void call_VM_base(           // returns the register containing the thread upon return
 172     Register oop_result,               // where an oop-result ends up if any; use noreg otherwise
 173     Register java_thread,              // the thread if computed before     ; use noreg otherwise
 174     Register last_java_sp,             // to set up last_Java_frame in stubs; use noreg otherwise
 175     Label*   return_pc,                // to set up last_Java_frame; use nullptr otherwise
 176     address  entry_point,              // the entry point
 177     int      number_of_arguments,      // the number of arguments (w/o thread) to pop after the call
 178     bool     check_exceptions          // whether to check for pending exceptions after return
 179   );
 180 
 181   void call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions);
 182 
 183   virtual void check_and_handle_earlyret(Register java_thread);
 184   virtual void check_and_handle_popframe(Register java_thread);
 185 
 186   void resolve_weak_handle(Register result, Register tmp1, Register tmp2);
 187   void resolve_oop_handle(Register result, Register tmp1, Register tmp2);
 188   void resolve_jobject(Register value, Register tmp1, Register tmp2);
 189   void resolve_global_jobject(Register value, Register tmp1, Register tmp2);
 190 
 191   void movoop(Register dst, jobject obj);
 192   void mov_metadata(Register dst, Metadata* obj);
 193   void bang_stack_size(Register size, Register tmp);
 194   void set_narrow_oop(Register dst, jobject obj);
 195   void set_narrow_klass(Register dst, Klass* k);
 196 
 197   void load_mirror(Register dst, Register method, Register tmp1, Register tmp2);
 198   void access_load_at(BasicType type, DecoratorSet decorators, Register dst,
 199                       Address src, Register tmp1, Register tmp2);
 200   void access_store_at(BasicType type, DecoratorSet decorators, Address dst,
 201                        Register val, Register tmp1, Register tmp2, Register tmp3);
 202   void load_klass(Register dst, Register src, Register tmp = t0);
 203   void load_prototype_header(Register dst, Register src, Register tmp = t0);
 204   void load_narrow_klass_compact(Register dst, Register src);
 205   void store_klass(Register dst, Register src, Register tmp = t0);
 206   void cmp_klass_beq(Register obj, Register klass,
 207                      Register tmp1, Register tmp2,
 208                      Label &L, bool is_far = false);
 209   void cmp_klass_bne(Register obj, Register klass,
 210                      Register tmp1, Register tmp2,
 211                      Label &L, bool is_far = false);
 212 
 213   void encode_klass_not_null(Register r, Register tmp = t0);
 214   void decode_klass_not_null(Register r, Register tmp = t0);
 215   void encode_klass_not_null(Register dst, Register src, Register tmp);
 216   void decode_klass_not_null(Register dst, Register src, Register tmp);
 217   void decode_heap_oop_not_null(Register r);
 218   void decode_heap_oop_not_null(Register dst, Register src);
 219   void decode_heap_oop(Register d, Register s);
 220   void decode_heap_oop(Register r) { decode_heap_oop(r, r); }
 221   void encode_heap_oop_not_null(Register r);
 222   void encode_heap_oop_not_null(Register dst, Register src);
 223   void encode_heap_oop(Register d, Register s);
 224   void encode_heap_oop(Register r) { encode_heap_oop(r, r); };
 225   void load_heap_oop(Register dst, Address src, Register tmp1,
 226                      Register tmp2, DecoratorSet decorators = 0);
 227   void load_heap_oop_not_null(Register dst, Address src, Register tmp1,
 228                               Register tmp2, DecoratorSet decorators = 0);
 229   void store_heap_oop(Address dst, Register val, Register tmp1,
 230                       Register tmp2, Register tmp3, DecoratorSet decorators = 0);
 231 
 232   void store_klass_gap(Register dst, Register src);
 233 
 234   // currently unimplemented
 235   // Used for storing null. All other oop constants should be
 236   // stored using routines that take a jobject.
 237   void store_heap_oop_null(Address dst);
 238 
 239   // This dummy is to prevent a call to store_heap_oop from
 240   // converting a zero (linked null) into a Register by giving
 241   // the compiler two choices it can't resolve
 242 
 243   void store_heap_oop(Address dst, void* dummy);
 244 
 245   // Support for null-checks
 246   //
 247   // Generates code that causes a null OS exception if the content of reg is null.
 248   // If the accessed location is M[reg + offset] and the offset is known, provide the
 249   // offset. No explicit code generateion is needed if the offset is within a certain
 250   // range (0 <= offset <= page_size).
 251 
 252   virtual void null_check(Register reg, int offset = -1);
 253   static bool needs_explicit_null_check(intptr_t offset);
 254   static bool uses_implicit_null_check(void* address);
 255 
 256   void test_field_is_null_free_inline_type(Register flags, Register temp_reg, Label& is_null_free);
 257   void test_field_is_not_null_free_inline_type(Register flags, Register temp_reg, Label& not_null_free_inline_type);
 258   void test_field_is_flat(Register flags, Register temp_reg, Label& is_flat);
 259 
 260   void test_markword_is_inline_type(Register markword, Label& is_inline_type);
 261   void test_oop_is_not_inline_type(Register object, Register tmp, Label& not_inline_type, bool can_be_null = true);
 262   void test_oop_prototype_bit(Register oop, Register temp_reg, int32_t tst_bit, bool jmp_set, Label& jmp_label);
 263   void test_flat_array_oop(Register klass, Register temp_reg, Label& is_flat_array);
 264   void test_null_free_array_oop(Register oop, Register temp_reg, Label& is_null_free_array);
 265   void test_non_flat_array_oop(Register oop, Register temp_reg, Label&is_non_flat_array);
 266   void test_non_null_free_array_oop(Register oop, Register temp_reg, Label&is_non_null_free_array);
 267 
 268   // Check array klass layout helper for flat or null-free arrays...
 269   void test_flat_array_layout(Register lh, Label& is_flat_array);
 270 
 271   void inline_layout_info(Register holder_klass, Register index, Register layout_info);
 272 
 273   void flat_field_copy(DecoratorSet decorators, Register src, Register dst, Register inline_layout_info);
 274 
 275   // inline type data payload offsets...
 276   void payload_offset(Register inline_klass, Register offset);
 277   void payload_address(Register oop, Register data, Register inline_klass);
 278 
 279   // interface method calling
 280   void lookup_interface_method(Register recv_klass,
 281                                Register intf_klass,
 282                                RegisterOrConstant itable_index,
 283                                Register method_result,
 284                                Register scan_tmp,
 285                                Label& no_such_interface,
 286                                bool return_method = true);
 287 
 288   void lookup_interface_method_stub(Register recv_klass,
 289                                     Register holder_klass,
 290                                     Register resolved_klass,
 291                                     Register method_result,
 292                                     Register temp_reg,
 293                                     Register temp_reg2,
 294                                     int itable_index,
 295                                     Label& L_no_such_interface);
 296 
 297   // virtual method calling
 298   // n.n. x86 allows RegisterOrConstant for vtable_index
 299   void lookup_virtual_method(Register recv_klass,
 300                              RegisterOrConstant vtable_index,
 301                              Register method_result);
 302 
 303   // Form an address from base + offset in Rd. Rd my or may not
 304   // actually be used: you must use the Address that is returned. It
 305   // is up to you to ensure that the shift provided matches the size
 306   // of your data.
 307   Address form_address(Register Rd, Register base, int64_t byte_offset);
 308 
 309   // Sometimes we get misaligned loads and stores, usually from Unsafe
 310   // accesses, and these can exceed the offset range.
 311   Address legitimize_address(Register Rd, const Address &adr) {
 312     if (adr.getMode() == Address::base_plus_offset) {
 313       if (!is_simm12(adr.offset())) {
 314         return form_address(Rd, adr.base(), adr.offset());
 315       }
 316     }
 317     return adr;
 318   }
 319 
 320   // allocation
 321 
 322   void tlab_allocate(
 323     Register obj,                   // result: pointer to object after successful allocation
 324     Register var_size_in_bytes,     // object size in bytes if unknown at compile time; invalid otherwise
 325     int      con_size_in_bytes,     // object size in bytes if   known at compile time
 326     Register tmp1,                  // temp register
 327     Register tmp2,                  // temp register
 328     Label&   slow_case,             // continuation point of fast allocation fails
 329     bool     is_far = false
 330   );
 331 
 332   // Test sub_klass against super_klass, with fast and slow paths.
 333 
 334   // The fast path produces a tri-state answer: yes / no / maybe-slow.
 335   // One of the three labels can be null, meaning take the fall-through.
 336   // If super_check_offset is -1, the value is loaded up from super_klass.
 337   // No registers are killed, except tmp_reg
 338   void check_klass_subtype_fast_path(Register sub_klass,
 339                                      Register super_klass,
 340                                      Register tmp_reg,
 341                                      Label* L_success,
 342                                      Label* L_failure,
 343                                      Label* L_slow_path,
 344                                      Register super_check_offset = noreg);
 345 
 346   // The reset of the type check; must be wired to a corresponding fast path.
 347   // It does not repeat the fast path logic, so don't use it standalone.
 348   // The tmp1_reg and tmp2_reg can be noreg, if no temps are available.
 349   // Updates the sub's secondary super cache as necessary.
 350   void check_klass_subtype_slow_path(Register sub_klass,
 351                                      Register super_klass,
 352                                      Register tmp1_reg,
 353                                      Register tmp2_reg,
 354                                      Label* L_success,
 355                                      Label* L_failure,
 356                                      bool set_cond_codes = false);
 357 
 358   void check_klass_subtype_slow_path_linear(Register sub_klass,
 359                                             Register super_klass,
 360                                             Register tmp1_reg,
 361                                             Register tmp2_reg,
 362                                             Label* L_success,
 363                                             Label* L_failure,
 364                                             bool set_cond_codes = false);
 365 
 366   void check_klass_subtype_slow_path_table(Register sub_klass,
 367                                            Register super_klass,
 368                                            Register tmp1_reg,
 369                                            Register tmp2_reg,
 370                                            Label* L_success,
 371                                            Label* L_failure,
 372                                            bool set_cond_codes = false);
 373 
 374   // If r is valid, return r.
 375   // If r is invalid, remove a register r2 from available_regs, add r2
 376   // to regs_to_push, then return r2.
 377   Register allocate_if_noreg(const Register r,
 378                              RegSetIterator<Register> &available_regs,
 379                              RegSet &regs_to_push);
 380 
 381   // Secondary subtype checking
 382   void lookup_secondary_supers_table_var(Register sub_klass,
 383                                          Register r_super_klass,
 384                                          Register result,
 385                                          Register tmp1,
 386                                          Register tmp2,
 387                                          Register tmp3,
 388                                          Register tmp4,
 389                                          Label *L_success);
 390 
 391   void population_count(Register dst, Register src, Register tmp1, Register tmp2);
 392 
 393   // As above, but with a constant super_klass.
 394   // The result is in Register result, not the condition codes.
 395   bool lookup_secondary_supers_table_const(Register r_sub_klass,
 396                                            Register r_super_klass,
 397                                            Register result,
 398                                            Register tmp1,
 399                                            Register tmp2,
 400                                            Register tmp3,
 401                                            Register tmp4,
 402                                            u1 super_klass_slot,
 403                                            bool stub_is_near = false);
 404 
 405   void verify_secondary_supers_table(Register r_sub_klass,
 406                                      Register r_super_klass,
 407                                      Register result,
 408                                      Register tmp1,
 409                                      Register tmp2,
 410                                      Register tmp3);
 411 
 412   void lookup_secondary_supers_table_slow_path(Register r_super_klass,
 413                                                Register r_array_base,
 414                                                Register r_array_index,
 415                                                Register r_bitmap,
 416                                                Register result,
 417                                                Register tmp,
 418                                                bool is_stub = true);
 419 
 420   void check_klass_subtype(Register sub_klass,
 421                            Register super_klass,
 422                            Register tmp_reg,
 423                            Label& L_success);
 424 
 425   Address argument_address(RegisterOrConstant arg_slot, int extra_slot_offset = 0);
 426 
 427   void profile_receiver_type(Register recv, Register mdp, int mdp_offset);
 428 
 429   // only if +VerifyOops
 430   void _verify_oop(Register reg, const char* s, const char* file, int line);
 431   void _verify_oop_addr(Address addr, const char* s, const char* file, int line);
 432 
 433   void _verify_oop_checked(Register reg, const char* s, const char* file, int line) {
 434     if (VerifyOops) {
 435       _verify_oop(reg, s, file, line);
 436     }
 437   }
 438   void _verify_oop_addr_checked(Address reg, const char* s, const char* file, int line) {
 439     if (VerifyOops) {
 440       _verify_oop_addr(reg, s, file, line);
 441     }
 442   }
 443 
 444   void _verify_method_ptr(Register reg, const char* msg, const char* file, int line) {}
 445   void _verify_klass_ptr(Register reg, const char* msg, const char* file, int line) {}
 446 
 447 #define verify_oop(reg) _verify_oop_checked(reg, "broken oop " #reg, __FILE__, __LINE__)
 448 #define verify_oop_msg(reg, msg) _verify_oop_checked(reg, "broken oop " #reg ", " #msg, __FILE__, __LINE__)
 449 #define verify_oop_addr(addr) _verify_oop_addr_checked(addr, "broken oop addr " #addr, __FILE__, __LINE__)
 450 #define verify_method_ptr(reg) _verify_method_ptr(reg, "broken method " #reg, __FILE__, __LINE__)
 451 #define verify_klass_ptr(reg) _verify_method_ptr(reg, "broken klass " #reg, __FILE__, __LINE__)
 452 
 453   // A more convenient access to fence for our purposes
 454   // We used four bit to indicate the read and write bits in the predecessors and successors,
 455   // and extended i for r, o for w if UseConservativeFence enabled.
 456   enum Membar_mask_bits {
 457     StoreStore = 0b0101,               // (pred = w   + succ = w)
 458     LoadStore  = 0b1001,               // (pred = r   + succ = w)
 459     StoreLoad  = 0b0110,               // (pred = w   + succ = r)
 460     LoadLoad   = 0b1010,               // (pred = r   + succ = r)
 461     AnyAny     = LoadStore | StoreLoad // (pred = rw  + succ = rw)
 462   };
 463 
 464   void membar(uint32_t order_constraint);
 465 
 466  private:
 467 
 468   static void membar_mask_to_pred_succ(uint32_t order_constraint,
 469                                        uint32_t& predecessor, uint32_t& successor) {
 470     predecessor = (order_constraint >> 2) & 0x3;
 471     successor = order_constraint & 0x3;
 472 
 473     // extend rw -> iorw:
 474     // 01(w) -> 0101(ow)
 475     // 10(r) -> 1010(ir)
 476     // 11(rw)-> 1111(iorw)
 477     if (UseConservativeFence) {
 478       predecessor |= predecessor << 2;
 479       successor   |= successor << 2;
 480     }
 481   }
 482 
 483   static int pred_succ_to_membar_mask(uint32_t predecessor, uint32_t successor) {
 484     return ((predecessor & 0x3) << 2) | (successor & 0x3);
 485   }
 486 
 487  public:
 488 
 489   void cmodx_fence();
 490 
 491   void pause() {
 492     // Zihintpause
 493     // PAUSE is encoded as a FENCE instruction with pred=W, succ=0, fm=0, rd=x0, and rs1=x0.
 494     Assembler::fence(w, 0);
 495   }
 496 
 497   // prints msg, dumps registers and stops execution
 498   void stop(const char* msg);
 499 
 500   static void debug64(char* msg, int64_t pc, int64_t regs[]);
 501 
 502   void unimplemented(const char* what = "");
 503 
 504   void should_not_reach_here() { stop("should not reach here"); }
 505 
 506   static address target_addr_for_insn(address insn_addr);
 507 
 508   // Required platform-specific helpers for Label::patch_instructions.
 509   // They _shadow_ the declarations in AbstractAssembler, which are undefined.
 510   static int pd_patch_instruction_size(address branch, address target);
 511   static void pd_patch_instruction(address branch, address target, const char* file = nullptr, int line = 0) {
 512     pd_patch_instruction_size(branch, target);
 513   }
 514   static address pd_call_destination(address branch) {
 515     return target_addr_for_insn(branch);
 516   }
 517 
 518   static int patch_oop(address insn_addr, address o);
 519 
 520   static address get_target_of_li32(address insn_addr);
 521   static int patch_imm_in_li32(address branch, int32_t target);
 522 
 523   // Return whether code is emitted to a scratch blob.
 524   virtual bool in_scratch_emit_size() {
 525     return false;
 526   }
 527 
 528   address emit_reloc_call_address_stub(int insts_call_instruction_offset, address target);
 529   static int max_reloc_call_address_stub_size();
 530 
 531   void emit_static_call_stub();
 532   static int static_call_stub_size();
 533 
 534   // The following 4 methods return the offset of the appropriate move instruction
 535 
 536   // Support for fast byte/short loading with zero extension (depending on particular CPU)
 537   int load_unsigned_byte(Register dst, Address src);
 538   int load_unsigned_short(Register dst, Address src);
 539 
 540   // Support for fast byte/short loading with sign extension (depending on particular CPU)
 541   int load_signed_byte(Register dst, Address src);
 542   int load_signed_short(Register dst, Address src);
 543 
 544   // Load and store values by size and signed-ness
 545   void load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed);
 546   void store_sized_value(Address dst, Register src, size_t size_in_bytes);
 547 
 548   // Misaligned loads, will use the best way, according to the AvoidUnalignedAccess flag
 549   void load_short_misaligned(Register dst, Address src, Register tmp, bool is_signed, int granularity = 1);
 550   void load_int_misaligned(Register dst, Address src, Register tmp, bool is_signed, int granularity = 1);
 551   void load_long_misaligned(Register dst, Address src, Register tmp, int granularity = 1);
 552 
 553  public:
 554   // Standard pseudo instructions
 555   inline void nop() {
 556     addi(x0, x0, 0);
 557   }
 558 
 559   inline void mv(Register Rd, Register Rs) {
 560     if (Rd != Rs) {
 561       addi(Rd, Rs, 0);
 562     }
 563   }
 564 
 565   inline void notr(Register Rd, Register Rs) {
 566     if (do_compress_zcb(Rd, Rs) && (Rd == Rs)) {
 567       c_not(Rd);
 568     } else {
 569       xori(Rd, Rs, -1);
 570     }
 571   }
 572 
 573   inline void neg(Register Rd, Register Rs) {
 574     sub(Rd, x0, Rs);
 575   }
 576 
 577   inline void negw(Register Rd, Register Rs) {
 578     subw(Rd, x0, Rs);
 579   }
 580 
 581   inline void sext_w(Register Rd, Register Rs) {
 582     addiw(Rd, Rs, 0);
 583   }
 584 
 585   inline void zext_b(Register Rd, Register Rs) {
 586     if (do_compress_zcb(Rd, Rs) && (Rd == Rs)) {
 587       c_zext_b(Rd);
 588     } else {
 589       andi(Rd, Rs, 0xFF);
 590     }
 591   }
 592 
 593   inline void seqz(Register Rd, Register Rs) {
 594     sltiu(Rd, Rs, 1);
 595   }
 596 
 597   inline void snez(Register Rd, Register Rs) {
 598     sltu(Rd, x0, Rs);
 599   }
 600 
 601   inline void sltz(Register Rd, Register Rs) {
 602     slt(Rd, Rs, x0);
 603   }
 604 
 605   inline void sgtz(Register Rd, Register Rs) {
 606     slt(Rd, x0, Rs);
 607   }
 608 
 609   // Bit-manipulation extension pseudo instructions
 610   // zero extend word
 611   inline void zext_w(Register Rd, Register Rs) {
 612     assert(UseZba, "must be");
 613     if (do_compress_zcb(Rd, Rs) && (Rd == Rs)) {
 614       c_zext_w(Rd);
 615     } else {
 616       add_uw(Rd, Rs, zr);
 617     }
 618   }
 619 
 620   // Floating-point data-processing pseudo instructions
 621   inline void fmv_s(FloatRegister Rd, FloatRegister Rs) {
 622     if (Rd != Rs) {
 623       fsgnj_s(Rd, Rs, Rs);
 624     }
 625   }
 626 
 627   inline void fabs_s(FloatRegister Rd, FloatRegister Rs) {
 628     fsgnjx_s(Rd, Rs, Rs);
 629   }
 630 
 631   inline void fneg_s(FloatRegister Rd, FloatRegister Rs) {
 632     fsgnjn_s(Rd, Rs, Rs);
 633   }
 634 
 635   inline void fmv_d(FloatRegister Rd, FloatRegister Rs) {
 636     if (Rd != Rs) {
 637       fsgnj_d(Rd, Rs, Rs);
 638     }
 639   }
 640 
 641   inline void fabs_d(FloatRegister Rd, FloatRegister Rs) {
 642     fsgnjx_d(Rd, Rs, Rs);
 643   }
 644 
 645   inline void fneg_d(FloatRegister Rd, FloatRegister Rs) {
 646     fsgnjn_d(Rd, Rs, Rs);
 647   }
 648 
 649   // Control and status pseudo instructions
 650   void csrr(Register Rd, unsigned csr);         // read csr
 651   void csrw(unsigned csr, Register Rs);         // write csr
 652   void csrs(unsigned csr, Register Rs);         // set bits in csr
 653   void csrc(unsigned csr, Register Rs);         // clear bits in csr
 654   void csrwi(unsigned csr, unsigned imm);
 655   void csrsi(unsigned csr, unsigned imm);
 656   void csrci(unsigned csr, unsigned imm);
 657   void frcsr(Register Rd) { csrr(Rd, CSR_FCSR); }; // read float-point csr
 658   void fscsr(Register Rd, Register Rs);            // swap float-point csr
 659   void fscsr(Register Rs);                         // write float-point csr
 660   void frrm(Register Rd) { csrr(Rd, CSR_FRM); };   // read float-point rounding mode
 661   void fsrm(Register Rd, Register Rs);             // swap float-point rounding mode
 662   void fsrm(Register Rs);                          // write float-point rounding mode
 663   void fsrmi(Register Rd, unsigned imm);
 664   void fsrmi(unsigned imm);
 665   void frflags(Register Rd) { csrr(Rd, CSR_FFLAGS); }; // read float-point exception flags
 666   void fsflags(Register Rd, Register Rs);              // swap float-point exception flags
 667   void fsflags(Register Rs);                           // write float-point exception flags
 668   void fsflagsi(Register Rd, unsigned imm);
 669   void fsflagsi(unsigned imm);
 670   // Requires Zicntr
 671   void rdinstret(Register Rd) { csrr(Rd, CSR_INSTRET); }; // read instruction-retired counter
 672   void rdcycle(Register Rd)   { csrr(Rd, CSR_CYCLE); };   // read cycle counter
 673   void rdtime(Register Rd)    { csrr(Rd, CSR_TIME); };    // read time
 674 
 675   // Restore cpu control state after JNI call
 676   void restore_cpu_control_state_after_jni(Register tmp);
 677 
 678   // Control transfer pseudo instructions
 679   void beqz(Register Rs, const address dest);
 680   void bnez(Register Rs, const address dest);
 681   void blez(Register Rs, const address dest);
 682   void bgez(Register Rs, const address dest);
 683   void bltz(Register Rs, const address dest);
 684   void bgtz(Register Rs, const address dest);
 685 
 686   void cmov_eq(Register cmp1, Register cmp2, Register dst, Register src);
 687   void cmov_ne(Register cmp1, Register cmp2, Register dst, Register src);
 688   void cmov_le(Register cmp1, Register cmp2, Register dst, Register src);
 689   void cmov_leu(Register cmp1, Register cmp2, Register dst, Register src);
 690   void cmov_ge(Register cmp1, Register cmp2, Register dst, Register src);
 691   void cmov_geu(Register cmp1, Register cmp2, Register dst, Register src);
 692   void cmov_lt(Register cmp1, Register cmp2, Register dst, Register src);
 693   void cmov_ltu(Register cmp1, Register cmp2, Register dst, Register src);
 694   void cmov_gt(Register cmp1, Register cmp2, Register dst, Register src);
 695   void cmov_gtu(Register cmp1, Register cmp2, Register dst, Register src);
 696 
 697   void cmov_cmp_fp_eq(FloatRegister cmp1, FloatRegister cmp2, Register dst, Register src, bool is_single);
 698   void cmov_cmp_fp_ne(FloatRegister cmp1, FloatRegister cmp2, Register dst, Register src, bool is_single);
 699   void cmov_cmp_fp_le(FloatRegister cmp1, FloatRegister cmp2, Register dst, Register src, bool is_single);
 700   void cmov_cmp_fp_ge(FloatRegister cmp1, FloatRegister cmp2, Register dst, Register src, bool is_single);
 701   void cmov_cmp_fp_lt(FloatRegister cmp1, FloatRegister cmp2, Register dst, Register src, bool is_single);
 702   void cmov_cmp_fp_gt(FloatRegister cmp1, FloatRegister cmp2, Register dst, Register src, bool is_single);
 703 
 704   void cmov_fp_eq(Register cmp1, Register cmp2, FloatRegister dst, FloatRegister src, bool is_single);
 705   void cmov_fp_ne(Register cmp1, Register cmp2, FloatRegister dst, FloatRegister src, bool is_single);
 706   void cmov_fp_le(Register cmp1, Register cmp2, FloatRegister dst, FloatRegister src, bool is_single);
 707   void cmov_fp_leu(Register cmp1, Register cmp2, FloatRegister dst, FloatRegister src, bool is_single);
 708   void cmov_fp_ge(Register cmp1, Register cmp2, FloatRegister dst, FloatRegister src, bool is_single);
 709   void cmov_fp_geu(Register cmp1, Register cmp2, FloatRegister dst, FloatRegister src, bool is_single);
 710   void cmov_fp_lt(Register cmp1, Register cmp2, FloatRegister dst, FloatRegister src, bool is_single);
 711   void cmov_fp_ltu(Register cmp1, Register cmp2, FloatRegister dst, FloatRegister src, bool is_single);
 712   void cmov_fp_gt(Register cmp1, Register cmp2, FloatRegister dst, FloatRegister src, bool is_single);
 713   void cmov_fp_gtu(Register cmp1, Register cmp2, FloatRegister dst, FloatRegister src, bool is_single);
 714 
 715   void cmov_fp_cmp_fp_eq(FloatRegister cmp1, FloatRegister cmp2, FloatRegister dst, FloatRegister src, bool cmp_single, bool cmov_single);
 716   void cmov_fp_cmp_fp_ne(FloatRegister cmp1, FloatRegister cmp2, FloatRegister dst, FloatRegister src, bool cmp_single, bool cmov_single);
 717   void cmov_fp_cmp_fp_le(FloatRegister cmp1, FloatRegister cmp2, FloatRegister dst, FloatRegister src, bool cmp_single, bool cmov_single);
 718   void cmov_fp_cmp_fp_ge(FloatRegister cmp1, FloatRegister cmp2, FloatRegister dst, FloatRegister src, bool cmp_single, bool cmov_single);
 719   void cmov_fp_cmp_fp_lt(FloatRegister cmp1, FloatRegister cmp2, FloatRegister dst, FloatRegister src, bool cmp_single, bool cmov_single);
 720   void cmov_fp_cmp_fp_gt(FloatRegister cmp1, FloatRegister cmp2, FloatRegister dst, FloatRegister src, bool cmp_single, bool cmov_single);
 721 
 722  public:
 723   // We try to follow risc-v asm menomics.
 724   // But as we don't layout a reachable GOT,
 725   // we often need to resort to movptr, li <48imm>.
 726   // https://github.com/riscv-non-isa/riscv-asm-manual/blob/main/src/asm-manual.adoc
 727 
 728   // Hotspot only use the standard calling convention using x1/ra.
 729   // The alternative calling convection using x5/t0 is not used.
 730   // Using x5 as a temp causes the CPU to mispredict returns.
 731 
 732   // JALR, return address stack updates:
 733   // | rd is x1/x5 | rs1 is x1/x5 | rd=rs1 | RAS action
 734   // | ----------- | ------------ | ------ |-------------
 735   // |     No      |      No      |   -    | None
 736   // |     No      |      Yes     |   -    | Pop
 737   // |     Yes     |      No      |   -    | Push
 738   // |     Yes     |      Yes     |   No   | Pop, then push
 739   // |     Yes     |      Yes     |   Yes  | Push
 740   //
 741   // JAL, return address stack updates:
 742   // | rd is x1/x5 | RAS action
 743   // | ----------- | ----------
 744   // |     Yes     | Push
 745   // |     No      | None
 746   //
 747   // JUMPs   uses Rd = x0/zero and Rs = x6/t1 or imm
 748   // CALLS   uses Rd = x1/ra   and Rs = x6/t1 or imm (or x1/ra*)
 749   // RETURNS uses Rd = x0/zero and Rs = x1/ra
 750   // *use of x1/ra should not normally be used, special case only.
 751 
 752   // jump: jal x0, offset
 753   // For long reach uses temp register for:
 754   // la + jr
 755   void j(const address dest, Register temp = t1);
 756   void j(const Address &dest, Register temp = t1);
 757   void j(Label &l, Register temp = noreg);
 758 
 759   // jump register: jalr x0, offset(rs)
 760   void jr(Register Rd, int32_t offset = 0);
 761 
 762   // call: la + jalr x1
 763   void call(const address dest, Register temp = t1);
 764 
 765   // jalr: jalr x1, offset(rs)
 766   void jalr(Register Rs, int32_t offset = 0);
 767 
 768   // Emit a runtime call. Only invalidates the tmp register which
 769   // is used to keep the entry address for jalr/movptr.
 770   // Uses call() for intra code cache, else movptr + jalr.
 771   // Clobebrs t1
 772   void rt_call(address dest, Register tmp = t1);
 773 
 774   // ret: jalr x0, 0(x1)
 775   inline void ret() {
 776     Assembler::jalr(x0, x1, 0);
 777   }
 778 
 779   //label
 780   void beqz(Register Rs, Label &l, bool is_far = false);
 781   void bnez(Register Rs, Label &l, bool is_far = false);
 782   void blez(Register Rs, Label &l, bool is_far = false);
 783   void bgez(Register Rs, Label &l, bool is_far = false);
 784   void bltz(Register Rs, Label &l, bool is_far = false);
 785   void bgtz(Register Rs, Label &l, bool is_far = false);
 786 
 787   void beq (Register Rs1, Register Rs2, Label &L, bool is_far = false);
 788   void bne (Register Rs1, Register Rs2, Label &L, bool is_far = false);
 789   void blt (Register Rs1, Register Rs2, Label &L, bool is_far = false);
 790   void bge (Register Rs1, Register Rs2, Label &L, bool is_far = false);
 791   void bltu(Register Rs1, Register Rs2, Label &L, bool is_far = false);
 792   void bgeu(Register Rs1, Register Rs2, Label &L, bool is_far = false);
 793 
 794   void bgt (Register Rs, Register Rt, const address dest);
 795   void ble (Register Rs, Register Rt, const address dest);
 796   void bgtu(Register Rs, Register Rt, const address dest);
 797   void bleu(Register Rs, Register Rt, const address dest);
 798 
 799   void bgt (Register Rs, Register Rt, Label &l, bool is_far = false);
 800   void ble (Register Rs, Register Rt, Label &l, bool is_far = false);
 801   void bgtu(Register Rs, Register Rt, Label &l, bool is_far = false);
 802   void bleu(Register Rs, Register Rt, Label &l, bool is_far = false);
 803 
 804 #define INSN_ENTRY_RELOC(result_type, header)                               \
 805   result_type header {                                                      \
 806     guarantee(rtype == relocInfo::internal_word_type,                       \
 807               "only internal_word_type relocs make sense here");            \
 808     relocate(InternalAddress(dest).rspec());                                \
 809     IncompressibleScope scope(this);  /* relocations */
 810 
 811 #define INSN(NAME)                                                                                       \
 812   void NAME(Register Rs1, Register Rs2, const address dest) {                                            \
 813     assert_cond(dest != nullptr);                                                                        \
 814     int64_t offset = dest - pc();                                                                        \
 815     guarantee(is_simm13(offset) && is_even(offset),                                                      \
 816               "offset is invalid: is_simm_13: %s offset: " INT64_FORMAT,                                 \
 817               BOOL_TO_STR(is_simm13(offset)), offset);                                                   \
 818     Assembler::NAME(Rs1, Rs2, offset);                                                                   \
 819   }                                                                                                      \
 820   INSN_ENTRY_RELOC(void, NAME(Register Rs1, Register Rs2, address dest, relocInfo::relocType rtype))     \
 821     NAME(Rs1, Rs2, dest);                                                                                \
 822   }
 823 
 824   INSN(beq);
 825   INSN(bne);
 826   INSN(bge);
 827   INSN(bgeu);
 828   INSN(blt);
 829   INSN(bltu);
 830 
 831 #undef INSN
 832 
 833 #undef INSN_ENTRY_RELOC
 834 
 835   void float_beq(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
 836   void float_bne(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
 837   void float_ble(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
 838   void float_bge(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
 839   void float_blt(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
 840   void float_bgt(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
 841 
 842   void double_beq(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
 843   void double_bne(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
 844   void double_ble(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
 845   void double_bge(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
 846   void double_blt(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
 847   void double_bgt(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
 848 
 849 private:
 850   // The signed 20-bit upper imm can materialize at most negative 0xF...F80000000, two G.
 851   // The following signed 12-bit imm can at max subtract 0x800, two K, from that previously loaded two G.
 852   bool is_valid_32bit_offset(int64_t x) {
 853     constexpr int64_t twoG = (2 * G);
 854     constexpr int64_t twoK = (2 * K);
 855     return x < (twoG - twoK) && x >= (-twoG - twoK);
 856   }
 857 
 858   // Ensure that the auipc can reach the destination at x from anywhere within
 859   // the code cache so that if it is relocated we know it will still reach.
 860   bool is_32bit_offset_from_codecache(int64_t x) {
 861     int64_t low  = (int64_t)CodeCache::low_bound();
 862     int64_t high = (int64_t)CodeCache::high_bound();
 863     return is_valid_32bit_offset(x - low) && is_valid_32bit_offset(x - high);
 864   }
 865 
 866 public:
 867   // Stack push and pop individual 64 bit registers
 868   void push_reg(Register Rs);
 869   void pop_reg(Register Rd);
 870 
 871   int push_reg(RegSet regset, Register stack);
 872   int pop_reg(RegSet regset, Register stack);
 873 
 874   int push_fp(FloatRegSet regset, Register stack);
 875   int pop_fp(FloatRegSet regset, Register stack);
 876 
 877 #ifdef COMPILER2
 878   int push_v(VectorRegSet regset, Register stack);
 879   int pop_v(VectorRegSet regset, Register stack);
 880 #endif // COMPILER2
 881 
 882   // Push and pop everything that might be clobbered by a native
 883   // runtime call except t0 and t1. (They are always
 884   // temporary registers, so we don't have to protect them.)
 885   // Additional registers can be excluded in a passed RegSet.
 886   void push_call_clobbered_registers_except(RegSet exclude);
 887   void pop_call_clobbered_registers_except(RegSet exclude);
 888 
 889   void push_call_clobbered_registers() {
 890     push_call_clobbered_registers_except(RegSet());
 891   }
 892   void pop_call_clobbered_registers() {
 893     pop_call_clobbered_registers_except(RegSet());
 894   }
 895 
 896   void push_CPU_state(bool save_vectors = false, int vector_size_in_bytes = 0);
 897   void pop_CPU_state(bool restore_vectors = false, int vector_size_in_bytes = 0);
 898 
 899   void push_cont_fastpath(Register java_thread = xthread);
 900   void pop_cont_fastpath(Register java_thread = xthread);
 901 
 902   // if heap base register is used - reinit it with the correct value
 903   void reinit_heapbase();
 904 
 905   void bind(Label& L) {
 906     Assembler::bind(L);
 907     // fences across basic blocks should not be merged
 908     code()->clear_last_merge_candidate();
 909   }
 910 
 911   typedef void (MacroAssembler::* compare_and_branch_insn)(Register Rs1, Register Rs2, const address dest);
 912   typedef void (MacroAssembler::* compare_and_branch_label_insn)(Register Rs1, Register Rs2, Label &L, bool is_far);
 913   typedef void (MacroAssembler::* jal_jalr_insn)(Register Rt, address dest);
 914 
 915   void wrap_label(Register r, Label &L, jal_jalr_insn insn);
 916   void wrap_label(Register r1, Register r2, Label &L,
 917                   compare_and_branch_insn insn,
 918                   compare_and_branch_label_insn neg_insn, bool is_far = false);
 919 
 920   void la(Register Rd, Label &label);
 921   void la(Register Rd, const address addr);
 922   void la(Register Rd, const address addr, int32_t &offset);
 923   void la(Register Rd, const Address &adr);
 924 
 925   void li16u(Register Rd, uint16_t imm);
 926   void li32(Register Rd, int32_t imm);
 927   void li  (Register Rd, int64_t imm);  // optimized load immediate
 928 
 929   // mv
 930   void mv(Register Rd, address addr)                  { li(Rd, (int64_t)addr); }
 931   void mv(Register Rd, address addr, int32_t &offset) {
 932     // Split address into a lower 12-bit sign-extended offset and the remainder,
 933     // so that the offset could be encoded in jalr or load/store instruction.
 934     offset = ((int32_t)(int64_t)addr << 20) >> 20;
 935     li(Rd, (int64_t)addr - offset);
 936   }
 937 
 938   template<typename T, ENABLE_IF(std::is_integral<T>::value)>
 939   inline void mv(Register Rd, T o)                    { li(Rd, (int64_t)o); }
 940 
 941   void mv(Register Rd, RegisterOrConstant src) {
 942     if (src.is_register()) {
 943       mv(Rd, src.as_register());
 944     } else {
 945       mv(Rd, src.as_constant());
 946     }
 947   }
 948 
 949   // Generates a load of a 48-bit constant which can be
 950   // patched to any 48-bit constant, i.e. address.
 951   // If common case supply additional temp register
 952   // to shorten the instruction sequence.
 953   void movptr(Register Rd, const Address &addr, Register tmp = noreg);
 954   void movptr(Register Rd, address addr, Register tmp = noreg);
 955   void movptr(Register Rd, address addr, int32_t &offset, Register tmp = noreg);
 956 
 957  private:
 958   void movptr1(Register Rd, uintptr_t addr, int32_t &offset);
 959   void movptr2(Register Rd, uintptr_t addr, int32_t &offset, Register tmp);
 960  public:
 961   // float imm move
 962   static bool can_hf_imm_load(short imm);
 963   static bool can_fp_imm_load(float imm);
 964   static bool can_dp_imm_load(double imm);
 965   void fli_h(FloatRegister Rd, short imm);
 966   void fli_s(FloatRegister Rd, float imm);
 967   void fli_d(FloatRegister Rd, double imm);
 968 
 969   // arith
 970   void add (Register Rd, Register Rn, int64_t increment, Register tmp = t0);
 971   void sub (Register Rd, Register Rn, int64_t decrement, Register tmp = t0);
 972   void addw(Register Rd, Register Rn, int64_t increment, Register tmp = t0);
 973   void subw(Register Rd, Register Rn, int64_t decrement, Register tmp = t0);
 974 
 975   void subi(Register Rd, Register Rn, int64_t decrement) {
 976     assert(is_simm12(-decrement), "Must be");
 977     addi(Rd, Rn, -decrement);
 978   }
 979 
 980   void subiw(Register Rd, Register Rn, int64_t decrement) {
 981     assert(is_simm12(-decrement), "Must be");
 982     addiw(Rd, Rn, -decrement);
 983   }
 984 
 985 #define INSN(NAME)                                               \
 986   inline void NAME(Register Rd, Register Rs1, Register Rs2) {    \
 987     Assembler::NAME(Rd, Rs1, Rs2);                               \
 988   }
 989 
 990   INSN(add);
 991   INSN(addw);
 992   INSN(sub);
 993   INSN(subw);
 994 
 995 #undef INSN
 996 
 997   // logic
 998   void andrw(Register Rd, Register Rs1, Register Rs2);
 999   void orrw(Register Rd, Register Rs1, Register Rs2);
1000   void xorrw(Register Rd, Register Rs1, Register Rs2);
1001 
1002   // logic with negate
1003   void andn(Register Rd, Register Rs1, Register Rs2);
1004   void orn(Register Rd, Register Rs1, Register Rs2);
1005 
1006   // reverse bytes
1007   void revbw(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2= t1);  // reverse bytes in lower word, sign-extend
1008   void revb(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1);  // reverse bytes in doubleword
1009 
1010   void ror(Register dst, Register src, Register shift, Register tmp = t0);
1011   void ror(Register dst, Register src, uint32_t shift, Register tmp = t0);
1012   void rolw(Register dst, Register src, uint32_t shift, Register tmp = t0);
1013 
1014   void orptr(Address adr, RegisterOrConstant src, Register tmp1 = t0, Register tmp2 = t1);
1015 
1016 // Load and Store Instructions
1017 #define INSN_ENTRY_RELOC(result_type, header)                               \
1018   result_type header {                                                      \
1019     guarantee(rtype == relocInfo::internal_word_type,                       \
1020               "only internal_word_type relocs make sense here");            \
1021     relocate(InternalAddress(dest).rspec());                                \
1022     IncompressibleScope scope(this);  /* relocations */
1023 
1024 #define INSN(NAME)                                                                                 \
1025   void NAME(Register Rd, address dest) {                                                           \
1026     assert_cond(dest != nullptr);                                                                  \
1027     if (CodeCache::contains(dest)) {                                                               \
1028       int64_t distance = dest - pc();                                                              \
1029       assert(is_valid_32bit_offset(distance), "Must be");                                          \
1030       auipc(Rd, (int32_t)distance + 0x800);                                                        \
1031       Assembler::NAME(Rd, Rd, ((int32_t)distance << 20) >> 20);                                    \
1032     } else {                                                                                       \
1033       int32_t offset = 0;                                                                          \
1034       movptr(Rd, dest, offset);                                                                    \
1035       Assembler::NAME(Rd, Rd, offset);                                                             \
1036     }                                                                                              \
1037   }                                                                                                \
1038   INSN_ENTRY_RELOC(void, NAME(Register Rd, address dest, relocInfo::relocType rtype))              \
1039     NAME(Rd, dest);                                                                                \
1040   }                                                                                                \
1041   void NAME(Register Rd, const Address &adr, Register temp = t0) {                                 \
1042     switch (adr.getMode()) {                                                                       \
1043       case Address::literal: {                                                                     \
1044         relocate(adr.rspec(), [&] {                                                                \
1045           NAME(Rd, adr.target());                                                                  \
1046         });                                                                                        \
1047         break;                                                                                     \
1048       }                                                                                            \
1049       case Address::base_plus_offset: {                                                            \
1050         if (is_simm12(adr.offset())) {                                                             \
1051           Assembler::NAME(Rd, adr.base(), adr.offset());                                           \
1052         } else {                                                                                   \
1053           int32_t offset = ((int32_t)adr.offset() << 20) >> 20;                                    \
1054           if (Rd == adr.base()) {                                                                  \
1055             la(temp, Address(adr.base(), adr.offset() - offset));                                  \
1056             Assembler::NAME(Rd, temp, offset);                                                     \
1057           } else {                                                                                 \
1058             la(Rd, Address(adr.base(), adr.offset() - offset));                                    \
1059             Assembler::NAME(Rd, Rd, offset);                                                       \
1060           }                                                                                        \
1061         }                                                                                          \
1062         break;                                                                                     \
1063       }                                                                                            \
1064       default:                                                                                     \
1065         ShouldNotReachHere();                                                                      \
1066     }                                                                                              \
1067   }                                                                                                \
1068   void NAME(Register Rd, Label &L) {                                                               \
1069     wrap_label(Rd, L, &MacroAssembler::NAME);                                                      \
1070   }
1071 
1072   INSN(lb);
1073   INSN(lbu);
1074   INSN(lh);
1075   INSN(lhu);
1076   INSN(lw);
1077   INSN(lwu);
1078   INSN(ld);
1079 
1080 #undef INSN
1081 
1082 #define INSN(NAME)                                                                                 \
1083   void NAME(FloatRegister Rd, address dest, Register temp = t0) {                                  \
1084     assert_cond(dest != nullptr);                                                                  \
1085     if (CodeCache::contains(dest)) {                                                               \
1086       int64_t distance = dest - pc();                                                              \
1087       assert(is_valid_32bit_offset(distance), "Must be");                                          \
1088       auipc(temp, (int32_t)distance + 0x800);                                                      \
1089       Assembler::NAME(Rd, temp, ((int32_t)distance << 20) >> 20);                                  \
1090     } else {                                                                                       \
1091       int32_t offset = 0;                                                                          \
1092       movptr(temp, dest, offset);                                                                  \
1093       Assembler::NAME(Rd, temp, offset);                                                           \
1094     }                                                                                              \
1095   }                                                                                                \
1096   INSN_ENTRY_RELOC(void, NAME(FloatRegister Rd, address dest,                                      \
1097                               relocInfo::relocType rtype, Register temp = t0))                     \
1098     NAME(Rd, dest, temp);                                                                          \
1099   }                                                                                                \
1100   void NAME(FloatRegister Rd, const Address &adr, Register temp = t0) {                            \
1101     switch (adr.getMode()) {                                                                       \
1102       case Address::literal: {                                                                     \
1103         relocate(adr.rspec(), [&] {                                                                \
1104           NAME(Rd, adr.target(), temp);                                                            \
1105         });                                                                                        \
1106         break;                                                                                     \
1107       }                                                                                            \
1108       case Address::base_plus_offset: {                                                            \
1109         if (is_simm12(adr.offset())) {                                                             \
1110           Assembler::NAME(Rd, adr.base(), adr.offset());                                           \
1111         } else {                                                                                   \
1112           int32_t offset = ((int32_t)adr.offset() << 20) >> 20;                                    \
1113           la(temp, Address(adr.base(), adr.offset() - offset));                                    \
1114           Assembler::NAME(Rd, temp, offset);                                                       \
1115         }                                                                                          \
1116         break;                                                                                     \
1117       }                                                                                            \
1118       default:                                                                                     \
1119         ShouldNotReachHere();                                                                      \
1120     }                                                                                              \
1121   }
1122 
1123   INSN(flh);
1124   INSN(flw);
1125   INSN(fld);
1126 
1127 #undef INSN
1128 
1129 #define INSN(NAME, REGISTER)                                                                       \
1130   INSN_ENTRY_RELOC(void, NAME(REGISTER Rs, address dest,                                           \
1131                               relocInfo::relocType rtype, Register temp = t0))                     \
1132     NAME(Rs, dest, temp);                                                                          \
1133   }
1134 
1135   INSN(sb,  Register);
1136   INSN(sh,  Register);
1137   INSN(sw,  Register);
1138   INSN(sd,  Register);
1139   INSN(fsw, FloatRegister);
1140   INSN(fsd, FloatRegister);
1141 
1142 #undef INSN
1143 
1144 #define INSN(NAME)                                                                                 \
1145   void NAME(Register Rs, address dest, Register temp = t0) {                                       \
1146     assert_cond(dest != nullptr);                                                                  \
1147     assert_different_registers(Rs, temp);                                                          \
1148     if (CodeCache::contains(dest)) {                                                               \
1149       int64_t distance = dest - pc();                                                              \
1150       assert(is_valid_32bit_offset(distance), "Must be");                                          \
1151       auipc(temp, (int32_t)distance + 0x800);                                                      \
1152       Assembler::NAME(Rs, temp, ((int32_t)distance << 20) >> 20);                                  \
1153     } else {                                                                                       \
1154       int32_t offset = 0;                                                                          \
1155       movptr(temp, dest, offset);                                                                  \
1156       Assembler::NAME(Rs, temp, offset);                                                           \
1157     }                                                                                              \
1158   }                                                                                                \
1159   void NAME(Register Rs, const Address &adr, Register temp = t0) {                                 \
1160     switch (adr.getMode()) {                                                                       \
1161       case Address::literal: {                                                                     \
1162         assert_different_registers(Rs, temp);                                                      \
1163         relocate(adr.rspec(), [&] {                                                                \
1164           NAME(Rs, adr.target(), temp);                                                            \
1165         });                                                                                        \
1166         break;                                                                                     \
1167       }                                                                                            \
1168       case Address::base_plus_offset: {                                                            \
1169         if (is_simm12(adr.offset())) {                                                             \
1170           Assembler::NAME(Rs, adr.base(), adr.offset());                                           \
1171         } else {                                                                                   \
1172           assert_different_registers(Rs, temp);                                                    \
1173           int32_t offset = ((int32_t)adr.offset() << 20) >> 20;                                    \
1174           la(temp, Address(adr.base(), adr.offset() - offset));                                    \
1175           Assembler::NAME(Rs, temp, offset);                                                       \
1176         }                                                                                          \
1177         break;                                                                                     \
1178       }                                                                                            \
1179       default:                                                                                     \
1180         ShouldNotReachHere();                                                                      \
1181     }                                                                                              \
1182   }
1183 
1184   INSN(sb);
1185   INSN(sh);
1186   INSN(sw);
1187   INSN(sd);
1188 
1189 #undef INSN
1190 
1191 #define INSN(NAME)                                                                                 \
1192   void NAME(FloatRegister Rs, address dest, Register temp = t0) {                                  \
1193     assert_cond(dest != nullptr);                                                                  \
1194     if (CodeCache::contains(dest)) {                                                               \
1195       int64_t distance = dest - pc();                                                              \
1196       assert(is_valid_32bit_offset(distance), "Must be");                                          \
1197       auipc(temp, (int32_t)distance + 0x800);                                                      \
1198       Assembler::NAME(Rs, temp, ((int32_t)distance << 20) >> 20);                                  \
1199     } else {                                                                                       \
1200       int32_t offset = 0;                                                                          \
1201       movptr(temp, dest, offset);                                                                  \
1202       Assembler::NAME(Rs, temp, offset);                                                           \
1203     }                                                                                              \
1204   }                                                                                                \
1205   void NAME(FloatRegister Rs, const Address &adr, Register temp = t0) {                            \
1206     switch (adr.getMode()) {                                                                       \
1207       case Address::literal: {                                                                     \
1208         relocate(adr.rspec(), [&] {                                                                \
1209           NAME(Rs, adr.target(), temp);                                                            \
1210         });                                                                                        \
1211         break;                                                                                     \
1212       }                                                                                            \
1213       case Address::base_plus_offset: {                                                            \
1214         if (is_simm12(adr.offset())) {                                                             \
1215           Assembler::NAME(Rs, adr.base(), adr.offset());                                           \
1216         } else {                                                                                   \
1217           int32_t offset = ((int32_t)adr.offset() << 20) >> 20;                                    \
1218           la(temp, Address(adr.base(), adr.offset() - offset));                                    \
1219           Assembler::NAME(Rs, temp, offset);                                                       \
1220         }                                                                                          \
1221         break;                                                                                     \
1222       }                                                                                            \
1223       default:                                                                                     \
1224         ShouldNotReachHere();                                                                      \
1225     }                                                                                              \
1226   }
1227 
1228   INSN(fsw);
1229   INSN(fsd);
1230 
1231 #undef INSN
1232 
1233 #undef INSN_ENTRY_RELOC
1234 
1235   void cmpxchg(Register addr, Register expected,
1236                Register new_val,
1237                Assembler::operand_size size,
1238                Assembler::Aqrl acquire, Assembler::Aqrl release,
1239                Register result, bool result_as_bool = false);
1240   void weak_cmpxchg(Register addr, Register expected,
1241                     Register new_val,
1242                     Assembler::operand_size size,
1243                     Assembler::Aqrl acquire, Assembler::Aqrl release,
1244                     Register result);
1245   void cmpxchg_narrow_value_helper(Register addr, Register expected, Register new_val,
1246                                    Assembler::operand_size size,
1247                                    Register shift, Register mask, Register aligned_addr);
1248   void cmpxchg_narrow_value(Register addr, Register expected,
1249                             Register new_val,
1250                             Assembler::operand_size size,
1251                             Assembler::Aqrl acquire, Assembler::Aqrl release,
1252                             Register result, bool result_as_bool,
1253                             Register tmp1, Register tmp2, Register tmp3);
1254   void weak_cmpxchg_narrow_value(Register addr, Register expected,
1255                                  Register new_val,
1256                                  Assembler::operand_size size,
1257                                  Assembler::Aqrl acquire, Assembler::Aqrl release,
1258                                  Register result,
1259                                  Register tmp1, Register tmp2, Register tmp3);
1260 
1261   void atomic_add(Register prev, RegisterOrConstant incr, Register addr);
1262   void atomic_addw(Register prev, RegisterOrConstant incr, Register addr);
1263   void atomic_addal(Register prev, RegisterOrConstant incr, Register addr);
1264   void atomic_addalw(Register prev, RegisterOrConstant incr, Register addr);
1265 
1266   void atomic_xchg(Register prev, Register newv, Register addr);
1267   void atomic_xchgw(Register prev, Register newv, Register addr);
1268   void atomic_xchgal(Register prev, Register newv, Register addr);
1269   void atomic_xchgalw(Register prev, Register newv, Register addr);
1270   void atomic_xchgwu(Register prev, Register newv, Register addr);
1271   void atomic_xchgalwu(Register prev, Register newv, Register addr);
1272 
1273   void atomic_cas(Register prev, Register newv, Register addr, Assembler::operand_size size,
1274               Assembler::Aqrl acquire = Assembler::relaxed, Assembler::Aqrl release = Assembler::relaxed);
1275 
1276   // Emit a far call/jump. Only invalidates the tmp register which
1277   // is used to keep the entry address for jalr.
1278   // The address must be inside the code cache.
1279   // Supported entry.rspec():
1280   // - relocInfo::external_word_type
1281   // - relocInfo::runtime_call_type
1282   // - relocInfo::none
1283   // Clobbers t1 default.
1284   void far_call(const Address &entry, Register tmp = t1);
1285   void far_jump(const Address &entry, Register tmp = t1);
1286 
1287   static int far_branch_size() {
1288       return 2 * MacroAssembler::instruction_size;  // auipc + jalr, see far_call() & far_jump()
1289   }
1290 
1291   void load_byte_map_base(Register reg);
1292 
1293   void bang_stack_with_offset(int offset) {
1294     // stack grows down, caller passes positive offset
1295     assert(offset > 0, "must bang with negative offset");
1296     sub(t0, sp, offset);
1297     sd(zr, Address(t0));
1298   }
1299 
1300   virtual void _call_Unimplemented(address call_site) {
1301     mv(t1, call_site);
1302   }
1303 
1304   #define call_Unimplemented() _call_Unimplemented((address)__PRETTY_FUNCTION__)
1305 
1306   // Frame creation and destruction shared between JITs.
1307   void build_frame(int framesize);
1308   void remove_frame(int framesize);
1309 
1310   void verified_entry(Compile* C, int sp_inc);
1311 
1312   void reserved_stack_check();
1313 
1314   void get_polling_page(Register dest, relocInfo::relocType rtype);
1315   void read_polling_page(Register r, int32_t offset, relocInfo::relocType rtype);
1316 
1317   // RISCV64 OpenJDK uses three different types of calls:
1318   //
1319   //   - far call: auipc reg, pc_relative_offset; jalr ra, reg, offset
1320   //     The offset has the range [-(2G + 2K), 2G - 2K). Addresses out of the
1321   //     range in the code cache requires indirect call.
1322   //     If a jump is needed rather than a call, a far jump 'jalr x0, reg, offset'
1323   //     can be used instead.
1324   //     All instructions are embedded at a call site.
1325   //
1326   //   - indirect call: movptr + jalr
1327   //     This can reach anywhere in the address space, but it cannot be patched
1328   //     while code is running, so it must only be modified at a safepoint.
1329   //     This form of call is most suitable for targets at fixed addresses,
1330   //     which will never be patched.
1331   //
1332   //   - reloc call:
1333   //     This too can reach anywhere in the address space but is only available
1334   //     in C1/C2-generated code (nmethod).
1335   //
1336   //     [Main code section]
1337   //       auipc
1338   //       ld <address_from_stub_section>
1339   //       jalr
1340   //
1341   //     [Stub section]
1342   //     address stub:
1343   //       <64-bit destination address>
1344   //
1345   //    To change the destination we simply atomically store the new
1346   //    address in the stub section.
1347   //    There is a benign race in that the other thread might observe the old
1348   //    64-bit destination address before it observes the new address. That does
1349   //    not matter because the destination method has been invalidated, so there
1350   //    will be a trap at its start.
1351 
1352   // Emit a reloc call and create a stub to hold the entry point address.
1353   // Supported entry.rspec():
1354   // - relocInfo::runtime_call_type
1355   // - relocInfo::opt_virtual_call_type
1356   // - relocInfo::static_call_type
1357   // - relocInfo::virtual_call_type
1358   //
1359   // Return: the call PC or nullptr if CodeCache is full.
1360   address reloc_call(Address entry, Register tmp = t1);
1361 
1362   address ic_call(address entry, jint method_index = 0);
1363   static int ic_check_size();
1364   int ic_check(int end_alignment = MacroAssembler::instruction_size);
1365 
1366   // Support for memory inc/dec
1367   // n.b. increment/decrement calls with an Address destination will
1368   // need to use a scratch register to load the value to be
1369   // incremented. increment/decrement calls which add or subtract a
1370   // constant value other than sign-extended 12-bit immediate will need
1371   // to use a 2nd scratch register to hold the constant. so, an address
1372   // increment/decrement may trash both t0 and t1.
1373 
1374   void increment(const Address dst, int64_t value = 1, Register tmp1 = t0, Register tmp2 = t1);
1375   void incrementw(const Address dst, int32_t value = 1, Register tmp1 = t0, Register tmp2 = t1);
1376 
1377   void decrement(const Address dst, int64_t value = 1, Register tmp1 = t0, Register tmp2 = t1);
1378   void decrementw(const Address dst, int32_t value = 1, Register tmp1 = t0, Register tmp2 = t1);
1379 
1380   void clinit_barrier(Register klass, Register tmp, Label* L_fast_path = nullptr, Label* L_slow_path = nullptr);
1381 
1382   void load_method_holder_cld(Register result, Register method);
1383   void load_method_holder(Register holder, Register method);
1384   void load_metadata(Register dst, Register src);
1385 
1386   void compute_index(Register str1, Register trailing_zeros, Register match_mask,
1387                      Register result, Register char_tmp, Register tmp,
1388                      bool haystack_isL);
1389   void compute_match_mask(Register src, Register pattern, Register match_mask,
1390                           Register mask1, Register mask2);
1391 
1392   // CRC32 code for java.util.zip.CRC32::updateBytes() intrinsic.
1393   void kernel_crc32(Register crc, Register buf, Register len,
1394         Register table0, Register table1, Register table2, Register table3,
1395         Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5, Register tmp6);
1396   void update_word_crc32(Register crc, Register v, Register tmp1, Register tmp2, Register tmp3,
1397         Register table0, Register table1, Register table2, Register table3,
1398         bool upper);
1399   void update_byte_crc32(Register crc, Register val, Register table);
1400 
1401 #ifdef COMPILER2
1402   void vector_update_crc32(Register crc, Register buf, Register len,
1403                            Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5,
1404                            Register table0, Register table3);
1405   void kernel_crc32_vclmul_fold(Register crc, Register buf, Register len,
1406               Register table0, Register table1, Register table2, Register table3,
1407               Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5);
1408   void crc32_vclmul_fold_to_16_bytes_vectorsize_32(VectorRegister vx, VectorRegister vy, VectorRegister vt,
1409                             VectorRegister vtmp1, VectorRegister vtmp2, VectorRegister vtmp3, VectorRegister vtmp4);
1410   void kernel_crc32_vclmul_fold_vectorsize_32(Register crc, Register buf, Register len,
1411                                               Register vclmul_table, Register tmp1, Register tmp2);
1412   void crc32_vclmul_fold_16_bytes_vectorsize_16(VectorRegister vx, VectorRegister vt,
1413                       VectorRegister vtmp1, VectorRegister vtmp2, VectorRegister vtmp3, VectorRegister vtmp4,
1414                       Register buf, Register tmp, const int STEP);
1415   void crc32_vclmul_fold_16_bytes_vectorsize_16_2(VectorRegister vx, VectorRegister vy, VectorRegister vt,
1416                       VectorRegister vtmp1, VectorRegister vtmp2, VectorRegister vtmp3, VectorRegister vtmp4,
1417                       Register tmp);
1418   void crc32_vclmul_fold_16_bytes_vectorsize_16_3(VectorRegister vx, VectorRegister vy, VectorRegister vt,
1419                       VectorRegister vtmp1, VectorRegister vtmp2, VectorRegister vtmp3, VectorRegister vtmp4,
1420                       Register tmp);
1421   void kernel_crc32_vclmul_fold_vectorsize_16(Register crc, Register buf, Register len,
1422                                               Register vclmul_table, Register tmp1, Register tmp2);
1423 
1424   void mul_add(Register out, Register in, Register offset,
1425                Register len, Register k, Register tmp);
1426   void wide_mul(Register prod_lo, Register prod_hi, Register n, Register m);
1427   void wide_madd(Register sum_lo, Register sum_hi, Register n,
1428                  Register m, Register tmp1, Register tmp2);
1429   void cad(Register dst, Register src1, Register src2, Register carry);
1430   void cadc(Register dst, Register src1, Register src2, Register carry);
1431   void adc(Register dst, Register src1, Register src2, Register carry);
1432   void add2_with_carry(Register final_dest_hi, Register dest_hi, Register dest_lo,
1433                        Register src1, Register src2, Register carry);
1434   void multiply_64_x_64_loop(Register x, Register xstart, Register x_xstart,
1435                              Register y, Register y_idx, Register z,
1436                              Register carry, Register product,
1437                              Register idx, Register kdx);
1438   void multiply_128_x_128_loop(Register y, Register z,
1439                                Register carry, Register carry2,
1440                                Register idx, Register jdx,
1441                                Register yz_idx1, Register yz_idx2,
1442                                Register tmp, Register tmp3, Register tmp4,
1443                                Register tmp6, Register product_hi);
1444   void multiply_to_len(Register x, Register xlen, Register y, Register ylen,
1445                        Register z, Register tmp0,
1446                        Register tmp1, Register tmp2, Register tmp3, Register tmp4,
1447                        Register tmp5, Register tmp6, Register product_hi);
1448 
1449 #endif // COMPILER2
1450 
1451   void inflate_lo32(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1);
1452   void inflate_hi32(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1);
1453 
1454   void ctzc_bits(Register Rd, Register Rs, bool isLL = false,
1455                  Register tmp1 = t0, Register tmp2 = t1);
1456 
1457   void zero_words(Register base, uint64_t cnt);
1458   address zero_words(Register ptr, Register cnt);
1459   void fill_words(Register base, Register cnt, Register value);
1460   void zero_memory(Register addr, Register len, Register tmp);
1461   void zero_dcache_blocks(Register base, Register cnt, Register tmp1, Register tmp2);
1462 
1463   // shift left by shamt and add
1464   void shadd(Register Rd, Register Rs1, Register Rs2, Register tmp, int shamt);
1465 
1466   // test single bit in Rs, result is set to Rd
1467   void test_bit(Register Rd, Register Rs, uint32_t bit_pos);
1468 
1469   // Here the float instructions with safe deal with some exceptions.
1470   // e.g. convert from NaN, +Inf, -Inf to int, float, double
1471   // will trigger exception, we need to deal with these situations
1472   // to get correct results.
1473   void fcvt_w_s_safe(Register dst, FloatRegister src, Register tmp = t0);
1474   void fcvt_l_s_safe(Register dst, FloatRegister src, Register tmp = t0);
1475   void fcvt_w_d_safe(Register dst, FloatRegister src, Register tmp = t0);
1476   void fcvt_l_d_safe(Register dst, FloatRegister src, Register tmp = t0);
1477 
1478   void java_round_float(Register dst, FloatRegister src, FloatRegister ftmp);
1479   void java_round_double(Register dst, FloatRegister src, FloatRegister ftmp);
1480 
1481   // Helper routine processing the slow path of NaN when converting float to float16
1482   void float_to_float16_NaN(Register dst, FloatRegister src, Register tmp1, Register tmp2);
1483 
1484   // vector load/store unit-stride instructions
1485   void vlex_v(VectorRegister vd, Register base, Assembler::SEW sew, VectorMask vm = unmasked) {
1486     switch (sew) {
1487       case Assembler::e64:
1488         vle64_v(vd, base, vm);
1489         break;
1490       case Assembler::e32:
1491         vle32_v(vd, base, vm);
1492         break;
1493       case Assembler::e16:
1494         vle16_v(vd, base, vm);
1495         break;
1496       case Assembler::e8: // fall through
1497       default:
1498         vle8_v(vd, base, vm);
1499         break;
1500     }
1501   }
1502 
1503   void vsex_v(VectorRegister store_data, Register base, Assembler::SEW sew, VectorMask vm = unmasked) {
1504     switch (sew) {
1505       case Assembler::e64:
1506         vse64_v(store_data, base, vm);
1507         break;
1508       case Assembler::e32:
1509         vse32_v(store_data, base, vm);
1510         break;
1511       case Assembler::e16:
1512         vse16_v(store_data, base, vm);
1513         break;
1514       case Assembler::e8: // fall through
1515       default:
1516         vse8_v(store_data, base, vm);
1517         break;
1518     }
1519   }
1520 
1521   // vector pseudo instructions
1522   // rotate vector register left with shift bits, 32-bit version
1523   inline void vrole32_vi(VectorRegister vd, uint32_t shift, VectorRegister tmp_vr) {
1524     vsrl_vi(tmp_vr, vd, 32 - shift);
1525     vsll_vi(vd, vd, shift);
1526     vor_vv(vd, vd, tmp_vr);
1527   }
1528 
1529   inline void vl1r_v(VectorRegister vd, Register rs) {
1530     vl1re8_v(vd, rs);
1531   }
1532 
1533   inline void vmnot_m(VectorRegister vd, VectorRegister vs) {
1534     vmnand_mm(vd, vs, vs);
1535   }
1536 
1537   inline void vncvt_x_x_w(VectorRegister vd, VectorRegister vs, VectorMask vm = unmasked) {
1538     vnsrl_wx(vd, vs, x0, vm);
1539   }
1540 
1541   inline void vneg_v(VectorRegister vd, VectorRegister vs, VectorMask vm = unmasked) {
1542     vrsub_vx(vd, vs, x0, vm);
1543   }
1544 
1545   inline void vfneg_v(VectorRegister vd, VectorRegister vs, VectorMask vm = unmasked) {
1546     vfsgnjn_vv(vd, vs, vs, vm);
1547   }
1548 
1549   inline void vfabs_v(VectorRegister vd, VectorRegister vs, VectorMask vm = unmasked) {
1550     vfsgnjx_vv(vd, vs, vs, vm);
1551   }
1552 
1553   inline void vmsgt_vv(VectorRegister vd, VectorRegister vs2, VectorRegister vs1, VectorMask vm = unmasked) {
1554     vmslt_vv(vd, vs1, vs2, vm);
1555   }
1556 
1557   inline void vmsgtu_vv(VectorRegister vd, VectorRegister vs2, VectorRegister vs1, VectorMask vm = unmasked) {
1558     vmsltu_vv(vd, vs1, vs2, vm);
1559   }
1560 
1561   inline void vmsge_vv(VectorRegister vd, VectorRegister vs2, VectorRegister vs1, VectorMask vm = unmasked) {
1562     vmsle_vv(vd, vs1, vs2, vm);
1563   }
1564 
1565   inline void vmsgeu_vv(VectorRegister vd, VectorRegister vs2, VectorRegister vs1, VectorMask vm = unmasked) {
1566     vmsleu_vv(vd, vs1, vs2, vm);
1567   }
1568 
1569   inline void vmfgt_vv(VectorRegister vd, VectorRegister vs2, VectorRegister vs1, VectorMask vm = unmasked) {
1570     vmflt_vv(vd, vs1, vs2, vm);
1571   }
1572 
1573   inline void vmfge_vv(VectorRegister vd, VectorRegister vs2, VectorRegister vs1, VectorMask vm = unmasked) {
1574     vmfle_vv(vd, vs1, vs2, vm);
1575   }
1576 
1577   inline void vmsltu_vi(VectorRegister Vd, VectorRegister Vs2, uint32_t imm, VectorMask vm = unmasked) {
1578     guarantee(imm >= 1 && imm <= 16, "imm is invalid");
1579     vmsleu_vi(Vd, Vs2, imm-1, vm);
1580   }
1581 
1582   inline void vmsgeu_vi(VectorRegister Vd, VectorRegister Vs2, uint32_t imm, VectorMask vm = unmasked) {
1583     guarantee(imm >= 1 && imm <= 16, "imm is invalid");
1584     vmsgtu_vi(Vd, Vs2, imm-1, vm);
1585   }
1586 
1587   // Copy mask register
1588   inline void vmmv_m(VectorRegister vd, VectorRegister vs) {
1589     vmand_mm(vd, vs, vs);
1590   }
1591 
1592   // Clear mask register
1593   inline void vmclr_m(VectorRegister vd) {
1594     vmxor_mm(vd, vd, vd);
1595   }
1596 
1597   // Set mask register
1598   inline void vmset_m(VectorRegister vd) {
1599     vmxnor_mm(vd, vd, vd);
1600   }
1601 
1602   inline void vnot_v(VectorRegister Vd, VectorRegister Vs, VectorMask vm = unmasked) {
1603     vxor_vi(Vd, Vs, -1, vm);
1604   }
1605 
1606   static const int zero_words_block_size;
1607 
1608   void cast_primitive_type(BasicType type, Register Rt) {
1609     switch (type) {
1610       case T_BOOLEAN:
1611         sltu(Rt, zr, Rt);
1612         break;
1613       case T_CHAR   :
1614         zext(Rt, Rt, 16);
1615         break;
1616       case T_BYTE   :
1617         sext(Rt, Rt, 8);
1618         break;
1619       case T_SHORT  :
1620         sext(Rt, Rt, 16);
1621         break;
1622       case T_INT    :
1623         sext(Rt, Rt, 32);
1624         break;
1625       case T_LONG   : /* nothing to do */        break;
1626       case T_VOID   : /* nothing to do */        break;
1627       case T_FLOAT  : /* nothing to do */        break;
1628       case T_DOUBLE : /* nothing to do */        break;
1629       default: ShouldNotReachHere();
1630     }
1631   }
1632 
1633   // float cmp with unordered_result
1634   void float_compare(Register result, FloatRegister Rs1, FloatRegister Rs2, int unordered_result);
1635   void double_compare(Register result, FloatRegister Rs1, FloatRegister Rs2, int unordered_result);
1636 
1637   // Zero/Sign-extend
1638   void zext(Register dst, Register src, int bits);
1639   void sext(Register dst, Register src, int bits);
1640 
1641 private:
1642   void cmp_x2i(Register dst, Register src1, Register src2, Register tmp, bool is_signed = true);
1643 
1644 public:
1645   // compare src1 and src2 and get -1/0/1 in dst.
1646   // if [src1 > src2], dst = 1;
1647   // if [src1 == src2], dst = 0;
1648   // if [src1 < src2], dst = -1;
1649   void cmp_l2i(Register dst, Register src1, Register src2, Register tmp = t0);
1650   void cmp_ul2i(Register dst, Register src1, Register src2, Register tmp = t0);
1651   void cmp_uw2i(Register dst, Register src1, Register src2, Register tmp = t0);
1652 
1653   // support for argument shuffling
1654   void move32_64(VMRegPair src, VMRegPair dst, Register tmp = t0);
1655   void float_move(VMRegPair src, VMRegPair dst, Register tmp = t0);
1656   void long_move(VMRegPair src, VMRegPair dst, Register tmp = t0);
1657   void double_move(VMRegPair src, VMRegPair dst, Register tmp = t0);
1658   void object_move(OopMap* map,
1659                    int oop_handle_offset,
1660                    int framesize_in_slots,
1661                    VMRegPair src,
1662                    VMRegPair dst,
1663                    bool is_receiver,
1664                    int* receiver_offset);
1665 
1666 #ifdef ASSERT
1667   // Template short-hand support to clean-up after a failed call to trampoline
1668   // call generation (see trampoline_call() below), when a set of Labels must
1669   // be reset (before returning).
1670   template<typename Label, typename... More>
1671   void reset_labels(Label& lbl, More&... more) {
1672     lbl.reset(); reset_labels(more...);
1673   }
1674   template<typename Label>
1675   void reset_labels(Label& lbl) {
1676     lbl.reset();
1677   }
1678 #endif
1679 
1680 private:
1681 
1682   void repne_scan(Register addr, Register value, Register count, Register tmp);
1683 
1684   int bitset_to_regs(unsigned int bitset, unsigned char* regs);
1685   Address add_memory_helper(const Address dst, Register tmp);
1686 
1687   void load_reserved(Register dst, Register addr, Assembler::operand_size size, Assembler::Aqrl acquire);
1688   void store_conditional(Register dst, Register new_val, Register addr, Assembler::operand_size size, Assembler::Aqrl release);
1689 
1690 public:
1691   void fast_lock(Register basic_lock, Register obj, Register tmp1, Register tmp2, Register tmp3, Label& slow);
1692   void fast_unlock(Register obj, Register tmp1, Register tmp2, Register tmp3, Label& slow);
1693 
1694 public:
1695   enum {
1696     // movptr
1697     movptr1_instruction_size = 6 * MacroAssembler::instruction_size, // lui, addi, slli, addi, slli, addi.  See movptr1().
1698     movptr2_instruction_size = 5 * MacroAssembler::instruction_size, // lui, lui, slli, add, addi.  See movptr2().
1699     load_pc_relative_instruction_size = 2 * MacroAssembler::instruction_size // auipc, ld
1700   };
1701 
1702   static bool is_load_pc_relative_at(address branch);
1703   static bool is_li16u_at(address instr);
1704 
1705   static bool is_jal_at(address instr)        { assert_cond(instr != nullptr); return extract_opcode(instr) == 0b1101111; }
1706   static bool is_jalr_at(address instr)       { assert_cond(instr != nullptr); return extract_opcode(instr) == 0b1100111 && extract_funct3(instr) == 0b000; }
1707   static bool is_branch_at(address instr)     { assert_cond(instr != nullptr); return extract_opcode(instr) == 0b1100011; }
1708   static bool is_ld_at(address instr)         { assert_cond(instr != nullptr); return is_load_at(instr) && extract_funct3(instr) == 0b011; }
1709   static bool is_load_at(address instr)       { assert_cond(instr != nullptr); return extract_opcode(instr) == 0b0000011; }
1710   static bool is_float_load_at(address instr) { assert_cond(instr != nullptr); return extract_opcode(instr) == 0b0000111; }
1711   static bool is_auipc_at(address instr)      { assert_cond(instr != nullptr); return extract_opcode(instr) == 0b0010111; }
1712   static bool is_jump_at(address instr)       { assert_cond(instr != nullptr); return is_branch_at(instr) || is_jal_at(instr) || is_jalr_at(instr); }
1713   static bool is_add_at(address instr)        { assert_cond(instr != nullptr); return extract_opcode(instr) == 0b0110011 && extract_funct3(instr) == 0b000; }
1714   static bool is_addi_at(address instr)       { assert_cond(instr != nullptr); return extract_opcode(instr) == 0b0010011 && extract_funct3(instr) == 0b000; }
1715   static bool is_addiw_at(address instr)      { assert_cond(instr != nullptr); return extract_opcode(instr) == 0b0011011 && extract_funct3(instr) == 0b000; }
1716   static bool is_addiw_to_zr_at(address instr){ assert_cond(instr != nullptr); return is_addiw_at(instr) && extract_rd(instr) == zr; }
1717   static bool is_lui_at(address instr)        { assert_cond(instr != nullptr); return extract_opcode(instr) == 0b0110111; }
1718   static bool is_lui_to_zr_at(address instr)  { assert_cond(instr != nullptr); return is_lui_at(instr) && extract_rd(instr) == zr; }
1719 
1720   static bool is_srli_at(address instr) {
1721     assert_cond(instr != nullptr);
1722     return extract_opcode(instr) == 0b0010011 &&
1723            extract_funct3(instr) == 0b101 &&
1724            Assembler::extract(((unsigned*)instr)[0], 31, 26) == 0b000000;
1725   }
1726 
1727   static bool is_slli_shift_at(address instr, uint32_t shift) {
1728     assert_cond(instr != nullptr);
1729     return (extract_opcode(instr) == 0b0010011 && // opcode field
1730             extract_funct3(instr) == 0b001 &&     // funct3 field, select the type of operation
1731             Assembler::extract(Assembler::ld_instr(instr), 25, 20) == shift);    // shamt field
1732   }
1733 
1734   static bool is_movptr1_at(address instr);
1735   static bool is_movptr2_at(address instr);
1736 
1737   static bool is_lwu_to_zr(address instr);
1738 
1739   static Register extract_rs1(address instr);
1740   static Register extract_rs2(address instr);
1741   static Register extract_rd(address instr);
1742   static uint32_t extract_opcode(address instr);
1743   static uint32_t extract_funct3(address instr);
1744 
1745   // the instruction sequence of movptr is as below:
1746   //     lui
1747   //     addi
1748   //     slli
1749   //     addi
1750   //     slli
1751   //     addi/jalr/load
1752   static bool check_movptr1_data_dependency(address instr) {
1753     address lui = instr;
1754     address addi1 = lui + MacroAssembler::instruction_size;
1755     address slli1 = addi1 + MacroAssembler::instruction_size;
1756     address addi2 = slli1 + MacroAssembler::instruction_size;
1757     address slli2 = addi2 + MacroAssembler::instruction_size;
1758     address last_instr = slli2 + MacroAssembler::instruction_size;
1759     return extract_rs1(addi1) == extract_rd(lui) &&
1760            extract_rs1(addi1) == extract_rd(addi1) &&
1761            extract_rs1(slli1) == extract_rd(addi1) &&
1762            extract_rs1(slli1) == extract_rd(slli1) &&
1763            extract_rs1(addi2) == extract_rd(slli1) &&
1764            extract_rs1(addi2) == extract_rd(addi2) &&
1765            extract_rs1(slli2) == extract_rd(addi2) &&
1766            extract_rs1(slli2) == extract_rd(slli2) &&
1767            extract_rs1(last_instr) == extract_rd(slli2);
1768   }
1769 
1770   // the instruction sequence of movptr2 is as below:
1771   //     lui
1772   //     lui
1773   //     slli
1774   //     add
1775   //     addi/jalr/load
1776   static bool check_movptr2_data_dependency(address instr) {
1777     address lui1 = instr;
1778     address lui2 = lui1 + MacroAssembler::instruction_size;
1779     address slli = lui2 + MacroAssembler::instruction_size;
1780     address add  = slli + MacroAssembler::instruction_size;
1781     address last_instr = add + MacroAssembler::instruction_size;
1782     return extract_rd(add) == extract_rd(lui2) &&
1783            extract_rs1(add) == extract_rd(lui2) &&
1784            extract_rs2(add) == extract_rd(slli) &&
1785            extract_rs1(slli) == extract_rd(lui1) &&
1786            extract_rd(slli) == extract_rd(lui1) &&
1787            extract_rs1(last_instr) == extract_rd(add);
1788   }
1789 
1790   // the instruction sequence of li16u is as below:
1791   //     lui
1792   //     srli
1793   static bool check_li16u_data_dependency(address instr) {
1794     address lui = instr;
1795     address srli = lui + MacroAssembler::instruction_size;
1796 
1797     return extract_rs1(srli) == extract_rd(lui) &&
1798            extract_rs1(srli) == extract_rd(srli);
1799   }
1800 
1801   // the instruction sequence of li32 is as below:
1802   //     lui
1803   //     addiw
1804   static bool check_li32_data_dependency(address instr) {
1805     address lui = instr;
1806     address addiw = lui + MacroAssembler::instruction_size;
1807 
1808     return extract_rs1(addiw) == extract_rd(lui) &&
1809            extract_rs1(addiw) == extract_rd(addiw);
1810   }
1811 
1812   // the instruction sequence of pc-relative is as below:
1813   //     auipc
1814   //     jalr/addi/load/float_load
1815   static bool check_pc_relative_data_dependency(address instr) {
1816     address auipc = instr;
1817     address last_instr = auipc + MacroAssembler::instruction_size;
1818 
1819     return extract_rs1(last_instr) == extract_rd(auipc);
1820   }
1821 
1822   // the instruction sequence of load_label is as below:
1823   //     auipc
1824   //     load
1825   static bool check_load_pc_relative_data_dependency(address instr) {
1826     address auipc = instr;
1827     address load = auipc + MacroAssembler::instruction_size;
1828 
1829     return extract_rd(load) == extract_rd(auipc) &&
1830            extract_rs1(load) == extract_rd(load);
1831   }
1832 
1833   static bool is_li32_at(address instr);
1834   static bool is_pc_relative_at(address branch);
1835 
1836   static bool is_membar(address addr) {
1837     return (Bytes::get_native_u4(addr) & 0x7f) == 0b1111 && extract_funct3(addr) == 0;
1838   }
1839   static uint32_t get_membar_kind(address addr);
1840   static void set_membar_kind(address addr, uint32_t order_kind);
1841 
1842  public:
1843   // Inline type specific methods
1844   #include "asm/macroAssembler_common.hpp"
1845 };
1846 
1847 #ifdef ASSERT
1848 inline bool AbstractAssembler::pd_check_instruction_mark() { return false; }
1849 #endif
1850 
1851 #endif // CPU_RISCV_MACROASSEMBLER_RISCV_HPP