1 /*
   2  * Copyright (c) 1997, 2024, Oracle and/or its affiliates. All rights reserved.
   3  * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
   4  * Copyright (c) 2020, 2024, Huawei Technologies Co., Ltd. All rights reserved.
   5  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   6  *
   7  * This code is free software; you can redistribute it and/or modify it
   8  * under the terms of the GNU General Public License version 2 only, as
   9  * published by the Free Software Foundation.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  *
  25  */
  26 
  27 #ifndef CPU_RISCV_MACROASSEMBLER_RISCV_HPP
  28 #define CPU_RISCV_MACROASSEMBLER_RISCV_HPP
  29 
  30 #include "asm/assembler.inline.hpp"
  31 #include "code/vmreg.hpp"
  32 #include "metaprogramming/enableIf.hpp"
  33 #include "oops/compressedOops.hpp"
  34 #include "utilities/powerOfTwo.hpp"
  35 
  36 // MacroAssembler extends Assembler by frequently used macros.
  37 //
  38 // Instructions for which a 'better' code sequence exists depending
  39 // on arguments should also go in here.
  40 
  41 class MacroAssembler: public Assembler {
  42 
  43  public:
  44 
  45   MacroAssembler(CodeBuffer* code) : Assembler(code) {}
  46 
  47   void safepoint_poll(Label& slow_path, bool at_return, bool acquire, bool in_nmethod);
  48 
  49   // Alignment
  50   int align(int modulus, int extra_offset = 0);
  51 
  52   static inline void assert_alignment(address pc, int alignment = MacroAssembler::instruction_size) {
  53     assert(is_aligned(pc, alignment), "bad alignment");
  54   }
  55 
  56   // nop
  57   void post_call_nop();
  58 
  59   // Stack frame creation/removal
  60   // Note that SP must be updated to the right place before saving/restoring RA and FP
  61   // because signal based thread suspend/resume could happen asynchronously.
  62   void enter() {
  63     addi(sp, sp, - 2 * wordSize);
  64     sd(ra, Address(sp, wordSize));
  65     sd(fp, Address(sp));
  66     addi(fp, sp, 2 * wordSize);
  67   }
  68 
  69   void leave() {
  70     addi(sp, fp, - 2 * wordSize);
  71     ld(fp, Address(sp));
  72     ld(ra, Address(sp, wordSize));
  73     addi(sp, sp, 2 * wordSize);
  74   }
  75 
  76 
  77   // Support for getting the JavaThread pointer (i.e.; a reference to thread-local information)
  78   // The pointer will be loaded into the thread register.
  79   void get_thread(Register thread);
  80 
  81   // Support for VM calls
  82   //
  83   // It is imperative that all calls into the VM are handled via the call_VM macros.
  84   // They make sure that the stack linkage is setup correctly. call_VM's correspond
  85   // to ENTRY/ENTRY_X entry points while call_VM_leaf's correspond to LEAF entry points.
  86 
  87   void call_VM(Register oop_result,
  88                address entry_point,
  89                bool check_exceptions = true);
  90   void call_VM(Register oop_result,
  91                address entry_point,
  92                Register arg_1,
  93                bool check_exceptions = true);
  94   void call_VM(Register oop_result,
  95                address entry_point,
  96                Register arg_1, Register arg_2,
  97                bool check_exceptions = true);
  98   void call_VM(Register oop_result,
  99                address entry_point,
 100                Register arg_1, Register arg_2, Register arg_3,
 101                bool check_exceptions = true);
 102 
 103   // Overloadings with last_Java_sp
 104   void call_VM(Register oop_result,
 105                Register last_java_sp,
 106                address entry_point,
 107                int number_of_arguments = 0,
 108                bool check_exceptions = true);
 109   void call_VM(Register oop_result,
 110                Register last_java_sp,
 111                address entry_point,
 112                Register arg_1,
 113                bool check_exceptions = true);
 114   void call_VM(Register oop_result,
 115                Register last_java_sp,
 116                address entry_point,
 117                Register arg_1, Register arg_2,
 118                bool check_exceptions = true);
 119   void call_VM(Register oop_result,
 120                Register last_java_sp,
 121                address entry_point,
 122                Register arg_1, Register arg_2, Register arg_3,
 123                bool check_exceptions = true);
 124 
 125   void get_vm_result(Register oop_result, Register java_thread);
 126   void get_vm_result_2(Register metadata_result, Register java_thread);
 127 
 128   // These always tightly bind to MacroAssembler::call_VM_leaf_base
 129   // bypassing the virtual implementation
 130   void call_VM_leaf(address entry_point,
 131                     int number_of_arguments = 0);
 132   void call_VM_leaf(address entry_point,
 133                     Register arg_0);
 134   void call_VM_leaf(address entry_point,
 135                     Register arg_0, Register arg_1);
 136   void call_VM_leaf(address entry_point,
 137                     Register arg_0, Register arg_1, Register arg_2);
 138 
 139   // These always tightly bind to MacroAssembler::call_VM_base
 140   // bypassing the virtual implementation
 141   void super_call_VM_leaf(address entry_point, Register arg_0);
 142   void super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1);
 143   void super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2);
 144   void super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2, Register arg_3);
 145 
 146   // last Java Frame (fills frame anchor)
 147   void set_last_Java_frame(Register last_java_sp, Register last_java_fp, address last_java_pc, Register tmp);
 148   void set_last_Java_frame(Register last_java_sp, Register last_java_fp, Label &last_java_pc, Register tmp);
 149   void set_last_Java_frame(Register last_java_sp, Register last_java_fp, Register last_java_pc);
 150 
 151   // thread in the default location (xthread)
 152   void reset_last_Java_frame(bool clear_fp);
 153 
 154   virtual void call_VM_leaf_base(
 155     address entry_point,                // the entry point
 156     int     number_of_arguments,        // the number of arguments to pop after the call
 157     Label*  retaddr = nullptr
 158   );
 159 
 160   virtual void call_VM_leaf_base(
 161     address entry_point,                // the entry point
 162     int     number_of_arguments,        // the number of arguments to pop after the call
 163     Label&  retaddr) {
 164     call_VM_leaf_base(entry_point, number_of_arguments, &retaddr);
 165   }
 166 
 167   virtual void call_VM_base(           // returns the register containing the thread upon return
 168     Register oop_result,               // where an oop-result ends up if any; use noreg otherwise
 169     Register java_thread,              // the thread if computed before     ; use noreg otherwise
 170     Register last_java_sp,             // to set up last_Java_frame in stubs; use noreg otherwise
 171     address  entry_point,              // the entry point
 172     int      number_of_arguments,      // the number of arguments (w/o thread) to pop after the call
 173     bool     check_exceptions          // whether to check for pending exceptions after return
 174   );
 175 
 176   void call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions);
 177 
 178   virtual void check_and_handle_earlyret(Register java_thread);
 179   virtual void check_and_handle_popframe(Register java_thread);
 180 
 181   void resolve_weak_handle(Register result, Register tmp1, Register tmp2);
 182   void resolve_oop_handle(Register result, Register tmp1, Register tmp2);
 183   void resolve_jobject(Register value, Register tmp1, Register tmp2);
 184   void resolve_global_jobject(Register value, Register tmp1, Register tmp2);
 185 
 186   void movoop(Register dst, jobject obj);
 187   void mov_metadata(Register dst, Metadata* obj);
 188   void bang_stack_size(Register size, Register tmp);
 189   void set_narrow_oop(Register dst, jobject obj);
 190   void set_narrow_klass(Register dst, Klass* k);
 191 
 192   void load_mirror(Register dst, Register method, Register tmp1, Register tmp2);
 193   void access_load_at(BasicType type, DecoratorSet decorators, Register dst,
 194                       Address src, Register tmp1, Register tmp2);
 195   void access_store_at(BasicType type, DecoratorSet decorators, Address dst,
 196                        Register val, Register tmp1, Register tmp2, Register tmp3);
 197   void load_klass(Register dst, Register src, Register tmp = t0);
 198   void store_klass(Register dst, Register src, Register tmp = t0);
 199   void cmp_klass(Register oop, Register trial_klass, Register tmp1, Register tmp2, Label &L);
 200 
 201   void encode_klass_not_null(Register r, Register tmp = t0);
 202   void decode_klass_not_null(Register r, Register tmp = t0);
 203   void encode_klass_not_null(Register dst, Register src, Register tmp);
 204   void decode_klass_not_null(Register dst, Register src, Register tmp);
 205   void decode_heap_oop_not_null(Register r);
 206   void decode_heap_oop_not_null(Register dst, Register src);
 207   void decode_heap_oop(Register d, Register s);
 208   void decode_heap_oop(Register r) { decode_heap_oop(r, r); }
 209   void encode_heap_oop_not_null(Register r);
 210   void encode_heap_oop_not_null(Register dst, Register src);
 211   void encode_heap_oop(Register d, Register s);
 212   void encode_heap_oop(Register r) { encode_heap_oop(r, r); };
 213   void load_heap_oop(Register dst, Address src, Register tmp1,
 214                      Register tmp2, DecoratorSet decorators = 0);
 215   void load_heap_oop_not_null(Register dst, Address src, Register tmp1,
 216                               Register tmp2, DecoratorSet decorators = 0);
 217   void store_heap_oop(Address dst, Register val, Register tmp1,
 218                       Register tmp2, Register tmp3, DecoratorSet decorators = 0);
 219 
 220   void store_klass_gap(Register dst, Register src);
 221 
 222   // currently unimplemented
 223   // Used for storing null. All other oop constants should be
 224   // stored using routines that take a jobject.
 225   void store_heap_oop_null(Address dst);
 226 
 227   // This dummy is to prevent a call to store_heap_oop from
 228   // converting a zero (linked null) into a Register by giving
 229   // the compiler two choices it can't resolve
 230 
 231   void store_heap_oop(Address dst, void* dummy);
 232 
 233   // Support for null-checks
 234   //
 235   // Generates code that causes a null OS exception if the content of reg is null.
 236   // If the accessed location is M[reg + offset] and the offset is known, provide the
 237   // offset. No explicit code generateion is needed if the offset is within a certain
 238   // range (0 <= offset <= page_size).
 239 
 240   virtual void null_check(Register reg, int offset = -1);
 241   static bool needs_explicit_null_check(intptr_t offset);
 242   static bool uses_implicit_null_check(void* address);
 243 
 244   // idiv variant which deals with MINLONG as dividend and -1 as divisor
 245   int corrected_idivl(Register result, Register rs1, Register rs2,
 246                       bool want_remainder, bool is_signed);
 247   int corrected_idivq(Register result, Register rs1, Register rs2,
 248                       bool want_remainder, bool is_signed);
 249 
 250   // interface method calling
 251   void lookup_interface_method(Register recv_klass,
 252                                Register intf_klass,
 253                                RegisterOrConstant itable_index,
 254                                Register method_result,
 255                                Register scan_tmp,
 256                                Label& no_such_interface,
 257                                bool return_method = true);
 258 
 259   void lookup_interface_method_stub(Register recv_klass,
 260                                     Register holder_klass,
 261                                     Register resolved_klass,
 262                                     Register method_result,
 263                                     Register temp_reg,
 264                                     Register temp_reg2,
 265                                     int itable_index,
 266                                     Label& L_no_such_interface);
 267 
 268   // virtual method calling
 269   // n.n. x86 allows RegisterOrConstant for vtable_index
 270   void lookup_virtual_method(Register recv_klass,
 271                              RegisterOrConstant vtable_index,
 272                              Register method_result);
 273 
 274   // Form an address from base + offset in Rd. Rd my or may not
 275   // actually be used: you must use the Address that is returned. It
 276   // is up to you to ensure that the shift provided matches the size
 277   // of your data.
 278   Address form_address(Register Rd, Register base, int64_t byte_offset);
 279 
 280   // Sometimes we get misaligned loads and stores, usually from Unsafe
 281   // accesses, and these can exceed the offset range.
 282   Address legitimize_address(Register Rd, const Address &adr) {
 283     if (adr.getMode() == Address::base_plus_offset) {
 284       if (!is_simm12(adr.offset())) {
 285         return form_address(Rd, adr.base(), adr.offset());
 286       }
 287     }
 288     return adr;
 289   }
 290 
 291   // allocation
 292   void tlab_allocate(
 293     Register obj,                   // result: pointer to object after successful allocation
 294     Register var_size_in_bytes,     // object size in bytes if unknown at compile time; invalid otherwise
 295     int      con_size_in_bytes,     // object size in bytes if   known at compile time
 296     Register tmp1,                  // temp register
 297     Register tmp2,                  // temp register
 298     Label&   slow_case,             // continuation point of fast allocation fails
 299     bool     is_far = false
 300   );
 301 
 302   // Test sub_klass against super_klass, with fast and slow paths.
 303 
 304   // The fast path produces a tri-state answer: yes / no / maybe-slow.
 305   // One of the three labels can be null, meaning take the fall-through.
 306   // If super_check_offset is -1, the value is loaded up from super_klass.
 307   // No registers are killed, except tmp_reg
 308   void check_klass_subtype_fast_path(Register sub_klass,
 309                                      Register super_klass,
 310                                      Register tmp_reg,
 311                                      Label* L_success,
 312                                      Label* L_failure,
 313                                      Label* L_slow_path,
 314                                      Register super_check_offset = noreg);
 315 
 316   // The reset of the type check; must be wired to a corresponding fast path.
 317   // It does not repeat the fast path logic, so don't use it standalone.
 318   // The tmp1_reg and tmp2_reg can be noreg, if no temps are available.
 319   // Updates the sub's secondary super cache as necessary.
 320   void check_klass_subtype_slow_path(Register sub_klass,
 321                                      Register super_klass,
 322                                      Register tmp1_reg,
 323                                      Register tmp2_reg,
 324                                      Label* L_success,
 325                                      Label* L_failure);
 326 
 327   void population_count(Register dst, Register src, Register tmp1, Register tmp2);
 328 
 329   // As above, but with a constant super_klass.
 330   // The result is in Register result, not the condition codes.
 331   bool lookup_secondary_supers_table(Register r_sub_klass,
 332                                      Register r_super_klass,
 333                                      Register result,
 334                                      Register tmp1,
 335                                      Register tmp2,
 336                                      Register tmp3,
 337                                      Register tmp4,
 338                                      u1 super_klass_slot,
 339                                      bool stub_is_near = false);
 340 
 341   void verify_secondary_supers_table(Register r_sub_klass,
 342                                      Register r_super_klass,
 343                                      Register result,
 344                                      Register tmp1,
 345                                      Register tmp2,
 346                                      Register tmp3);
 347 
 348   void lookup_secondary_supers_table_slow_path(Register r_super_klass,
 349                                                Register r_array_base,
 350                                                Register r_array_index,
 351                                                Register r_bitmap,
 352                                                Register result,
 353                                                Register tmp1);
 354 
 355   void check_klass_subtype(Register sub_klass,
 356                            Register super_klass,
 357                            Register tmp_reg,
 358                            Label& L_success);
 359 
 360   Address argument_address(RegisterOrConstant arg_slot, int extra_slot_offset = 0);
 361 
 362   // only if +VerifyOops
 363   void _verify_oop(Register reg, const char* s, const char* file, int line);
 364   void _verify_oop_addr(Address addr, const char* s, const char* file, int line);
 365 
 366   void _verify_oop_checked(Register reg, const char* s, const char* file, int line) {
 367     if (VerifyOops) {
 368       _verify_oop(reg, s, file, line);
 369     }
 370   }
 371   void _verify_oop_addr_checked(Address reg, const char* s, const char* file, int line) {
 372     if (VerifyOops) {
 373       _verify_oop_addr(reg, s, file, line);
 374     }
 375   }
 376 
 377   void _verify_method_ptr(Register reg, const char* msg, const char* file, int line) {}
 378   void _verify_klass_ptr(Register reg, const char* msg, const char* file, int line) {}
 379 
 380 #define verify_oop(reg) _verify_oop_checked(reg, "broken oop " #reg, __FILE__, __LINE__)
 381 #define verify_oop_msg(reg, msg) _verify_oop_checked(reg, "broken oop " #reg ", " #msg, __FILE__, __LINE__)
 382 #define verify_oop_addr(addr) _verify_oop_addr_checked(addr, "broken oop addr " #addr, __FILE__, __LINE__)
 383 #define verify_method_ptr(reg) _verify_method_ptr(reg, "broken method " #reg, __FILE__, __LINE__)
 384 #define verify_klass_ptr(reg) _verify_method_ptr(reg, "broken klass " #reg, __FILE__, __LINE__)
 385 
 386   // A more convenient access to fence for our purposes
 387   // We used four bit to indicate the read and write bits in the predecessors and successors,
 388   // and extended i for r, o for w if UseConservativeFence enabled.
 389   enum Membar_mask_bits {
 390     StoreStore = 0b0101,               // (pred = ow   + succ =   ow)
 391     LoadStore  = 0b1001,               // (pred = ir   + succ =   ow)
 392     StoreLoad  = 0b0110,               // (pred = ow   + succ =   ir)
 393     LoadLoad   = 0b1010,               // (pred = ir   + succ =   ir)
 394     AnyAny     = LoadStore | StoreLoad // (pred = iorw + succ = iorw)
 395   };
 396 
 397   void membar(uint32_t order_constraint);
 398 
 399   static void membar_mask_to_pred_succ(uint32_t order_constraint,
 400                                        uint32_t& predecessor, uint32_t& successor) {
 401     predecessor = (order_constraint >> 2) & 0x3;
 402     successor = order_constraint & 0x3;
 403 
 404     // extend rw -> iorw:
 405     // 01(w) -> 0101(ow)
 406     // 10(r) -> 1010(ir)
 407     // 11(rw)-> 1111(iorw)
 408     if (UseConservativeFence) {
 409       predecessor |= predecessor << 2;
 410       successor |= successor << 2;
 411     }
 412   }
 413 
 414   static int pred_succ_to_membar_mask(uint32_t predecessor, uint32_t successor) {
 415     return ((predecessor & 0x3) << 2) | (successor & 0x3);
 416   }
 417 
 418   void fence(uint32_t predecessor, uint32_t successor) {
 419     if (UseZtso) {
 420       if ((pred_succ_to_membar_mask(predecessor, successor) & StoreLoad) == StoreLoad) {
 421         // TSO allows for stores to be reordered after loads. When the compiler
 422         // generates a fence to disallow that, we are required to generate the
 423         // fence for correctness.
 424         Assembler::fence(predecessor, successor);
 425       } else {
 426         // TSO guarantees other fences already.
 427       }
 428     } else {
 429       // always generate fence for RVWMO
 430       Assembler::fence(predecessor, successor);
 431     }
 432   }
 433 
 434   void cmodx_fence();
 435 
 436   void pause() {
 437     Assembler::fence(w, 0);
 438   }
 439 
 440   // prints msg, dumps registers and stops execution
 441   void stop(const char* msg);
 442 
 443   static void debug64(char* msg, int64_t pc, int64_t regs[]);
 444 
 445   void unimplemented(const char* what = "");
 446 
 447   void should_not_reach_here() { stop("should not reach here"); }
 448 
 449   static address target_addr_for_insn(address insn_addr);
 450 
 451   // Required platform-specific helpers for Label::patch_instructions.
 452   // They _shadow_ the declarations in AbstractAssembler, which are undefined.
 453   static int pd_patch_instruction_size(address branch, address target);
 454   static void pd_patch_instruction(address branch, address target, const char* file = nullptr, int line = 0) {
 455     pd_patch_instruction_size(branch, target);
 456   }
 457   static address pd_call_destination(address branch) {
 458     return target_addr_for_insn(branch);
 459   }
 460 
 461   static int patch_oop(address insn_addr, address o);
 462 
 463   static address get_target_of_li32(address insn_addr);
 464   static int patch_imm_in_li32(address branch, int32_t target);
 465 
 466   // Return whether code is emitted to a scratch blob.
 467   virtual bool in_scratch_emit_size() {
 468     return false;
 469   }
 470 
 471   address emit_address_stub(int insts_call_instruction_offset, address target);
 472   address emit_trampoline_stub(int insts_call_instruction_offset, address target);
 473   static int max_reloc_call_stub_size();
 474 
 475   void emit_static_call_stub();
 476   static int static_call_stub_size();
 477 
 478   // The following 4 methods return the offset of the appropriate move instruction
 479 
 480   // Support for fast byte/short loading with zero extension (depending on particular CPU)
 481   int load_unsigned_byte(Register dst, Address src);
 482   int load_unsigned_short(Register dst, Address src);
 483 
 484   // Support for fast byte/short loading with sign extension (depending on particular CPU)
 485   int load_signed_byte(Register dst, Address src);
 486   int load_signed_short(Register dst, Address src);
 487 
 488   // Load and store values by size and signed-ness
 489   void load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed);
 490   void store_sized_value(Address dst, Register src, size_t size_in_bytes);
 491 
 492   // Misaligned loads, will use the best way, according to the AvoidUnalignedAccess flag
 493   void load_short_misaligned(Register dst, Address src, Register tmp, bool is_signed, int granularity = 1);
 494   void load_int_misaligned(Register dst, Address src, Register tmp, bool is_signed, int granularity = 1);
 495   void load_long_misaligned(Register dst, Address src, Register tmp, int granularity = 1);
 496 
 497  public:
 498   // Standard pseudo instructions
 499   inline void nop() {
 500     addi(x0, x0, 0);
 501   }
 502 
 503   inline void mv(Register Rd, Register Rs) {
 504     if (Rd != Rs) {
 505       addi(Rd, Rs, 0);
 506     }
 507   }
 508 
 509   inline void notr(Register Rd, Register Rs) {
 510     if (do_compress_zcb(Rd, Rs) && (Rd == Rs)) {
 511       c_not(Rd);
 512     } else {
 513       xori(Rd, Rs, -1);
 514     }
 515   }
 516 
 517   inline void neg(Register Rd, Register Rs) {
 518     sub(Rd, x0, Rs);
 519   }
 520 
 521   inline void negw(Register Rd, Register Rs) {
 522     subw(Rd, x0, Rs);
 523   }
 524 
 525   inline void sext_w(Register Rd, Register Rs) {
 526     addiw(Rd, Rs, 0);
 527   }
 528 
 529   inline void zext_b(Register Rd, Register Rs) {
 530     if (do_compress_zcb(Rd, Rs) && (Rd == Rs)) {
 531       c_zext_b(Rd);
 532     } else {
 533       andi(Rd, Rs, 0xFF);
 534     }
 535   }
 536 
 537   inline void seqz(Register Rd, Register Rs) {
 538     sltiu(Rd, Rs, 1);
 539   }
 540 
 541   inline void snez(Register Rd, Register Rs) {
 542     sltu(Rd, x0, Rs);
 543   }
 544 
 545   inline void sltz(Register Rd, Register Rs) {
 546     slt(Rd, Rs, x0);
 547   }
 548 
 549   inline void sgtz(Register Rd, Register Rs) {
 550     slt(Rd, x0, Rs);
 551   }
 552 
 553   // Bit-manipulation extension pseudo instructions
 554   // zero extend word
 555   inline void zext_w(Register Rd, Register Rs) {
 556     assert(UseZba, "must be");
 557     if (do_compress_zcb(Rd, Rs) && (Rd == Rs)) {
 558       c_zext_w(Rd);
 559     } else {
 560       add_uw(Rd, Rs, zr);
 561     }
 562   }
 563 
 564   // Floating-point data-processing pseudo instructions
 565   inline void fmv_s(FloatRegister Rd, FloatRegister Rs) {
 566     if (Rd != Rs) {
 567       fsgnj_s(Rd, Rs, Rs);
 568     }
 569   }
 570 
 571   inline void fabs_s(FloatRegister Rd, FloatRegister Rs) {
 572     fsgnjx_s(Rd, Rs, Rs);
 573   }
 574 
 575   inline void fneg_s(FloatRegister Rd, FloatRegister Rs) {
 576     fsgnjn_s(Rd, Rs, Rs);
 577   }
 578 
 579   inline void fmv_d(FloatRegister Rd, FloatRegister Rs) {
 580     if (Rd != Rs) {
 581       fsgnj_d(Rd, Rs, Rs);
 582     }
 583   }
 584 
 585   inline void fabs_d(FloatRegister Rd, FloatRegister Rs) {
 586     fsgnjx_d(Rd, Rs, Rs);
 587   }
 588 
 589   inline void fneg_d(FloatRegister Rd, FloatRegister Rs) {
 590     fsgnjn_d(Rd, Rs, Rs);
 591   }
 592 
 593   // Control and status pseudo instructions
 594   void rdinstret(Register Rd);                  // read instruction-retired counter
 595   void rdcycle(Register Rd);                    // read cycle counter
 596   void rdtime(Register Rd);                     // read time
 597   void csrr(Register Rd, unsigned csr);         // read csr
 598   void csrw(unsigned csr, Register Rs);         // write csr
 599   void csrs(unsigned csr, Register Rs);         // set bits in csr
 600   void csrc(unsigned csr, Register Rs);         // clear bits in csr
 601   void csrwi(unsigned csr, unsigned imm);
 602   void csrsi(unsigned csr, unsigned imm);
 603   void csrci(unsigned csr, unsigned imm);
 604   void frcsr(Register Rd);                      // read float-point csr
 605   void fscsr(Register Rd, Register Rs);         // swap float-point csr
 606   void fscsr(Register Rs);                      // write float-point csr
 607   void frrm(Register Rd);                       // read float-point rounding mode
 608   void fsrm(Register Rd, Register Rs);          // swap float-point rounding mode
 609   void fsrm(Register Rs);                       // write float-point rounding mode
 610   void fsrmi(Register Rd, unsigned imm);
 611   void fsrmi(unsigned imm);
 612   void frflags(Register Rd);                    // read float-point exception flags
 613   void fsflags(Register Rd, Register Rs);       // swap float-point exception flags
 614   void fsflags(Register Rs);                    // write float-point exception flags
 615   void fsflagsi(Register Rd, unsigned imm);
 616   void fsflagsi(unsigned imm);
 617 
 618   // Restore cpu control state after JNI call
 619   void restore_cpu_control_state_after_jni(Register tmp);
 620 
 621   // Control transfer pseudo instructions
 622   void beqz(Register Rs, const address dest);
 623   void bnez(Register Rs, const address dest);
 624   void blez(Register Rs, const address dest);
 625   void bgez(Register Rs, const address dest);
 626   void bltz(Register Rs, const address dest);
 627   void bgtz(Register Rs, const address dest);
 628 
 629  private:
 630   void load_link_jump(const address source, Register temp = t0);
 631   void jump_link(const address dest, Register temp);
 632  public:
 633   // We try to follow risc-v asm menomics.
 634   // But as we don't layout a reachable GOT,
 635   // we often need to resort to movptr, li <48imm>.
 636   // https://github.com/riscv-non-isa/riscv-asm-manual/blob/master/riscv-asm.md
 637 
 638   // jump: jal x0, offset
 639   // For long reach uses temp register for:
 640   // la + jr
 641   void j(const address dest, Register temp = t0);
 642   void j(const Address &adr, Register temp = t0);
 643   void j(Label &l, Register temp = t0);
 644 
 645   // jump register: jalr x0, offset(rs)
 646   void jr(Register Rd, int32_t offset = 0);
 647 
 648   // call: la + jalr x1
 649   void call(const address dest, Register temp = t0);
 650 
 651   // jalr: jalr x1, offset(rs)
 652   void jalr(Register Rs, int32_t offset = 0);
 653 
 654   // Emit a runtime call. Only invalidates the tmp register which
 655   // is used to keep the entry address for jalr/movptr.
 656   // Uses call() for intra code cache, else movptr + jalr.
 657   void rt_call(address dest, Register tmp = t0);
 658 
 659   // ret: jalr x0, 0(x1)
 660   inline void ret() {
 661     Assembler::jalr(x0, x1, 0);
 662   }
 663 
 664   //label
 665   void beqz(Register Rs, Label &l, bool is_far = false);
 666   void bnez(Register Rs, Label &l, bool is_far = false);
 667   void blez(Register Rs, Label &l, bool is_far = false);
 668   void bgez(Register Rs, Label &l, bool is_far = false);
 669   void bltz(Register Rs, Label &l, bool is_far = false);
 670   void bgtz(Register Rs, Label &l, bool is_far = false);
 671 
 672   void beq (Register Rs1, Register Rs2, Label &L, bool is_far = false);
 673   void bne (Register Rs1, Register Rs2, Label &L, bool is_far = false);
 674   void blt (Register Rs1, Register Rs2, Label &L, bool is_far = false);
 675   void bge (Register Rs1, Register Rs2, Label &L, bool is_far = false);
 676   void bltu(Register Rs1, Register Rs2, Label &L, bool is_far = false);
 677   void bgeu(Register Rs1, Register Rs2, Label &L, bool is_far = false);
 678 
 679   void bgt (Register Rs, Register Rt, const address dest);
 680   void ble (Register Rs, Register Rt, const address dest);
 681   void bgtu(Register Rs, Register Rt, const address dest);
 682   void bleu(Register Rs, Register Rt, const address dest);
 683 
 684   void bgt (Register Rs, Register Rt, Label &l, bool is_far = false);
 685   void ble (Register Rs, Register Rt, Label &l, bool is_far = false);
 686   void bgtu(Register Rs, Register Rt, Label &l, bool is_far = false);
 687   void bleu(Register Rs, Register Rt, Label &l, bool is_far = false);
 688 
 689 #define INSN_ENTRY_RELOC(result_type, header)                               \
 690   result_type header {                                                      \
 691     guarantee(rtype == relocInfo::internal_word_type,                       \
 692               "only internal_word_type relocs make sense here");            \
 693     relocate(InternalAddress(dest).rspec());                                \
 694     IncompressibleRegion ir(this);  /* relocations */
 695 
 696 #define INSN(NAME)                                                                                       \
 697   void NAME(Register Rs1, Register Rs2, const address dest) {                                            \
 698     assert_cond(dest != nullptr);                                                                        \
 699     int64_t offset = dest - pc();                                                                        \
 700     guarantee(is_simm13(offset) && is_even(offset),                                                      \
 701               "offset is invalid: is_simm_13: %s offset: " INT64_FORMAT,                                 \
 702               BOOL_TO_STR(is_simm13(offset)), offset);                                                   \
 703     Assembler::NAME(Rs1, Rs2, offset);                                                                   \
 704   }                                                                                                      \
 705   INSN_ENTRY_RELOC(void, NAME(Register Rs1, Register Rs2, address dest, relocInfo::relocType rtype))     \
 706     NAME(Rs1, Rs2, dest);                                                                                \
 707   }
 708 
 709   INSN(beq);
 710   INSN(bne);
 711   INSN(bge);
 712   INSN(bgeu);
 713   INSN(blt);
 714   INSN(bltu);
 715 
 716 #undef INSN
 717 
 718 #undef INSN_ENTRY_RELOC
 719 
 720   void float_beq(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
 721   void float_bne(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
 722   void float_ble(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
 723   void float_bge(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
 724   void float_blt(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
 725   void float_bgt(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
 726 
 727   void double_beq(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
 728   void double_bne(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
 729   void double_ble(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
 730   void double_bge(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
 731   void double_blt(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
 732   void double_bgt(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
 733 
 734 private:
 735   int push_reg(unsigned int bitset, Register stack);
 736   int pop_reg(unsigned int bitset, Register stack);
 737   int push_fp(unsigned int bitset, Register stack);
 738   int pop_fp(unsigned int bitset, Register stack);
 739 #ifdef COMPILER2
 740   int push_v(unsigned int bitset, Register stack);
 741   int pop_v(unsigned int bitset, Register stack);
 742 #endif // COMPILER2
 743 
 744   // The signed 20-bit upper imm can materialize at most negative 0xF...F80000000, two G.
 745   // The following signed 12-bit imm can at max subtract 0x800, two K, from that previously loaded two G.
 746   bool is_valid_32bit_offset(int64_t x) {
 747     constexpr int64_t twoG = (2 * G);
 748     constexpr int64_t twoK = (2 * K);
 749     return x < (twoG - twoK) && x >= (-twoG - twoK);
 750   }
 751 
 752   // Ensure that the auipc can reach the destination at x from anywhere within
 753   // the code cache so that if it is relocated we know it will still reach.
 754   bool is_32bit_offset_from_codecache(int64_t x) {
 755     int64_t low  = (int64_t)CodeCache::low_bound();
 756     int64_t high = (int64_t)CodeCache::high_bound();
 757     return is_valid_32bit_offset(x - low) && is_valid_32bit_offset(x - high);
 758   }
 759 
 760 public:
 761   void push_reg(Register Rs);
 762   void pop_reg(Register Rd);
 763   void push_reg(RegSet regs, Register stack) { if (regs.bits()) push_reg(regs.bits(), stack); }
 764   void pop_reg(RegSet regs, Register stack)  { if (regs.bits()) pop_reg(regs.bits(), stack); }
 765   void push_fp(FloatRegSet regs, Register stack) { if (regs.bits()) push_fp(regs.bits(), stack); }
 766   void pop_fp(FloatRegSet regs, Register stack)  { if (regs.bits()) pop_fp(regs.bits(), stack); }
 767 #ifdef COMPILER2
 768   void push_v(VectorRegSet regs, Register stack) { if (regs.bits()) push_v(regs.bits(), stack); }
 769   void pop_v(VectorRegSet regs, Register stack)  { if (regs.bits()) pop_v(regs.bits(), stack); }
 770 #endif // COMPILER2
 771 
 772   // Push and pop everything that might be clobbered by a native
 773   // runtime call except t0 and t1. (They are always
 774   // temporary registers, so we don't have to protect them.)
 775   // Additional registers can be excluded in a passed RegSet.
 776   void push_call_clobbered_registers_except(RegSet exclude);
 777   void pop_call_clobbered_registers_except(RegSet exclude);
 778 
 779   void push_call_clobbered_registers() {
 780     push_call_clobbered_registers_except(RegSet());
 781   }
 782   void pop_call_clobbered_registers() {
 783     pop_call_clobbered_registers_except(RegSet());
 784   }
 785 
 786   void push_CPU_state(bool save_vectors = false, int vector_size_in_bytes = 0);
 787   void pop_CPU_state(bool restore_vectors = false, int vector_size_in_bytes = 0);
 788 
 789   void push_cont_fastpath(Register java_thread = xthread);
 790   void pop_cont_fastpath(Register java_thread = xthread);
 791 
 792   void inc_held_monitor_count(Register tmp = t0);
 793   void dec_held_monitor_count(Register tmp = t0);
 794 
 795   // if heap base register is used - reinit it with the correct value
 796   void reinit_heapbase();
 797 
 798   void bind(Label& L) {
 799     Assembler::bind(L);
 800     // fences across basic blocks should not be merged
 801     code()->clear_last_insn();
 802   }
 803 
 804   typedef void (MacroAssembler::* compare_and_branch_insn)(Register Rs1, Register Rs2, const address dest);
 805   typedef void (MacroAssembler::* compare_and_branch_label_insn)(Register Rs1, Register Rs2, Label &L, bool is_far);
 806   typedef void (MacroAssembler::* jal_jalr_insn)(Register Rt, address dest);
 807 
 808   void wrap_label(Register r, Label &L, jal_jalr_insn insn);
 809   void wrap_label(Register r1, Register r2, Label &L,
 810                   compare_and_branch_insn insn,
 811                   compare_and_branch_label_insn neg_insn, bool is_far = false);
 812 
 813   // la will use movptr instead of GOT when not in reach for auipc.
 814   void la(Register Rd, Label &label);
 815   void la(Register Rd, const address addr);
 816   void la(Register Rd, const address addr, int32_t &offset);
 817   void la(Register Rd, const Address &adr);
 818 
 819   void li16u(Register Rd, uint16_t imm);
 820   void li32(Register Rd, int32_t imm);
 821   void li  (Register Rd, int64_t imm);  // optimized load immediate
 822 
 823   // mv
 824   void mv(Register Rd, address addr)                  { li(Rd, (int64_t)addr); }
 825   void mv(Register Rd, address addr, int32_t &offset) {
 826     // Split address into a lower 12-bit sign-extended offset and the remainder,
 827     // so that the offset could be encoded in jalr or load/store instruction.
 828     offset = ((int32_t)(int64_t)addr << 20) >> 20;
 829     li(Rd, (int64_t)addr - offset);
 830   }
 831 
 832   template<typename T, ENABLE_IF(std::is_integral<T>::value)>
 833   inline void mv(Register Rd, T o)                    { li(Rd, (int64_t)o); }
 834 
 835   void mv(Register Rd, RegisterOrConstant src) {
 836     if (src.is_register()) {
 837       mv(Rd, src.as_register());
 838     } else {
 839       mv(Rd, src.as_constant());
 840     }
 841   }
 842 
 843   // Generates a load of a 48-bit constant which can be
 844   // patched to any 48-bit constant, i.e. address.
 845   // If common case supply additional temp register
 846   // to shorten the instruction sequence.
 847   void movptr(Register Rd, address addr, Register tmp = noreg);
 848   void movptr(Register Rd, address addr, int32_t &offset, Register tmp = noreg);
 849  private:
 850   void movptr1(Register Rd, uintptr_t addr, int32_t &offset);
 851   void movptr2(Register Rd, uintptr_t addr, int32_t &offset, Register tmp);
 852  public:
 853 
 854   // arith
 855   void add (Register Rd, Register Rn, int64_t increment, Register temp = t0);
 856   void addw(Register Rd, Register Rn, int32_t increment, Register temp = t0);
 857   void sub (Register Rd, Register Rn, int64_t decrement, Register temp = t0);
 858   void subw(Register Rd, Register Rn, int32_t decrement, Register temp = t0);
 859 
 860 #define INSN(NAME)                                               \
 861   inline void NAME(Register Rd, Register Rs1, Register Rs2) {    \
 862     Assembler::NAME(Rd, Rs1, Rs2);                               \
 863   }
 864 
 865   INSN(add);
 866   INSN(addw);
 867   INSN(sub);
 868   INSN(subw);
 869 
 870 #undef INSN
 871 
 872   // logic
 873   void andrw(Register Rd, Register Rs1, Register Rs2);
 874   void orrw(Register Rd, Register Rs1, Register Rs2);
 875   void xorrw(Register Rd, Register Rs1, Register Rs2);
 876 
 877   // logic with negate
 878   void andn(Register Rd, Register Rs1, Register Rs2);
 879   void orn(Register Rd, Register Rs1, Register Rs2);
 880 
 881   // revb
 882   void revb_h_h(Register Rd, Register Rs, Register tmp = t0);                           // reverse bytes in halfword in lower 16 bits, sign-extend
 883   void revb_w_w(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1);      // reverse bytes in lower word, sign-extend
 884   void revb_h_h_u(Register Rd, Register Rs, Register tmp = t0);                         // reverse bytes in halfword in lower 16 bits, zero-extend
 885   void revb_h_w_u(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1);    // reverse bytes in halfwords in lower 32 bits, zero-extend
 886   void revb_h_helper(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2= t1);  // reverse bytes in upper 16 bits (48:63) and move to lower
 887   void revb_h(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2= t1);         // reverse bytes in each halfword
 888   void revb_w(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2= t1);         // reverse bytes in each word
 889   void revb(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1);          // reverse bytes in doubleword
 890 
 891   void ror_imm(Register dst, Register src, uint32_t shift, Register tmp = t0);
 892   void rolw_imm(Register dst, Register src, uint32_t, Register tmp = t0);
 893   void andi(Register Rd, Register Rn, int64_t imm, Register tmp = t0);
 894   void orptr(Address adr, RegisterOrConstant src, Register tmp1 = t0, Register tmp2 = t1);
 895 
 896 // Load and Store Instructions
 897 #define INSN_ENTRY_RELOC(result_type, header)                               \
 898   result_type header {                                                      \
 899     guarantee(rtype == relocInfo::internal_word_type,                       \
 900               "only internal_word_type relocs make sense here");            \
 901     relocate(InternalAddress(dest).rspec());                                \
 902     IncompressibleRegion ir(this);  /* relocations */
 903 
 904 #define INSN(NAME)                                                                                 \
 905   void NAME(Register Rd, address dest) {                                                           \
 906     assert_cond(dest != nullptr);                                                                  \
 907     int64_t distance = dest - pc();                                                                \
 908     if (is_valid_32bit_offset(distance)) {                                                         \
 909       auipc(Rd, (int32_t)distance + 0x800);                                                        \
 910       Assembler::NAME(Rd, Rd, ((int32_t)distance << 20) >> 20);                                    \
 911     } else {                                                                                       \
 912       int32_t offset = 0;                                                                          \
 913       movptr(Rd, dest, offset);                                                                    \
 914       Assembler::NAME(Rd, Rd, offset);                                                             \
 915     }                                                                                              \
 916   }                                                                                                \
 917   INSN_ENTRY_RELOC(void, NAME(Register Rd, address dest, relocInfo::relocType rtype))              \
 918     NAME(Rd, dest);                                                                                \
 919   }                                                                                                \
 920   void NAME(Register Rd, const Address &adr, Register temp = t0) {                                 \
 921     switch (adr.getMode()) {                                                                       \
 922       case Address::literal: {                                                                     \
 923         relocate(adr.rspec(), [&] {                                                                \
 924           NAME(Rd, adr.target());                                                                  \
 925         });                                                                                        \
 926         break;                                                                                     \
 927       }                                                                                            \
 928       case Address::base_plus_offset: {                                                            \
 929         if (is_simm12(adr.offset())) {                                                             \
 930           Assembler::NAME(Rd, adr.base(), adr.offset());                                           \
 931         } else {                                                                                   \
 932           int32_t offset = ((int32_t)adr.offset() << 20) >> 20;                                    \
 933           if (Rd == adr.base()) {                                                                  \
 934             la(temp, Address(adr.base(), adr.offset() - offset));                                  \
 935             Assembler::NAME(Rd, temp, offset);                                                     \
 936           } else {                                                                                 \
 937             la(Rd, Address(adr.base(), adr.offset() - offset));                                    \
 938             Assembler::NAME(Rd, Rd, offset);                                                       \
 939           }                                                                                        \
 940         }                                                                                          \
 941         break;                                                                                     \
 942       }                                                                                            \
 943       default:                                                                                     \
 944         ShouldNotReachHere();                                                                      \
 945     }                                                                                              \
 946   }                                                                                                \
 947   void NAME(Register Rd, Label &L) {                                                               \
 948     wrap_label(Rd, L, &MacroAssembler::NAME);                                                      \
 949   }
 950 
 951   INSN(lb);
 952   INSN(lbu);
 953   INSN(lh);
 954   INSN(lhu);
 955   INSN(lw);
 956   INSN(lwu);
 957   INSN(ld);
 958 
 959 #undef INSN
 960 
 961 #define INSN(NAME)                                                                                 \
 962   void NAME(FloatRegister Rd, address dest, Register temp = t0) {                                  \
 963     assert_cond(dest != nullptr);                                                                  \
 964     int64_t distance = dest - pc();                                                                \
 965     if (is_valid_32bit_offset(distance)) {                                                         \
 966       auipc(temp, (int32_t)distance + 0x800);                                                      \
 967       Assembler::NAME(Rd, temp, ((int32_t)distance << 20) >> 20);                                  \
 968     } else {                                                                                       \
 969       int32_t offset = 0;                                                                          \
 970       movptr(temp, dest, offset);                                                                  \
 971       Assembler::NAME(Rd, temp, offset);                                                           \
 972     }                                                                                              \
 973   }                                                                                                \
 974   INSN_ENTRY_RELOC(void, NAME(FloatRegister Rd, address dest,                                      \
 975                               relocInfo::relocType rtype, Register temp = t0))                     \
 976     NAME(Rd, dest, temp);                                                                          \
 977   }                                                                                                \
 978   void NAME(FloatRegister Rd, const Address &adr, Register temp = t0) {                            \
 979     switch (adr.getMode()) {                                                                       \
 980       case Address::literal: {                                                                     \
 981         relocate(adr.rspec(), [&] {                                                                \
 982           NAME(Rd, adr.target(), temp);                                                            \
 983         });                                                                                        \
 984         break;                                                                                     \
 985       }                                                                                            \
 986       case Address::base_plus_offset: {                                                            \
 987         if (is_simm12(adr.offset())) {                                                             \
 988           Assembler::NAME(Rd, adr.base(), adr.offset());                                           \
 989         } else {                                                                                   \
 990           int32_t offset = ((int32_t)adr.offset() << 20) >> 20;                                    \
 991           la(temp, Address(adr.base(), adr.offset() - offset));                                    \
 992           Assembler::NAME(Rd, temp, offset);                                                       \
 993         }                                                                                          \
 994         break;                                                                                     \
 995       }                                                                                            \
 996       default:                                                                                     \
 997         ShouldNotReachHere();                                                                      \
 998     }                                                                                              \
 999   }
1000 
1001   INSN(flw);
1002   INSN(fld);
1003 
1004 #undef INSN
1005 
1006 #define INSN(NAME, REGISTER)                                                                       \
1007   INSN_ENTRY_RELOC(void, NAME(REGISTER Rs, address dest,                                           \
1008                               relocInfo::relocType rtype, Register temp = t0))                     \
1009     NAME(Rs, dest, temp);                                                                          \
1010   }
1011 
1012   INSN(sb,  Register);
1013   INSN(sh,  Register);
1014   INSN(sw,  Register);
1015   INSN(sd,  Register);
1016   INSN(fsw, FloatRegister);
1017   INSN(fsd, FloatRegister);
1018 
1019 #undef INSN
1020 
1021 #define INSN(NAME)                                                                                 \
1022   void NAME(Register Rs, address dest, Register temp = t0) {                                       \
1023     assert_cond(dest != nullptr);                                                                  \
1024     assert_different_registers(Rs, temp);                                                          \
1025     int64_t distance = dest - pc();                                                                \
1026     if (is_valid_32bit_offset(distance)) {                                                         \
1027       auipc(temp, (int32_t)distance + 0x800);                                                      \
1028       Assembler::NAME(Rs, temp, ((int32_t)distance << 20) >> 20);                                  \
1029     } else {                                                                                       \
1030       int32_t offset = 0;                                                                          \
1031       movptr(temp, dest, offset);                                                                  \
1032       Assembler::NAME(Rs, temp, offset);                                                           \
1033     }                                                                                              \
1034   }                                                                                                \
1035   void NAME(Register Rs, const Address &adr, Register temp = t0) {                                 \
1036     switch (adr.getMode()) {                                                                       \
1037       case Address::literal: {                                                                     \
1038         assert_different_registers(Rs, temp);                                                      \
1039         relocate(adr.rspec(), [&] {                                                                \
1040           NAME(Rs, adr.target(), temp);                                                            \
1041         });                                                                                        \
1042         break;                                                                                     \
1043       }                                                                                            \
1044       case Address::base_plus_offset: {                                                            \
1045         if (is_simm12(adr.offset())) {                                                             \
1046           Assembler::NAME(Rs, adr.base(), adr.offset());                                           \
1047         } else {                                                                                   \
1048           assert_different_registers(Rs, temp);                                                    \
1049           int32_t offset = ((int32_t)adr.offset() << 20) >> 20;                                    \
1050           la(temp, Address(adr.base(), adr.offset() - offset));                                    \
1051           Assembler::NAME(Rs, temp, offset);                                                       \
1052         }                                                                                          \
1053         break;                                                                                     \
1054       }                                                                                            \
1055       default:                                                                                     \
1056         ShouldNotReachHere();                                                                      \
1057     }                                                                                              \
1058   }
1059 
1060   INSN(sb);
1061   INSN(sh);
1062   INSN(sw);
1063   INSN(sd);
1064 
1065 #undef INSN
1066 
1067 #define INSN(NAME)                                                                                 \
1068   void NAME(FloatRegister Rs, address dest, Register temp = t0) {                                  \
1069     assert_cond(dest != nullptr);                                                                  \
1070     int64_t distance = dest - pc();                                                                \
1071     if (is_valid_32bit_offset(distance)) {                                                         \
1072       auipc(temp, (int32_t)distance + 0x800);                                                      \
1073       Assembler::NAME(Rs, temp, ((int32_t)distance << 20) >> 20);                                  \
1074     } else {                                                                                       \
1075       int32_t offset = 0;                                                                          \
1076       movptr(temp, dest, offset);                                                                  \
1077       Assembler::NAME(Rs, temp, offset);                                                           \
1078     }                                                                                              \
1079   }                                                                                                \
1080   void NAME(FloatRegister Rs, const Address &adr, Register temp = t0) {                            \
1081     switch (adr.getMode()) {                                                                       \
1082       case Address::literal: {                                                                     \
1083         relocate(adr.rspec(), [&] {                                                                \
1084           NAME(Rs, adr.target(), temp);                                                            \
1085         });                                                                                        \
1086         break;                                                                                     \
1087       }                                                                                            \
1088       case Address::base_plus_offset: {                                                            \
1089         if (is_simm12(adr.offset())) {                                                             \
1090           Assembler::NAME(Rs, adr.base(), adr.offset());                                           \
1091         } else {                                                                                   \
1092           int32_t offset = ((int32_t)adr.offset() << 20) >> 20;                                    \
1093           la(temp, Address(adr.base(), adr.offset() - offset));                                    \
1094           Assembler::NAME(Rs, temp, offset);                                                       \
1095         }                                                                                          \
1096         break;                                                                                     \
1097       }                                                                                            \
1098       default:                                                                                     \
1099         ShouldNotReachHere();                                                                      \
1100     }                                                                                              \
1101   }
1102 
1103   INSN(fsw);
1104   INSN(fsd);
1105 
1106 #undef INSN
1107 
1108 #undef INSN_ENTRY_RELOC
1109 
1110   void cmpxchg_obj_header(Register oldv, Register newv, Register obj, Register tmp, Label &succeed, Label *fail);
1111   void cmpxchgptr(Register oldv, Register newv, Register addr, Register tmp, Label &succeed, Label *fail);
1112   void cmpxchg(Register addr, Register expected,
1113                Register new_val,
1114                enum operand_size size,
1115                Assembler::Aqrl acquire, Assembler::Aqrl release,
1116                Register result, bool result_as_bool = false);
1117   void cmpxchg_weak(Register addr, Register expected,
1118                     Register new_val,
1119                     enum operand_size size,
1120                     Assembler::Aqrl acquire, Assembler::Aqrl release,
1121                     Register result);
1122   void cmpxchg_narrow_value_helper(Register addr, Register expected,
1123                                    Register new_val,
1124                                    enum operand_size size,
1125                                    Register tmp1, Register tmp2, Register tmp3);
1126   void cmpxchg_narrow_value(Register addr, Register expected,
1127                             Register new_val,
1128                             enum operand_size size,
1129                             Assembler::Aqrl acquire, Assembler::Aqrl release,
1130                             Register result, bool result_as_bool,
1131                             Register tmp1, Register tmp2, Register tmp3);
1132   void weak_cmpxchg_narrow_value(Register addr, Register expected,
1133                                  Register new_val,
1134                                  enum operand_size size,
1135                                  Assembler::Aqrl acquire, Assembler::Aqrl release,
1136                                  Register result,
1137                                  Register tmp1, Register tmp2, Register tmp3);
1138 
1139   void atomic_add(Register prev, RegisterOrConstant incr, Register addr);
1140   void atomic_addw(Register prev, RegisterOrConstant incr, Register addr);
1141   void atomic_addal(Register prev, RegisterOrConstant incr, Register addr);
1142   void atomic_addalw(Register prev, RegisterOrConstant incr, Register addr);
1143 
1144   void atomic_xchg(Register prev, Register newv, Register addr);
1145   void atomic_xchgw(Register prev, Register newv, Register addr);
1146   void atomic_xchgal(Register prev, Register newv, Register addr);
1147   void atomic_xchgalw(Register prev, Register newv, Register addr);
1148   void atomic_xchgwu(Register prev, Register newv, Register addr);
1149   void atomic_xchgalwu(Register prev, Register newv, Register addr);
1150 
1151   void atomic_cas(Register prev, Register newv, Register addr);
1152   void atomic_casw(Register prev, Register newv, Register addr);
1153   void atomic_casl(Register prev, Register newv, Register addr);
1154   void atomic_caslw(Register prev, Register newv, Register addr);
1155   void atomic_casal(Register prev, Register newv, Register addr);
1156   void atomic_casalw(Register prev, Register newv, Register addr);
1157   void atomic_caswu(Register prev, Register newv, Register addr);
1158   void atomic_caslwu(Register prev, Register newv, Register addr);
1159   void atomic_casalwu(Register prev, Register newv, Register addr);
1160 
1161   void atomic_cas(Register prev, Register newv, Register addr, enum operand_size size,
1162               Assembler::Aqrl acquire = Assembler::relaxed, Assembler::Aqrl release = Assembler::relaxed);
1163 
1164   // Emit a far call/jump. Only invalidates the tmp register which
1165   // is used to keep the entry address for jalr.
1166   // The address must be inside the code cache.
1167   // Supported entry.rspec():
1168   // - relocInfo::external_word_type
1169   // - relocInfo::runtime_call_type
1170   // - relocInfo::none
1171   void far_call(const Address &entry, Register tmp = t0);
1172   void far_jump(const Address &entry, Register tmp = t0);
1173 
1174   static int far_branch_size() {
1175       return 2 * 4;  // auipc + jalr, see far_call() & far_jump()
1176   }
1177 
1178   void load_byte_map_base(Register reg);
1179 
1180   void bang_stack_with_offset(int offset) {
1181     // stack grows down, caller passes positive offset
1182     assert(offset > 0, "must bang with negative offset");
1183     sub(t0, sp, offset);
1184     sd(zr, Address(t0));
1185   }
1186 
1187   virtual void _call_Unimplemented(address call_site) {
1188     mv(t1, call_site);
1189   }
1190 
1191   #define call_Unimplemented() _call_Unimplemented((address)__PRETTY_FUNCTION__)
1192 
1193   // Frame creation and destruction shared between JITs.
1194   void build_frame(int framesize);
1195   void remove_frame(int framesize);
1196 
1197   void reserved_stack_check();
1198 
1199   void get_polling_page(Register dest, relocInfo::relocType rtype);
1200   void read_polling_page(Register r, int32_t offset, relocInfo::relocType rtype);
1201 
1202   // RISCV64 OpenJDK uses four different types of calls:
1203   //   - direct call: jal pc_relative_offset
1204   //     This is the shortest and the fastest, but the offset has the range: +/-1MB.
1205   //
1206   //   - far call: auipc reg, pc_relative_offset; jalr ra, reg, offset
1207   //     This is longer than a direct call. The offset has
1208   //     the range [-(2G + 2K), 2G - 2K). Addresses out of the range in the code cache
1209   //     requires indirect call.
1210   //     If a jump is needed rather than a call, a far jump 'jalr x0, reg, offset' can
1211   //     be used instead.
1212   //     All instructions are embedded at a call site.
1213   //
1214   //   - indirect call: movptr + jalr
1215   //     This too can reach anywhere in the address space, but it cannot be
1216   //     patched while code is running, so it must only be modified at a safepoint.
1217   //     This form of call is most suitable for targets at fixed addresses, which
1218   //     will never be patched.
1219   //
1220   //   - reloc call:
1221   //     This is only available in C1/C2-generated code (nmethod).
1222   //
1223   //     [Main code section]
1224   //       auipc
1225   //       ld <address_from_stub_section>
1226   //       jalr
1227   //     [Stub section]
1228   //     trampoline:
1229   //       <64-bit destination address>
1230   //
1231   //    To change the destination we simply atomically store the new
1232   //    address in the stub section.
1233   //
1234   // - trampoline call (old reloc call / -XX:+UseTrampolines):
1235   //     This is only available in C1/C2-generated code (nmethod). It is a combination
1236   //     of a direct call, which is used if the destination of a call is in range,
1237   //     and a register-indirect call. It has the advantages of reaching anywhere in
1238   //     the RISCV address space and being patchable at runtime when the generated
1239   //     code is being executed by other threads.
1240   //
1241   //     [Main code section]
1242   //       jal trampoline
1243   //     [Stub code section]
1244   //     trampoline:
1245   //       ld    reg, pc + 8 (auipc + ld)
1246   //       jr    reg
1247   //       <64-bit destination address>
1248   //
1249   //     If the destination is in range when the generated code is moved to the code
1250   //     cache, 'jal trampoline' is replaced with 'jal destination' and the trampoline
1251   //     is not used.
1252   //     The optimization does not remove the trampoline from the stub section.
1253   //
1254   //     This is necessary because the trampoline may well be redirected later when
1255   //     code is patched, and the new destination may not be reachable by a simple JAL
1256   //     instruction.
1257   //
1258   // To patch a trampoline call when the JAL can't reach, we first modify
1259   // the 64-bit destination address in the trampoline, then modify the
1260   // JAL to point to the trampoline, then flush the instruction cache to
1261   // broadcast the change to all executing threads. See
1262   // NativeCall::set_destination_mt_safe for the details.
1263   //
1264   // There is a benign race in that the other thread might observe the
1265   // modified JAL before it observes the modified 64-bit destination
1266   // address. That does not matter because the destination method has been
1267   // invalidated, so there will be a trap at its start.
1268   // For this to work, the destination address in the trampoline is
1269   // always updated, even if we're not using the trampoline.
1270   // --
1271 
1272   // Emit a direct call if the entry address will always be in range,
1273   // otherwise a reloc call.
1274   // Supported entry.rspec():
1275   // - relocInfo::runtime_call_type
1276   // - relocInfo::opt_virtual_call_type
1277   // - relocInfo::static_call_type
1278   // - relocInfo::virtual_call_type
1279   //
1280   // Return: the call PC or null if CodeCache is full.
1281   address reloc_call(Address entry) {
1282     return UseTrampolines ? trampoline_call(entry) : load_and_call(entry);
1283   }
1284  private:
1285   address trampoline_call(Address entry);
1286   address load_and_call(Address entry);
1287  public:
1288 
1289   address ic_call(address entry, jint method_index = 0);
1290   static int ic_check_size();
1291   int ic_check(int end_alignment = MacroAssembler::instruction_size);
1292 
1293   // Support for memory inc/dec
1294   // n.b. increment/decrement calls with an Address destination will
1295   // need to use a scratch register to load the value to be
1296   // incremented. increment/decrement calls which add or subtract a
1297   // constant value other than sign-extended 12-bit immediate will need
1298   // to use a 2nd scratch register to hold the constant. so, an address
1299   // increment/decrement may trash both t0 and t1.
1300 
1301   void increment(const Address dst, int64_t value = 1, Register tmp1 = t0, Register tmp2 = t1);
1302   void incrementw(const Address dst, int32_t value = 1, Register tmp1 = t0, Register tmp2 = t1);
1303 
1304   void decrement(const Address dst, int64_t value = 1, Register tmp1 = t0, Register tmp2 = t1);
1305   void decrementw(const Address dst, int32_t value = 1, Register tmp1 = t0, Register tmp2 = t1);
1306 
1307   void cmpptr(Register src1, Address src2, Label& equal);
1308 
1309   void clinit_barrier(Register klass, Register tmp, Label* L_fast_path = nullptr, Label* L_slow_path = nullptr);
1310   void load_method_holder_cld(Register result, Register method);
1311   void load_method_holder(Register holder, Register method);
1312 
1313   void compute_index(Register str1, Register trailing_zeros, Register match_mask,
1314                      Register result, Register char_tmp, Register tmp,
1315                      bool haystack_isL);
1316   void compute_match_mask(Register src, Register pattern, Register match_mask,
1317                           Register mask1, Register mask2);
1318 
1319   // CRC32 code for java.util.zip.CRC32::updateBytes() intrinsic.
1320   void kernel_crc32(Register crc, Register buf, Register len,
1321         Register table0, Register table1, Register table2, Register table3,
1322         Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5, Register tmp6);
1323   void update_word_crc32(Register crc, Register v, Register tmp1, Register tmp2, Register tmp3,
1324         Register table0, Register table1, Register table2, Register table3,
1325         bool upper);
1326   void update_byte_crc32(Register crc, Register val, Register table);
1327 
1328 #ifdef COMPILER2
1329   void vector_update_crc32(Register crc, Register buf, Register len,
1330                            Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5,
1331                            Register table0, Register table3);
1332 
1333   void mul_add(Register out, Register in, Register offset,
1334                Register len, Register k, Register tmp);
1335   void wide_mul(Register prod_lo, Register prod_hi, Register n, Register m);
1336   void wide_madd(Register sum_lo, Register sum_hi, Register n,
1337                  Register m, Register tmp1, Register tmp2);
1338   void cad(Register dst, Register src1, Register src2, Register carry);
1339   void cadc(Register dst, Register src1, Register src2, Register carry);
1340   void adc(Register dst, Register src1, Register src2, Register carry);
1341   void add2_with_carry(Register final_dest_hi, Register dest_hi, Register dest_lo,
1342                        Register src1, Register src2, Register carry);
1343   void multiply_32_x_32_loop(Register x, Register xstart, Register x_xstart,
1344                              Register y, Register y_idx, Register z,
1345                              Register carry, Register product,
1346                              Register idx, Register kdx);
1347   void multiply_64_x_64_loop(Register x, Register xstart, Register x_xstart,
1348                              Register y, Register y_idx, Register z,
1349                              Register carry, Register product,
1350                              Register idx, Register kdx);
1351   void multiply_128_x_128_loop(Register y, Register z,
1352                                Register carry, Register carry2,
1353                                Register idx, Register jdx,
1354                                Register yz_idx1, Register yz_idx2,
1355                                Register tmp, Register tmp3, Register tmp4,
1356                                Register tmp6, Register product_hi);
1357   void multiply_to_len(Register x, Register xlen, Register y, Register ylen,
1358                        Register z, Register tmp0,
1359                        Register tmp1, Register tmp2, Register tmp3, Register tmp4,
1360                        Register tmp5, Register tmp6, Register product_hi);
1361 
1362 #endif // COMPILER2
1363 
1364   void inflate_lo32(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1);
1365   void inflate_hi32(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1);
1366 
1367   void ctzc_bit(Register Rd, Register Rs, bool isLL = false, Register tmp1 = t0, Register tmp2 = t1);
1368 
1369   void zero_words(Register base, uint64_t cnt);
1370   address zero_words(Register ptr, Register cnt);
1371   void fill_words(Register base, Register cnt, Register value);
1372   void zero_memory(Register addr, Register len, Register tmp);
1373   void zero_dcache_blocks(Register base, Register cnt, Register tmp1, Register tmp2);
1374 
1375   // shift left by shamt and add
1376   void shadd(Register Rd, Register Rs1, Register Rs2, Register tmp, int shamt);
1377 
1378   // test single bit in Rs, result is set to Rd
1379   void test_bit(Register Rd, Register Rs, uint32_t bit_pos);
1380 
1381   // Here the float instructions with safe deal with some exceptions.
1382   // e.g. convert from NaN, +Inf, -Inf to int, float, double
1383   // will trigger exception, we need to deal with these situations
1384   // to get correct results.
1385   void fcvt_w_s_safe(Register dst, FloatRegister src, Register tmp = t0);
1386   void fcvt_l_s_safe(Register dst, FloatRegister src, Register tmp = t0);
1387   void fcvt_w_d_safe(Register dst, FloatRegister src, Register tmp = t0);
1388   void fcvt_l_d_safe(Register dst, FloatRegister src, Register tmp = t0);
1389 
1390   void java_round_float(Register dst, FloatRegister src, FloatRegister ftmp);
1391   void java_round_double(Register dst, FloatRegister src, FloatRegister ftmp);
1392 
1393   // vector load/store unit-stride instructions
1394   void vlex_v(VectorRegister vd, Register base, Assembler::SEW sew, VectorMask vm = unmasked) {
1395     switch (sew) {
1396       case Assembler::e64:
1397         vle64_v(vd, base, vm);
1398         break;
1399       case Assembler::e32:
1400         vle32_v(vd, base, vm);
1401         break;
1402       case Assembler::e16:
1403         vle16_v(vd, base, vm);
1404         break;
1405       case Assembler::e8: // fall through
1406       default:
1407         vle8_v(vd, base, vm);
1408         break;
1409     }
1410   }
1411 
1412   void vsex_v(VectorRegister store_data, Register base, Assembler::SEW sew, VectorMask vm = unmasked) {
1413     switch (sew) {
1414       case Assembler::e64:
1415         vse64_v(store_data, base, vm);
1416         break;
1417       case Assembler::e32:
1418         vse32_v(store_data, base, vm);
1419         break;
1420       case Assembler::e16:
1421         vse16_v(store_data, base, vm);
1422         break;
1423       case Assembler::e8: // fall through
1424       default:
1425         vse8_v(store_data, base, vm);
1426         break;
1427     }
1428   }
1429 
1430   // vector pseudo instructions
1431   // rotate vector register left with shift bits, 32-bit version
1432   inline void vrole32_vi(VectorRegister vd, uint32_t shift, VectorRegister tmp_vr) {
1433     vsrl_vi(tmp_vr, vd, 32 - shift);
1434     vsll_vi(vd, vd, shift);
1435     vor_vv(vd, vd, tmp_vr);
1436   }
1437 
1438   inline void vl1r_v(VectorRegister vd, Register rs) {
1439     vl1re8_v(vd, rs);
1440   }
1441 
1442   inline void vmnot_m(VectorRegister vd, VectorRegister vs) {
1443     vmnand_mm(vd, vs, vs);
1444   }
1445 
1446   inline void vncvt_x_x_w(VectorRegister vd, VectorRegister vs, VectorMask vm = unmasked) {
1447     vnsrl_wx(vd, vs, x0, vm);
1448   }
1449 
1450   inline void vneg_v(VectorRegister vd, VectorRegister vs, VectorMask vm = unmasked) {
1451     vrsub_vx(vd, vs, x0, vm);
1452   }
1453 
1454   inline void vfneg_v(VectorRegister vd, VectorRegister vs, VectorMask vm = unmasked) {
1455     vfsgnjn_vv(vd, vs, vs, vm);
1456   }
1457 
1458   inline void vfabs_v(VectorRegister vd, VectorRegister vs, VectorMask vm = unmasked) {
1459     vfsgnjx_vv(vd, vs, vs, vm);
1460   }
1461 
1462   inline void vmsgt_vv(VectorRegister vd, VectorRegister vs2, VectorRegister vs1, VectorMask vm = unmasked) {
1463     vmslt_vv(vd, vs1, vs2, vm);
1464   }
1465 
1466   inline void vmsgtu_vv(VectorRegister vd, VectorRegister vs2, VectorRegister vs1, VectorMask vm = unmasked) {
1467     vmsltu_vv(vd, vs1, vs2, vm);
1468   }
1469 
1470   inline void vmsge_vv(VectorRegister vd, VectorRegister vs2, VectorRegister vs1, VectorMask vm = unmasked) {
1471     vmsle_vv(vd, vs1, vs2, vm);
1472   }
1473 
1474   inline void vmsgeu_vv(VectorRegister vd, VectorRegister vs2, VectorRegister vs1, VectorMask vm = unmasked) {
1475     vmsleu_vv(vd, vs1, vs2, vm);
1476   }
1477 
1478   inline void vmfgt_vv(VectorRegister vd, VectorRegister vs2, VectorRegister vs1, VectorMask vm = unmasked) {
1479     vmflt_vv(vd, vs1, vs2, vm);
1480   }
1481 
1482   inline void vmfge_vv(VectorRegister vd, VectorRegister vs2, VectorRegister vs1, VectorMask vm = unmasked) {
1483     vmfle_vv(vd, vs1, vs2, vm);
1484   }
1485 
1486   inline void vmsltu_vi(VectorRegister Vd, VectorRegister Vs2, uint32_t imm, VectorMask vm = unmasked) {
1487     guarantee(imm >= 1 && imm <= 16, "imm is invalid");
1488     vmsleu_vi(Vd, Vs2, imm-1, vm);
1489   }
1490 
1491   inline void vmsgeu_vi(VectorRegister Vd, VectorRegister Vs2, uint32_t imm, VectorMask vm = unmasked) {
1492     guarantee(imm >= 1 && imm <= 16, "imm is invalid");
1493     vmsgtu_vi(Vd, Vs2, imm-1, vm);
1494   }
1495 
1496   // Copy mask register
1497   inline void vmmv_m(VectorRegister vd, VectorRegister vs) {
1498     vmand_mm(vd, vs, vs);
1499   }
1500 
1501   // Clear mask register
1502   inline void vmclr_m(VectorRegister vd) {
1503     vmxor_mm(vd, vd, vd);
1504   }
1505 
1506   // Set mask register
1507   inline void vmset_m(VectorRegister vd) {
1508     vmxnor_mm(vd, vd, vd);
1509   }
1510 
1511   inline void vnot_v(VectorRegister Vd, VectorRegister Vs, VectorMask vm = unmasked) {
1512     vxor_vi(Vd, Vs, -1, vm);
1513   }
1514 
1515   static const int zero_words_block_size;
1516 
1517   void cast_primitive_type(BasicType type, Register Rt) {
1518     switch (type) {
1519       case T_BOOLEAN:
1520         sltu(Rt, zr, Rt);
1521         break;
1522       case T_CHAR   :
1523         zero_extend(Rt, Rt, 16);
1524         break;
1525       case T_BYTE   :
1526         sign_extend(Rt, Rt, 8);
1527         break;
1528       case T_SHORT  :
1529         sign_extend(Rt, Rt, 16);
1530         break;
1531       case T_INT    :
1532         sign_extend(Rt, Rt, 32);
1533         break;
1534       case T_LONG   : /* nothing to do */        break;
1535       case T_VOID   : /* nothing to do */        break;
1536       case T_FLOAT  : /* nothing to do */        break;
1537       case T_DOUBLE : /* nothing to do */        break;
1538       default: ShouldNotReachHere();
1539     }
1540   }
1541 
1542   // float cmp with unordered_result
1543   void float_compare(Register result, FloatRegister Rs1, FloatRegister Rs2, int unordered_result);
1544   void double_compare(Register result, FloatRegister Rs1, FloatRegister Rs2, int unordered_result);
1545 
1546   // Zero/Sign-extend
1547   void zero_extend(Register dst, Register src, int bits);
1548   void sign_extend(Register dst, Register src, int bits);
1549 
1550 private:
1551   void cmp_x2i(Register dst, Register src1, Register src2, Register tmp, bool is_signed = true);
1552 
1553 public:
1554   // compare src1 and src2 and get -1/0/1 in dst.
1555   // if [src1 > src2], dst = 1;
1556   // if [src1 == src2], dst = 0;
1557   // if [src1 < src2], dst = -1;
1558   void cmp_l2i(Register dst, Register src1, Register src2, Register tmp = t0);
1559   void cmp_ul2i(Register dst, Register src1, Register src2, Register tmp = t0);
1560   void cmp_uw2i(Register dst, Register src1, Register src2, Register tmp = t0);
1561 
1562   // support for argument shuffling
1563   void move32_64(VMRegPair src, VMRegPair dst, Register tmp = t0);
1564   void float_move(VMRegPair src, VMRegPair dst, Register tmp = t0);
1565   void long_move(VMRegPair src, VMRegPair dst, Register tmp = t0);
1566   void double_move(VMRegPair src, VMRegPair dst, Register tmp = t0);
1567   void object_move(OopMap* map,
1568                    int oop_handle_offset,
1569                    int framesize_in_slots,
1570                    VMRegPair src,
1571                    VMRegPair dst,
1572                    bool is_receiver,
1573                    int* receiver_offset);
1574 
1575 #ifdef ASSERT
1576   // Template short-hand support to clean-up after a failed call to trampoline
1577   // call generation (see trampoline_call() below), when a set of Labels must
1578   // be reset (before returning).
1579   template<typename Label, typename... More>
1580   void reset_labels(Label& lbl, More&... more) {
1581     lbl.reset(); reset_labels(more...);
1582   }
1583   template<typename Label>
1584   void reset_labels(Label& lbl) {
1585     lbl.reset();
1586   }
1587 #endif
1588 
1589 private:
1590 
1591   void repne_scan(Register addr, Register value, Register count, Register tmp);
1592 
1593   void ld_constant(Register dest, const Address &const_addr) {
1594     if (NearCpool) {
1595       ld(dest, const_addr);
1596     } else {
1597       InternalAddress target(const_addr.target());
1598       relocate(target.rspec(), [&] {
1599         int32_t offset;
1600         la(dest, target.target(), offset);
1601         ld(dest, Address(dest, offset));
1602       });
1603     }
1604   }
1605 
1606   int bitset_to_regs(unsigned int bitset, unsigned char* regs);
1607   Address add_memory_helper(const Address dst, Register tmp);
1608 
1609   void load_reserved(Register dst, Register addr, enum operand_size size, Assembler::Aqrl acquire);
1610   void store_conditional(Register dst, Register new_val, Register addr, enum operand_size size, Assembler::Aqrl release);
1611 
1612 public:
1613   void lightweight_lock(Register basic_lock, Register obj, Register tmp1, Register tmp2, Register tmp3, Label& slow);
1614   void lightweight_unlock(Register obj, Register tmp1, Register tmp2, Register tmp3, Label& slow);
1615 
1616 public:
1617   enum {
1618     // movptr
1619     movptr1_instruction_size = 6 * instruction_size, // lui, addi, slli, addi, slli, addi.  See movptr1().
1620     movptr2_instruction_size = 5 * instruction_size, // lui, lui, slli, add, addi.  See movptr2().
1621     load_pc_relative_instruction_size = 2 * instruction_size // auipc, ld
1622   };
1623 
1624   enum NativeShortCall {
1625     trampoline_size        = 3 * instruction_size + wordSize,
1626     trampoline_data_offset = 3 * instruction_size
1627   };
1628 
1629   static bool is_load_pc_relative_at(address branch);
1630   static bool is_li16u_at(address instr);
1631 
1632   static bool is_jal_at(address instr)        { assert_cond(instr != nullptr); return extract_opcode(instr) == 0b1101111; }
1633   static bool is_jalr_at(address instr)       { assert_cond(instr != nullptr); return extract_opcode(instr) == 0b1100111 && extract_funct3(instr) == 0b000; }
1634   static bool is_branch_at(address instr)     { assert_cond(instr != nullptr); return extract_opcode(instr) == 0b1100011; }
1635   static bool is_ld_at(address instr)         { assert_cond(instr != nullptr); return is_load_at(instr) && extract_funct3(instr) == 0b011; }
1636   static bool is_load_at(address instr)       { assert_cond(instr != nullptr); return extract_opcode(instr) == 0b0000011; }
1637   static bool is_float_load_at(address instr) { assert_cond(instr != nullptr); return extract_opcode(instr) == 0b0000111; }
1638   static bool is_auipc_at(address instr)      { assert_cond(instr != nullptr); return extract_opcode(instr) == 0b0010111; }
1639   static bool is_jump_at(address instr)       { assert_cond(instr != nullptr); return is_branch_at(instr) || is_jal_at(instr) || is_jalr_at(instr); }
1640   static bool is_add_at(address instr)        { assert_cond(instr != nullptr); return extract_opcode(instr) == 0b0110011 && extract_funct3(instr) == 0b000; }
1641   static bool is_addi_at(address instr)       { assert_cond(instr != nullptr); return extract_opcode(instr) == 0b0010011 && extract_funct3(instr) == 0b000; }
1642   static bool is_addiw_at(address instr)      { assert_cond(instr != nullptr); return extract_opcode(instr) == 0b0011011 && extract_funct3(instr) == 0b000; }
1643   static bool is_addiw_to_zr_at(address instr){ assert_cond(instr != nullptr); return is_addiw_at(instr) && extract_rd(instr) == zr; }
1644   static bool is_lui_at(address instr)        { assert_cond(instr != nullptr); return extract_opcode(instr) == 0b0110111; }
1645   static bool is_lui_to_zr_at(address instr)  { assert_cond(instr != nullptr); return is_lui_at(instr) && extract_rd(instr) == zr; }
1646 
1647   static bool is_srli_at(address instr) {
1648     assert_cond(instr != nullptr);
1649     return extract_opcode(instr) == 0b0010011 &&
1650            extract_funct3(instr) == 0b101 &&
1651            Assembler::extract(((unsigned*)instr)[0], 31, 26) == 0b000000;
1652   }
1653 
1654   static bool is_slli_shift_at(address instr, uint32_t shift) {
1655     assert_cond(instr != nullptr);
1656     return (extract_opcode(instr) == 0b0010011 && // opcode field
1657             extract_funct3(instr) == 0b001 &&     // funct3 field, select the type of operation
1658             Assembler::extract(Assembler::ld_instr(instr), 25, 20) == shift);    // shamt field
1659   }
1660 
1661   static bool is_movptr1_at(address instr);
1662   static bool is_movptr2_at(address instr);
1663 
1664   static bool is_lwu_to_zr(address instr);
1665 
1666   static Register extract_rs1(address instr);
1667   static Register extract_rs2(address instr);
1668   static Register extract_rd(address instr);
1669   static uint32_t extract_opcode(address instr);
1670   static uint32_t extract_funct3(address instr);
1671 
1672   // the instruction sequence of movptr is as below:
1673   //     lui
1674   //     addi
1675   //     slli
1676   //     addi
1677   //     slli
1678   //     addi/jalr/load
1679   static bool check_movptr1_data_dependency(address instr) {
1680     address lui = instr;
1681     address addi1 = lui + instruction_size;
1682     address slli1 = addi1 + instruction_size;
1683     address addi2 = slli1 + instruction_size;
1684     address slli2 = addi2 + instruction_size;
1685     address last_instr = slli2 + instruction_size;
1686     return extract_rs1(addi1) == extract_rd(lui) &&
1687            extract_rs1(addi1) == extract_rd(addi1) &&
1688            extract_rs1(slli1) == extract_rd(addi1) &&
1689            extract_rs1(slli1) == extract_rd(slli1) &&
1690            extract_rs1(addi2) == extract_rd(slli1) &&
1691            extract_rs1(addi2) == extract_rd(addi2) &&
1692            extract_rs1(slli2) == extract_rd(addi2) &&
1693            extract_rs1(slli2) == extract_rd(slli2) &&
1694            extract_rs1(last_instr) == extract_rd(slli2);
1695   }
1696 
1697   // the instruction sequence of movptr2 is as below:
1698   //     lui
1699   //     lui
1700   //     slli
1701   //     add
1702   //     addi/jalr/load
1703   static bool check_movptr2_data_dependency(address instr) {
1704     address lui1 = instr;
1705     address lui2 = lui1 + instruction_size;
1706     address slli = lui2 + instruction_size;
1707     address add  = slli + instruction_size;
1708     address last_instr = add + instruction_size;
1709     return extract_rd(add) == extract_rd(lui2) &&
1710            extract_rs1(add) == extract_rd(lui2) &&
1711            extract_rs2(add) == extract_rd(slli) &&
1712            extract_rs1(slli) == extract_rd(lui1) &&
1713            extract_rd(slli) == extract_rd(lui1) &&
1714            extract_rs1(last_instr) == extract_rd(add);
1715   }
1716 
1717   // the instruction sequence of li16u is as below:
1718   //     lui
1719   //     srli
1720   static bool check_li16u_data_dependency(address instr) {
1721     address lui = instr;
1722     address srli = lui + instruction_size;
1723 
1724     return extract_rs1(srli) == extract_rd(lui) &&
1725            extract_rs1(srli) == extract_rd(srli);
1726   }
1727 
1728   // the instruction sequence of li32 is as below:
1729   //     lui
1730   //     addiw
1731   static bool check_li32_data_dependency(address instr) {
1732     address lui = instr;
1733     address addiw = lui + instruction_size;
1734 
1735     return extract_rs1(addiw) == extract_rd(lui) &&
1736            extract_rs1(addiw) == extract_rd(addiw);
1737   }
1738 
1739   // the instruction sequence of pc-relative is as below:
1740   //     auipc
1741   //     jalr/addi/load/float_load
1742   static bool check_pc_relative_data_dependency(address instr) {
1743     address auipc = instr;
1744     address last_instr = auipc + instruction_size;
1745 
1746     return extract_rs1(last_instr) == extract_rd(auipc);
1747   }
1748 
1749   // the instruction sequence of load_label is as below:
1750   //     auipc
1751   //     load
1752   static bool check_load_pc_relative_data_dependency(address instr) {
1753     address auipc = instr;
1754     address load = auipc + instruction_size;
1755 
1756     return extract_rd(load) == extract_rd(auipc) &&
1757            extract_rs1(load) == extract_rd(load);
1758   }
1759 
1760   static bool is_li32_at(address instr);
1761   static bool is_pc_relative_at(address branch);
1762 
1763   static bool is_membar(address addr) {
1764     return (Bytes::get_native_u4(addr) & 0x7f) == 0b1111 && extract_funct3(addr) == 0;
1765   }
1766   static uint32_t get_membar_kind(address addr);
1767   static void set_membar_kind(address addr, uint32_t order_kind);
1768 };
1769 
1770 #ifdef ASSERT
1771 inline bool AbstractAssembler::pd_check_instruction_mark() { return false; }
1772 #endif
1773 
1774 /**
1775  * class SkipIfEqual:
1776  *
1777  * Instantiating this class will result in assembly code being output that will
1778  * jump around any code emitted between the creation of the instance and it's
1779  * automatic destruction at the end of a scope block, depending on the value of
1780  * the flag passed to the constructor, which will be checked at run-time.
1781  */
1782 class SkipIfEqual {
1783  private:
1784   MacroAssembler* _masm;
1785   Label _label;
1786 
1787  public:
1788    SkipIfEqual(MacroAssembler*, const bool* flag_addr, bool value);
1789    ~SkipIfEqual();
1790 };
1791 
1792 #endif // CPU_RISCV_MACROASSEMBLER_RISCV_HPP