1 /*
   2  * Copyright (c) 1997, 2023, Oracle and/or its affiliates. All rights reserved.
   3  * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
   4  * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved.
   5  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   6  *
   7  * This code is free software; you can redistribute it and/or modify it
   8  * under the terms of the GNU General Public License version 2 only, as
   9  * published by the Free Software Foundation.
  10  *
  11  * This code is distributed in the hope that it will be useful, but WITHOUT
  12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  14  * version 2 for more details (a copy is included in the LICENSE file that
  15  * accompanied this code).
  16  *
  17  * You should have received a copy of the GNU General Public License version
  18  * 2 along with this work; if not, write to the Free Software Foundation,
  19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  20  *
  21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  22  * or visit www.oracle.com if you need additional information or have any
  23  * questions.
  24  *
  25  */
  26 
  27 #ifndef CPU_RISCV_MACROASSEMBLER_RISCV_HPP
  28 #define CPU_RISCV_MACROASSEMBLER_RISCV_HPP
  29 
  30 #include "asm/assembler.inline.hpp"
  31 #include "code/vmreg.hpp"
  32 #include "metaprogramming/enableIf.hpp"
  33 #include "nativeInst_riscv.hpp"
  34 #include "oops/compressedOops.hpp"
  35 #include "utilities/powerOfTwo.hpp"
  36 
  37 // MacroAssembler extends Assembler by frequently used macros.
  38 //
  39 // Instructions for which a 'better' code sequence exists depending
  40 // on arguments should also go in here.
  41 
  42 class MacroAssembler: public Assembler {
  43 
  44  public:
  45   MacroAssembler(CodeBuffer* code) : Assembler(code) {}
  46 
  47   void safepoint_poll(Label& slow_path, bool at_return, bool acquire, bool in_nmethod);
  48 
  49   // Alignment
  50   int align(int modulus, int extra_offset = 0);
  51 
  52   static inline void assert_alignment(address pc, int alignment = NativeInstruction::instruction_size) {
  53     assert(is_aligned(pc, alignment), "bad alignment");
  54   }
  55 
  56   // nop
  57   void post_call_nop();
  58 
  59   // Stack frame creation/removal
  60   // Note that SP must be updated to the right place before saving/restoring RA and FP
  61   // because signal based thread suspend/resume could happen asynchronously.
  62   void enter() {
  63     addi(sp, sp, - 2 * wordSize);
  64     sd(ra, Address(sp, wordSize));
  65     sd(fp, Address(sp));
  66     addi(fp, sp, 2 * wordSize);
  67   }
  68 
  69   void leave() {
  70     addi(sp, fp, - 2 * wordSize);
  71     ld(fp, Address(sp));
  72     ld(ra, Address(sp, wordSize));
  73     addi(sp, sp, 2 * wordSize);
  74   }
  75 
  76 
  77   // Support for getting the JavaThread pointer (i.e.; a reference to thread-local information)
  78   // The pointer will be loaded into the thread register.
  79   void get_thread(Register thread);
  80 
  81   // Support for VM calls
  82   //
  83   // It is imperative that all calls into the VM are handled via the call_VM macros.
  84   // They make sure that the stack linkage is setup correctly. call_VM's correspond
  85   // to ENTRY/ENTRY_X entry points while call_VM_leaf's correspond to LEAF entry points.
  86 
  87   void call_VM(Register oop_result,
  88                address entry_point,
  89                bool check_exceptions = true);
  90   void call_VM(Register oop_result,
  91                address entry_point,
  92                Register arg_1,
  93                bool check_exceptions = true);
  94   void call_VM(Register oop_result,
  95                address entry_point,
  96                Register arg_1, Register arg_2,
  97                bool check_exceptions = true);
  98   void call_VM(Register oop_result,
  99                address entry_point,
 100                Register arg_1, Register arg_2, Register arg_3,
 101                bool check_exceptions = true);
 102 
 103   // Overloadings with last_Java_sp
 104   void call_VM(Register oop_result,
 105                Register last_java_sp,
 106                address entry_point,
 107                int number_of_arguments = 0,
 108                bool check_exceptions = true);
 109   void call_VM(Register oop_result,
 110                Register last_java_sp,
 111                address entry_point,
 112                Register arg_1,
 113                bool check_exceptions = true);
 114   void call_VM(Register oop_result,
 115                Register last_java_sp,
 116                address entry_point,
 117                Register arg_1, Register arg_2,
 118                bool check_exceptions = true);
 119   void call_VM(Register oop_result,
 120                Register last_java_sp,
 121                address entry_point,
 122                Register arg_1, Register arg_2, Register arg_3,
 123                bool check_exceptions = true);
 124 
 125   void get_vm_result(Register oop_result, Register java_thread);
 126   void get_vm_result_2(Register metadata_result, Register java_thread);
 127 
 128   // These always tightly bind to MacroAssembler::call_VM_leaf_base
 129   // bypassing the virtual implementation
 130   void call_VM_leaf(address entry_point,
 131                     int number_of_arguments = 0);
 132   void call_VM_leaf(address entry_point,
 133                     Register arg_0);
 134   void call_VM_leaf(address entry_point,
 135                     Register arg_0, Register arg_1);
 136   void call_VM_leaf(address entry_point,
 137                     Register arg_0, Register arg_1, Register arg_2);
 138 
 139   // These always tightly bind to MacroAssembler::call_VM_base
 140   // bypassing the virtual implementation
 141   void super_call_VM_leaf(address entry_point, Register arg_0);
 142   void super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1);
 143   void super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2);
 144   void super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2, Register arg_3);
 145 
 146   // last Java Frame (fills frame anchor)
 147   void set_last_Java_frame(Register last_java_sp, Register last_java_fp, address last_java_pc, Register tmp);
 148   void set_last_Java_frame(Register last_java_sp, Register last_java_fp, Label &last_java_pc, Register tmp);
 149   void set_last_Java_frame(Register last_java_sp, Register last_java_fp, Register last_java_pc, Register tmp);
 150 
 151   // thread in the default location (xthread)
 152   void reset_last_Java_frame(bool clear_fp);
 153 
 154   virtual void call_VM_leaf_base(
 155     address entry_point,                // the entry point
 156     int     number_of_arguments,        // the number of arguments to pop after the call
 157     Label*  retaddr = nullptr
 158   );
 159 
 160   virtual void call_VM_leaf_base(
 161     address entry_point,                // the entry point
 162     int     number_of_arguments,        // the number of arguments to pop after the call
 163     Label&  retaddr) {
 164     call_VM_leaf_base(entry_point, number_of_arguments, &retaddr);
 165   }
 166 
 167   virtual void call_VM_base(           // returns the register containing the thread upon return
 168     Register oop_result,               // where an oop-result ends up if any; use noreg otherwise
 169     Register java_thread,              // the thread if computed before     ; use noreg otherwise
 170     Register last_java_sp,             // to set up last_Java_frame in stubs; use noreg otherwise
 171     address  entry_point,              // the entry point
 172     int      number_of_arguments,      // the number of arguments (w/o thread) to pop after the call
 173     bool     check_exceptions          // whether to check for pending exceptions after return
 174   );
 175 
 176   void call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions);
 177 
 178   virtual void check_and_handle_earlyret(Register java_thread);
 179   virtual void check_and_handle_popframe(Register java_thread);
 180 
 181   void resolve_weak_handle(Register result, Register tmp1, Register tmp2);
 182   void resolve_oop_handle(Register result, Register tmp1, Register tmp2);
 183   void resolve_jobject(Register value, Register tmp1, Register tmp2);
 184   void resolve_global_jobject(Register value, Register tmp1, Register tmp2);
 185 
 186   void movoop(Register dst, jobject obj);
 187   void mov_metadata(Register dst, Metadata* obj);
 188   void bang_stack_size(Register size, Register tmp);
 189   void set_narrow_oop(Register dst, jobject obj);
 190   void set_narrow_klass(Register dst, Klass* k);
 191 
 192   void load_mirror(Register dst, Register method, Register tmp1, Register tmp2);
 193   void access_load_at(BasicType type, DecoratorSet decorators, Register dst,
 194                       Address src, Register tmp1, Register tmp2);
 195   void access_store_at(BasicType type, DecoratorSet decorators, Address dst,
 196                        Register val, Register tmp1, Register tmp2, Register tmp3);
 197   void load_klass(Register dst, Register src, Register tmp = t0);
 198   void store_klass(Register dst, Register src, Register tmp = t0);
 199   void cmp_klass(Register oop, Register trial_klass, Register tmp1, Register tmp2, Label &L);
 200 
 201   void encode_klass_not_null(Register r, Register tmp = t0);
 202   void decode_klass_not_null(Register r, Register tmp = t0);
 203   void encode_klass_not_null(Register dst, Register src, Register tmp);
 204   void decode_klass_not_null(Register dst, Register src, Register tmp);
 205   void decode_heap_oop_not_null(Register r);
 206   void decode_heap_oop_not_null(Register dst, Register src);
 207   void decode_heap_oop(Register d, Register s);
 208   void decode_heap_oop(Register r) { decode_heap_oop(r, r); }
 209   void encode_heap_oop(Register d, Register s);
 210   void encode_heap_oop(Register r) { encode_heap_oop(r, r); };
 211   void load_heap_oop(Register dst, Address src, Register tmp1,
 212                      Register tmp2, DecoratorSet decorators = 0);
 213   void load_heap_oop_not_null(Register dst, Address src, Register tmp1,
 214                               Register tmp2, DecoratorSet decorators = 0);
 215   void store_heap_oop(Address dst, Register val, Register tmp1,
 216                       Register tmp2, Register tmp3, DecoratorSet decorators = 0);
 217 
 218   void store_klass_gap(Register dst, Register src);
 219 
 220   // currently unimplemented
 221   // Used for storing null. All other oop constants should be
 222   // stored using routines that take a jobject.
 223   void store_heap_oop_null(Address dst);
 224 
 225   // This dummy is to prevent a call to store_heap_oop from
 226   // converting a zero (linked null) into a Register by giving
 227   // the compiler two choices it can't resolve
 228 
 229   void store_heap_oop(Address dst, void* dummy);
 230 
 231   // Support for null-checks
 232   //
 233   // Generates code that causes a null OS exception if the content of reg is null.
 234   // If the accessed location is M[reg + offset] and the offset is known, provide the
 235   // offset. No explicit code generateion is needed if the offset is within a certain
 236   // range (0 <= offset <= page_size).
 237 
 238   virtual void null_check(Register reg, int offset = -1);
 239   static bool needs_explicit_null_check(intptr_t offset);
 240   static bool uses_implicit_null_check(void* address);
 241 
 242   // idiv variant which deals with MINLONG as dividend and -1 as divisor
 243   int corrected_idivl(Register result, Register rs1, Register rs2,
 244                       bool want_remainder);
 245   int corrected_idivq(Register result, Register rs1, Register rs2,
 246                       bool want_remainder);
 247 
 248   // interface method calling
 249   void lookup_interface_method(Register recv_klass,
 250                                Register intf_klass,
 251                                RegisterOrConstant itable_index,
 252                                Register method_result,
 253                                Register scan_tmp,
 254                                Label& no_such_interface,
 255                                bool return_method = true);
 256 
 257   // virtual method calling
 258   // n.n. x86 allows RegisterOrConstant for vtable_index
 259   void lookup_virtual_method(Register recv_klass,
 260                              RegisterOrConstant vtable_index,
 261                              Register method_result);
 262 
 263   // Form an address from base + offset in Rd. Rd my or may not
 264   // actually be used: you must use the Address that is returned. It
 265   // is up to you to ensure that the shift provided matches the size
 266   // of your data.
 267   Address form_address(Register Rd, Register base, int64_t byte_offset);
 268 
 269   // Sometimes we get misaligned loads and stores, usually from Unsafe
 270   // accesses, and these can exceed the offset range.
 271   Address legitimize_address(Register Rd, const Address &adr) {
 272     if (adr.getMode() == Address::base_plus_offset) {
 273       if (!is_simm12(adr.offset())) {
 274         return form_address(Rd, adr.base(), adr.offset());
 275       }
 276     }
 277     return adr;
 278   }
 279 
 280   // allocation
 281   void tlab_allocate(
 282     Register obj,                   // result: pointer to object after successful allocation
 283     Register var_size_in_bytes,     // object size in bytes if unknown at compile time; invalid otherwise
 284     int      con_size_in_bytes,     // object size in bytes if   known at compile time
 285     Register tmp1,                  // temp register
 286     Register tmp2,                  // temp register
 287     Label&   slow_case,             // continuation point of fast allocation fails
 288     bool     is_far = false
 289   );
 290 
 291   // Test sub_klass against super_klass, with fast and slow paths.
 292 
 293   // The fast path produces a tri-state answer: yes / no / maybe-slow.
 294   // One of the three labels can be null, meaning take the fall-through.
 295   // If super_check_offset is -1, the value is loaded up from super_klass.
 296   // No registers are killed, except tmp_reg
 297   void check_klass_subtype_fast_path(Register sub_klass,
 298                                      Register super_klass,
 299                                      Register tmp_reg,
 300                                      Label* L_success,
 301                                      Label* L_failure,
 302                                      Label* L_slow_path,
 303                                      Register super_check_offset = noreg);
 304 
 305   // The reset of the type check; must be wired to a corresponding fast path.
 306   // It does not repeat the fast path logic, so don't use it standalone.
 307   // The tmp1_reg and tmp2_reg can be noreg, if no temps are available.
 308   // Updates the sub's secondary super cache as necessary.
 309   void check_klass_subtype_slow_path(Register sub_klass,
 310                                      Register super_klass,
 311                                      Register tmp1_reg,
 312                                      Register tmp2_reg,
 313                                      Label* L_success,
 314                                      Label* L_failure);
 315 
 316   void check_klass_subtype(Register sub_klass,
 317                            Register super_klass,
 318                            Register tmp_reg,
 319                            Label& L_success);
 320 
 321   Address argument_address(RegisterOrConstant arg_slot, int extra_slot_offset = 0);
 322 
 323   // only if +VerifyOops
 324   void _verify_oop(Register reg, const char* s, const char* file, int line);
 325   void _verify_oop_addr(Address addr, const char* s, const char* file, int line);
 326 
 327   void _verify_oop_checked(Register reg, const char* s, const char* file, int line) {
 328     if (VerifyOops) {
 329       _verify_oop(reg, s, file, line);
 330     }
 331   }
 332   void _verify_oop_addr_checked(Address reg, const char* s, const char* file, int line) {
 333     if (VerifyOops) {
 334       _verify_oop_addr(reg, s, file, line);
 335     }
 336   }
 337 
 338   void _verify_method_ptr(Register reg, const char* msg, const char* file, int line) {}
 339   void _verify_klass_ptr(Register reg, const char* msg, const char* file, int line) {}
 340 
 341 #define verify_oop(reg) _verify_oop_checked(reg, "broken oop " #reg, __FILE__, __LINE__)
 342 #define verify_oop_msg(reg, msg) _verify_oop_checked(reg, "broken oop " #reg ", " #msg, __FILE__, __LINE__)
 343 #define verify_oop_addr(addr) _verify_oop_addr_checked(addr, "broken oop addr " #addr, __FILE__, __LINE__)
 344 #define verify_method_ptr(reg) _verify_method_ptr(reg, "broken method " #reg, __FILE__, __LINE__)
 345 #define verify_klass_ptr(reg) _verify_method_ptr(reg, "broken klass " #reg, __FILE__, __LINE__)
 346 
 347   // A more convenient access to fence for our purposes
 348   // We used four bit to indicate the read and write bits in the predecessors and successors,
 349   // and extended i for r, o for w if UseConservativeFence enabled.
 350   enum Membar_mask_bits {
 351     StoreStore = 0b0101,               // (pred = ow   + succ =   ow)
 352     LoadStore  = 0b1001,               // (pred = ir   + succ =   ow)
 353     StoreLoad  = 0b0110,               // (pred = ow   + succ =   ir)
 354     LoadLoad   = 0b1010,               // (pred = ir   + succ =   ir)
 355     AnyAny     = LoadStore | StoreLoad // (pred = iorw + succ = iorw)
 356   };
 357 
 358   void membar(uint32_t order_constraint);
 359 
 360   static void membar_mask_to_pred_succ(uint32_t order_constraint,
 361                                        uint32_t& predecessor, uint32_t& successor) {
 362     predecessor = (order_constraint >> 2) & 0x3;
 363     successor = order_constraint & 0x3;
 364 
 365     // extend rw -> iorw:
 366     // 01(w) -> 0101(ow)
 367     // 10(r) -> 1010(ir)
 368     // 11(rw)-> 1111(iorw)
 369     if (UseConservativeFence) {
 370       predecessor |= predecessor << 2;
 371       successor |= successor << 2;
 372     }
 373   }
 374 
 375   static int pred_succ_to_membar_mask(uint32_t predecessor, uint32_t successor) {
 376     return ((predecessor & 0x3) << 2) | (successor & 0x3);
 377   }
 378 
 379   void pause() {
 380     fence(w, 0);
 381   }
 382 
 383   // prints msg, dumps registers and stops execution
 384   void stop(const char* msg);
 385 
 386   static void debug64(char* msg, int64_t pc, int64_t regs[]);
 387 
 388   void unimplemented(const char* what = "");
 389 
 390   void should_not_reach_here() { stop("should not reach here"); }
 391 
 392   static address target_addr_for_insn(address insn_addr);
 393 
 394   // Required platform-specific helpers for Label::patch_instructions.
 395   // They _shadow_ the declarations in AbstractAssembler, which are undefined.
 396   static int pd_patch_instruction_size(address branch, address target);
 397   static void pd_patch_instruction(address branch, address target, const char* file = nullptr, int line = 0) {
 398     pd_patch_instruction_size(branch, target);
 399   }
 400   static address pd_call_destination(address branch) {
 401     return target_addr_for_insn(branch);
 402   }
 403 
 404   static int patch_oop(address insn_addr, address o);
 405 
 406   static address get_target_of_li32(address insn_addr);
 407   static int patch_imm_in_li32(address branch, int32_t target);
 408 
 409   // Return whether code is emitted to a scratch blob.
 410   virtual bool in_scratch_emit_size() {
 411     return false;
 412   }
 413 
 414   address emit_trampoline_stub(int insts_call_instruction_offset, address target);
 415   static int max_trampoline_stub_size();
 416   void emit_static_call_stub();
 417   static int static_call_stub_size();
 418 
 419   // The following 4 methods return the offset of the appropriate move instruction
 420 
 421   // Support for fast byte/short loading with zero extension (depending on particular CPU)
 422   int load_unsigned_byte(Register dst, Address src);
 423   int load_unsigned_short(Register dst, Address src);
 424 
 425   // Support for fast byte/short loading with sign extension (depending on particular CPU)
 426   int load_signed_byte(Register dst, Address src);
 427   int load_signed_short(Register dst, Address src);
 428 
 429   // Load and store values by size and signed-ness
 430   void load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed);
 431   void store_sized_value(Address dst, Register src, size_t size_in_bytes);
 432 
 433   // Misaligned loads, will use the best way, according to the AvoidUnalignedAccess flag
 434   void load_short_misaligned(Register dst, Address src, Register tmp, bool is_signed, int granularity = 1);
 435   void load_int_misaligned(Register dst, Address src, Register tmp, bool is_signed, int granularity = 1);
 436   void load_long_misaligned(Register dst, Address src, Register tmp, int granularity = 1);
 437 
 438  public:
 439   // Standard pseudo instructions
 440   inline void nop() {
 441     addi(x0, x0, 0);
 442   }
 443 
 444   inline void mv(Register Rd, Register Rs) {
 445     if (Rd != Rs) {
 446       addi(Rd, Rs, 0);
 447     }
 448   }
 449 
 450   inline void notr(Register Rd, Register Rs) {
 451     xori(Rd, Rs, -1);
 452   }
 453 
 454   inline void neg(Register Rd, Register Rs) {
 455     sub(Rd, x0, Rs);
 456   }
 457 
 458   inline void negw(Register Rd, Register Rs) {
 459     subw(Rd, x0, Rs);
 460   }
 461 
 462   inline void sext_w(Register Rd, Register Rs) {
 463     addiw(Rd, Rs, 0);
 464   }
 465 
 466   inline void zext_b(Register Rd, Register Rs) {
 467     andi(Rd, Rs, 0xFF);
 468   }
 469 
 470   inline void seqz(Register Rd, Register Rs) {
 471     sltiu(Rd, Rs, 1);
 472   }
 473 
 474   inline void snez(Register Rd, Register Rs) {
 475     sltu(Rd, x0, Rs);
 476   }
 477 
 478   inline void sltz(Register Rd, Register Rs) {
 479     slt(Rd, Rs, x0);
 480   }
 481 
 482   inline void sgtz(Register Rd, Register Rs) {
 483     slt(Rd, x0, Rs);
 484   }
 485 
 486   // Bit-manipulation extension pseudo instructions
 487   // zero extend word
 488   inline void zext_w(Register Rd, Register Rs) {
 489     add_uw(Rd, Rs, zr);
 490   }
 491 
 492   // Floating-point data-processing pseudo instructions
 493   inline void fmv_s(FloatRegister Rd, FloatRegister Rs) {
 494     if (Rd != Rs) {
 495       fsgnj_s(Rd, Rs, Rs);
 496     }
 497   }
 498 
 499   inline void fabs_s(FloatRegister Rd, FloatRegister Rs) {
 500     fsgnjx_s(Rd, Rs, Rs);
 501   }
 502 
 503   inline void fneg_s(FloatRegister Rd, FloatRegister Rs) {
 504     fsgnjn_s(Rd, Rs, Rs);
 505   }
 506 
 507   inline void fmv_d(FloatRegister Rd, FloatRegister Rs) {
 508     if (Rd != Rs) {
 509       fsgnj_d(Rd, Rs, Rs);
 510     }
 511   }
 512 
 513   inline void fabs_d(FloatRegister Rd, FloatRegister Rs) {
 514     fsgnjx_d(Rd, Rs, Rs);
 515   }
 516 
 517   inline void fneg_d(FloatRegister Rd, FloatRegister Rs) {
 518     fsgnjn_d(Rd, Rs, Rs);
 519   }
 520 
 521   // Control and status pseudo instructions
 522   void rdinstret(Register Rd);                  // read instruction-retired counter
 523   void rdcycle(Register Rd);                    // read cycle counter
 524   void rdtime(Register Rd);                     // read time
 525   void csrr(Register Rd, unsigned csr);         // read csr
 526   void csrw(unsigned csr, Register Rs);         // write csr
 527   void csrs(unsigned csr, Register Rs);         // set bits in csr
 528   void csrc(unsigned csr, Register Rs);         // clear bits in csr
 529   void csrwi(unsigned csr, unsigned imm);
 530   void csrsi(unsigned csr, unsigned imm);
 531   void csrci(unsigned csr, unsigned imm);
 532   void frcsr(Register Rd);                      // read float-point csr
 533   void fscsr(Register Rd, Register Rs);         // swap float-point csr
 534   void fscsr(Register Rs);                      // write float-point csr
 535   void frrm(Register Rd);                       // read float-point rounding mode
 536   void fsrm(Register Rd, Register Rs);          // swap float-point rounding mode
 537   void fsrm(Register Rs);                       // write float-point rounding mode
 538   void fsrmi(Register Rd, unsigned imm);
 539   void fsrmi(unsigned imm);
 540   void frflags(Register Rd);                    // read float-point exception flags
 541   void fsflags(Register Rd, Register Rs);       // swap float-point exception flags
 542   void fsflags(Register Rs);                    // write float-point exception flags
 543   void fsflagsi(Register Rd, unsigned imm);
 544   void fsflagsi(unsigned imm);
 545 
 546   // Control transfer pseudo instructions
 547   void beqz(Register Rs, const address dest);
 548   void bnez(Register Rs, const address dest);
 549   void blez(Register Rs, const address dest);
 550   void bgez(Register Rs, const address dest);
 551   void bltz(Register Rs, const address dest);
 552   void bgtz(Register Rs, const address dest);
 553 
 554   void j(Label &l, Register temp = t0);
 555   void j(const address dest, Register temp = t0);
 556   void j(const Address &adr, Register temp = t0);
 557   void jal(Label &l, Register temp = t0);
 558   void jal(const address dest, Register temp = t0);
 559   void jal(const Address &adr, Register temp = t0);
 560   void jal(Register Rd, Label &L, Register temp = t0);
 561   void jal(Register Rd, const address dest, Register temp = t0);
 562 
 563   //label
 564   void beqz(Register Rs, Label &l, bool is_far = false);
 565   void bnez(Register Rs, Label &l, bool is_far = false);
 566   void blez(Register Rs, Label &l, bool is_far = false);
 567   void bgez(Register Rs, Label &l, bool is_far = false);
 568   void bltz(Register Rs, Label &l, bool is_far = false);
 569   void bgtz(Register Rs, Label &l, bool is_far = false);
 570 
 571   void beq (Register Rs1, Register Rs2, Label &L, bool is_far = false);
 572   void bne (Register Rs1, Register Rs2, Label &L, bool is_far = false);
 573   void blt (Register Rs1, Register Rs2, Label &L, bool is_far = false);
 574   void bge (Register Rs1, Register Rs2, Label &L, bool is_far = false);
 575   void bltu(Register Rs1, Register Rs2, Label &L, bool is_far = false);
 576   void bgeu(Register Rs1, Register Rs2, Label &L, bool is_far = false);
 577 
 578   void bgt (Register Rs, Register Rt, const address dest);
 579   void ble (Register Rs, Register Rt, const address dest);
 580   void bgtu(Register Rs, Register Rt, const address dest);
 581   void bleu(Register Rs, Register Rt, const address dest);
 582 
 583   void bgt (Register Rs, Register Rt, Label &l, bool is_far = false);
 584   void ble (Register Rs, Register Rt, Label &l, bool is_far = false);
 585   void bgtu(Register Rs, Register Rt, Label &l, bool is_far = false);
 586   void bleu(Register Rs, Register Rt, Label &l, bool is_far = false);
 587 
 588 #define INSN_ENTRY_RELOC(result_type, header)                               \
 589   result_type header {                                                      \
 590     guarantee(rtype == relocInfo::internal_word_type,                       \
 591               "only internal_word_type relocs make sense here");            \
 592     relocate(InternalAddress(dest).rspec());                                \
 593     IncompressibleRegion ir(this);  /* relocations */
 594 
 595 #define INSN(NAME)                                                                                       \
 596   void NAME(Register Rs1, Register Rs2, const address dest) {                                            \
 597     assert_cond(dest != nullptr);                                                                        \
 598     int64_t offset = dest - pc();                                                                        \
 599     guarantee(is_simm13(offset) && is_even(offset),                                                      \
 600               "offset is invalid: is_simm_13: %s offset: " INT64_FORMAT,                                 \
 601               BOOL_TO_STR(is_simm13(offset)), offset);                                                   \
 602     Assembler::NAME(Rs1, Rs2, offset);                                                                   \
 603   }                                                                                                      \
 604   INSN_ENTRY_RELOC(void, NAME(Register Rs1, Register Rs2, address dest, relocInfo::relocType rtype))     \
 605     NAME(Rs1, Rs2, dest);                                                                                \
 606   }
 607 
 608   INSN(beq);
 609   INSN(bne);
 610   INSN(bge);
 611   INSN(bgeu);
 612   INSN(blt);
 613   INSN(bltu);
 614 
 615 #undef INSN
 616 
 617 #undef INSN_ENTRY_RELOC
 618 
 619   void float_beq(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
 620   void float_bne(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
 621   void float_ble(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
 622   void float_bge(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
 623   void float_blt(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
 624   void float_bgt(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
 625 
 626   void double_beq(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
 627   void double_bne(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
 628   void double_ble(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
 629   void double_bge(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
 630   void double_blt(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
 631   void double_bgt(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
 632 
 633 private:
 634   int push_reg(unsigned int bitset, Register stack);
 635   int pop_reg(unsigned int bitset, Register stack);
 636   int push_fp(unsigned int bitset, Register stack);
 637   int pop_fp(unsigned int bitset, Register stack);
 638 #ifdef COMPILER2
 639   int push_v(unsigned int bitset, Register stack);
 640   int pop_v(unsigned int bitset, Register stack);
 641 #endif // COMPILER2
 642 
 643 public:
 644   void push_reg(Register Rs);
 645   void pop_reg(Register Rd);
 646   void push_reg(RegSet regs, Register stack) { if (regs.bits()) push_reg(regs.bits(), stack); }
 647   void pop_reg(RegSet regs, Register stack)  { if (regs.bits()) pop_reg(regs.bits(), stack); }
 648   void push_fp(FloatRegSet regs, Register stack) { if (regs.bits()) push_fp(regs.bits(), stack); }
 649   void pop_fp(FloatRegSet regs, Register stack)  { if (regs.bits()) pop_fp(regs.bits(), stack); }
 650 #ifdef COMPILER2
 651   void push_v(VectorRegSet regs, Register stack) { if (regs.bits()) push_v(regs.bits(), stack); }
 652   void pop_v(VectorRegSet regs, Register stack)  { if (regs.bits()) pop_v(regs.bits(), stack); }
 653 #endif // COMPILER2
 654 
 655   // Push and pop everything that might be clobbered by a native
 656   // runtime call except t0 and t1. (They are always
 657   // temporary registers, so we don't have to protect them.)
 658   // Additional registers can be excluded in a passed RegSet.
 659   void push_call_clobbered_registers_except(RegSet exclude);
 660   void pop_call_clobbered_registers_except(RegSet exclude);
 661 
 662   void push_call_clobbered_registers() {
 663     push_call_clobbered_registers_except(RegSet());
 664   }
 665   void pop_call_clobbered_registers() {
 666     pop_call_clobbered_registers_except(RegSet());
 667   }
 668 
 669   void push_CPU_state(bool save_vectors = false, int vector_size_in_bytes = 0);
 670   void pop_CPU_state(bool restore_vectors = false, int vector_size_in_bytes = 0);
 671 
 672   void push_cont_fastpath(Register java_thread);
 673   void pop_cont_fastpath(Register java_thread);
 674 
 675   // if heap base register is used - reinit it with the correct value
 676   void reinit_heapbase();
 677 
 678   void bind(Label& L) {
 679     Assembler::bind(L);
 680     // fences across basic blocks should not be merged
 681     code()->clear_last_insn();
 682   }
 683 
 684   typedef void (MacroAssembler::* compare_and_branch_insn)(Register Rs1, Register Rs2, const address dest);
 685   typedef void (MacroAssembler::* compare_and_branch_label_insn)(Register Rs1, Register Rs2, Label &L, bool is_far);
 686   typedef void (MacroAssembler::* jal_jalr_insn)(Register Rt, address dest);
 687   typedef void (MacroAssembler::* load_insn_by_temp)(Register Rt, address dest, Register temp);
 688 
 689   void wrap_label(Register r, Label &L, Register t, load_insn_by_temp insn);
 690   void wrap_label(Register r, Label &L, jal_jalr_insn insn);
 691   void wrap_label(Register r1, Register r2, Label &L,
 692                   compare_and_branch_insn insn,
 693                   compare_and_branch_label_insn neg_insn, bool is_far = false);
 694 
 695   void la(Register Rd, Label &label);
 696   void la(Register Rd, const address dest);
 697   void la(Register Rd, const Address &adr);
 698 
 699   void li16u(Register Rd, uint16_t imm);
 700   void li32(Register Rd, int32_t imm);
 701   void li64(Register Rd, int64_t imm);
 702   void li  (Register Rd, int64_t imm);  // optimized load immediate
 703 
 704   // mv
 705   void mv(Register Rd, address addr)                  { li(Rd, (int64_t)addr); }
 706   void mv(Register Rd, address addr, int32_t &offset) {
 707     // Split address into a lower 12-bit sign-extended offset and the remainder,
 708     // so that the offset could be encoded in jalr or load/store instruction.
 709     offset = ((int32_t)(int64_t)addr << 20) >> 20;
 710     li(Rd, (int64_t)addr - offset);
 711   }
 712 
 713   template<typename T, ENABLE_IF(std::is_integral<T>::value)>
 714   inline void mv(Register Rd, T o)                    { li(Rd, (int64_t)o); }
 715 
 716   void mv(Register Rd, Address dest) {
 717     assert(dest.getMode() == Address::literal, "Address mode should be Address::literal");
 718     relocate(dest.rspec(), [&] {
 719       movptr(Rd, dest.target());
 720     });
 721   }
 722 
 723   void mv(Register Rd, RegisterOrConstant src) {
 724     if (src.is_register()) {
 725       mv(Rd, src.as_register());
 726     } else {
 727       mv(Rd, src.as_constant());
 728     }
 729   }
 730 
 731   void movptr(Register Rd, address addr, int32_t &offset);
 732 
 733   void movptr(Register Rd, address addr) {
 734     int offset = 0;
 735     movptr(Rd, addr, offset);
 736     addi(Rd, Rd, offset);
 737   }
 738 
 739   inline void movptr(Register Rd, uintptr_t imm64) {
 740     movptr(Rd, (address)imm64);
 741   }
 742 
 743   // arith
 744   void add (Register Rd, Register Rn, int64_t increment, Register temp = t0);
 745   void addw(Register Rd, Register Rn, int32_t increment, Register temp = t0);
 746   void sub (Register Rd, Register Rn, int64_t decrement, Register temp = t0);
 747   void subw(Register Rd, Register Rn, int32_t decrement, Register temp = t0);
 748 
 749 #define INSN(NAME)                                               \
 750   inline void NAME(Register Rd, Register Rs1, Register Rs2) {    \
 751     Assembler::NAME(Rd, Rs1, Rs2);                               \
 752   }
 753 
 754   INSN(add);
 755   INSN(addw);
 756   INSN(sub);
 757   INSN(subw);
 758 
 759 #undef INSN
 760 
 761   // logic
 762   void andrw(Register Rd, Register Rs1, Register Rs2);
 763   void orrw(Register Rd, Register Rs1, Register Rs2);
 764   void xorrw(Register Rd, Register Rs1, Register Rs2);
 765 
 766   // logic with negate
 767   void andn(Register Rd, Register Rs1, Register Rs2);
 768   void orn(Register Rd, Register Rs1, Register Rs2);
 769 
 770   // revb
 771   void revb_h_h(Register Rd, Register Rs, Register tmp = t0);                           // reverse bytes in halfword in lower 16 bits, sign-extend
 772   void revb_w_w(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1);      // reverse bytes in lower word, sign-extend
 773   void revb_h_h_u(Register Rd, Register Rs, Register tmp = t0);                         // reverse bytes in halfword in lower 16 bits, zero-extend
 774   void revb_h_w_u(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1);    // reverse bytes in halfwords in lower 32 bits, zero-extend
 775   void revb_h_helper(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2= t1);  // reverse bytes in upper 16 bits (48:63) and move to lower
 776   void revb_h(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2= t1);         // reverse bytes in each halfword
 777   void revb_w(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2= t1);         // reverse bytes in each word
 778   void revb(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1);          // reverse bytes in doubleword
 779 
 780   void ror_imm(Register dst, Register src, uint32_t shift, Register tmp = t0);
 781   void rolw_imm(Register dst, Register src, uint32_t, Register tmp = t0);
 782   void andi(Register Rd, Register Rn, int64_t imm, Register tmp = t0);
 783   void orptr(Address adr, RegisterOrConstant src, Register tmp1 = t0, Register tmp2 = t1);
 784 
 785 // Load and Store Instructions
 786 #define INSN_ENTRY_RELOC(result_type, header)                               \
 787   result_type header {                                                      \
 788     guarantee(rtype == relocInfo::internal_word_type,                       \
 789               "only internal_word_type relocs make sense here");            \
 790     relocate(InternalAddress(dest).rspec());                                \
 791     IncompressibleRegion ir(this);  /* relocations */
 792 
 793 #define INSN(NAME)                                                                                 \
 794   void NAME(Register Rd, address dest) {                                                           \
 795     assert_cond(dest != nullptr);                                                                  \
 796     int64_t distance = dest - pc();                                                                \
 797     if (is_simm32(distance)) {                                                                     \
 798       auipc(Rd, (int32_t)distance + 0x800);                                                        \
 799       Assembler::NAME(Rd, Rd, ((int32_t)distance << 20) >> 20);                                    \
 800     } else {                                                                                       \
 801       int32_t offset = 0;                                                                          \
 802       movptr(Rd, dest, offset);                                                                    \
 803       Assembler::NAME(Rd, Rd, offset);                                                             \
 804     }                                                                                              \
 805   }                                                                                                \
 806   INSN_ENTRY_RELOC(void, NAME(Register Rd, address dest, relocInfo::relocType rtype))              \
 807     NAME(Rd, dest);                                                                                \
 808   }                                                                                                \
 809   void NAME(Register Rd, const Address &adr, Register temp = t0) {                                 \
 810     switch (adr.getMode()) {                                                                       \
 811       case Address::literal: {                                                                     \
 812         relocate(adr.rspec(), [&] {                                                                \
 813           NAME(Rd, adr.target());                                                                  \
 814         });                                                                                        \
 815         break;                                                                                     \
 816       }                                                                                            \
 817       case Address::base_plus_offset: {                                                            \
 818         if (is_simm12(adr.offset())) {                                                             \
 819           Assembler::NAME(Rd, adr.base(), adr.offset());                                           \
 820         } else {                                                                                   \
 821           int32_t offset = ((int32_t)adr.offset() << 20) >> 20;                                    \
 822           if (Rd == adr.base()) {                                                                  \
 823             la(temp, Address(adr.base(), adr.offset() - offset));                                  \
 824             Assembler::NAME(Rd, temp, offset);                                                     \
 825           } else {                                                                                 \
 826             la(Rd, Address(adr.base(), adr.offset() - offset));                                    \
 827             Assembler::NAME(Rd, Rd, offset);                                                       \
 828           }                                                                                        \
 829         }                                                                                          \
 830         break;                                                                                     \
 831       }                                                                                            \
 832       default:                                                                                     \
 833         ShouldNotReachHere();                                                                      \
 834     }                                                                                              \
 835   }                                                                                                \
 836   void NAME(Register Rd, Label &L) {                                                               \
 837     wrap_label(Rd, L, &MacroAssembler::NAME);                                                      \
 838   }
 839 
 840   INSN(lb);
 841   INSN(lbu);
 842   INSN(lh);
 843   INSN(lhu);
 844   INSN(lw);
 845   INSN(lwu);
 846   INSN(ld);
 847 
 848 #undef INSN
 849 
 850 #define INSN(NAME)                                                                                 \
 851   void NAME(FloatRegister Rd, address dest, Register temp = t0) {                                  \
 852     assert_cond(dest != nullptr);                                                                  \
 853     int64_t distance = dest - pc();                                                                \
 854     if (is_simm32(distance)) {                                                                     \
 855       auipc(temp, (int32_t)distance + 0x800);                                                      \
 856       Assembler::NAME(Rd, temp, ((int32_t)distance << 20) >> 20);                                  \
 857     } else {                                                                                       \
 858       int32_t offset = 0;                                                                          \
 859       movptr(temp, dest, offset);                                                                  \
 860       Assembler::NAME(Rd, temp, offset);                                                           \
 861     }                                                                                              \
 862   }                                                                                                \
 863   INSN_ENTRY_RELOC(void, NAME(FloatRegister Rd, address dest,                                      \
 864                               relocInfo::relocType rtype, Register temp = t0))                     \
 865     NAME(Rd, dest, temp);                                                                          \
 866   }                                                                                                \
 867   void NAME(FloatRegister Rd, const Address &adr, Register temp = t0) {                            \
 868     switch (adr.getMode()) {                                                                       \
 869       case Address::literal: {                                                                     \
 870         relocate(adr.rspec(), [&] {                                                                \
 871           NAME(Rd, adr.target(), temp);                                                            \
 872         });                                                                                        \
 873         break;                                                                                     \
 874       }                                                                                            \
 875       case Address::base_plus_offset: {                                                            \
 876         if (is_simm12(adr.offset())) {                                                             \
 877           Assembler::NAME(Rd, adr.base(), adr.offset());                                           \
 878         } else {                                                                                   \
 879           int32_t offset = ((int32_t)adr.offset() << 20) >> 20;                                    \
 880           la(temp, Address(adr.base(), adr.offset() - offset));                                    \
 881           Assembler::NAME(Rd, temp, offset);                                                       \
 882         }                                                                                          \
 883         break;                                                                                     \
 884       }                                                                                            \
 885       default:                                                                                     \
 886         ShouldNotReachHere();                                                                      \
 887     }                                                                                              \
 888   }
 889 
 890   INSN(flw);
 891   INSN(fld);
 892 
 893 #undef INSN
 894 
 895 #define INSN(NAME, REGISTER)                                                                       \
 896   INSN_ENTRY_RELOC(void, NAME(REGISTER Rs, address dest,                                           \
 897                               relocInfo::relocType rtype, Register temp = t0))                     \
 898     NAME(Rs, dest, temp);                                                                          \
 899   }
 900 
 901   INSN(sb,  Register);
 902   INSN(sh,  Register);
 903   INSN(sw,  Register);
 904   INSN(sd,  Register);
 905   INSN(fsw, FloatRegister);
 906   INSN(fsd, FloatRegister);
 907 
 908 #undef INSN
 909 
 910 #define INSN(NAME)                                                                                 \
 911   void NAME(Register Rs, address dest, Register temp = t0) {                                       \
 912     assert_cond(dest != nullptr);                                                                  \
 913     assert_different_registers(Rs, temp);                                                          \
 914     int64_t distance = dest - pc();                                                                \
 915     if (is_simm32(distance)) {                                                                     \
 916       auipc(temp, (int32_t)distance + 0x800);                                                      \
 917       Assembler::NAME(Rs, temp, ((int32_t)distance << 20) >> 20);                                  \
 918     } else {                                                                                       \
 919       int32_t offset = 0;                                                                          \
 920       movptr(temp, dest, offset);                                                                  \
 921       Assembler::NAME(Rs, temp, offset);                                                           \
 922     }                                                                                              \
 923   }                                                                                                \
 924   void NAME(Register Rs, const Address &adr, Register temp = t0) {                                 \
 925     switch (adr.getMode()) {                                                                       \
 926       case Address::literal: {                                                                     \
 927         assert_different_registers(Rs, temp);                                                      \
 928         relocate(adr.rspec(), [&] {                                                                \
 929           NAME(Rs, adr.target(), temp);                                                            \
 930         });                                                                                        \
 931         break;                                                                                     \
 932       }                                                                                            \
 933       case Address::base_plus_offset: {                                                            \
 934         if (is_simm12(adr.offset())) {                                                             \
 935           Assembler::NAME(Rs, adr.base(), adr.offset());                                           \
 936         } else {                                                                                   \
 937           assert_different_registers(Rs, temp);                                                    \
 938           int32_t offset = ((int32_t)adr.offset() << 20) >> 20;                                    \
 939           la(temp, Address(adr.base(), adr.offset() - offset));                                    \
 940           Assembler::NAME(Rs, temp, offset);                                                       \
 941         }                                                                                          \
 942         break;                                                                                     \
 943       }                                                                                            \
 944       default:                                                                                     \
 945         ShouldNotReachHere();                                                                      \
 946     }                                                                                              \
 947   }
 948 
 949   INSN(sb);
 950   INSN(sh);
 951   INSN(sw);
 952   INSN(sd);
 953 
 954 #undef INSN
 955 
 956 #define INSN(NAME)                                                                                 \
 957   void NAME(FloatRegister Rs, address dest, Register temp = t0) {                                  \
 958     assert_cond(dest != nullptr);                                                                  \
 959     int64_t distance = dest - pc();                                                                \
 960     if (is_simm32(distance)) {                                                                     \
 961       auipc(temp, (int32_t)distance + 0x800);                                                      \
 962       Assembler::NAME(Rs, temp, ((int32_t)distance << 20) >> 20);                                  \
 963     } else {                                                                                       \
 964       int32_t offset = 0;                                                                          \
 965       movptr(temp, dest, offset);                                                                  \
 966       Assembler::NAME(Rs, temp, offset);                                                           \
 967     }                                                                                              \
 968   }                                                                                                \
 969   void NAME(FloatRegister Rs, const Address &adr, Register temp = t0) {                            \
 970     switch (adr.getMode()) {                                                                       \
 971       case Address::literal: {                                                                     \
 972         relocate(adr.rspec(), [&] {                                                                \
 973           NAME(Rs, adr.target(), temp);                                                            \
 974         });                                                                                        \
 975         break;                                                                                     \
 976       }                                                                                            \
 977       case Address::base_plus_offset: {                                                            \
 978         if (is_simm12(adr.offset())) {                                                             \
 979           Assembler::NAME(Rs, adr.base(), adr.offset());                                           \
 980         } else {                                                                                   \
 981           int32_t offset = ((int32_t)adr.offset() << 20) >> 20;                                    \
 982           la(temp, Address(adr.base(), adr.offset() - offset));                                    \
 983           Assembler::NAME(Rs, temp, offset);                                                       \
 984         }                                                                                          \
 985         break;                                                                                     \
 986       }                                                                                            \
 987       default:                                                                                     \
 988         ShouldNotReachHere();                                                                      \
 989     }                                                                                              \
 990   }
 991 
 992   INSN(fsw);
 993   INSN(fsd);
 994 
 995 #undef INSN
 996 
 997 #undef INSN_ENTRY_RELOC
 998 
 999   void cmpxchg_obj_header(Register oldv, Register newv, Register obj, Register tmp, Label &succeed, Label *fail);
1000   void cmpxchgptr(Register oldv, Register newv, Register addr, Register tmp, Label &succeed, Label *fail);
1001   void cmpxchg(Register addr, Register expected,
1002                Register new_val,
1003                enum operand_size size,
1004                Assembler::Aqrl acquire, Assembler::Aqrl release,
1005                Register result, bool result_as_bool = false);
1006   void cmpxchg_weak(Register addr, Register expected,
1007                     Register new_val,
1008                     enum operand_size size,
1009                     Assembler::Aqrl acquire, Assembler::Aqrl release,
1010                     Register result);
1011   void cmpxchg_narrow_value_helper(Register addr, Register expected,
1012                                    Register new_val,
1013                                    enum operand_size size,
1014                                    Register tmp1, Register tmp2, Register tmp3);
1015   void cmpxchg_narrow_value(Register addr, Register expected,
1016                             Register new_val,
1017                             enum operand_size size,
1018                             Assembler::Aqrl acquire, Assembler::Aqrl release,
1019                             Register result, bool result_as_bool,
1020                             Register tmp1, Register tmp2, Register tmp3);
1021   void weak_cmpxchg_narrow_value(Register addr, Register expected,
1022                                  Register new_val,
1023                                  enum operand_size size,
1024                                  Assembler::Aqrl acquire, Assembler::Aqrl release,
1025                                  Register result,
1026                                  Register tmp1, Register tmp2, Register tmp3);
1027 
1028   void atomic_add(Register prev, RegisterOrConstant incr, Register addr);
1029   void atomic_addw(Register prev, RegisterOrConstant incr, Register addr);
1030   void atomic_addal(Register prev, RegisterOrConstant incr, Register addr);
1031   void atomic_addalw(Register prev, RegisterOrConstant incr, Register addr);
1032 
1033   void atomic_xchg(Register prev, Register newv, Register addr);
1034   void atomic_xchgw(Register prev, Register newv, Register addr);
1035   void atomic_xchgal(Register prev, Register newv, Register addr);
1036   void atomic_xchgalw(Register prev, Register newv, Register addr);
1037   void atomic_xchgwu(Register prev, Register newv, Register addr);
1038   void atomic_xchgalwu(Register prev, Register newv, Register addr);
1039 
1040   static bool far_branches() {
1041     return ReservedCodeCacheSize > branch_range;
1042   }
1043 
1044   // Emit a direct call/jump if the entry address will always be in range,
1045   // otherwise a far call/jump.
1046   // The address must be inside the code cache.
1047   // Supported entry.rspec():
1048   // - relocInfo::external_word_type
1049   // - relocInfo::runtime_call_type
1050   // - relocInfo::none
1051   // In the case of a far call/jump, the entry address is put in the tmp register.
1052   // The tmp register is invalidated.
1053   void far_call(Address entry, Register tmp = t0);
1054   void far_jump(Address entry, Register tmp = t0);
1055 
1056   static int far_branch_size() {
1057     if (far_branches()) {
1058       return 2 * 4;  // auipc + jalr, see far_call() & far_jump()
1059     } else {
1060       return 4;
1061     }
1062   }
1063 
1064   void load_byte_map_base(Register reg);
1065 
1066   void bang_stack_with_offset(int offset) {
1067     // stack grows down, caller passes positive offset
1068     assert(offset > 0, "must bang with negative offset");
1069     sub(t0, sp, offset);
1070     sd(zr, Address(t0));
1071   }
1072 
1073   void la_patchable(Register reg1, const Address &dest, int32_t &offset);
1074 
1075   virtual void _call_Unimplemented(address call_site) {
1076     mv(t1, call_site);
1077   }
1078 
1079   #define call_Unimplemented() _call_Unimplemented((address)__PRETTY_FUNCTION__)
1080 
1081   // Frame creation and destruction shared between JITs.
1082   void build_frame(int framesize);
1083   void remove_frame(int framesize);
1084 
1085   void reserved_stack_check();
1086 
1087   void get_polling_page(Register dest, relocInfo::relocType rtype);
1088   void read_polling_page(Register r, int32_t offset, relocInfo::relocType rtype);
1089 
1090   // RISCV64 OpenJDK uses four different types of calls:
1091   //   - direct call: jal pc_relative_offset
1092   //     This is the shortest and the fastest, but the offset has the range: +/-1MB.
1093   //
1094   //   - far call: auipc reg, pc_relative_offset; jalr ra, reg, offset
1095   //     This is longer than a direct call. The offset has
1096   //     the range [-(2G + 2K), 2G - 2K). Addresses out of the range in the code cache
1097   //     requires indirect call.
1098   //     If a jump is needed rather than a call, a far jump 'jalr x0, reg, offset' can
1099   //     be used instead.
1100   //     All instructions are embedded at a call site.
1101   //
1102   //   - trampoline call:
1103   //     This is only available in C1/C2-generated code (nmethod). It is a combination
1104   //     of a direct call, which is used if the destination of a call is in range,
1105   //     and a register-indirect call. It has the advantages of reaching anywhere in
1106   //     the RISCV address space and being patchable at runtime when the generated
1107   //     code is being executed by other threads.
1108   //
1109   //     [Main code section]
1110   //       jal trampoline
1111   //     [Stub code section]
1112   //     trampoline:
1113   //       ld    reg, pc + 8 (auipc + ld)
1114   //       jr    reg
1115   //       <64-bit destination address>
1116   //
1117   //     If the destination is in range when the generated code is moved to the code
1118   //     cache, 'jal trampoline' is replaced with 'jal destination' and the trampoline
1119   //     is not used.
1120   //     The optimization does not remove the trampoline from the stub section.
1121 
1122   //     This is necessary because the trampoline may well be redirected later when
1123   //     code is patched, and the new destination may not be reachable by a simple JAL
1124   //     instruction.
1125   //
1126   //   - indirect call: movptr + jalr
1127   //     This too can reach anywhere in the address space, but it cannot be
1128   //     patched while code is running, so it must only be modified at a safepoint.
1129   //     This form of call is most suitable for targets at fixed addresses, which
1130   //     will never be patched.
1131   //
1132   //
1133   // To patch a trampoline call when the JAL can't reach, we first modify
1134   // the 64-bit destination address in the trampoline, then modify the
1135   // JAL to point to the trampoline, then flush the instruction cache to
1136   // broadcast the change to all executing threads. See
1137   // NativeCall::set_destination_mt_safe for the details.
1138   //
1139   // There is a benign race in that the other thread might observe the
1140   // modified JAL before it observes the modified 64-bit destination
1141   // address. That does not matter because the destination method has been
1142   // invalidated, so there will be a trap at its start.
1143   // For this to work, the destination address in the trampoline is
1144   // always updated, even if we're not using the trampoline.
1145 
1146   // Emit a direct call if the entry address will always be in range,
1147   // otherwise a trampoline call.
1148   // Supported entry.rspec():
1149   // - relocInfo::runtime_call_type
1150   // - relocInfo::opt_virtual_call_type
1151   // - relocInfo::static_call_type
1152   // - relocInfo::virtual_call_type
1153   //
1154   // Return: the call PC or null if CodeCache is full.
1155   address trampoline_call(Address entry);
1156   address ic_call(address entry, jint method_index = 0);
1157 
1158   // Support for memory inc/dec
1159   // n.b. increment/decrement calls with an Address destination will
1160   // need to use a scratch register to load the value to be
1161   // incremented. increment/decrement calls which add or subtract a
1162   // constant value other than sign-extended 12-bit immediate will need
1163   // to use a 2nd scratch register to hold the constant. so, an address
1164   // increment/decrement may trash both t0 and t1.
1165 
1166   void increment(const Address dst, int64_t value = 1, Register tmp1 = t0, Register tmp2 = t1);
1167   void incrementw(const Address dst, int32_t value = 1, Register tmp1 = t0, Register tmp2 = t1);
1168 
1169   void decrement(const Address dst, int64_t value = 1, Register tmp1 = t0, Register tmp2 = t1);
1170   void decrementw(const Address dst, int32_t value = 1, Register tmp1 = t0, Register tmp2 = t1);
1171 
1172   void cmpptr(Register src1, Address src2, Label& equal);
1173 
1174   void clinit_barrier(Register klass, Register tmp, Label* L_fast_path = nullptr, Label* L_slow_path = nullptr);
1175   void load_method_holder_cld(Register result, Register method);
1176   void load_method_holder(Register holder, Register method);
1177 
1178   void compute_index(Register str1, Register trailing_zeros, Register match_mask,
1179                      Register result, Register char_tmp, Register tmp,
1180                      bool haystack_isL);
1181   void compute_match_mask(Register src, Register pattern, Register match_mask,
1182                           Register mask1, Register mask2);
1183 
1184 #ifdef COMPILER2
1185   void mul_add(Register out, Register in, Register offset,
1186                Register len, Register k, Register tmp);
1187   void cad(Register dst, Register src1, Register src2, Register carry);
1188   void cadc(Register dst, Register src1, Register src2, Register carry);
1189   void adc(Register dst, Register src1, Register src2, Register carry);
1190   void add2_with_carry(Register final_dest_hi, Register dest_hi, Register dest_lo,
1191                        Register src1, Register src2, Register carry);
1192   void multiply_32_x_32_loop(Register x, Register xstart, Register x_xstart,
1193                              Register y, Register y_idx, Register z,
1194                              Register carry, Register product,
1195                              Register idx, Register kdx);
1196   void multiply_64_x_64_loop(Register x, Register xstart, Register x_xstart,
1197                              Register y, Register y_idx, Register z,
1198                              Register carry, Register product,
1199                              Register idx, Register kdx);
1200   void multiply_128_x_128_loop(Register y, Register z,
1201                                Register carry, Register carry2,
1202                                Register idx, Register jdx,
1203                                Register yz_idx1, Register yz_idx2,
1204                                Register tmp, Register tmp3, Register tmp4,
1205                                Register tmp6, Register product_hi);
1206   void multiply_to_len(Register x, Register xlen, Register y, Register ylen,
1207                        Register z, Register zlen,
1208                        Register tmp1, Register tmp2, Register tmp3, Register tmp4,
1209                        Register tmp5, Register tmp6, Register product_hi);
1210 #endif
1211 
1212   void inflate_lo32(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1);
1213   void inflate_hi32(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1);
1214 
1215   void ctzc_bit(Register Rd, Register Rs, bool isLL = false, Register tmp1 = t0, Register tmp2 = t1);
1216 
1217   void zero_words(Register base, uint64_t cnt);
1218   address zero_words(Register ptr, Register cnt);
1219   void fill_words(Register base, Register cnt, Register value);
1220   void zero_memory(Register addr, Register len, Register tmp);
1221   void zero_dcache_blocks(Register base, Register cnt, Register tmp1, Register tmp2);
1222 
1223   // shift left by shamt and add
1224   void shadd(Register Rd, Register Rs1, Register Rs2, Register tmp, int shamt);
1225 
1226   // test single bit in Rs, result is set to Rd
1227   void test_bit(Register Rd, Register Rs, uint32_t bit_pos);
1228 
1229   // Here the float instructions with safe deal with some exceptions.
1230   // e.g. convert from NaN, +Inf, -Inf to int, float, double
1231   // will trigger exception, we need to deal with these situations
1232   // to get correct results.
1233   void fcvt_w_s_safe(Register dst, FloatRegister src, Register tmp = t0);
1234   void fcvt_l_s_safe(Register dst, FloatRegister src, Register tmp = t0);
1235   void fcvt_w_d_safe(Register dst, FloatRegister src, Register tmp = t0);
1236   void fcvt_l_d_safe(Register dst, FloatRegister src, Register tmp = t0);
1237 
1238   void java_round_float(Register dst, FloatRegister src, FloatRegister ftmp);
1239   void java_round_double(Register dst, FloatRegister src, FloatRegister ftmp);
1240 
1241   // vector load/store unit-stride instructions
1242   void vlex_v(VectorRegister vd, Register base, Assembler::SEW sew, VectorMask vm = unmasked) {
1243     switch (sew) {
1244       case Assembler::e64:
1245         vle64_v(vd, base, vm);
1246         break;
1247       case Assembler::e32:
1248         vle32_v(vd, base, vm);
1249         break;
1250       case Assembler::e16:
1251         vle16_v(vd, base, vm);
1252         break;
1253       case Assembler::e8: // fall through
1254       default:
1255         vle8_v(vd, base, vm);
1256         break;
1257     }
1258   }
1259 
1260   void vsex_v(VectorRegister store_data, Register base, Assembler::SEW sew, VectorMask vm = unmasked) {
1261     switch (sew) {
1262       case Assembler::e64:
1263         vse64_v(store_data, base, vm);
1264         break;
1265       case Assembler::e32:
1266         vse32_v(store_data, base, vm);
1267         break;
1268       case Assembler::e16:
1269         vse16_v(store_data, base, vm);
1270         break;
1271       case Assembler::e8: // fall through
1272       default:
1273         vse8_v(store_data, base, vm);
1274         break;
1275     }
1276   }
1277 
1278   // vector pseudo instructions
1279   inline void vl1r_v(VectorRegister vd, Register rs) {
1280     vl1re8_v(vd, rs);
1281   }
1282 
1283   inline void vmnot_m(VectorRegister vd, VectorRegister vs) {
1284     vmnand_mm(vd, vs, vs);
1285   }
1286 
1287   inline void vncvt_x_x_w(VectorRegister vd, VectorRegister vs, VectorMask vm = unmasked) {
1288     vnsrl_wx(vd, vs, x0, vm);
1289   }
1290 
1291   inline void vneg_v(VectorRegister vd, VectorRegister vs, VectorMask vm = unmasked) {
1292     vrsub_vx(vd, vs, x0, vm);
1293   }
1294 
1295   inline void vfneg_v(VectorRegister vd, VectorRegister vs, VectorMask vm = unmasked) {
1296     vfsgnjn_vv(vd, vs, vs, vm);
1297   }
1298 
1299   inline void vfabs_v(VectorRegister vd, VectorRegister vs, VectorMask vm = unmasked) {
1300     vfsgnjx_vv(vd, vs, vs, vm);
1301   }
1302 
1303   inline void vmsgt_vv(VectorRegister vd, VectorRegister vs2, VectorRegister vs1, VectorMask vm = unmasked) {
1304     vmslt_vv(vd, vs1, vs2, vm);
1305   }
1306 
1307   inline void vmsgtu_vv(VectorRegister vd, VectorRegister vs2, VectorRegister vs1, VectorMask vm = unmasked) {
1308     vmsltu_vv(vd, vs1, vs2, vm);
1309   }
1310 
1311   inline void vmsge_vv(VectorRegister vd, VectorRegister vs2, VectorRegister vs1, VectorMask vm = unmasked) {
1312     vmsle_vv(vd, vs1, vs2, vm);
1313   }
1314 
1315   inline void vmsgeu_vv(VectorRegister vd, VectorRegister vs2, VectorRegister vs1, VectorMask vm = unmasked) {
1316     vmsleu_vv(vd, vs1, vs2, vm);
1317   }
1318 
1319   inline void vmfgt_vv(VectorRegister vd, VectorRegister vs2, VectorRegister vs1, VectorMask vm = unmasked) {
1320     vmflt_vv(vd, vs1, vs2, vm);
1321   }
1322 
1323   inline void vmfge_vv(VectorRegister vd, VectorRegister vs2, VectorRegister vs1, VectorMask vm = unmasked) {
1324     vmfle_vv(vd, vs1, vs2, vm);
1325   }
1326 
1327   // Copy mask register
1328   inline void vmmv_m(VectorRegister vd, VectorRegister vs) {
1329     vmand_mm(vd, vs, vs);
1330   }
1331 
1332   // Clear mask register
1333   inline void vmclr_m(VectorRegister vd) {
1334     vmxor_mm(vd, vd, vd);
1335   }
1336 
1337   // Set mask register
1338   inline void vmset_m(VectorRegister vd) {
1339     vmxnor_mm(vd, vd, vd);
1340   }
1341 
1342   static const int zero_words_block_size;
1343 
1344   void cast_primitive_type(BasicType type, Register Rt) {
1345     switch (type) {
1346       case T_BOOLEAN:
1347         sltu(Rt, zr, Rt);
1348         break;
1349       case T_CHAR   :
1350         zero_extend(Rt, Rt, 16);
1351         break;
1352       case T_BYTE   :
1353         sign_extend(Rt, Rt, 8);
1354         break;
1355       case T_SHORT  :
1356         sign_extend(Rt, Rt, 16);
1357         break;
1358       case T_INT    :
1359         sign_extend(Rt, Rt, 32);
1360         break;
1361       case T_LONG   : /* nothing to do */        break;
1362       case T_VOID   : /* nothing to do */        break;
1363       case T_FLOAT  : /* nothing to do */        break;
1364       case T_DOUBLE : /* nothing to do */        break;
1365       default: ShouldNotReachHere();
1366     }
1367   }
1368 
1369   // float cmp with unordered_result
1370   void float_compare(Register result, FloatRegister Rs1, FloatRegister Rs2, int unordered_result);
1371   void double_compare(Register result, FloatRegister Rs1, FloatRegister Rs2, int unordered_result);
1372 
1373   // Zero/Sign-extend
1374   void zero_extend(Register dst, Register src, int bits);
1375   void sign_extend(Register dst, Register src, int bits);
1376 
1377   // compare src1 and src2 and get -1/0/1 in dst.
1378   // if [src1 > src2], dst = 1;
1379   // if [src1 == src2], dst = 0;
1380   // if [src1 < src2], dst = -1;
1381   void cmp_l2i(Register dst, Register src1, Register src2, Register tmp = t0);
1382 
1383   // support for argument shuffling
1384   void move32_64(VMRegPair src, VMRegPair dst, Register tmp = t0);
1385   void float_move(VMRegPair src, VMRegPair dst, Register tmp = t0);
1386   void long_move(VMRegPair src, VMRegPair dst, Register tmp = t0);
1387   void double_move(VMRegPair src, VMRegPair dst, Register tmp = t0);
1388   void object_move(OopMap* map,
1389                    int oop_handle_offset,
1390                    int framesize_in_slots,
1391                    VMRegPair src,
1392                    VMRegPair dst,
1393                    bool is_receiver,
1394                    int* receiver_offset);
1395   void rt_call(address dest, Register tmp = t0);
1396 
1397   void call(const address dest, Register temp = t0) {
1398     assert_cond(dest != nullptr);
1399     assert(temp != noreg, "expecting a register");
1400     int32_t offset = 0;
1401     mv(temp, dest, offset);
1402     jalr(x1, temp, offset);
1403   }
1404 
1405   inline void ret() {
1406     jalr(x0, x1, 0);
1407   }
1408 
1409 #ifdef ASSERT
1410   // Template short-hand support to clean-up after a failed call to trampoline
1411   // call generation (see trampoline_call() below), when a set of Labels must
1412   // be reset (before returning).
1413   template<typename Label, typename... More>
1414   void reset_labels(Label& lbl, More&... more) {
1415     lbl.reset(); reset_labels(more...);
1416   }
1417   template<typename Label>
1418   void reset_labels(Label& lbl) {
1419     lbl.reset();
1420   }
1421 #endif
1422 
1423 private:
1424 
1425   void repne_scan(Register addr, Register value, Register count, Register tmp);
1426 
1427   void ld_constant(Register dest, const Address &const_addr) {
1428     if (NearCpool) {
1429       ld(dest, const_addr);
1430     } else {
1431       InternalAddress target(const_addr.target());
1432       relocate(target.rspec(), [&] {
1433         int32_t offset;
1434         la_patchable(dest, target, offset);
1435         ld(dest, Address(dest, offset));
1436       });
1437     }
1438   }
1439 
1440   int bitset_to_regs(unsigned int bitset, unsigned char* regs);
1441   Address add_memory_helper(const Address dst, Register tmp);
1442 
1443   void load_reserved(Register addr, enum operand_size size, Assembler::Aqrl acquire);
1444   void store_conditional(Register addr, Register new_val, enum operand_size size, Assembler::Aqrl release);
1445 
1446 public:
1447   void lightweight_lock(Register obj, Register hdr, Register tmp1, Register tmp2, Label& slow);
1448   void lightweight_unlock(Register obj, Register hdr, Register tmp1, Register tmp2, Label& slow);
1449 };
1450 
1451 #ifdef ASSERT
1452 inline bool AbstractAssembler::pd_check_instruction_mark() { return false; }
1453 #endif
1454 
1455 /**
1456  * class SkipIfEqual:
1457  *
1458  * Instantiating this class will result in assembly code being output that will
1459  * jump around any code emitted between the creation of the instance and it's
1460  * automatic destruction at the end of a scope block, depending on the value of
1461  * the flag passed to the constructor, which will be checked at run-time.
1462  */
1463 class SkipIfEqual {
1464  private:
1465   MacroAssembler* _masm;
1466   Label _label;
1467 
1468  public:
1469    SkipIfEqual(MacroAssembler*, const bool* flag_addr, bool value);
1470    ~SkipIfEqual();
1471 };
1472 
1473 #endif // CPU_RISCV_MACROASSEMBLER_RISCV_HPP
--- EOF ---