1 /*
  2  * Copyright (c) 1997, 2022, Oracle and/or its affiliates. All rights reserved.
  3  * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
  4  * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
  5  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  6  *
  7  * This code is free software; you can redistribute it and/or modify it
  8  * under the terms of the GNU General Public License version 2 only, as
  9  * published by the Free Software Foundation.
 10  *
 11  * This code is distributed in the hope that it will be useful, but WITHOUT
 12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 14  * version 2 for more details (a copy is included in the LICENSE file that
 15  * accompanied this code).
 16  *
 17  * You should have received a copy of the GNU General Public License version
 18  * 2 along with this work; if not, write to the Free Software Foundation,
 19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 20  *
 21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 22  * or visit www.oracle.com if you need additional information or have any
 23  * questions.
 24  *
 25  */
 26 
 27 #ifndef CPU_RISCV_MACROASSEMBLER_RISCV_HPP
 28 #define CPU_RISCV_MACROASSEMBLER_RISCV_HPP
 29 
 30 #include "asm/assembler.hpp"
 31 #include "code/vmreg.hpp"
 32 #include "metaprogramming/enableIf.hpp"
 33 #include "nativeInst_riscv.hpp"
 34 #include "oops/compressedOops.hpp"
 35 #include "utilities/powerOfTwo.hpp"
 36 
 37 // MacroAssembler extends Assembler by frequently used macros.
 38 //
 39 // Instructions for which a 'better' code sequence exists depending
 40 // on arguments should also go in here.
 41 
 42 class MacroAssembler: public Assembler {
 43 
 44  public:
 45   MacroAssembler(CodeBuffer* code) : Assembler(code) {
 46   }
 47   virtual ~MacroAssembler() {}
 48 
 49   void safepoint_poll(Label& slow_path, bool at_return, bool acquire, bool in_nmethod);
 50 
 51   // Alignment
 52   int align(int modulus, int extra_offset = 0);
 53   static inline void assert_alignment(address pc, int alignment = NativeInstruction::instruction_size) {
 54     assert(is_aligned(pc, alignment), "bad alignment");
 55   }
 56 
 57   // Stack frame creation/removal
 58   // Note that SP must be updated to the right place before saving/restoring RA and FP
 59   // because signal based thread suspend/resume could happen asynchronously.
 60   void enter() {
 61     addi(sp, sp, - 2 * wordSize);
 62     sd(ra, Address(sp, wordSize));
 63     sd(fp, Address(sp));
 64     addi(fp, sp, 2 * wordSize);
 65   }
 66 
 67   void leave() {
 68     addi(sp, fp, - 2 * wordSize);
 69     ld(fp, Address(sp));
 70     ld(ra, Address(sp, wordSize));
 71     addi(sp, sp, 2 * wordSize);
 72   }
 73 
 74 
 75   // Support for getting the JavaThread pointer (i.e.; a reference to thread-local information)
 76   // The pointer will be loaded into the thread register.
 77   void get_thread(Register thread);
 78 
 79   // Support for VM calls
 80   //
 81   // It is imperative that all calls into the VM are handled via the call_VM macros.
 82   // They make sure that the stack linkage is setup correctly. call_VM's correspond
 83   // to ENTRY/ENTRY_X entry points while call_VM_leaf's correspond to LEAF entry points.
 84 
 85   void call_VM(Register oop_result,
 86                address entry_point,
 87                bool check_exceptions = true);
 88   void call_VM(Register oop_result,
 89                address entry_point,
 90                Register arg_1,
 91                bool check_exceptions = true);
 92   void call_VM(Register oop_result,
 93                address entry_point,
 94                Register arg_1, Register arg_2,
 95                bool check_exceptions = true);
 96   void call_VM(Register oop_result,
 97                address entry_point,
 98                Register arg_1, Register arg_2, Register arg_3,
 99                bool check_exceptions = true);
100 
101   // Overloadings with last_Java_sp
102   void call_VM(Register oop_result,
103                Register last_java_sp,
104                address entry_point,
105                int number_of_arguments = 0,
106                bool check_exceptions = true);
107   void call_VM(Register oop_result,
108                Register last_java_sp,
109                address entry_point,
110                Register arg_1,
111                bool check_exceptions = true);
112   void call_VM(Register oop_result,
113                Register last_java_sp,
114                address entry_point,
115                Register arg_1, Register arg_2,
116                bool check_exceptions = true);
117   void call_VM(Register oop_result,
118                Register last_java_sp,
119                address entry_point,
120                Register arg_1, Register arg_2, Register arg_3,
121                bool check_exceptions = true);
122 
123   void get_vm_result(Register oop_result, Register java_thread);
124   void get_vm_result_2(Register metadata_result, Register java_thread);
125 
126   // These always tightly bind to MacroAssembler::call_VM_leaf_base
127   // bypassing the virtual implementation
128   void call_VM_leaf(address entry_point,
129                     int number_of_arguments = 0);
130   void call_VM_leaf(address entry_point,
131                     Register arg_0);
132   void call_VM_leaf(address entry_point,
133                     Register arg_0, Register arg_1);
134   void call_VM_leaf(address entry_point,
135                     Register arg_0, Register arg_1, Register arg_2);
136 
137   // These always tightly bind to MacroAssembler::call_VM_base
138   // bypassing the virtual implementation
139   void super_call_VM_leaf(address entry_point, Register arg_0);
140   void super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1);
141   void super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2);
142   void super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2, Register arg_3);
143 
144   // last Java Frame (fills frame anchor)
145   void set_last_Java_frame(Register last_java_sp, Register last_java_fp, address last_java_pc, Register tmp);
146   void set_last_Java_frame(Register last_java_sp, Register last_java_fp, Label &last_java_pc, Register tmp);
147   void set_last_Java_frame(Register last_java_sp, Register last_java_fp, Register last_java_pc, Register tmp);
148 
149   // thread in the default location (xthread)
150   void reset_last_Java_frame(bool clear_fp);
151 
152   virtual void call_VM_leaf_base(
153     address entry_point,                // the entry point
154     int     number_of_arguments,        // the number of arguments to pop after the call
155     Label*  retaddr = NULL
156   );
157 
158   virtual void call_VM_leaf_base(
159     address entry_point,                // the entry point
160     int     number_of_arguments,        // the number of arguments to pop after the call
161     Label&  retaddr) {
162     call_VM_leaf_base(entry_point, number_of_arguments, &retaddr);
163   }
164 
165   virtual void call_VM_base(           // returns the register containing the thread upon return
166     Register oop_result,               // where an oop-result ends up if any; use noreg otherwise
167     Register java_thread,              // the thread if computed before     ; use noreg otherwise
168     Register last_java_sp,             // to set up last_Java_frame in stubs; use noreg otherwise
169     address  entry_point,              // the entry point
170     int      number_of_arguments,      // the number of arguments (w/o thread) to pop after the call
171     bool     check_exceptions          // whether to check for pending exceptions after return
172   );
173 
174   void call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions);
175 
176   virtual void check_and_handle_earlyret(Register java_thread);
177   virtual void check_and_handle_popframe(Register java_thread);
178 
179   void resolve_weak_handle(Register result, Register tmp1, Register tmp2);
180   void resolve_oop_handle(Register result, Register tmp1, Register tmp2);
181   void resolve_jobject(Register value, Register tmp1, Register tmp2);
182 
183   void movoop(Register dst, jobject obj);
184   void mov_metadata(Register dst, Metadata* obj);
185   void bang_stack_size(Register size, Register tmp);
186   void set_narrow_oop(Register dst, jobject obj);
187   void set_narrow_klass(Register dst, Klass* k);
188 
189   void load_mirror(Register dst, Register method, Register tmp1, Register tmp2);
190   void access_load_at(BasicType type, DecoratorSet decorators, Register dst,
191                       Address src, Register tmp1, Register tmp2);
192   void access_store_at(BasicType type, DecoratorSet decorators, Address dst,
193                        Register src, Register tmp1, Register tmp2, Register tmp3);
194   void load_klass(Register dst, Register src);
195   void store_klass(Register dst, Register src);
196   void cmp_klass(Register oop, Register trial_klass, Register tmp, Label &L);
197 
198   void encode_klass_not_null(Register r);
199   void decode_klass_not_null(Register r);
200   void encode_klass_not_null(Register dst, Register src, Register tmp = xheapbase);
201   void decode_klass_not_null(Register dst, Register src, Register tmp = xheapbase);
202   void decode_heap_oop_not_null(Register r);
203   void decode_heap_oop_not_null(Register dst, Register src);
204   void decode_heap_oop(Register d, Register s);
205   void decode_heap_oop(Register r) { decode_heap_oop(r, r); }
206   void encode_heap_oop(Register d, Register s);
207   void encode_heap_oop(Register r) { encode_heap_oop(r, r); };
208   void load_heap_oop(Register dst, Address src, Register tmp1 = noreg,
209                      Register tmp2 = noreg, DecoratorSet decorators = 0);
210   void load_heap_oop_not_null(Register dst, Address src, Register tmp1 = noreg,
211                               Register tmp2 = noreg, DecoratorSet decorators = 0);
212   void store_heap_oop(Address dst, Register src, Register tmp1 = noreg,
213                       Register tmp2 = noreg, Register tmp3 = noreg, DecoratorSet decorators = 0);
214 
215   void store_klass_gap(Register dst, Register src);
216 
217   // currently unimplemented
218   // Used for storing NULL. All other oop constants should be
219   // stored using routines that take a jobject.
220   void store_heap_oop_null(Address dst);
221 
222   // This dummy is to prevent a call to store_heap_oop from
223   // converting a zero (linked NULL) into a Register by giving
224   // the compiler two choices it can't resolve
225 
226   void store_heap_oop(Address dst, void* dummy);
227 
228   // Support for NULL-checks
229   //
230   // Generates code that causes a NULL OS exception if the content of reg is NULL.
231   // If the accessed location is M[reg + offset] and the offset is known, provide the
232   // offset. No explicit code generateion is needed if the offset is within a certain
233   // range (0 <= offset <= page_size).
234 
235   virtual void null_check(Register reg, int offset = -1);
236   static bool needs_explicit_null_check(intptr_t offset);
237   static bool uses_implicit_null_check(void* address);
238 
239   // idiv variant which deals with MINLONG as dividend and -1 as divisor
240   int corrected_idivl(Register result, Register rs1, Register rs2,
241                       bool want_remainder);
242   int corrected_idivq(Register result, Register rs1, Register rs2,
243                       bool want_remainder);
244 
245   // interface method calling
246   void lookup_interface_method(Register recv_klass,
247                                Register intf_klass,
248                                RegisterOrConstant itable_index,
249                                Register method_result,
250                                Register scan_tmp,
251                                Label& no_such_interface,
252                                bool return_method = true);
253 
254   // virtual method calling
255   // n.n. x86 allows RegisterOrConstant for vtable_index
256   void lookup_virtual_method(Register recv_klass,
257                              RegisterOrConstant vtable_index,
258                              Register method_result);
259 
260   // Form an address from base + offset in Rd. Rd my or may not
261   // actually be used: you must use the Address that is returned. It
262   // is up to you to ensure that the shift provided matches the size
263   // of your data.
264   Address form_address(Register Rd, Register base, long byte_offset);
265 
266   // allocation
267   void tlab_allocate(
268     Register obj,                   // result: pointer to object after successful allocation
269     Register var_size_in_bytes,     // object size in bytes if unknown at compile time; invalid otherwise
270     int      con_size_in_bytes,     // object size in bytes if   known at compile time
271     Register tmp1,                  // temp register
272     Register tmp2,                  // temp register
273     Label&   slow_case,             // continuation point of fast allocation fails
274     bool is_far = false
275   );
276 
277   // Test sub_klass against super_klass, with fast and slow paths.
278 
279   // The fast path produces a tri-state answer: yes / no / maybe-slow.
280   // One of the three labels can be NULL, meaning take the fall-through.
281   // If super_check_offset is -1, the value is loaded up from super_klass.
282   // No registers are killed, except tmp_reg
283   void check_klass_subtype_fast_path(Register sub_klass,
284                                      Register super_klass,
285                                      Register tmp_reg,
286                                      Label* L_success,
287                                      Label* L_failure,
288                                      Label* L_slow_path,
289                                      Register super_check_offset = noreg);
290 
291   // The reset of the type check; must be wired to a corresponding fast path.
292   // It does not repeat the fast path logic, so don't use it standalone.
293   // The tmp1_reg and tmp2_reg can be noreg, if no temps are available.
294   // Updates the sub's secondary super cache as necessary.
295   void check_klass_subtype_slow_path(Register sub_klass,
296                                      Register super_klass,
297                                      Register tmp1_reg,
298                                      Register tmp2_reg,
299                                      Label* L_success,
300                                      Label* L_failure);
301 
302   void check_klass_subtype(Register sub_klass,
303                            Register super_klass,
304                            Register tmp_reg,
305                            Label& L_success);
306 
307   Address argument_address(RegisterOrConstant arg_slot, int extra_slot_offset = 0);
308 
309   // only if +VerifyOops
310   void _verify_oop(Register reg, const char* s, const char* file, int line);
311   void _verify_oop_addr(Address addr, const char* s, const char* file, int line);
312 
313   void _verify_oop_checked(Register reg, const char* s, const char* file, int line) {
314     if (VerifyOops) {
315       _verify_oop(reg, s, file, line);
316     }
317   }
318   void _verify_oop_addr_checked(Address reg, const char* s, const char* file, int line) {
319     if (VerifyOops) {
320       _verify_oop_addr(reg, s, file, line);
321     }
322   }
323 
324   void _verify_method_ptr(Register reg, const char* msg, const char* file, int line) {}
325   void _verify_klass_ptr(Register reg, const char* msg, const char* file, int line) {}
326 
327 #define verify_oop(reg) _verify_oop_checked(reg, "broken oop " #reg, __FILE__, __LINE__)
328 #define verify_oop_msg(reg, msg) _verify_oop_checked(reg, "broken oop " #reg ", " #msg, __FILE__, __LINE__)
329 #define verify_oop_addr(addr) _verify_oop_addr_checked(addr, "broken oop addr " #addr, __FILE__, __LINE__)
330 #define verify_method_ptr(reg) _verify_method_ptr(reg, "broken method " #reg, __FILE__, __LINE__)
331 #define verify_klass_ptr(reg) _verify_method_ptr(reg, "broken klass " #reg, __FILE__, __LINE__)
332 
333   // A more convenient access to fence for our purposes
334   // We used four bit to indicate the read and write bits in the predecessors and successors,
335   // and extended i for r, o for w if UseConservativeFence enabled.
336   enum Membar_mask_bits {
337     StoreStore = 0b0101,               // (pred = ow   + succ =   ow)
338     LoadStore  = 0b1001,               // (pred = ir   + succ =   ow)
339     StoreLoad  = 0b0110,               // (pred = ow   + succ =   ir)
340     LoadLoad   = 0b1010,               // (pred = ir   + succ =   ir)
341     AnyAny     = LoadStore | StoreLoad // (pred = iorw + succ = iorw)
342   };
343 
344   void membar(uint32_t order_constraint);
345 
346   static void membar_mask_to_pred_succ(uint32_t order_constraint, uint32_t& predecessor, uint32_t& successor) {
347     predecessor = (order_constraint >> 2) & 0x3;
348     successor = order_constraint & 0x3;
349 
350     // extend rw -> iorw:
351     // 01(w) -> 0101(ow)
352     // 10(r) -> 1010(ir)
353     // 11(rw)-> 1111(iorw)
354     if (UseConservativeFence) {
355       predecessor |= predecessor << 2;
356       successor |= successor << 2;
357     }
358   }
359 
360   static int pred_succ_to_membar_mask(uint32_t predecessor, uint32_t successor) {
361     return ((predecessor & 0x3) << 2) | (successor & 0x3);
362   }
363 
364   // prints msg, dumps registers and stops execution
365   void stop(const char* msg);
366 
367   static void debug64(char* msg, int64_t pc, int64_t regs[]);
368 
369   void unimplemented(const char* what = "");
370 
371   void should_not_reach_here() { stop("should not reach here"); }
372 
373   static address target_addr_for_insn(address insn_addr);
374 
375   // Required platform-specific helpers for Label::patch_instructions.
376   // They _shadow_ the declarations in AbstractAssembler, which are undefined.
377   static int pd_patch_instruction_size(address branch, address target);
378   static void pd_patch_instruction(address branch, address target, const char* file = NULL, int line = 0) {
379     pd_patch_instruction_size(branch, target);
380   }
381   static address pd_call_destination(address branch) {
382     return target_addr_for_insn(branch);
383   }
384 
385   static int patch_oop(address insn_addr, address o);
386   address emit_trampoline_stub(int insts_call_instruction_offset, address target);
387   void emit_static_call_stub();
388 
389   // The following 4 methods return the offset of the appropriate move instruction
390 
391   // Support for fast byte/short loading with zero extension (depending on particular CPU)
392   int load_unsigned_byte(Register dst, Address src);
393   int load_unsigned_short(Register dst, Address src);
394 
395   // Support for fast byte/short loading with sign extension (depending on particular CPU)
396   int load_signed_byte(Register dst, Address src);
397   int load_signed_short(Register dst, Address src);
398 
399   // Load and store values by size and signed-ness
400   void load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2 = noreg);
401   void store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2 = noreg);
402 
403  public:
404   // Standard pseudoinstruction
405   void nop();
406   void mv(Register Rd, Register Rs);
407   void notr(Register Rd, Register Rs);
408   void neg(Register Rd, Register Rs);
409   void negw(Register Rd, Register Rs);
410   void sext_w(Register Rd, Register Rs);
411   void zext_b(Register Rd, Register Rs);
412   void seqz(Register Rd, Register Rs);          // set if = zero
413   void snez(Register Rd, Register Rs);          // set if != zero
414   void sltz(Register Rd, Register Rs);          // set if < zero
415   void sgtz(Register Rd, Register Rs);          // set if > zero
416 
417   // Float pseudoinstruction
418   void fmv_s(FloatRegister Rd, FloatRegister Rs);
419   void fabs_s(FloatRegister Rd, FloatRegister Rs);    // single-precision absolute value
420   void fneg_s(FloatRegister Rd, FloatRegister Rs);
421 
422   // Double pseudoinstruction
423   void fmv_d(FloatRegister Rd, FloatRegister Rs);
424   void fabs_d(FloatRegister Rd, FloatRegister Rs);
425   void fneg_d(FloatRegister Rd, FloatRegister Rs);
426 
427   // Pseudoinstruction for control and status register
428   void rdinstret(Register Rd);                  // read instruction-retired counter
429   void rdcycle(Register Rd);                    // read cycle counter
430   void rdtime(Register Rd);                     // read time
431   void csrr(Register Rd, unsigned csr);         // read csr
432   void csrw(unsigned csr, Register Rs);         // write csr
433   void csrs(unsigned csr, Register Rs);         // set bits in csr
434   void csrc(unsigned csr, Register Rs);         // clear bits in csr
435   void csrwi(unsigned csr, unsigned imm);
436   void csrsi(unsigned csr, unsigned imm);
437   void csrci(unsigned csr, unsigned imm);
438   void frcsr(Register Rd);                      // read float-point csr
439   void fscsr(Register Rd, Register Rs);         // swap float-point csr
440   void fscsr(Register Rs);                      // write float-point csr
441   void frrm(Register Rd);                       // read float-point rounding mode
442   void fsrm(Register Rd, Register Rs);          // swap float-point rounding mode
443   void fsrm(Register Rs);                       // write float-point rounding mode
444   void fsrmi(Register Rd, unsigned imm);
445   void fsrmi(unsigned imm);
446   void frflags(Register Rd);                    // read float-point exception flags
447   void fsflags(Register Rd, Register Rs);       // swap float-point exception flags
448   void fsflags(Register Rs);                    // write float-point exception flags
449   void fsflagsi(Register Rd, unsigned imm);
450   void fsflagsi(unsigned imm);
451 
452   void beqz(Register Rs, const address &dest);
453   void bnez(Register Rs, const address &dest);
454   void blez(Register Rs, const address &dest);
455   void bgez(Register Rs, const address &dest);
456   void bltz(Register Rs, const address &dest);
457   void bgtz(Register Rs, const address &dest);
458   void la(Register Rd, Label &label);
459   void la(Register Rd, const address &dest);
460   void la(Register Rd, const Address &adr);
461   //label
462   void beqz(Register Rs, Label &l, bool is_far = false);
463   void bnez(Register Rs, Label &l, bool is_far = false);
464   void blez(Register Rs, Label &l, bool is_far = false);
465   void bgez(Register Rs, Label &l, bool is_far = false);
466   void bltz(Register Rs, Label &l, bool is_far = false);
467   void bgtz(Register Rs, Label &l, bool is_far = false);
468   void float_beq(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
469   void float_bne(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
470   void float_ble(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
471   void float_bge(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
472   void float_blt(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
473   void float_bgt(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
474   void double_beq(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
475   void double_bne(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
476   void double_ble(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
477   void double_bge(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
478   void double_blt(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
479   void double_bgt(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
480 
481 private:
482   int push_reg(unsigned int bitset, Register stack);
483   int pop_reg(unsigned int bitset, Register stack);
484   int push_fp(unsigned int bitset, Register stack);
485   int pop_fp(unsigned int bitset, Register stack);
486 #ifdef COMPILER2
487   int push_v(unsigned int bitset, Register stack);
488   int pop_v(unsigned int bitset, Register stack);
489 #endif // COMPILER2
490 
491 public:
492   void push_reg(Register Rs);
493   void pop_reg(Register Rd);
494   void push_reg(RegSet regs, Register stack) { if (regs.bits()) push_reg(regs.bits(), stack); }
495   void pop_reg(RegSet regs, Register stack)  { if (regs.bits()) pop_reg(regs.bits(), stack); }
496   void push_fp(FloatRegSet regs, Register stack) { if (regs.bits()) push_fp(regs.bits(), stack); }
497   void pop_fp(FloatRegSet regs, Register stack)  { if (regs.bits()) pop_fp(regs.bits(), stack); }
498 #ifdef COMPILER2
499   void push_v(VectorRegSet regs, Register stack) { if (regs.bits()) push_v(regs.bits(), stack); }
500   void pop_v(VectorRegSet regs, Register stack)  { if (regs.bits()) pop_v(regs.bits(), stack); }
501 #endif // COMPILER2
502 
503   // Push and pop everything that might be clobbered by a native
504   // runtime call except t0 and t1. (They are always
505   // temporary registers, so we don't have to protect them.)
506   // Additional registers can be excluded in a passed RegSet.
507   void push_call_clobbered_registers_except(RegSet exclude);
508   void pop_call_clobbered_registers_except(RegSet exclude);
509 
510   void push_call_clobbered_registers() {
511     push_call_clobbered_registers_except(RegSet());
512   }
513   void pop_call_clobbered_registers() {
514     pop_call_clobbered_registers_except(RegSet());
515   }
516 
517   void push_CPU_state(bool save_vectors = false, int vector_size_in_bytes = 0);
518   void pop_CPU_state(bool restore_vectors = false, int vector_size_in_bytes = 0);
519 
520   // if heap base register is used - reinit it with the correct value
521   void reinit_heapbase();
522 
523   void bind(Label& L) {
524     Assembler::bind(L);
525     // fences across basic blocks should not be merged
526     code()->clear_last_insn();
527   }
528 
529   // mv
530   void mv(Register Rd, address addr)          { li(Rd, (int64_t)addr); }
531 
532   template<typename T, ENABLE_IF(std::is_integral<T>::value)>
533   inline void mv(Register Rd, T o)            { li(Rd, (int64_t)o); }
534 
535   inline void mvw(Register Rd, int32_t imm32) { mv(Rd, imm32); }
536 
537   void mv(Register Rd, Address dest);
538   void mv(Register Rd, RegisterOrConstant src);
539 
540   // logic
541   void andrw(Register Rd, Register Rs1, Register Rs2);
542   void orrw(Register Rd, Register Rs1, Register Rs2);
543   void xorrw(Register Rd, Register Rs1, Register Rs2);
544 
545   // revb
546   void revb_h_h(Register Rd, Register Rs, Register tmp = t0);                           // reverse bytes in halfword in lower 16 bits, sign-extend
547   void revb_w_w(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1);      // reverse bytes in lower word, sign-extend
548   void revb_h_h_u(Register Rd, Register Rs, Register tmp = t0);                         // reverse bytes in halfword in lower 16 bits, zero-extend
549   void revb_h_w_u(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1);    // reverse bytes in halfwords in lower 32 bits, zero-extend
550   void revb_h_helper(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2= t1);  // reverse bytes in upper 16 bits (48:63) and move to lower
551   void revb_h(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2= t1);         // reverse bytes in each halfword
552   void revb_w(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2= t1);         // reverse bytes in each word
553   void revb(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1);          // reverse bytes in doubleword
554 
555   void ror_imm(Register dst, Register src, uint32_t shift, Register tmp = t0);
556   void andi(Register Rd, Register Rn, int64_t imm, Register tmp = t0);
557   void orptr(Address adr, RegisterOrConstant src, Register tmp1 = t0, Register tmp2 = t1);
558 
559   void cmpxchgptr(Register oldv, Register newv, Register addr, Register tmp, Label &succeed, Label *fail);
560   void cmpxchg(Register addr, Register expected,
561                Register new_val,
562                enum operand_size size,
563                Assembler::Aqrl acquire, Assembler::Aqrl release,
564                Register result, bool result_as_bool = false);
565   void cmpxchg_weak(Register addr, Register expected,
566                     Register new_val,
567                     enum operand_size size,
568                     Assembler::Aqrl acquire, Assembler::Aqrl release,
569                     Register result);
570   void cmpxchg_narrow_value_helper(Register addr, Register expected,
571                                    Register new_val,
572                                    enum operand_size size,
573                                    Register tmp1, Register tmp2, Register tmp3);
574   void cmpxchg_narrow_value(Register addr, Register expected,
575                             Register new_val,
576                             enum operand_size size,
577                             Assembler::Aqrl acquire, Assembler::Aqrl release,
578                             Register result, bool result_as_bool,
579                             Register tmp1, Register tmp2, Register tmp3);
580   void weak_cmpxchg_narrow_value(Register addr, Register expected,
581                                  Register new_val,
582                                  enum operand_size size,
583                                  Assembler::Aqrl acquire, Assembler::Aqrl release,
584                                  Register result,
585                                  Register tmp1, Register tmp2, Register tmp3);
586 
587   void atomic_add(Register prev, RegisterOrConstant incr, Register addr);
588   void atomic_addw(Register prev, RegisterOrConstant incr, Register addr);
589   void atomic_addal(Register prev, RegisterOrConstant incr, Register addr);
590   void atomic_addalw(Register prev, RegisterOrConstant incr, Register addr);
591 
592   void atomic_xchg(Register prev, Register newv, Register addr);
593   void atomic_xchgw(Register prev, Register newv, Register addr);
594   void atomic_xchgal(Register prev, Register newv, Register addr);
595   void atomic_xchgalw(Register prev, Register newv, Register addr);
596   void atomic_xchgwu(Register prev, Register newv, Register addr);
597   void atomic_xchgalwu(Register prev, Register newv, Register addr);
598 
599   static bool far_branches() {
600     return ReservedCodeCacheSize > branch_range;
601   }
602 
603   // Emit a direct call/jump if the entry address will always be in range,
604   // otherwise a far call/jump.
605   // The address must be inside the code cache.
606   // Supported entry.rspec():
607   // - relocInfo::external_word_type
608   // - relocInfo::runtime_call_type
609   // - relocInfo::none
610   // In the case of a far call/jump, the entry address is put in the tmp register.
611   // The tmp register is invalidated.
612   void far_call(Address entry, Register tmp = t0);
613   void far_jump(Address entry, Register tmp = t0);
614 
615   static int far_branch_size() {
616     if (far_branches()) {
617       return 2 * 4;  // auipc + jalr, see far_call() & far_jump()
618     } else {
619       return 4;
620     }
621   }
622 
623   void load_byte_map_base(Register reg);
624 
625   void bang_stack_with_offset(int offset) {
626     // stack grows down, caller passes positive offset
627     assert(offset > 0, "must bang with negative offset");
628     sub(t0, sp, offset);
629     sd(zr, Address(t0));
630   }
631 
632   void la_patchable(Register reg1, const Address &dest, int32_t &offset);
633 
634   virtual void _call_Unimplemented(address call_site) {
635     mv(t1, call_site);
636   }
637 
638   #define call_Unimplemented() _call_Unimplemented((address)__PRETTY_FUNCTION__)
639 
640   // Frame creation and destruction shared between JITs.
641   void build_frame(int framesize);
642   void remove_frame(int framesize);
643 
644   void reserved_stack_check();
645 
646   void get_polling_page(Register dest, relocInfo::relocType rtype);
647   void read_polling_page(Register r, int32_t offset, relocInfo::relocType rtype);
648 
649   // RISCV64 OpenJDK uses four different types of calls:
650   //   - direct call: jal pc_relative_offset
651   //     This is the shortest and the fastest, but the offset has the range: +/-1MB.
652   //
653   //   - far call: auipc reg, pc_relative_offset; jalr ra, reg, offset
654   //     This is longer than a direct call. The offset has
655   //     the range [-(2G + 2K), 2G - 2K). Addresses out of the range in the code cache
656   //     requires indirect call.
657   //     If a jump is needed rather than a call, a far jump 'jalr x0, reg, offset' can
658   //     be used instead.
659   //     All instructions are embedded at a call site.
660   //
661   //   - trampoline call:
662   //     This is only available in C1/C2-generated code (nmethod). It is a combination
663   //     of a direct call, which is used if the destination of a call is in range,
664   //     and a register-indirect call. It has the advantages of reaching anywhere in
665   //     the RISCV address space and being patchable at runtime when the generated
666   //     code is being executed by other threads.
667   //
668   //     [Main code section]
669   //       jal trampoline
670   //     [Stub code section]
671   //     trampoline:
672   //       ld    reg, pc + 8 (auipc + ld)
673   //       jr    reg
674   //       <64-bit destination address>
675   //
676   //     If the destination is in range when the generated code is moved to the code
677   //     cache, 'jal trampoline' is replaced with 'jal destination' and the trampoline
678   //     is not used.
679   //     The optimization does not remove the trampoline from the stub section.
680 
681   //     This is necessary because the trampoline may well be redirected later when
682   //     code is patched, and the new destination may not be reachable by a simple JAL
683   //     instruction.
684   //
685   //   - indirect call: movptr + jalr
686   //     This too can reach anywhere in the address space, but it cannot be
687   //     patched while code is running, so it must only be modified at a safepoint.
688   //     This form of call is most suitable for targets at fixed addresses, which
689   //     will never be patched.
690   //
691   //
692   // To patch a trampoline call when the JAL can't reach, we first modify
693   // the 64-bit destination address in the trampoline, then modify the
694   // JAL to point to the trampoline, then flush the instruction cache to
695   // broadcast the change to all executing threads. See
696   // NativeCall::set_destination_mt_safe for the details.
697   //
698   // There is a benign race in that the other thread might observe the
699   // modified JAL before it observes the modified 64-bit destination
700   // address. That does not matter because the destination method has been
701   // invalidated, so there will be a trap at its start.
702   // For this to work, the destination address in the trampoline is
703   // always updated, even if we're not using the trampoline.
704 
705   // Emit a direct call if the entry address will always be in range,
706   // otherwise a trampoline call.
707   // Supported entry.rspec():
708   // - relocInfo::runtime_call_type
709   // - relocInfo::opt_virtual_call_type
710   // - relocInfo::static_call_type
711   // - relocInfo::virtual_call_type
712   //
713   // Return: the call PC or NULL if CodeCache is full.
714   address trampoline_call(Address entry);
715   address ic_call(address entry, jint method_index = 0);
716 
717   // Support for memory inc/dec
718   // n.b. increment/decrement calls with an Address destination will
719   // need to use a scratch register to load the value to be
720   // incremented. increment/decrement calls which add or subtract a
721   // constant value other than sign-extended 12-bit immediate will need
722   // to use a 2nd scratch register to hold the constant. so, an address
723   // increment/decrement may trash both t0 and t1.
724 
725   void increment(const Address dst, int64_t value = 1);
726   void incrementw(const Address dst, int32_t value = 1);
727 
728   void decrement(const Address dst, int64_t value = 1);
729   void decrementw(const Address dst, int32_t value = 1);
730 
731   void cmpptr(Register src1, Address src2, Label& equal);
732 
733   void clinit_barrier(Register klass, Register tmp, Label* L_fast_path = NULL, Label* L_slow_path = NULL);
734   void load_method_holder_cld(Register result, Register method);
735   void load_method_holder(Register holder, Register method);
736 
737   void compute_index(Register str1, Register trailing_zeros, Register match_mask,
738                      Register result, Register char_tmp, Register tmp,
739                      bool haystack_isL);
740   void compute_match_mask(Register src, Register pattern, Register match_mask,
741                           Register mask1, Register mask2);
742 
743 #ifdef COMPILER2
744   void mul_add(Register out, Register in, Register offset,
745                Register len, Register k, Register tmp);
746   void cad(Register dst, Register src1, Register src2, Register carry);
747   void cadc(Register dst, Register src1, Register src2, Register carry);
748   void adc(Register dst, Register src1, Register src2, Register carry);
749   void add2_with_carry(Register final_dest_hi, Register dest_hi, Register dest_lo,
750                        Register src1, Register src2, Register carry);
751   void multiply_32_x_32_loop(Register x, Register xstart, Register x_xstart,
752                              Register y, Register y_idx, Register z,
753                              Register carry, Register product,
754                              Register idx, Register kdx);
755   void multiply_64_x_64_loop(Register x, Register xstart, Register x_xstart,
756                              Register y, Register y_idx, Register z,
757                              Register carry, Register product,
758                              Register idx, Register kdx);
759   void multiply_128_x_128_loop(Register y, Register z,
760                                Register carry, Register carry2,
761                                Register idx, Register jdx,
762                                Register yz_idx1, Register yz_idx2,
763                                Register tmp, Register tmp3, Register tmp4,
764                                Register tmp6, Register product_hi);
765   void multiply_to_len(Register x, Register xlen, Register y, Register ylen,
766                        Register z, Register zlen,
767                        Register tmp1, Register tmp2, Register tmp3, Register tmp4,
768                        Register tmp5, Register tmp6, Register product_hi);
769 #endif
770 
771   void inflate_lo32(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1);
772   void inflate_hi32(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1);
773 
774   void ctzc_bit(Register Rd, Register Rs, bool isLL = false, Register tmp1 = t0, Register tmp2 = t1);
775 
776   void zero_words(Register base, u_int64_t cnt);
777   address zero_words(Register ptr, Register cnt);
778   void fill_words(Register base, Register cnt, Register value);
779   void zero_memory(Register addr, Register len, Register tmp);
780 
781   // shift left by shamt and add
782   void shadd(Register Rd, Register Rs1, Register Rs2, Register tmp, int shamt);
783 
784   // Here the float instructions with safe deal with some exceptions.
785   // e.g. convert from NaN, +Inf, -Inf to int, float, double
786   // will trigger exception, we need to deal with these situations
787   // to get correct results.
788   void fcvt_w_s_safe(Register dst, FloatRegister src, Register tmp = t0);
789   void fcvt_l_s_safe(Register dst, FloatRegister src, Register tmp = t0);
790   void fcvt_w_d_safe(Register dst, FloatRegister src, Register tmp = t0);
791   void fcvt_l_d_safe(Register dst, FloatRegister src, Register tmp = t0);
792 
793   // vector load/store unit-stride instructions
794   void vlex_v(VectorRegister vd, Register base, Assembler::SEW sew, VectorMask vm = unmasked) {
795     switch (sew) {
796       case Assembler::e64:
797         vle64_v(vd, base, vm);
798         break;
799       case Assembler::e32:
800         vle32_v(vd, base, vm);
801         break;
802       case Assembler::e16:
803         vle16_v(vd, base, vm);
804         break;
805       case Assembler::e8: // fall through
806       default:
807         vle8_v(vd, base, vm);
808         break;
809     }
810   }
811 
812   void vsex_v(VectorRegister store_data, Register base, Assembler::SEW sew, VectorMask vm = unmasked) {
813     switch (sew) {
814       case Assembler::e64:
815         vse64_v(store_data, base, vm);
816         break;
817       case Assembler::e32:
818         vse32_v(store_data, base, vm);
819         break;
820       case Assembler::e16:
821         vse16_v(store_data, base, vm);
822         break;
823       case Assembler::e8: // fall through
824       default:
825         vse8_v(store_data, base, vm);
826         break;
827     }
828   }
829 
830   static const int zero_words_block_size;
831 
832   void cast_primitive_type(BasicType type, Register Rt) {
833     switch (type) {
834       case T_BOOLEAN:
835         sltu(Rt, zr, Rt);
836         break;
837       case T_CHAR   :
838         zero_extend(Rt, Rt, 16);
839         break;
840       case T_BYTE   :
841         sign_extend(Rt, Rt, 8);
842         break;
843       case T_SHORT  :
844         sign_extend(Rt, Rt, 16);
845         break;
846       case T_INT    :
847         addw(Rt, Rt, zr);
848         break;
849       case T_LONG   : /* nothing to do */        break;
850       case T_VOID   : /* nothing to do */        break;
851       case T_FLOAT  : /* nothing to do */        break;
852       case T_DOUBLE : /* nothing to do */        break;
853       default: ShouldNotReachHere();
854     }
855   }
856 
857   // float cmp with unordered_result
858   void float_compare(Register result, FloatRegister Rs1, FloatRegister Rs2, int unordered_result);
859   void double_compare(Register result, FloatRegister Rs1, FloatRegister Rs2, int unordered_result);
860 
861   // Zero/Sign-extend
862   void zero_extend(Register dst, Register src, int bits);
863   void sign_extend(Register dst, Register src, int bits);
864 
865   // compare src1 and src2 and get -1/0/1 in dst.
866   // if [src1 > src2], dst = 1;
867   // if [src1 == src2], dst = 0;
868   // if [src1 < src2], dst = -1;
869   void cmp_l2i(Register dst, Register src1, Register src2, Register tmp = t0);
870 
871   // vext
872   void vmnot_m(VectorRegister vd, VectorRegister vs);
873   void vncvt_x_x_w(VectorRegister vd, VectorRegister vs, VectorMask vm = unmasked);
874   void vfneg_v(VectorRegister vd, VectorRegister vs);
875 
876 
877   // support for argument shuffling
878   void move32_64(VMRegPair src, VMRegPair dst, Register tmp = t0);
879   void float_move(VMRegPair src, VMRegPair dst, Register tmp = t0);
880   void long_move(VMRegPair src, VMRegPair dst, Register tmp = t0);
881   void double_move(VMRegPair src, VMRegPair dst, Register tmp = t0);
882   void object_move(OopMap* map,
883                    int oop_handle_offset,
884                    int framesize_in_slots,
885                    VMRegPair src,
886                    VMRegPair dst,
887                    bool is_receiver,
888                    int* receiver_offset);
889 
890   void rt_call(address dest, Register tmp = t0);
891 
892 private:
893 
894 #ifdef ASSERT
895   // Template short-hand support to clean-up after a failed call to trampoline
896   // call generation (see trampoline_call() below), when a set of Labels must
897   // be reset (before returning).
898   template<typename Label, typename... More>
899   void reset_labels(Label& lbl, More&... more) {
900     lbl.reset(); reset_labels(more...);
901   }
902   template<typename Label>
903   void reset_labels(Label& lbl) {
904     lbl.reset();
905   }
906 #endif
907   void repne_scan(Register addr, Register value, Register count, Register tmp);
908 
909   // Return true if an address is within the 48-bit RISCV64 address space.
910   bool is_valid_riscv64_address(address addr) {
911     // sv48: must have bits 63–48 all equal to bit 47
912     return ((uintptr_t)addr >> 47) == 0;
913   }
914 
915   void ld_constant(Register dest, const Address &const_addr) {
916     if (NearCpool) {
917       ld(dest, const_addr);
918     } else {
919       int32_t offset = 0;
920       la_patchable(dest, InternalAddress(const_addr.target()), offset);
921       ld(dest, Address(dest, offset));
922     }
923   }
924 
925   int bitset_to_regs(unsigned int bitset, unsigned char* regs);
926   Address add_memory_helper(const Address dst);
927 
928   void load_reserved(Register addr, enum operand_size size, Assembler::Aqrl acquire);
929   void store_conditional(Register addr, Register new_val, enum operand_size size, Assembler::Aqrl release);
930 
931 public:
932   void fast_lock(Register obj, Register hdr, Register tmp1, Register tmp2, Register tmp3, Label& slow);
933   void fast_unlock(Register obj, Register hdr, Register tmp1, Register tmp2, Label& slow);
934 };
935 
936 #ifdef ASSERT
937 inline bool AbstractAssembler::pd_check_instruction_mark() { return false; }
938 #endif
939 
940 /**
941  * class SkipIfEqual:
942  *
943  * Instantiating this class will result in assembly code being output that will
944  * jump around any code emitted between the creation of the instance and it's
945  * automatic destruction at the end of a scope block, depending on the value of
946  * the flag passed to the constructor, which will be checked at run-time.
947  */
948 class SkipIfEqual {
949  private:
950   MacroAssembler* _masm;
951   Label _label;
952 
953  public:
954    SkipIfEqual(MacroAssembler*, const bool* flag_addr, bool value);
955    ~SkipIfEqual();
956 };
957 
958 #endif // CPU_RISCV_MACROASSEMBLER_RISCV_HPP