1 /*
  2  * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
  3  * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
  4  * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
  5  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  6  *
  7  * This code is free software; you can redistribute it and/or modify it
  8  * under the terms of the GNU General Public License version 2 only, as
  9  * published by the Free Software Foundation.
 10  *
 11  * This code is distributed in the hope that it will be useful, but WITHOUT
 12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 14  * version 2 for more details (a copy is included in the LICENSE file that
 15  * accompanied this code).
 16  *
 17  * You should have received a copy of the GNU General Public License version
 18  * 2 along with this work; if not, write to the Free Software Foundation,
 19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 20  *
 21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 22  * or visit www.oracle.com if you need additional information or have any
 23  * questions.
 24  *
 25  */
 26 
 27 #ifndef CPU_RISCV_MACROASSEMBLER_RISCV_HPP
 28 #define CPU_RISCV_MACROASSEMBLER_RISCV_HPP
 29 
 30 #include "asm/assembler.hpp"
 31 #include "metaprogramming/enableIf.hpp"
 32 #include "oops/compressedOops.hpp"
 33 #include "utilities/powerOfTwo.hpp"
 34 
 35 // MacroAssembler extends Assembler by frequently used macros.
 36 //
 37 // Instructions for which a 'better' code sequence exists depending
 38 // on arguments should also go in here.
 39 
 40 class MacroAssembler: public Assembler {
 41 
 42  public:
 43   MacroAssembler(CodeBuffer* code) : Assembler(code) {
 44   }
 45   virtual ~MacroAssembler() {}
 46 
 47   void safepoint_poll(Label& slow_path, bool at_return, bool acquire, bool in_nmethod);
 48 
 49   // Place a fence.i after code may have been modified due to a safepoint.
 50   void safepoint_ifence();
 51 
 52   // Alignment
 53   void align(int modulus, int extra_offset = 0);
 54 
 55   // Stack frame creation/removal
 56   // Note that SP must be updated to the right place before saving/restoring RA and FP
 57   // because signal based thread suspend/resume could happen asynchronously.
 58   void enter() {
 59     addi(sp, sp, - 2 * wordSize);
 60     sd(ra, Address(sp, wordSize));
 61     sd(fp, Address(sp));
 62     addi(fp, sp, 2 * wordSize);
 63   }
 64 
 65   void leave() {
 66     addi(sp, fp, - 2 * wordSize);
 67     ld(fp, Address(sp));
 68     ld(ra, Address(sp, wordSize));
 69     addi(sp, sp, 2 * wordSize);
 70   }
 71 
 72 
 73   // Support for getting the JavaThread pointer (i.e.; a reference to thread-local information)
 74   // The pointer will be loaded into the thread register.
 75   void get_thread(Register thread);
 76 
 77   // Support for VM calls
 78   //
 79   // It is imperative that all calls into the VM are handled via the call_VM macros.
 80   // They make sure that the stack linkage is setup correctly. call_VM's correspond
 81   // to ENTRY/ENTRY_X entry points while call_VM_leaf's correspond to LEAF entry points.
 82 
 83   void call_VM(Register oop_result,
 84                address entry_point,
 85                bool check_exceptions = true);
 86   void call_VM(Register oop_result,
 87                address entry_point,
 88                Register arg_1,
 89                bool check_exceptions = true);
 90   void call_VM(Register oop_result,
 91                address entry_point,
 92                Register arg_1, Register arg_2,
 93                bool check_exceptions = true);
 94   void call_VM(Register oop_result,
 95                address entry_point,
 96                Register arg_1, Register arg_2, Register arg_3,
 97                bool check_exceptions = true);
 98 
 99   // Overloadings with last_Java_sp
100   void call_VM(Register oop_result,
101                Register last_java_sp,
102                address entry_point,
103                int number_of_arguments = 0,
104                bool check_exceptions = true);
105   void call_VM(Register oop_result,
106                Register last_java_sp,
107                address entry_point,
108                Register arg_1,
109                bool check_exceptions = true);
110   void call_VM(Register oop_result,
111                Register last_java_sp,
112                address entry_point,
113                Register arg_1, Register arg_2,
114                bool check_exceptions = true);
115   void call_VM(Register oop_result,
116                Register last_java_sp,
117                address entry_point,
118                Register arg_1, Register arg_2, Register arg_3,
119                bool check_exceptions = true);
120 
121   void get_vm_result(Register oop_result, Register java_thread);
122   void get_vm_result_2(Register metadata_result, Register java_thread);
123 
124   // These always tightly bind to MacroAssembler::call_VM_leaf_base
125   // bypassing the virtual implementation
126   void call_VM_leaf(address entry_point,
127                     int number_of_arguments = 0);
128   void call_VM_leaf(address entry_point,
129                     Register arg_0);
130   void call_VM_leaf(address entry_point,
131                     Register arg_0, Register arg_1);
132   void call_VM_leaf(address entry_point,
133                     Register arg_0, Register arg_1, Register arg_2);
134 
135   // These always tightly bind to MacroAssembler::call_VM_base
136   // bypassing the virtual implementation
137   void super_call_VM_leaf(address entry_point, Register arg_0);
138   void super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1);
139   void super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2);
140   void super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2, Register arg_3);
141 
142   // last Java Frame (fills frame anchor)
143   void set_last_Java_frame(Register last_java_sp, Register last_java_fp, address last_java_pc, Register tmp);
144   void set_last_Java_frame(Register last_java_sp, Register last_java_fp, Label &last_java_pc, Register tmp);
145   void set_last_Java_frame(Register last_java_sp, Register last_java_fp, Register last_java_pc, Register tmp);
146 
147   // thread in the default location (xthread)
148   void reset_last_Java_frame(bool clear_fp);
149 
150   void call_native(address entry_point,
151                    Register arg_0);
152   void call_native_base(
153     address entry_point,                // the entry point
154     Label*  retaddr = NULL
155   );
156 
157   virtual void call_VM_leaf_base(
158     address entry_point,                // the entry point
159     int     number_of_arguments,        // the number of arguments to pop after the call
160     Label*  retaddr = NULL
161   );
162 
163   virtual void call_VM_leaf_base(
164     address entry_point,                // the entry point
165     int     number_of_arguments,        // the number of arguments to pop after the call
166     Label&  retaddr) {
167     call_VM_leaf_base(entry_point, number_of_arguments, &retaddr);
168   }
169 
170   virtual void call_VM_base(           // returns the register containing the thread upon return
171     Register oop_result,               // where an oop-result ends up if any; use noreg otherwise
172     Register java_thread,              // the thread if computed before     ; use noreg otherwise
173     Register last_java_sp,             // to set up last_Java_frame in stubs; use noreg otherwise
174     address  entry_point,              // the entry point
175     int      number_of_arguments,      // the number of arguments (w/o thread) to pop after the call
176     bool     check_exceptions          // whether to check for pending exceptions after return
177   );
178 
179   void call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions);
180 
181   virtual void check_and_handle_earlyret(Register java_thread);
182   virtual void check_and_handle_popframe(Register java_thread);
183 
184   void resolve_weak_handle(Register result, Register tmp);
185   void resolve_oop_handle(Register result, Register tmp = x15);
186   void resolve_jobject(Register value, Register thread, Register tmp);
187 
188   void movoop(Register dst, jobject obj, bool immediate = false);
189   void mov_metadata(Register dst, Metadata* obj);
190   void bang_stack_size(Register size, Register tmp);
191   void set_narrow_oop(Register dst, jobject obj);
192   void set_narrow_klass(Register dst, Klass* k);
193 
194   void load_mirror(Register dst, Register method, Register tmp = x15);
195   void access_load_at(BasicType type, DecoratorSet decorators, Register dst,
196                       Address src, Register tmp1, Register thread_tmp);
197   void access_store_at(BasicType type, DecoratorSet decorators, Address dst,
198                        Register src, Register tmp1, Register thread_tmp);
199   void load_klass(Register dst, Register src);
200   void store_klass(Register dst, Register src);
201   void cmp_klass(Register oop, Register trial_klass, Register tmp, Label &L);
202 
203   void encode_klass_not_null(Register r);
204   void decode_klass_not_null(Register r);
205   void encode_klass_not_null(Register dst, Register src, Register tmp = xheapbase);
206   void decode_klass_not_null(Register dst, Register src, Register tmp = xheapbase);
207   void decode_heap_oop_not_null(Register r);
208   void decode_heap_oop_not_null(Register dst, Register src);
209   void decode_heap_oop(Register d, Register s);
210   void decode_heap_oop(Register r) { decode_heap_oop(r, r); }
211   void encode_heap_oop(Register d, Register s);
212   void encode_heap_oop(Register r) { encode_heap_oop(r, r); };
213   void load_heap_oop(Register dst, Address src, Register tmp1 = noreg,
214                      Register thread_tmp = noreg, DecoratorSet decorators = 0);
215   void load_heap_oop_not_null(Register dst, Address src, Register tmp1 = noreg,
216                               Register thread_tmp = noreg, DecoratorSet decorators = 0);
217   void store_heap_oop(Address dst, Register src, Register tmp1 = noreg,
218                       Register thread_tmp = noreg, DecoratorSet decorators = 0);
219 
220   void store_klass_gap(Register dst, Register src);
221 
222   // currently unimplemented
223   // Used for storing NULL. All other oop constants should be
224   // stored using routines that take a jobject.
225   void store_heap_oop_null(Address dst);
226 
227   // This dummy is to prevent a call to store_heap_oop from
228   // converting a zero (linke NULL) into a Register by giving
229   // the compiler two choices it can't resolve
230 
231   void store_heap_oop(Address dst, void* dummy);
232 
233   // Support for NULL-checks
234   //
235   // Generates code that causes a NULL OS exception if the content of reg is NULL.
236   // If the accessed location is M[reg + offset] and the offset is known, provide the
237   // offset. No explicit code generateion is needed if the offset is within a certain
238   // range (0 <= offset <= page_size).
239 
240   virtual void null_check(Register reg, int offset = -1);
241   static bool needs_explicit_null_check(intptr_t offset);
242   static bool uses_implicit_null_check(void* address);
243 
244   // idiv variant which deals with MINLONG as dividend and -1 as divisor
245   int corrected_idivl(Register result, Register rs1, Register rs2,
246                       bool want_remainder);
247   int corrected_idivq(Register result, Register rs1, Register rs2,
248                       bool want_remainder);
249 
250   // interface method calling
251   void lookup_interface_method(Register recv_klass,
252                                Register intf_klass,
253                                RegisterOrConstant itable_index,
254                                Register method_result,
255                                Register scan_tmp,
256                                Label& no_such_interface,
257                                bool return_method = true);
258 
259   // virtual method calling
260   // n.n. x86 allows RegisterOrConstant for vtable_index
261   void lookup_virtual_method(Register recv_klass,
262                              RegisterOrConstant vtable_index,
263                              Register method_result);
264 
265   // Form an addres from base + offset in Rd. Rd my or may not
266   // actually be used: you must use the Address that is returned. It
267   // is up to you to ensure that the shift provided mathces the size
268   // of your data.
269   Address form_address(Register Rd, Register base, long byte_offset);
270 
271   // allocation
272   void tlab_allocate(
273     Register obj,                   // result: pointer to object after successful allocation
274     Register var_size_in_bytes,     // object size in bytes if unknown at compile time; invalid otherwise
275     int      con_size_in_bytes,     // object size in bytes if   known at compile time
276     Register tmp1,                  // temp register
277     Register tmp2,                  // temp register
278     Label&   slow_case,             // continuation point of fast allocation fails
279     bool is_far = false
280   );
281 
282   void eden_allocate(
283     Register obj,                   // result: pointer to object after successful allocation
284     Register var_size_in_bytes,     // object size in bytes if unknown at compile time; invalid otherwise
285     int      con_size_in_bytes,     // object size in bytes if   known at compile time
286     Register tmp,                   // temp register
287     Label&   slow_case,             // continuation point if fast allocation fails
288     bool is_far = false
289   );
290 
291   // Test sub_klass against super_klass, with fast and slow paths.
292 
293   // The fast path produces a tri-state answer: yes / no / maybe-slow.
294   // One of the three labels can be NULL, meaning take the fall-through.
295   // If super_check_offset is -1, the value is loaded up from super_klass.
296   // No registers are killed, except tmp_reg
297   void check_klass_subtype_fast_path(Register sub_klass,
298                                      Register super_klass,
299                                      Register tmp_reg,
300                                      Label* L_success,
301                                      Label* L_failure,
302                                      Label* L_slow_path,
303                                      Register super_check_offset = noreg);
304 
305   // The reset of the type cehck; must be wired to a corresponding fast path.
306   // It does not repeat the fast path logic, so don't use it standalone.
307   // The tmp1_reg and tmp2_reg can be noreg, if no temps are avaliable.
308   // Updates the sub's secondary super cache as necessary.
309   void check_klass_subtype_slow_path(Register sub_klass,
310                                      Register super_klass,
311                                      Register tmp1_reg,
312                                      Register tmp2_reg,
313                                      Label* L_success,
314                                      Label* L_failure);
315 
316   void check_klass_subtype(Register sub_klass,
317                            Register super_klass,
318                            Register tmp_reg,
319                            Label& L_success);
320 
321   Address argument_address(RegisterOrConstant arg_slot, int extra_slot_offset = 0);
322 
323   // only if +VerifyOops
324   void verify_oop(Register reg, const char* s = "broken oop");
325   void verify_oop_addr(Address addr, const char* s = "broken oop addr");
326 
327   void _verify_method_ptr(Register reg, const char* msg, const char* file, int line) {}
328   void _verify_klass_ptr(Register reg, const char* msg, const char* file, int line) {}
329 
330 #define verify_method_ptr(reg) _verify_method_ptr(reg, "broken method " #reg, __FILE__, __LINE__)
331 #define verify_klass_ptr(reg) _verify_method_ptr(reg, "broken klass " #reg, __FILE__, __LINE__)
332 
333   // A more convenient access to fence for our purposes
334   // We used four bit to indicate the read and write bits in the predecessors and successors,
335   // and extended i for r, o for w if UseConservativeFence enabled.
336   enum Membar_mask_bits {
337     StoreStore = 0b0101,               // (pred = ow   + succ =   ow)
338     LoadStore  = 0b1001,               // (pred = ir   + succ =   ow)
339     StoreLoad  = 0b0110,               // (pred = ow   + succ =   ir)
340     LoadLoad   = 0b1010,               // (pred = ir   + succ =   ir)
341     AnyAny     = LoadStore | StoreLoad // (pred = iorw + succ = iorw)
342   };
343 
344   void membar(uint32_t order_constraint);
345 
346   static void membar_mask_to_pred_succ(uint32_t order_constraint, uint32_t& predecessor, uint32_t& successor) {
347     predecessor = (order_constraint >> 2) & 0x3;
348     successor = order_constraint & 0x3;
349 
350     // extend rw -> iorw:
351     // 01(w) -> 0101(ow)
352     // 10(r) -> 1010(ir)
353     // 11(rw)-> 1111(iorw)
354     if (UseConservativeFence) {
355       predecessor |= predecessor << 2;
356       successor |= successor << 2;
357     }
358   }
359 
360   static int pred_succ_to_membar_mask(uint32_t predecessor, uint32_t successor) {
361     return ((predecessor & 0x3) << 2) | (successor & 0x3);
362   }
363 
364   // prints msg, dumps registers and stops execution
365   void stop(const char* msg);
366 
367   static void debug64(char* msg, int64_t pc, int64_t regs[]);
368 
369   void unimplemented(const char* what = "");
370 
371   void should_not_reach_here() { stop("should not reach here"); }
372 
373   static address target_addr_for_insn(address insn_addr);
374 
375   // Required platform-specific helpers for Label::patch_instructions.
376   // They _shadow_ the declarations in AbstractAssembler, which are undefined.
377   static int pd_patch_instruction_size(address branch, address target);
378   static void pd_patch_instruction(address branch, address target, const char* file = NULL, int line = 0) {
379     pd_patch_instruction_size(branch, target);
380   }
381   static address pd_call_destination(address branch) {
382     return target_addr_for_insn(branch);
383   }
384 
385   static int patch_oop(address insn_addr, address o);
386   address emit_trampoline_stub(int insts_call_instruction_offset, address target);
387   void emit_static_call_stub();
388 
389   // The following 4 methods return the offset of the appropriate move instruction
390 
391   // Support for fast byte/short loading with zero extension (depending on particular CPU)
392   int load_unsigned_byte(Register dst, Address src);
393   int load_unsigned_short(Register dst, Address src);
394 
395   // Support for fast byte/short loading with sign extension (depending on particular CPU)
396   int load_signed_byte(Register dst, Address src);
397   int load_signed_short(Register dst, Address src);
398 
399   // Load and store values by size and signed-ness
400   void load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2 = noreg);
401   void store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2 = noreg);
402 
403  public:
404   // Standard pseudoinstruction
405   void nop();
406   void mv(Register Rd, Register Rs);
407   void notr(Register Rd, Register Rs);
408   void neg(Register Rd, Register Rs);
409   void negw(Register Rd, Register Rs);
410   void sext_w(Register Rd, Register Rs);
411   void zext_b(Register Rd, Register Rs);
412   void seqz(Register Rd, Register Rs);          // set if = zero
413   void snez(Register Rd, Register Rs);          // set if != zero
414   void sltz(Register Rd, Register Rs);          // set if < zero
415   void sgtz(Register Rd, Register Rs);          // set if > zero
416 
417   // Float pseudoinstruction
418   void fmv_s(FloatRegister Rd, FloatRegister Rs);
419   void fabs_s(FloatRegister Rd, FloatRegister Rs);    // single-precision absolute value
420   void fneg_s(FloatRegister Rd, FloatRegister Rs);
421 
422   // Double pseudoinstruction
423   void fmv_d(FloatRegister Rd, FloatRegister Rs);
424   void fabs_d(FloatRegister Rd, FloatRegister Rs);
425   void fneg_d(FloatRegister Rd, FloatRegister Rs);
426 
427   // Pseudoinstruction for control and status register
428   void rdinstret(Register Rd);                  // read instruction-retired counter
429   void rdcycle(Register Rd);                    // read cycle counter
430   void rdtime(Register Rd);                     // read time
431   void csrr(Register Rd, unsigned csr);         // read csr
432   void csrw(unsigned csr, Register Rs);         // write csr
433   void csrs(unsigned csr, Register Rs);         // set bits in csr
434   void csrc(unsigned csr, Register Rs);         // clear bits in csr
435   void csrwi(unsigned csr, unsigned imm);
436   void csrsi(unsigned csr, unsigned imm);
437   void csrci(unsigned csr, unsigned imm);
438   void frcsr(Register Rd);                      // read float-point csr
439   void fscsr(Register Rd, Register Rs);         // swap float-point csr
440   void fscsr(Register Rs);                      // write float-point csr
441   void frrm(Register Rd);                       // read float-point rounding mode
442   void fsrm(Register Rd, Register Rs);          // swap float-point rounding mode
443   void fsrm(Register Rs);                       // write float-point rounding mode
444   void fsrmi(Register Rd, unsigned imm);
445   void fsrmi(unsigned imm);
446   void frflags(Register Rd);                    // read float-point exception flags
447   void fsflags(Register Rd, Register Rs);       // swap float-point exception flags
448   void fsflags(Register Rs);                    // write float-point exception flags
449   void fsflagsi(Register Rd, unsigned imm);
450   void fsflagsi(unsigned imm);
451 
452   void beqz(Register Rs, const address &dest);
453   void bnez(Register Rs, const address &dest);
454   void blez(Register Rs, const address &dest);
455   void bgez(Register Rs, const address &dest);
456   void bltz(Register Rs, const address &dest);
457   void bgtz(Register Rs, const address &dest);
458   void la(Register Rd, Label &label);
459   void la(Register Rd, const address &dest);
460   void la(Register Rd, const Address &adr);
461   //label
462   void beqz(Register Rs, Label &l, bool is_far = false);
463   void bnez(Register Rs, Label &l, bool is_far = false);
464   void blez(Register Rs, Label &l, bool is_far = false);
465   void bgez(Register Rs, Label &l, bool is_far = false);
466   void bltz(Register Rs, Label &l, bool is_far = false);
467   void bgtz(Register Rs, Label &l, bool is_far = false);
468   void float_beq(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
469   void float_bne(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
470   void float_ble(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
471   void float_bge(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
472   void float_blt(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
473   void float_bgt(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
474   void double_beq(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
475   void double_bne(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
476   void double_ble(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
477   void double_bge(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
478   void double_blt(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
479   void double_bgt(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
480 
481   void push_reg(RegSet regs, Register stack) { if (regs.bits()) { push_reg(regs.bits(), stack); } }
482   void pop_reg(RegSet regs, Register stack) { if (regs.bits()) { pop_reg(regs.bits(), stack); } }
483   void push_reg(Register Rs);
484   void pop_reg(Register Rd);
485   int  push_reg(unsigned int bitset, Register stack);
486   int  pop_reg(unsigned int bitset, Register stack);
487   void push_fp(FloatRegSet regs, Register stack) { if (regs.bits()) push_fp(regs.bits(), stack); }
488   void pop_fp(FloatRegSet regs, Register stack) { if (regs.bits()) pop_fp(regs.bits(), stack); }
489 #ifdef COMPILER2
490   void push_vp(VectorRegSet regs, Register stack) { if (regs.bits()) push_vp(regs.bits(), stack); }
491   void pop_vp(VectorRegSet regs, Register stack) { if (regs.bits()) pop_vp(regs.bits(), stack); }
492 #endif // COMPILER2
493 
494   // Push and pop everything that might be clobbered by a native
495   // runtime call except t0 and t1. (They are always
496   // temporary registers, so we don't have to protect them.)
497   // Additional registers can be excluded in a passed RegSet.
498   void push_call_clobbered_registers_except(RegSet exclude);
499   void pop_call_clobbered_registers_except(RegSet exclude);
500 
501   void push_call_clobbered_registers() {
502     push_call_clobbered_registers_except(RegSet());
503   }
504   void pop_call_clobbered_registers() {
505     pop_call_clobbered_registers_except(RegSet());
506   }
507 
508   void pusha();
509   void popa();
510   void push_CPU_state(bool save_vectors = false, int vector_size_in_bytes = 0);
511   void pop_CPU_state(bool restore_vectors = false, int vector_size_in_bytes = 0);
512 
513   // if heap base register is used - reinit it with the correct value
514   void reinit_heapbase();
515 
516   void bind(Label& L) {
517     Assembler::bind(L);
518     // fences across basic blocks should not be merged
519     code()->clear_last_insn();
520   }
521 
522   // mv
523   template<typename T, ENABLE_IF(std::is_integral<T>::value)>
524   inline void mv(Register Rd, T o) {
525     li(Rd, (int64_t)o);
526   }
527 
528   inline void mvw(Register Rd, int32_t imm32) { mv(Rd, imm32); }
529 
530   void mv(Register Rd, Address dest);
531   void mv(Register Rd, address addr);
532   void mv(Register Rd, RegisterOrConstant src);
533 
534   // logic
535   void andrw(Register Rd, Register Rs1, Register Rs2);
536   void orrw(Register Rd, Register Rs1, Register Rs2);
537   void xorrw(Register Rd, Register Rs1, Register Rs2);
538 
539   // revb
540   void revb_h_h(Register Rd, Register Rs, Register tmp = t0);                           // reverse bytes in halfword in lower 16 bits, sign-extend
541   void revb_w_w(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1);      // reverse bytes in lower word, sign-extend
542   void revb_h_h_u(Register Rd, Register Rs, Register tmp = t0);                         // reverse bytes in halfword in lower 16 bits, zero-extend
543   void revb_h_w_u(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1);    // reverse bytes in halfwords in lower 32 bits, zero-extend
544   void revb_h_helper(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2= t1);  // reverse bytes in upper 16 bits (48:63) and move to lower
545   void revb_h(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2= t1);         // reverse bytes in each halfword
546   void revb_w(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2= t1);         // reverse bytes in each word
547   void revb(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1);          // reverse bytes in doubleword
548 
549   void ror_imm(Register dst, Register src, uint32_t shift, Register tmp = t0);
550   void andi(Register Rd, Register Rn, int64_t imm, Register tmp = t0);
551   void orptr(Address adr, RegisterOrConstant src, Register tmp1 = t0, Register tmp2 = t1);
552 
553   void cmpxchg_obj_header(Register oldv, Register newv, Register obj, Register tmp, Label &succeed, Label *fail);
554   void cmpxchgptr(Register oldv, Register newv, Register addr, Register tmp, Label &succeed, Label *fail);
555   void cmpxchg(Register addr, Register expected,
556                Register new_val,
557                enum operand_size size,
558                Assembler::Aqrl acquire, Assembler::Aqrl release,
559                Register result, bool result_as_bool = false);
560   void cmpxchg_weak(Register addr, Register expected,
561                     Register new_val,
562                     enum operand_size size,
563                     Assembler::Aqrl acquire, Assembler::Aqrl release,
564                     Register result);
565   void cmpxchg_narrow_value_helper(Register addr, Register expected,
566                                    Register new_val,
567                                    enum operand_size size,
568                                    Register tmp1, Register tmp2, Register tmp3);
569   void cmpxchg_narrow_value(Register addr, Register expected,
570                             Register new_val,
571                             enum operand_size size,
572                             Assembler::Aqrl acquire, Assembler::Aqrl release,
573                             Register result, bool result_as_bool,
574                             Register tmp1, Register tmp2, Register tmp3);
575   void weak_cmpxchg_narrow_value(Register addr, Register expected,
576                                  Register new_val,
577                                  enum operand_size size,
578                                  Assembler::Aqrl acquire, Assembler::Aqrl release,
579                                  Register result,
580                                  Register tmp1, Register tmp2, Register tmp3);
581 
582   void atomic_add(Register prev, RegisterOrConstant incr, Register addr);
583   void atomic_addw(Register prev, RegisterOrConstant incr, Register addr);
584   void atomic_addal(Register prev, RegisterOrConstant incr, Register addr);
585   void atomic_addalw(Register prev, RegisterOrConstant incr, Register addr);
586 
587   void atomic_xchg(Register prev, Register newv, Register addr);
588   void atomic_xchgw(Register prev, Register newv, Register addr);
589   void atomic_xchgal(Register prev, Register newv, Register addr);
590   void atomic_xchgalw(Register prev, Register newv, Register addr);
591   void atomic_xchgwu(Register prev, Register newv, Register addr);
592   void atomic_xchgalwu(Register prev, Register newv, Register addr);
593 
594   static bool far_branches() {
595     return ReservedCodeCacheSize > branch_range;
596   }
597 
598   // Jumps that can reach anywhere in the code cache.
599   // Trashes tmp.
600   void far_call(Address entry, CodeBuffer *cbuf = NULL, Register tmp = t0);
601   void far_jump(Address entry, CodeBuffer *cbuf = NULL, Register tmp = t0);
602 
603   static int far_branch_size() {
604     if (far_branches()) {
605       return 2 * 4;  // auipc + jalr, see far_call() & far_jump()
606     } else {
607       return 4;
608     }
609   }
610 
611   void load_byte_map_base(Register reg);
612 
613   void bang_stack_with_offset(int offset) {
614     // stack grows down, caller passes positive offset
615     assert(offset > 0, "must bang with negative offset");
616     sub(t0, sp, offset);
617     sd(zr, Address(t0));
618   }
619 
620   void la_patchable(Register reg1, const Address &dest, int32_t &offset);
621 
622   virtual void _call_Unimplemented(address call_site) {
623     mv(t1, call_site);
624   }
625 
626   #define call_Unimplemented() _call_Unimplemented((address)__PRETTY_FUNCTION__)
627 
628   // Frame creation and destruction shared between JITs.
629   void build_frame(int framesize);
630   void remove_frame(int framesize);
631 
632   void reserved_stack_check();
633 
634   void get_polling_page(Register dest, relocInfo::relocType rtype);
635   address read_polling_page(Register r, int32_t offset, relocInfo::relocType rtype);
636 
637   address trampoline_call(Address entry, CodeBuffer* cbuf = NULL);
638   address ic_call(address entry, jint method_index = 0);
639 
640   void add_memory_int64(const Address dst, int64_t imm);
641   void add_memory_int32(const Address dst, int32_t imm);
642 
643   void cmpptr(Register src1, Address src2, Label& equal);
644 
645   void clinit_barrier(Register klass, Register tmp, Label* L_fast_path = NULL, Label* L_slow_path = NULL);
646   void load_method_holder_cld(Register result, Register method);
647   void load_method_holder(Register holder, Register method);
648 
649   void compute_index(Register str1, Register trailing_zeros, Register match_mask,
650                      Register result, Register char_tmp, Register tmp,
651                      bool haystack_isL);
652   void compute_match_mask(Register src, Register pattern, Register match_mask,
653                           Register mask1, Register mask2);
654 
655 #ifdef COMPILER2
656   void mul_add(Register out, Register in, Register offset,
657                Register len, Register k, Register tmp);
658   void cad(Register dst, Register src1, Register src2, Register carry);
659   void cadc(Register dst, Register src1, Register src2, Register carry);
660   void adc(Register dst, Register src1, Register src2, Register carry);
661   void add2_with_carry(Register final_dest_hi, Register dest_hi, Register dest_lo,
662                        Register src1, Register src2, Register carry);
663   void multiply_32_x_32_loop(Register x, Register xstart, Register x_xstart,
664                              Register y, Register y_idx, Register z,
665                              Register carry, Register product,
666                              Register idx, Register kdx);
667   void multiply_64_x_64_loop(Register x, Register xstart, Register x_xstart,
668                              Register y, Register y_idx, Register z,
669                              Register carry, Register product,
670                              Register idx, Register kdx);
671   void multiply_128_x_128_loop(Register y, Register z,
672                                Register carry, Register carry2,
673                                Register idx, Register jdx,
674                                Register yz_idx1, Register yz_idx2,
675                                Register tmp, Register tmp3, Register tmp4,
676                                Register tmp6, Register product_hi);
677   void multiply_to_len(Register x, Register xlen, Register y, Register ylen,
678                        Register z, Register zlen,
679                        Register tmp1, Register tmp2, Register tmp3, Register tmp4,
680                        Register tmp5, Register tmp6, Register product_hi);
681 #endif
682 
683   void inflate_lo32(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1);
684   void inflate_hi32(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1);
685 
686   void ctzc_bit(Register Rd, Register Rs, bool isLL = false, Register tmp1 = t0, Register tmp2 = t1);
687 
688   void zero_words(Register base, u_int64_t cnt);
689   address zero_words(Register ptr, Register cnt);
690   void fill_words(Register base, Register cnt, Register value);
691   void zero_memory(Register addr, Register len, Register tmp);
692 
693   // shift left by shamt and add
694   void shadd(Register Rd, Register Rs1, Register Rs2, Register tmp, int shamt);
695 
696   // Here the float instructions with safe deal with some exceptions.
697   // e.g. convert from NaN, +Inf, -Inf to int, float, double
698   // will trigger exception, we need to deal with these situations
699   // to get correct results.
700   void fcvt_w_s_safe(Register dst, FloatRegister src, Register tmp = t0);
701   void fcvt_l_s_safe(Register dst, FloatRegister src, Register tmp = t0);
702   void fcvt_w_d_safe(Register dst, FloatRegister src, Register tmp = t0);
703   void fcvt_l_d_safe(Register dst, FloatRegister src, Register tmp = t0);
704 
705   // vector load/store unit-stride instructions
706   void vlex_v(VectorRegister vd, Register base, Assembler::SEW sew, VectorMask vm = unmasked) {
707     switch (sew) {
708       case Assembler::e64:
709         vle64_v(vd, base, vm);
710         break;
711       case Assembler::e32:
712         vle32_v(vd, base, vm);
713         break;
714       case Assembler::e16:
715         vle16_v(vd, base, vm);
716         break;
717       case Assembler::e8: // fall through
718       default:
719         vle8_v(vd, base, vm);
720         break;
721     }
722   }
723 
724   void vsex_v(VectorRegister store_data, Register base, Assembler::SEW sew, VectorMask vm = unmasked) {
725     switch (sew) {
726       case Assembler::e64:
727         vse64_v(store_data, base, vm);
728         break;
729       case Assembler::e32:
730         vse32_v(store_data, base, vm);
731         break;
732       case Assembler::e16:
733         vse16_v(store_data, base, vm);
734         break;
735       case Assembler::e8: // fall through
736       default:
737         vse8_v(store_data, base, vm);
738         break;
739     }
740   }
741 
742   static const int zero_words_block_size;
743 
744   void cast_primitive_type(BasicType type, Register Rt) {
745     switch (type) {
746       case T_BOOLEAN:
747         sltu(Rt, zr, Rt);
748         break;
749       case T_CHAR   :
750         zero_extend(Rt, Rt, 16);
751         break;
752       case T_BYTE   :
753         sign_extend(Rt, Rt, 8);
754         break;
755       case T_SHORT  :
756         sign_extend(Rt, Rt, 16);
757         break;
758       case T_INT    :
759         addw(Rt, Rt, zr);
760         break;
761       case T_LONG   : /* nothing to do */        break;
762       case T_VOID   : /* nothing to do */        break;
763       case T_FLOAT  : /* nothing to do */        break;
764       case T_DOUBLE : /* nothing to do */        break;
765       default: ShouldNotReachHere();
766     }
767   }
768 
769   // float cmp with unordered_result
770   void float_compare(Register result, FloatRegister Rs1, FloatRegister Rs2, int unordered_result);
771   void double_compare(Register result, FloatRegister Rs1, FloatRegister Rs2, int unordered_result);
772 
773   // Zero/Sign-extend
774   void zero_extend(Register dst, Register src, int bits);
775   void sign_extend(Register dst, Register src, int bits);
776 
777   // compare src1 and src2 and get -1/0/1 in dst.
778   // if [src1 > src2], dst = 1;
779   // if [src1 == src2], dst = 0;
780   // if [src1 < src2], dst = -1;
781   void cmp_l2i(Register dst, Register src1, Register src2, Register tmp = t0);
782 
783   int push_fp(unsigned int bitset, Register stack);
784   int pop_fp(unsigned int bitset, Register stack);
785 
786   int push_vp(unsigned int bitset, Register stack);
787   int pop_vp(unsigned int bitset, Register stack);
788 
789   // vext
790   void vmnot_m(VectorRegister vd, VectorRegister vs);
791   void vncvt_x_x_w(VectorRegister vd, VectorRegister vs, VectorMask vm = unmasked);
792   void vfneg_v(VectorRegister vd, VectorRegister vs);
793 
794 private:
795 
796 #ifdef ASSERT
797   // Template short-hand support to clean-up after a failed call to trampoline
798   // call generation (see trampoline_call() below), when a set of Labels must
799   // be reset (before returning).
800   template<typename Label, typename... More>
801   void reset_labels(Label& lbl, More&... more) {
802     lbl.reset(); reset_labels(more...);
803   }
804   template<typename Label>
805   void reset_labels(Label& lbl) {
806     lbl.reset();
807   }
808 #endif
809   void repne_scan(Register addr, Register value, Register count, Register tmp);
810 
811   // Return true if an address is within the 48-bit RISCV64 address space.
812   bool is_valid_riscv64_address(address addr) {
813     return ((uintptr_t)addr >> 48) == 0;
814   }
815 
816   void ld_constant(Register dest, const Address &const_addr) {
817     if (NearCpool) {
818       ld(dest, const_addr);
819     } else {
820       int32_t offset = 0;
821       la_patchable(dest, InternalAddress(const_addr.target()), offset);
822       ld(dest, Address(dest, offset));
823     }
824   }
825 
826   int bitset_to_regs(unsigned int bitset, unsigned char* regs);
827   Address add_memory_helper(const Address dst);
828 
829   void load_reserved(Register addr, enum operand_size size, Assembler::Aqrl acquire);
830   void store_conditional(Register addr, Register new_val, enum operand_size size, Assembler::Aqrl release);
831 
832   // Check the current thread doesn't need a cross modify fence.
833   void verify_cross_modify_fence_not_required() PRODUCT_RETURN;
834 };
835 
836 #ifdef ASSERT
837 inline bool AbstractAssembler::pd_check_instruction_mark() { return false; }
838 #endif
839 
840 /**
841  * class SkipIfEqual:
842  *
843  * Instantiating this class will result in assembly code being output that will
844  * jump around any code emitted between the creation of the instance and it's
845  * automatic destruction at the end of a scope block, depending on the value of
846  * the flag passed to the constructor, which will be checked at run-time.
847  */
848 class SkipIfEqual {
849  private:
850   MacroAssembler* _masm;
851   Label _label;
852 
853  public:
854    SkipIfEqual(MacroAssembler*, const bool* flag_addr, bool value);
855    ~SkipIfEqual();
856 };
857 #endif // CPU_RISCV_MACROASSEMBLER_RISCV_HPP