1 /*
  2  * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
  3  * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
  4  * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved.
  5  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  6  *
  7  * This code is free software; you can redistribute it and/or modify it
  8  * under the terms of the GNU General Public License version 2 only, as
  9  * published by the Free Software Foundation.
 10  *
 11  * This code is distributed in the hope that it will be useful, but WITHOUT
 12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 14  * version 2 for more details (a copy is included in the LICENSE file that
 15  * accompanied this code).
 16  *
 17  * You should have received a copy of the GNU General Public License version
 18  * 2 along with this work; if not, write to the Free Software Foundation,
 19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 20  *
 21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 22  * or visit www.oracle.com if you need additional information or have any
 23  * questions.
 24  *
 25  */
 26 
 27 #ifndef CPU_RISCV_MACROASSEMBLER_RISCV_HPP
 28 #define CPU_RISCV_MACROASSEMBLER_RISCV_HPP
 29 
 30 #include "asm/assembler.hpp"
 31 #include "metaprogramming/enableIf.hpp"
 32 #include "nativeInst_riscv.hpp"
 33 #include "oops/compressedOops.hpp"
 34 #include "utilities/powerOfTwo.hpp"
 35 
 36 // MacroAssembler extends Assembler by frequently used macros.
 37 //
 38 // Instructions for which a 'better' code sequence exists depending
 39 // on arguments should also go in here.
 40 
 41 class MacroAssembler: public Assembler {
 42 
 43  public:
 44   MacroAssembler(CodeBuffer* code) : Assembler(code) {
 45   }
 46   virtual ~MacroAssembler() {}
 47 
 48   void safepoint_poll(Label& slow_path, bool at_return, bool acquire, bool in_nmethod);
 49 
 50   // Alignment
 51   void align(int modulus, int extra_offset = 0);
 52   static inline void assert_alignment(address pc, int alignment = NativeInstruction::instruction_size) {
 53     assert(is_aligned(pc, alignment), "bad alignment");
 54   }
 55 
 56   // Stack frame creation/removal
 57   // Note that SP must be updated to the right place before saving/restoring RA and FP
 58   // because signal based thread suspend/resume could happen asynchronously.
 59   void enter() {
 60     addi(sp, sp, - 2 * wordSize);
 61     sd(ra, Address(sp, wordSize));
 62     sd(fp, Address(sp));
 63     addi(fp, sp, 2 * wordSize);
 64   }
 65 
 66   void leave() {
 67     addi(sp, fp, - 2 * wordSize);
 68     ld(fp, Address(sp));
 69     ld(ra, Address(sp, wordSize));
 70     addi(sp, sp, 2 * wordSize);
 71   }
 72 
 73 
 74   // Support for getting the JavaThread pointer (i.e.; a reference to thread-local information)
 75   // The pointer will be loaded into the thread register.
 76   void get_thread(Register thread);
 77 
 78   // Support for VM calls
 79   //
 80   // It is imperative that all calls into the VM are handled via the call_VM macros.
 81   // They make sure that the stack linkage is setup correctly. call_VM's correspond
 82   // to ENTRY/ENTRY_X entry points while call_VM_leaf's correspond to LEAF entry points.
 83 
 84   void call_VM(Register oop_result,
 85                address entry_point,
 86                bool check_exceptions = true);
 87   void call_VM(Register oop_result,
 88                address entry_point,
 89                Register arg_1,
 90                bool check_exceptions = true);
 91   void call_VM(Register oop_result,
 92                address entry_point,
 93                Register arg_1, Register arg_2,
 94                bool check_exceptions = true);
 95   void call_VM(Register oop_result,
 96                address entry_point,
 97                Register arg_1, Register arg_2, Register arg_3,
 98                bool check_exceptions = true);
 99 
100   // Overloadings with last_Java_sp
101   void call_VM(Register oop_result,
102                Register last_java_sp,
103                address entry_point,
104                int number_of_arguments = 0,
105                bool check_exceptions = true);
106   void call_VM(Register oop_result,
107                Register last_java_sp,
108                address entry_point,
109                Register arg_1,
110                bool check_exceptions = true);
111   void call_VM(Register oop_result,
112                Register last_java_sp,
113                address entry_point,
114                Register arg_1, Register arg_2,
115                bool check_exceptions = true);
116   void call_VM(Register oop_result,
117                Register last_java_sp,
118                address entry_point,
119                Register arg_1, Register arg_2, Register arg_3,
120                bool check_exceptions = true);
121 
122   void get_vm_result(Register oop_result, Register java_thread);
123   void get_vm_result_2(Register metadata_result, Register java_thread);
124 
125   // These always tightly bind to MacroAssembler::call_VM_leaf_base
126   // bypassing the virtual implementation
127   void call_VM_leaf(address entry_point,
128                     int number_of_arguments = 0);
129   void call_VM_leaf(address entry_point,
130                     Register arg_0);
131   void call_VM_leaf(address entry_point,
132                     Register arg_0, Register arg_1);
133   void call_VM_leaf(address entry_point,
134                     Register arg_0, Register arg_1, Register arg_2);
135 
136   // These always tightly bind to MacroAssembler::call_VM_base
137   // bypassing the virtual implementation
138   void super_call_VM_leaf(address entry_point, Register arg_0);
139   void super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1);
140   void super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2);
141   void super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2, Register arg_3);
142 
143   // last Java Frame (fills frame anchor)
144   void set_last_Java_frame(Register last_java_sp, Register last_java_fp, address last_java_pc, Register tmp);
145   void set_last_Java_frame(Register last_java_sp, Register last_java_fp, Label &last_java_pc, Register tmp);
146   void set_last_Java_frame(Register last_java_sp, Register last_java_fp, Register last_java_pc, Register tmp);
147 
148   // thread in the default location (xthread)
149   void reset_last_Java_frame(bool clear_fp);
150 
151   virtual void call_VM_leaf_base(
152     address entry_point,                // the entry point
153     int     number_of_arguments,        // the number of arguments to pop after the call
154     Label*  retaddr = NULL
155   );
156 
157   virtual void call_VM_leaf_base(
158     address entry_point,                // the entry point
159     int     number_of_arguments,        // the number of arguments to pop after the call
160     Label&  retaddr) {
161     call_VM_leaf_base(entry_point, number_of_arguments, &retaddr);
162   }
163 
164   virtual void call_VM_base(           // returns the register containing the thread upon return
165     Register oop_result,               // where an oop-result ends up if any; use noreg otherwise
166     Register java_thread,              // the thread if computed before     ; use noreg otherwise
167     Register last_java_sp,             // to set up last_Java_frame in stubs; use noreg otherwise
168     address  entry_point,              // the entry point
169     int      number_of_arguments,      // the number of arguments (w/o thread) to pop after the call
170     bool     check_exceptions          // whether to check for pending exceptions after return
171   );
172 
173   void call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions);
174 
175   virtual void check_and_handle_earlyret(Register java_thread);
176   virtual void check_and_handle_popframe(Register java_thread);
177 
178   void resolve_weak_handle(Register result, Register tmp);
179   void resolve_oop_handle(Register result, Register tmp = x15);
180   void resolve_jobject(Register value, Register thread, Register tmp);
181 
182   void movoop(Register dst, jobject obj, bool immediate = false);
183   void mov_metadata(Register dst, Metadata* obj);
184   void bang_stack_size(Register size, Register tmp);
185   void set_narrow_oop(Register dst, jobject obj);
186   void set_narrow_klass(Register dst, Klass* k);
187 
188   void load_mirror(Register dst, Register method, Register tmp = x15);
189   void access_load_at(BasicType type, DecoratorSet decorators, Register dst,
190                       Address src, Register tmp1, Register thread_tmp);
191   void access_store_at(BasicType type, DecoratorSet decorators, Address dst,
192                        Register src, Register tmp1, Register thread_tmp);
193   void load_klass(Register dst, Register src, Register tmp = t0);
194   void store_klass(Register dst, Register src, Register tmp = t0);
195   void cmp_klass(Register oop, Register trial_klass, Register tmp1, Register tmp2, Label &L);
196 
197   void encode_klass_not_null(Register r, Register tmp = t0);
198   void decode_klass_not_null(Register r, Register tmp = t0);
199   void encode_klass_not_null(Register dst, Register src, Register tmp);
200   void decode_klass_not_null(Register dst, Register src, Register tmp);
201   void decode_heap_oop_not_null(Register r);
202   void decode_heap_oop_not_null(Register dst, Register src);
203   void decode_heap_oop(Register d, Register s);
204   void decode_heap_oop(Register r) { decode_heap_oop(r, r); }
205   void encode_heap_oop(Register d, Register s);
206   void encode_heap_oop(Register r) { encode_heap_oop(r, r); };
207   void load_heap_oop(Register dst, Address src, Register tmp1 = noreg,
208                      Register thread_tmp = noreg, DecoratorSet decorators = 0);
209   void load_heap_oop_not_null(Register dst, Address src, Register tmp1 = noreg,
210                               Register thread_tmp = noreg, DecoratorSet decorators = 0);
211   void store_heap_oop(Address dst, Register src, Register tmp1 = noreg,
212                       Register thread_tmp = noreg, DecoratorSet decorators = 0);
213 
214   void store_klass_gap(Register dst, Register src);
215 
216   // currently unimplemented
217   // Used for storing NULL. All other oop constants should be
218   // stored using routines that take a jobject.
219   void store_heap_oop_null(Address dst);
220 
221   // This dummy is to prevent a call to store_heap_oop from
222   // converting a zero (linke NULL) into a Register by giving
223   // the compiler two choices it can't resolve
224 
225   void store_heap_oop(Address dst, void* dummy);
226 
227   // Support for NULL-checks
228   //
229   // Generates code that causes a NULL OS exception if the content of reg is NULL.
230   // If the accessed location is M[reg + offset] and the offset is known, provide the
231   // offset. No explicit code generateion is needed if the offset is within a certain
232   // range (0 <= offset <= page_size).
233 
234   virtual void null_check(Register reg, int offset = -1);
235   static bool needs_explicit_null_check(intptr_t offset);
236   static bool uses_implicit_null_check(void* address);
237 
238   // idiv variant which deals with MINLONG as dividend and -1 as divisor
239   int corrected_idivl(Register result, Register rs1, Register rs2,
240                       bool want_remainder);
241   int corrected_idivq(Register result, Register rs1, Register rs2,
242                       bool want_remainder);
243 
244   // interface method calling
245   void lookup_interface_method(Register recv_klass,
246                                Register intf_klass,
247                                RegisterOrConstant itable_index,
248                                Register method_result,
249                                Register scan_tmp,
250                                Label& no_such_interface,
251                                bool return_method = true);
252 
253   // virtual method calling
254   // n.n. x86 allows RegisterOrConstant for vtable_index
255   void lookup_virtual_method(Register recv_klass,
256                              RegisterOrConstant vtable_index,
257                              Register method_result);
258 
259   // Form an addres from base + offset in Rd. Rd my or may not
260   // actually be used: you must use the Address that is returned. It
261   // is up to you to ensure that the shift provided mathces the size
262   // of your data.
263   Address form_address(Register Rd, Register base, long byte_offset);
264 
265   // allocation
266   void tlab_allocate(
267     Register obj,                   // result: pointer to object after successful allocation
268     Register var_size_in_bytes,     // object size in bytes if unknown at compile time; invalid otherwise
269     int      con_size_in_bytes,     // object size in bytes if   known at compile time
270     Register tmp1,                  // temp register
271     Register tmp2,                  // temp register
272     Label&   slow_case,             // continuation point of fast allocation fails
273     bool is_far = false
274   );
275 
276   void eden_allocate(
277     Register obj,                   // result: pointer to object after successful allocation
278     Register var_size_in_bytes,     // object size in bytes if unknown at compile time; invalid otherwise
279     int      con_size_in_bytes,     // object size in bytes if   known at compile time
280     Register tmp,                   // temp register
281     Label&   slow_case,             // continuation point if fast allocation fails
282     bool is_far = false
283   );
284 
285   // Test sub_klass against super_klass, with fast and slow paths.
286 
287   // The fast path produces a tri-state answer: yes / no / maybe-slow.
288   // One of the three labels can be NULL, meaning take the fall-through.
289   // If super_check_offset is -1, the value is loaded up from super_klass.
290   // No registers are killed, except tmp_reg
291   void check_klass_subtype_fast_path(Register sub_klass,
292                                      Register super_klass,
293                                      Register tmp_reg,
294                                      Label* L_success,
295                                      Label* L_failure,
296                                      Label* L_slow_path,
297                                      Register super_check_offset = noreg);
298 
299   // The reset of the type cehck; must be wired to a corresponding fast path.
300   // It does not repeat the fast path logic, so don't use it standalone.
301   // The tmp1_reg and tmp2_reg can be noreg, if no temps are avaliable.
302   // Updates the sub's secondary super cache as necessary.
303   void check_klass_subtype_slow_path(Register sub_klass,
304                                      Register super_klass,
305                                      Register tmp1_reg,
306                                      Register tmp2_reg,
307                                      Label* L_success,
308                                      Label* L_failure);
309 
310   void check_klass_subtype(Register sub_klass,
311                            Register super_klass,
312                            Register tmp_reg,
313                            Label& L_success);
314 
315   Address argument_address(RegisterOrConstant arg_slot, int extra_slot_offset = 0);
316 
317   // only if +VerifyOops
318   void verify_oop(Register reg, const char* s = "broken oop");
319   void verify_oop_addr(Address addr, const char* s = "broken oop addr");
320 
321   void _verify_method_ptr(Register reg, const char* msg, const char* file, int line) {}
322   void _verify_klass_ptr(Register reg, const char* msg, const char* file, int line) {}
323 
324 #define verify_method_ptr(reg) _verify_method_ptr(reg, "broken method " #reg, __FILE__, __LINE__)
325 #define verify_klass_ptr(reg) _verify_method_ptr(reg, "broken klass " #reg, __FILE__, __LINE__)
326 
327   // A more convenient access to fence for our purposes
328   // We used four bit to indicate the read and write bits in the predecessors and successors,
329   // and extended i for r, o for w if UseConservativeFence enabled.
330   enum Membar_mask_bits {
331     StoreStore = 0b0101,               // (pred = ow   + succ =   ow)
332     LoadStore  = 0b1001,               // (pred = ir   + succ =   ow)
333     StoreLoad  = 0b0110,               // (pred = ow   + succ =   ir)
334     LoadLoad   = 0b1010,               // (pred = ir   + succ =   ir)
335     AnyAny     = LoadStore | StoreLoad // (pred = iorw + succ = iorw)
336   };
337 
338   void membar(uint32_t order_constraint);
339 
340   static void membar_mask_to_pred_succ(uint32_t order_constraint, uint32_t& predecessor, uint32_t& successor) {
341     predecessor = (order_constraint >> 2) & 0x3;
342     successor = order_constraint & 0x3;
343 
344     // extend rw -> iorw:
345     // 01(w) -> 0101(ow)
346     // 10(r) -> 1010(ir)
347     // 11(rw)-> 1111(iorw)
348     if (UseConservativeFence) {
349       predecessor |= predecessor << 2;
350       successor |= successor << 2;
351     }
352   }
353 
354   static int pred_succ_to_membar_mask(uint32_t predecessor, uint32_t successor) {
355     return ((predecessor & 0x3) << 2) | (successor & 0x3);
356   }
357 
358   // prints msg, dumps registers and stops execution
359   void stop(const char* msg);
360 
361   static void debug64(char* msg, int64_t pc, int64_t regs[]);
362 
363   void unimplemented(const char* what = "");
364 
365   void should_not_reach_here() { stop("should not reach here"); }
366 
367   static address target_addr_for_insn(address insn_addr);
368 
369   // Required platform-specific helpers for Label::patch_instructions.
370   // They _shadow_ the declarations in AbstractAssembler, which are undefined.
371   static int pd_patch_instruction_size(address branch, address target);
372   static void pd_patch_instruction(address branch, address target, const char* file = NULL, int line = 0) {
373     pd_patch_instruction_size(branch, target);
374   }
375   static address pd_call_destination(address branch) {
376     return target_addr_for_insn(branch);
377   }
378 
379   static int patch_oop(address insn_addr, address o);
380   address emit_trampoline_stub(int insts_call_instruction_offset, address target);
381   void emit_static_call_stub();
382 
383   // The following 4 methods return the offset of the appropriate move instruction
384 
385   // Support for fast byte/short loading with zero extension (depending on particular CPU)
386   int load_unsigned_byte(Register dst, Address src);
387   int load_unsigned_short(Register dst, Address src);
388 
389   // Support for fast byte/short loading with sign extension (depending on particular CPU)
390   int load_signed_byte(Register dst, Address src);
391   int load_signed_short(Register dst, Address src);
392 
393   // Load and store values by size and signed-ness
394   void load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2 = noreg);
395   void store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2 = noreg);
396 
397  public:
398   // Standard pseudoinstruction
399   void nop();
400   void mv(Register Rd, Register Rs);
401   void notr(Register Rd, Register Rs);
402   void neg(Register Rd, Register Rs);
403   void negw(Register Rd, Register Rs);
404   void sext_w(Register Rd, Register Rs);
405   void zext_b(Register Rd, Register Rs);
406   void seqz(Register Rd, Register Rs);          // set if = zero
407   void snez(Register Rd, Register Rs);          // set if != zero
408   void sltz(Register Rd, Register Rs);          // set if < zero
409   void sgtz(Register Rd, Register Rs);          // set if > zero
410 
411   // Float pseudoinstruction
412   void fmv_s(FloatRegister Rd, FloatRegister Rs);
413   void fabs_s(FloatRegister Rd, FloatRegister Rs);    // single-precision absolute value
414   void fneg_s(FloatRegister Rd, FloatRegister Rs);
415 
416   // Double pseudoinstruction
417   void fmv_d(FloatRegister Rd, FloatRegister Rs);
418   void fabs_d(FloatRegister Rd, FloatRegister Rs);
419   void fneg_d(FloatRegister Rd, FloatRegister Rs);
420 
421   // Pseudoinstruction for control and status register
422   void rdinstret(Register Rd);                  // read instruction-retired counter
423   void rdcycle(Register Rd);                    // read cycle counter
424   void rdtime(Register Rd);                     // read time
425   void csrr(Register Rd, unsigned csr);         // read csr
426   void csrw(unsigned csr, Register Rs);         // write csr
427   void csrs(unsigned csr, Register Rs);         // set bits in csr
428   void csrc(unsigned csr, Register Rs);         // clear bits in csr
429   void csrwi(unsigned csr, unsigned imm);
430   void csrsi(unsigned csr, unsigned imm);
431   void csrci(unsigned csr, unsigned imm);
432   void frcsr(Register Rd);                      // read float-point csr
433   void fscsr(Register Rd, Register Rs);         // swap float-point csr
434   void fscsr(Register Rs);                      // write float-point csr
435   void frrm(Register Rd);                       // read float-point rounding mode
436   void fsrm(Register Rd, Register Rs);          // swap float-point rounding mode
437   void fsrm(Register Rs);                       // write float-point rounding mode
438   void fsrmi(Register Rd, unsigned imm);
439   void fsrmi(unsigned imm);
440   void frflags(Register Rd);                    // read float-point exception flags
441   void fsflags(Register Rd, Register Rs);       // swap float-point exception flags
442   void fsflags(Register Rs);                    // write float-point exception flags
443   void fsflagsi(Register Rd, unsigned imm);
444   void fsflagsi(unsigned imm);
445 
446   void beqz(Register Rs, const address &dest);
447   void bnez(Register Rs, const address &dest);
448   void blez(Register Rs, const address &dest);
449   void bgez(Register Rs, const address &dest);
450   void bltz(Register Rs, const address &dest);
451   void bgtz(Register Rs, const address &dest);
452   void la(Register Rd, Label &label);
453   void la(Register Rd, const address &dest);
454   void la(Register Rd, const Address &adr);
455   //label
456   void beqz(Register Rs, Label &l, bool is_far = false);
457   void bnez(Register Rs, Label &l, bool is_far = false);
458   void blez(Register Rs, Label &l, bool is_far = false);
459   void bgez(Register Rs, Label &l, bool is_far = false);
460   void bltz(Register Rs, Label &l, bool is_far = false);
461   void bgtz(Register Rs, Label &l, bool is_far = false);
462   void float_beq(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
463   void float_bne(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
464   void float_ble(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
465   void float_bge(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
466   void float_blt(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
467   void float_bgt(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
468   void double_beq(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
469   void double_bne(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
470   void double_ble(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
471   void double_bge(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
472   void double_blt(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
473   void double_bgt(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
474 
475   void push_reg(RegSet regs, Register stack) { if (regs.bits()) { push_reg(regs.bits(), stack); } }
476   void pop_reg(RegSet regs, Register stack) { if (regs.bits()) { pop_reg(regs.bits(), stack); } }
477   void push_reg(Register Rs);
478   void pop_reg(Register Rd);
479   int  push_reg(unsigned int bitset, Register stack);
480   int  pop_reg(unsigned int bitset, Register stack);
481   void push_fp(FloatRegSet regs, Register stack) { if (regs.bits()) push_fp(regs.bits(), stack); }
482   void pop_fp(FloatRegSet regs, Register stack) { if (regs.bits()) pop_fp(regs.bits(), stack); }
483 #ifdef COMPILER2
484   void push_vp(VectorRegSet regs, Register stack) { if (regs.bits()) push_vp(regs.bits(), stack); }
485   void pop_vp(VectorRegSet regs, Register stack) { if (regs.bits()) pop_vp(regs.bits(), stack); }
486 #endif // COMPILER2
487 
488   // Push and pop everything that might be clobbered by a native
489   // runtime call except t0 and t1. (They are always
490   // temporary registers, so we don't have to protect them.)
491   // Additional registers can be excluded in a passed RegSet.
492   void push_call_clobbered_registers_except(RegSet exclude);
493   void pop_call_clobbered_registers_except(RegSet exclude);
494 
495   void push_call_clobbered_registers() {
496     push_call_clobbered_registers_except(RegSet());
497   }
498   void pop_call_clobbered_registers() {
499     pop_call_clobbered_registers_except(RegSet());
500   }
501 
502   void push_CPU_state(bool save_vectors = false, int vector_size_in_bytes = 0);
503   void pop_CPU_state(bool restore_vectors = false, int vector_size_in_bytes = 0);
504 
505   // if heap base register is used - reinit it with the correct value
506   void reinit_heapbase();
507 
508   void bind(Label& L) {
509     Assembler::bind(L);
510     // fences across basic blocks should not be merged
511     code()->clear_last_insn();
512   }
513 
514   // mv
515   template<typename T, ENABLE_IF(std::is_integral<T>::value)>
516   inline void mv(Register Rd, T o) {
517     li(Rd, (int64_t)o);
518   }
519 
520   inline void mvw(Register Rd, int32_t imm32) { mv(Rd, imm32); }
521 
522   void mv(Register Rd, Address dest);
523   void mv(Register Rd, address addr);
524   void mv(Register Rd, RegisterOrConstant src);
525 
526   // logic
527   void andrw(Register Rd, Register Rs1, Register Rs2);
528   void orrw(Register Rd, Register Rs1, Register Rs2);
529   void xorrw(Register Rd, Register Rs1, Register Rs2);
530 
531   // revb
532   void revb_h_h(Register Rd, Register Rs, Register tmp = t0);                           // reverse bytes in halfword in lower 16 bits, sign-extend
533   void revb_w_w(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1);      // reverse bytes in lower word, sign-extend
534   void revb_h_h_u(Register Rd, Register Rs, Register tmp = t0);                         // reverse bytes in halfword in lower 16 bits, zero-extend
535   void revb_h_w_u(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1);    // reverse bytes in halfwords in lower 32 bits, zero-extend
536   void revb_h_helper(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2= t1);  // reverse bytes in upper 16 bits (48:63) and move to lower
537   void revb_h(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2= t1);         // reverse bytes in each halfword
538   void revb_w(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2= t1);         // reverse bytes in each word
539   void revb(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1);          // reverse bytes in doubleword
540 
541   void ror_imm(Register dst, Register src, uint32_t shift, Register tmp = t0);
542   void andi(Register Rd, Register Rn, int64_t imm, Register tmp = t0);
543   void orptr(Address adr, RegisterOrConstant src, Register tmp1 = t0, Register tmp2 = t1);
544 
545   void cmpxchg_obj_header(Register oldv, Register newv, Register obj, Register tmp, Label &succeed, Label *fail);
546   void cmpxchgptr(Register oldv, Register newv, Register addr, Register tmp, Label &succeed, Label *fail);
547   void cmpxchg(Register addr, Register expected,
548                Register new_val,
549                enum operand_size size,
550                Assembler::Aqrl acquire, Assembler::Aqrl release,
551                Register result, bool result_as_bool = false);
552   void cmpxchg_weak(Register addr, Register expected,
553                     Register new_val,
554                     enum operand_size size,
555                     Assembler::Aqrl acquire, Assembler::Aqrl release,
556                     Register result);
557   void cmpxchg_narrow_value_helper(Register addr, Register expected,
558                                    Register new_val,
559                                    enum operand_size size,
560                                    Register tmp1, Register tmp2, Register tmp3);
561   void cmpxchg_narrow_value(Register addr, Register expected,
562                             Register new_val,
563                             enum operand_size size,
564                             Assembler::Aqrl acquire, Assembler::Aqrl release,
565                             Register result, bool result_as_bool,
566                             Register tmp1, Register tmp2, Register tmp3);
567   void weak_cmpxchg_narrow_value(Register addr, Register expected,
568                                  Register new_val,
569                                  enum operand_size size,
570                                  Assembler::Aqrl acquire, Assembler::Aqrl release,
571                                  Register result,
572                                  Register tmp1, Register tmp2, Register tmp3);
573 
574   void atomic_add(Register prev, RegisterOrConstant incr, Register addr);
575   void atomic_addw(Register prev, RegisterOrConstant incr, Register addr);
576   void atomic_addal(Register prev, RegisterOrConstant incr, Register addr);
577   void atomic_addalw(Register prev, RegisterOrConstant incr, Register addr);
578 
579   void atomic_xchg(Register prev, Register newv, Register addr);
580   void atomic_xchgw(Register prev, Register newv, Register addr);
581   void atomic_xchgal(Register prev, Register newv, Register addr);
582   void atomic_xchgalw(Register prev, Register newv, Register addr);
583   void atomic_xchgwu(Register prev, Register newv, Register addr);
584   void atomic_xchgalwu(Register prev, Register newv, Register addr);
585 
586   void atomic_incw(Register counter_addr, Register tmp);
587   void atomic_incw(Address counter_addr, Register tmp1, Register tmp2) {
588     la(tmp1, counter_addr);
589     atomic_incw(tmp1, tmp2);
590   }
591 
592   // Biased locking support
593   // lock_reg and obj_reg must be loaded up with the appropriate values.
594   // swap_reg is killed.
595   // tmp_reg must be supplied and must not be t0 or t1
596   // Optional slow case is for implementations (interpreter and C1) which branch to
597   // slow case directly. Leaves condition codes set for C2's Fast_Lock done.
598   // Returns offset of first potentially-faulting instruction for null
599   // check info (currently consumed only by C1). If
600   // swap_reg_contains_mark is true then returns -1 as it as assumed
601   // the calling code has already passed any potential faults.
602   void biased_locking_enter(Register lock_reg, Register obj_reg,
603                             Register swap_reg, Register tmp_Reg,
604                             bool swap_reg_contains_mark,
605                             Label& done, Label* slow_case = NULL,
606                             BiasedLockingCounters* counters = NULL,
607                             Register flag = noreg);
608   void biased_locking_exit(Register obj_reg, Register tmp_reg, Label& done, Register flag = noreg);
609 
610   static bool far_branches() {
611     return ReservedCodeCacheSize > branch_range;
612   }
613 
614   // Jumps that can reach anywhere in the code cache.
615   // Trashes tmp.
616   void far_call(Address entry, CodeBuffer *cbuf = NULL, Register tmp = t0);
617   void far_jump(Address entry, CodeBuffer *cbuf = NULL, Register tmp = t0);
618 
619   static int far_branch_size() {
620     if (far_branches()) {
621       return 2 * 4;  // auipc + jalr, see far_call() & far_jump()
622     } else {
623       return 4;
624     }
625   }
626 
627   void load_byte_map_base(Register reg);
628 
629   void bang_stack_with_offset(int offset) {
630     // stack grows down, caller passes positive offset
631     assert(offset > 0, "must bang with negative offset");
632     sub(t0, sp, offset);
633     sd(zr, Address(t0));
634   }
635 
636   void la_patchable(Register reg1, const Address &dest, int32_t &offset);
637 
638   virtual void _call_Unimplemented(address call_site) {
639     mv(t1, call_site);
640   }
641 
642   #define call_Unimplemented() _call_Unimplemented((address)__PRETTY_FUNCTION__)
643 
644   // Frame creation and destruction shared between JITs.
645   void build_frame(int framesize);
646   void remove_frame(int framesize);
647 
648   void reserved_stack_check();
649 
650   void get_polling_page(Register dest, relocInfo::relocType rtype);
651   void read_polling_page(Register r, int32_t offset, relocInfo::relocType rtype);
652 
653   address trampoline_call(Address entry, CodeBuffer* cbuf = NULL);
654   address ic_call(address entry, jint method_index = 0);
655 
656   void add_memory_int64(const Address dst, int64_t imm);
657   void add_memory_int32(const Address dst, int32_t imm);
658 
659   void cmpptr(Register src1, Address src2, Label& equal);
660 
661   void clinit_barrier(Register klass, Register tmp, Label* L_fast_path = NULL, Label* L_slow_path = NULL);
662   void load_method_holder_cld(Register result, Register method);
663   void load_method_holder(Register holder, Register method);
664 
665   void compute_index(Register str1, Register trailing_zeros, Register match_mask,
666                      Register result, Register char_tmp, Register tmp,
667                      bool haystack_isL);
668   void compute_match_mask(Register src, Register pattern, Register match_mask,
669                           Register mask1, Register mask2);
670 
671 #ifdef COMPILER2
672   void mul_add(Register out, Register in, Register offset,
673                Register len, Register k, Register tmp);
674   void cad(Register dst, Register src1, Register src2, Register carry);
675   void cadc(Register dst, Register src1, Register src2, Register carry);
676   void adc(Register dst, Register src1, Register src2, Register carry);
677   void add2_with_carry(Register final_dest_hi, Register dest_hi, Register dest_lo,
678                        Register src1, Register src2, Register carry);
679   void multiply_32_x_32_loop(Register x, Register xstart, Register x_xstart,
680                              Register y, Register y_idx, Register z,
681                              Register carry, Register product,
682                              Register idx, Register kdx);
683   void multiply_64_x_64_loop(Register x, Register xstart, Register x_xstart,
684                              Register y, Register y_idx, Register z,
685                              Register carry, Register product,
686                              Register idx, Register kdx);
687   void multiply_128_x_128_loop(Register y, Register z,
688                                Register carry, Register carry2,
689                                Register idx, Register jdx,
690                                Register yz_idx1, Register yz_idx2,
691                                Register tmp, Register tmp3, Register tmp4,
692                                Register tmp6, Register product_hi);
693   void multiply_to_len(Register x, Register xlen, Register y, Register ylen,
694                        Register z, Register zlen,
695                        Register tmp1, Register tmp2, Register tmp3, Register tmp4,
696                        Register tmp5, Register tmp6, Register product_hi);
697 #endif
698 
699   void inflate_lo32(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1);
700   void inflate_hi32(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1);
701 
702   void ctzc_bit(Register Rd, Register Rs, bool isLL = false, Register tmp1 = t0, Register tmp2 = t1);
703 
704   void zero_words(Register base, u_int64_t cnt);
705   address zero_words(Register ptr, Register cnt);
706   void fill_words(Register base, Register cnt, Register value);
707   void zero_memory(Register addr, Register len, Register tmp);
708 
709   // shift left by shamt and add
710   void shadd(Register Rd, Register Rs1, Register Rs2, Register tmp, int shamt);
711 
712   // Here the float instructions with safe deal with some exceptions.
713   // e.g. convert from NaN, +Inf, -Inf to int, float, double
714   // will trigger exception, we need to deal with these situations
715   // to get correct results.
716   void fcvt_w_s_safe(Register dst, FloatRegister src, Register tmp = t0);
717   void fcvt_l_s_safe(Register dst, FloatRegister src, Register tmp = t0);
718   void fcvt_w_d_safe(Register dst, FloatRegister src, Register tmp = t0);
719   void fcvt_l_d_safe(Register dst, FloatRegister src, Register tmp = t0);
720 
721   // vector load/store unit-stride instructions
722   void vlex_v(VectorRegister vd, Register base, Assembler::SEW sew, VectorMask vm = unmasked) {
723     switch (sew) {
724       case Assembler::e64:
725         vle64_v(vd, base, vm);
726         break;
727       case Assembler::e32:
728         vle32_v(vd, base, vm);
729         break;
730       case Assembler::e16:
731         vle16_v(vd, base, vm);
732         break;
733       case Assembler::e8: // fall through
734       default:
735         vle8_v(vd, base, vm);
736         break;
737     }
738   }
739 
740   void vsex_v(VectorRegister store_data, Register base, Assembler::SEW sew, VectorMask vm = unmasked) {
741     switch (sew) {
742       case Assembler::e64:
743         vse64_v(store_data, base, vm);
744         break;
745       case Assembler::e32:
746         vse32_v(store_data, base, vm);
747         break;
748       case Assembler::e16:
749         vse16_v(store_data, base, vm);
750         break;
751       case Assembler::e8: // fall through
752       default:
753         vse8_v(store_data, base, vm);
754         break;
755     }
756   }
757 
758   static const int zero_words_block_size;
759 
760   void cast_primitive_type(BasicType type, Register Rt) {
761     switch (type) {
762       case T_BOOLEAN:
763         sltu(Rt, zr, Rt);
764         break;
765       case T_CHAR   :
766         zero_extend(Rt, Rt, 16);
767         break;
768       case T_BYTE   :
769         sign_extend(Rt, Rt, 8);
770         break;
771       case T_SHORT  :
772         sign_extend(Rt, Rt, 16);
773         break;
774       case T_INT    :
775         addw(Rt, Rt, zr);
776         break;
777       case T_LONG   : /* nothing to do */        break;
778       case T_VOID   : /* nothing to do */        break;
779       case T_FLOAT  : /* nothing to do */        break;
780       case T_DOUBLE : /* nothing to do */        break;
781       default: ShouldNotReachHere();
782     }
783   }
784 
785   // float cmp with unordered_result
786   void float_compare(Register result, FloatRegister Rs1, FloatRegister Rs2, int unordered_result);
787   void double_compare(Register result, FloatRegister Rs1, FloatRegister Rs2, int unordered_result);
788 
789   // Zero/Sign-extend
790   void zero_extend(Register dst, Register src, int bits);
791   void sign_extend(Register dst, Register src, int bits);
792 
793   // compare src1 and src2 and get -1/0/1 in dst.
794   // if [src1 > src2], dst = 1;
795   // if [src1 == src2], dst = 0;
796   // if [src1 < src2], dst = -1;
797   void cmp_l2i(Register dst, Register src1, Register src2, Register tmp = t0);
798 
799   int push_fp(unsigned int bitset, Register stack);
800   int pop_fp(unsigned int bitset, Register stack);
801 
802   int push_vp(unsigned int bitset, Register stack);
803   int pop_vp(unsigned int bitset, Register stack);
804 
805   // vext
806   void vmnot_m(VectorRegister vd, VectorRegister vs);
807   void vncvt_x_x_w(VectorRegister vd, VectorRegister vs, VectorMask vm = unmasked);
808   void vfneg_v(VectorRegister vd, VectorRegister vs);
809 
810 private:
811 
812 #ifdef ASSERT
813   // Template short-hand support to clean-up after a failed call to trampoline
814   // call generation (see trampoline_call() below), when a set of Labels must
815   // be reset (before returning).
816   template<typename Label, typename... More>
817   void reset_labels(Label& lbl, More&... more) {
818     lbl.reset(); reset_labels(more...);
819   }
820   template<typename Label>
821   void reset_labels(Label& lbl) {
822     lbl.reset();
823   }
824 #endif
825   void repne_scan(Register addr, Register value, Register count, Register tmp);
826 
827   // Return true if an address is within the 48-bit RISCV64 address space.
828   bool is_valid_riscv64_address(address addr) {
829     // sv48: must have bits 63–48 all equal to bit 47
830     return ((uintptr_t)addr >> 47) == 0;
831   }
832 
833   void ld_constant(Register dest, const Address &const_addr) {
834     if (NearCpool) {
835       ld(dest, const_addr);
836     } else {
837       int32_t offset = 0;
838       la_patchable(dest, InternalAddress(const_addr.target()), offset);
839       ld(dest, Address(dest, offset));
840     }
841   }
842 
843   int bitset_to_regs(unsigned int bitset, unsigned char* regs);
844   Address add_memory_helper(const Address dst);
845 
846   void load_reserved(Register addr, enum operand_size size, Assembler::Aqrl acquire);
847   void store_conditional(Register addr, Register new_val, enum operand_size size, Assembler::Aqrl release);
848 
849   void load_prototype_header(Register dst, Register src);
850 };
851 
852 #ifdef ASSERT
853 inline bool AbstractAssembler::pd_check_instruction_mark() { return false; }
854 #endif
855 
856 /**
857  * class SkipIfEqual:
858  *
859  * Instantiating this class will result in assembly code being output that will
860  * jump around any code emitted between the creation of the instance and it's
861  * automatic destruction at the end of a scope block, depending on the value of
862  * the flag passed to the constructor, which will be checked at run-time.
863  */
864 class SkipIfEqual {
865  private:
866   MacroAssembler* _masm;
867   Label _label;
868 
869  public:
870    SkipIfEqual(MacroAssembler*, const bool* flag_addr, bool value);
871    ~SkipIfEqual();
872 };
873 
874 #endif // CPU_RISCV_MACROASSEMBLER_RISCV_HPP