1 /*
  2  * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
  3  * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
  4  * Copyright (c) 2020, 2021, Huawei Technologies Co., Ltd. All rights reserved.
  5  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  6  *
  7  * This code is free software; you can redistribute it and/or modify it
  8  * under the terms of the GNU General Public License version 2 only, as
  9  * published by the Free Software Foundation.
 10  *
 11  * This code is distributed in the hope that it will be useful, but WITHOUT
 12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 14  * version 2 for more details (a copy is included in the LICENSE file that
 15  * accompanied this code).
 16  *
 17  * You should have received a copy of the GNU General Public License version
 18  * 2 along with this work; if not, write to the Free Software Foundation,
 19  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 20  *
 21  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 22  * or visit www.oracle.com if you need additional information or have any
 23  * questions.
 24  *
 25  */
 26 
 27 #ifndef CPU_RISCV_MACROASSEMBLER_RISCV_HPP
 28 #define CPU_RISCV_MACROASSEMBLER_RISCV_HPP
 29 
 30 #include "asm/assembler.hpp"
 31 #include "oops/compressedOops.hpp"
 32 #include "utilities/powerOfTwo.hpp"
 33 
 34 // MacroAssembler extends Assembler by frequently used macros.
 35 //
 36 // Instructions for which a 'better' code sequence exists depending
 37 // on arguments should also go in here.
 38 
 39 class MacroAssembler: public Assembler {
 40 
 41  public:
 42   MacroAssembler(CodeBuffer* code) : Assembler(code) {
 43   }
 44   virtual ~MacroAssembler() {}
 45 
 46   void safepoint_poll(Label& slow_path, bool at_return, bool acquire, bool in_nmethod);
 47 
 48   // Place a fence.i after code may have been modified due to a safepoint.
 49   void safepoint_ifence();
 50 
 51   // Alignment
 52   void align(int modulus);
 53 
 54   // Stack frame creation/removal
 55   void enter() {
 56     addi(sp, sp, - 2 * wordSize);
 57     sd(lr, Address(sp, wordSize));
 58     sd(fp, Address(sp));
 59     mv(fp, sp);
 60   }
 61 
 62   void leave() {
 63     mv(sp, fp);
 64     ld(fp, Address(sp));
 65     ld(lr, Address(sp, wordSize));
 66     addi(sp, sp, 2 * wordSize);
 67   }
 68 
 69 
 70   // Support for getting the JavaThread pointer (i.e.; a reference to thread-local information)
 71   // The pointer will be loaded into the thread register.
 72   void get_thread(Register thread);
 73 
 74   // Support for VM calls
 75   //
 76   // It is imperative that all calls into the VM are handled via the call_VM macros.
 77   // They make sure that the stack linkage is setup correctly. call_VM's correspond
 78   // to ENTRY/ENTRY_X entry points while call_VM_leaf's correspond to LEAF entry points.
 79 
 80   void call_VM(Register oop_result,
 81                address entry_point,
 82                bool check_exceptions = true);
 83   void call_VM(Register oop_result,
 84                address entry_point,
 85                Register arg_1,
 86                bool check_exceptions = true);
 87   void call_VM(Register oop_result,
 88                address entry_point,
 89                Register arg_1, Register arg_2,
 90                bool check_exceptions = true);
 91   void call_VM(Register oop_result,
 92                address entry_point,
 93                Register arg_1, Register arg_2, Register arg_3,
 94                bool check_exceptions = true);
 95 
 96   // Overloadings with last_Java_sp
 97   void call_VM(Register oop_result,
 98                Register last_java_sp,
 99                address entry_point,
100                int number_of_arguments = 0,
101                bool check_exceptions = true);
102   void call_VM(Register oop_result,
103                Register last_java_sp,
104                address entry_point,
105                Register arg_1,
106                bool check_exceptions = true);
107   void call_VM(Register oop_result,
108                Register last_java_sp,
109                address entry_point,
110                Register arg_1, Register arg_2,
111                bool check_exceptions = true);
112   void call_VM(Register oop_result,
113                Register last_java_sp,
114                address entry_point,
115                Register arg_1, Register arg_2, Register arg_3,
116                bool check_exceptions = true);
117 
118   void get_vm_result(Register oop_result, Register java_thread);
119   void get_vm_result_2(Register metadata_result, Register java_thread);
120 
121   // These always tightly bind to MacroAssembler::call_VM_leaf_base
122   // bypassing the virtual implementation
123   void call_VM_leaf(address entry_point,
124                     int number_of_arguments = 0);
125   void call_VM_leaf(address entry_point,
126                     Register arg_0);
127   void call_VM_leaf(address entry_point,
128                     Register arg_0, Register arg_1);
129   void call_VM_leaf(address entry_point,
130                     Register arg_0, Register arg_1, Register arg_2);
131 
132   // These always tightly bind to MacroAssembler::call_VM_base
133   // bypassing the virtual implementation
134   void super_call_VM_leaf(address entry_point, Register arg_0);
135   void super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1);
136   void super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2);
137   void super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2, Register arg_3);
138 
139   // last Java Frame (fills frame anchor)
140   void set_last_Java_frame(Register last_java_sp, Register last_java_fp, address last_java_pc, Register temp);
141   void set_last_Java_frame(Register last_java_sp, Register last_java_fp, Label &last_java_pc, Register temp);
142   void set_last_Java_frame(Register last_java_sp, Register last_java_fp, Register last_java_pc,Register temp);
143 
144   // thread in the default location (xthread)
145   void reset_last_Java_frame(bool clear_fp);
146 
147   void call_native(address entry_point,
148                    Register arg_0);
149   void call_native_base(
150     address entry_point,                // the entry point
151     Label*  retaddr = NULL
152   );
153 
154   virtual void call_VM_leaf_base(
155     address entry_point,                // the entry point
156     int     number_of_arguments,        // the number of arguments to pop after the call
157     Label*  retaddr = NULL
158   );
159 
160   virtual void call_VM_leaf_base(
161     address entry_point,                // the entry point
162     int     number_of_arguments,        // the number of arguments to pop after the call
163     Label&  retaddr) {
164     call_VM_leaf_base(entry_point, number_of_arguments, &retaddr);
165   }
166 
167   virtual void call_VM_base(           // returns the register containing the thread upon return
168     Register oop_result,               // where an oop-result ends up if any; use noreg otherwise
169     Register java_thread,              // the thread if computed before     ; use noreg otherwise
170     Register last_java_sp,             // to set up last_Java_frame in stubs; use noreg otherwise
171     address  entry_point,              // the entry point
172     int      number_of_arguments,      // the number of arguments (w/o thread) to pop after the call
173     bool     check_exceptions          // whether to check for pending exceptions after return
174   );
175 
176   void call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions);
177 
178   virtual void check_and_handle_earlyret(Register java_thread);
179   virtual void check_and_handle_popframe(Register java_thread);
180 
181   void resolve_weak_handle(Register result, Register tmp);
182   void resolve_oop_handle(Register result, Register tmp = x15);
183   void resolve_jobject(Register value, Register thread, Register tmp);
184 
185   void movoop(Register dst, jobject obj, bool immediate = false);
186   void mov_metadata(Register dst, Metadata* obj);
187   void bang_stack_size(Register size, Register tmp);
188   void set_narrow_oop(Register dst, jobject obj);
189   void set_narrow_klass(Register dst, Klass* k);
190 
191   void load_mirror(Register dst, Register method, Register tmp = x15);
192   void access_load_at(BasicType type, DecoratorSet decorators, Register dst,
193                       Address src, Register tmp1, Register thread_tmp);
194   void access_store_at(BasicType type, DecoratorSet decorators, Address dst,
195                        Register src, Register tmp1, Register thread_tmp);
196   void load_klass(Register dst, Register src);
197   void store_klass(Register dst, Register src);
198   void cmp_klass(Register oop, Register trial_klass, Register tmp, Label &L);
199 
200   void encode_klass_not_null(Register r);
201   void decode_klass_not_null(Register r);
202   void encode_klass_not_null(Register dst, Register src, Register tmp = xheapbase);
203   void decode_klass_not_null(Register dst, Register src, Register tmp = xheapbase);
204   void decode_heap_oop_not_null(Register r);
205   void decode_heap_oop_not_null(Register dst, Register src);
206   void decode_heap_oop(Register d, Register s);
207   void decode_heap_oop(Register r) { decode_heap_oop(r, r); }
208   void encode_heap_oop(Register d, Register s);
209   void encode_heap_oop(Register r) { encode_heap_oop(r, r); };
210   void load_heap_oop(Register dst, Address src, Register tmp1 = noreg,
211                      Register thread_tmp = noreg, DecoratorSet decorators = 0);
212   void load_heap_oop_not_null(Register dst, Address src, Register tmp1 = noreg,
213                               Register thread_tmp = noreg, DecoratorSet decorators = 0);
214   void store_heap_oop(Address dst, Register src, Register tmp1 = noreg,
215                       Register thread_tmp = noreg, DecoratorSet decorators = 0);
216 
217   void store_klass_gap(Register dst, Register src);
218 
219   // currently unimplemented
220   // Used for storing NULL. All other oop constants should be
221   // stored using routines that take a jobject.
222   void store_heap_oop_null(Address dst);
223 
224   // This dummy is to prevent a call to store_heap_oop from
225   // converting a zero (linke NULL) into a Register by giving
226   // the compiler two choices it can't resolve
227 
228   void store_heap_oop(Address dst, void* dummy);
229 
230   // Support for NULL-checks
231   //
232   // Generates code that causes a NULL OS exception if the content of reg is NULL.
233   // If the accessed location is M[reg + offset] and the offset is known, provide the
234   // offset. No explicit code generateion is needed if the offset is within a certain
235   // range (0 <= offset <= page_size).
236 
237   virtual void null_check(Register reg, int offset = -1);
238   static bool needs_explicit_null_check(intptr_t offset);
239   static bool uses_implicit_null_check(void* address);
240 
241   // idiv variant which deals with MINLONG as dividend and -1 as divisor
242   int corrected_idivl(Register result, Register rs1, Register rs2,
243                       bool want_remainder);
244   int corrected_idivq(Register result, Register rs1, Register rs2,
245                       bool want_remainder);
246 
247   // interface method calling
248   void lookup_interface_method(Register recv_klass,
249                                Register intf_klass,
250                                RegisterOrConstant itable_index,
251                                Register method_result,
252                                Register scan_temp,
253                                Label& no_such_interface,
254                                bool return_method = true);
255 
256   // virtual method calling
257   // n.n. x86 allows RegisterOrConstant for vtable_index
258   void lookup_virtual_method(Register recv_klass,
259                              RegisterOrConstant vtable_index,
260                              Register method_result);
261 
262   // Form an addres from base + offset in Rd. Rd my or may not
263   // actually be used: you must use the Address that is returned. It
264   // is up to you to ensure that the shift provided mathces the size
265   // of your data.
266   Address form_address(Register Rd, Register base, long byte_offset);
267 
268   // allocation
269   void eden_allocate(
270     Register obj,                   // result: pointer to object after successful allocation
271     Register var_size_in_bytes,     // object size in bytes if unknown at compile time; invalid otherwise
272     int      con_size_in_bytes,     // object size in bytes if   known at compile time
273     Register tmp1,                  // temp register
274     Label&   slow_case,             // continuation point if fast allocation fails
275     bool is_far = false
276   );
277   void tlab_allocate(
278     Register obj,                   // result: pointer to object after successful allocation
279     Register var_size_in_bytes,     // object size in bytes if unknown at compile time; invalid otherwise
280     int      con_size_in_bytes,     // object size in bytes if   known at compile time
281     Register tmp1,                  // temp register
282     Register tmp2,                  // temp register
283     Label&   slow_case,             // continuation point of fast allocation fails
284     bool is_far = false
285   );
286 
287   // Test sub_klass against super_klass, with fast and slow paths.
288 
289   // The fast path produces a tri-state answer: yes / no / maybe-slow.
290   // One of the three labels can be NULL, meaning take the fall-through.
291   // If super_check_offset is -1, the value is loaded up from super_klass.
292   // No registers are killed, except temp_reg
293   void check_klass_subtype_fast_path(Register sub_klass,
294                                      Register super_klass,
295                                      Register temp_reg,
296                                      Label* L_success,
297                                      Label* L_failure,
298                                      Label* L_slow_path,
299                                      Register super_check_offset = noreg);
300 
301   // The reset of the type cehck; must be wired to a corresponding fast path.
302   // It does not repeat the fast path logic, so don't use it standalone.
303   // The temp_reg and temp2_reg can be noreg, if no temps are avaliable.
304   // Updates the sub's secondary super cache as necessary.
305   void check_klass_subtype_slow_path(Register sub_klass,
306                                      Register super_klass,
307                                      Register temp_reg,
308                                      Register temp2_reg,
309                                      Label* L_success,
310                                      Label* L_failure);
311 
312   void check_klass_subtype(Register sub_klass,
313                            Register super_klass,
314                            Register temp_reg,
315                            Label& L_success);
316 
317   Address argument_address(RegisterOrConstant arg_slot, int extra_slot_offset = 0);
318 
319   // only if +VerifyOops
320   void verify_oop(Register reg, const char* s = "broken oop");
321   void verify_oop_addr(Address addr, const char* s = "broken oop addr");
322 
323   void _verify_method_ptr(Register reg, const char* msg, const char* file, int line) {}
324   void _verify_klass_ptr(Register reg, const char* msg, const char* file, int line) {}
325 
326 #define verify_method_ptr(reg) _verify_method_ptr(reg, "broken method " #reg, __FILE__, __LINE__)
327 #define verify_klass_ptr(reg) _verify_method_ptr(reg, "broken klass " #reg, __FILE__, __LINE__)
328 
329   // A more convenient access to fence for our purposes
330   // We used four bit to indicate the read and write bits in the predecessors and successors,
331   // and extended i for r, o for w if UseConservativeFence enabled.
332   enum Membar_mask_bits {
333     StoreStore = 0b0101,               // (pred = ow   + succ =   ow)
334     LoadStore  = 0b1001,               // (pred = ir   + succ =   ow)
335     StoreLoad  = 0b0110,               // (pred = ow   + succ =   ir)
336     LoadLoad   = 0b1010,               // (pred = ir   + succ =   ir)
337     AnyAny     = LoadStore | StoreLoad // (pred = iorw + succ = iorw)
338   };
339 
340   void membar(uint32_t order_constraint);
341 
342   static void membar_mask_to_pred_succ(uint32_t order_constraint, uint32_t& predecessor, uint32_t& successor) {
343     predecessor = (order_constraint >> 2) & 0x3;
344     successor = order_constraint & 0x3;
345 
346     // extend rw -> iorw:
347     // 01(w) -> 0101(ow)
348     // 10(r) -> 1010(ir)
349     // 11(rw)-> 1111(iorw)
350     if (UseConservativeFence) {
351       predecessor |= predecessor << 2;
352       successor |= successor << 2;
353     }
354   }
355 
356   static int pred_succ_to_membar_mask(uint32_t predecessor, uint32_t successor) {
357     return ((predecessor & 0x3) << 2) | (successor & 0x3);
358   }
359 
360   // prints msg, dumps registers and stops execution
361   void stop(const char* msg);
362 
363   static void debug64(char* msg, int64_t pc, int64_t regs[]);
364 
365   void unimplemented(const char* what = "");
366 
367   void should_not_reach_here()                   { stop("should not reach here"); }
368 
369   virtual RegisterOrConstant delayed_value_impl(intptr_t* delayed_value_addr,
370                                                 Register tmp,
371                                                 int offset) {
372     return RegisterOrConstant(tmp);
373   }
374 
375   static address target_addr_for_insn(address insn_addr);
376 
377   // Required platform-specific helpers for Label::patch_instructions.
378   // They _shadow_ the declarations in AbstractAssembler, which are undefined.
379   static int pd_patch_instruction_size(address branch, address target) ;
380   static void pd_patch_instruction(address branch, address target, const char* file = NULL, int line = 0) {
381     pd_patch_instruction_size(branch, target);
382   }
383   static address pd_call_destination(address branch) {
384     return target_addr_for_insn(branch);
385   }
386 
387   static int patch_oop(address insn_addr, address o);
388   address emit_trampoline_stub(int insts_call_instruction_offset, address target);
389   void emit_static_call_stub();
390 
391   // The following 4 methods return the offset of the appropriate move instruction
392 
393   // Support for fast byte/short loading with zero extension (depending on particular CPU)
394   int load_unsigned_byte(Register dst, Address src);
395   int load_unsigned_short(Register dst, Address src);
396 
397   // Support for fast byte/short loading with sign extension (depending on particular CPU)
398   int load_signed_byte(Register dst, Address src);
399   int load_signed_short(Register dst, Address src);
400 
401   // Load and store values by size and signed-ness
402   void load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2 = noreg);
403   void store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2 = noreg);
404 
405  public:
406   // enum used for riscv64--x86 linkage to define return type of x86 function
407   enum ret_type { ret_type_void, ret_type_integral, ret_type_float, ret_type_double};
408 
409   // Standard pseudoinstruction
410   void nop();
411   void mv(Register Rd, Register Rs) ;
412   void notr(Register Rd, Register Rs);
413   void neg(Register Rd, Register Rs);
414   void negw(Register Rd, Register Rs);
415   void sext_w(Register Rd, Register Rs);        // mv Rd[31:0], Rs[31:0]
416   void seqz(Register Rd, Register Rs);          // set if = zero
417   void snez(Register Rd, Register Rs);          // set if != zero
418   void sltz(Register Rd, Register Rs);          // set if < zero
419   void sgtz(Register Rd, Register Rs);          // set if > zero
420 
421   // Float pseudoinstruction
422   void fmv_s(FloatRegister Rd, FloatRegister Rs);
423   void fabs_s(FloatRegister Rd, FloatRegister Rs);    // single-precision absolute value
424   void fneg_s(FloatRegister Rd, FloatRegister Rs);
425 
426   // Double pseudoinstruction
427   void fmv_d(FloatRegister Rd, FloatRegister Rs);
428   void fabs_d(FloatRegister Rd, FloatRegister Rs);
429   void fneg_d(FloatRegister Rd, FloatRegister Rs);
430 
431   // Pseudoinstruction for control and status register
432   void rdinstret(Register Rd);                  // read instruction-retired counter
433   void rdcycle(Register Rd);                    // read cycle counter
434   void rdtime(Register Rd);                     // read time
435   void csrr(Register Rd, unsigned csr);         // read csr
436   void csrw(unsigned csr, Register Rs);         // write csr
437   void csrs(unsigned csr, Register Rs);         // set bits in csr
438   void csrc(unsigned csr, Register Rs);         // clear bits in csr
439   void csrwi(unsigned csr, unsigned imm);
440   void csrsi(unsigned csr, unsigned imm);
441   void csrci(unsigned csr, unsigned imm);
442   void frcsr(Register Rd);                      // read float-point csr
443   void fscsr(Register Rd, Register Rs);         // swap float-point csr
444   void fscsr(Register Rs);                      // write float-point csr
445   void frrm(Register Rd);                       // read float-point rounding mode
446   void fsrm(Register Rd, Register Rs);          // swap float-point rounding mode
447   void fsrm(Register Rs);                       // write float-point rounding mode
448   void fsrmi(Register Rd, unsigned imm);
449   void fsrmi(unsigned imm);
450   void frflags(Register Rd);                    // read float-point exception flags
451   void fsflags(Register Rd, Register Rs);       // swap float-point exception flags
452   void fsflags(Register Rs);                    // write float-point exception flags
453   void fsflagsi(Register Rd, unsigned imm);
454   void fsflagsi(unsigned imm);
455 
456   void beqz(Register Rs, const address &dest);
457   void blez(Register Rs, const address &dest);
458   void bgez(Register Rs, const address &dest);
459   void bltz(Register Rs, const address &dest);
460   void bgtz(Register Rs, const address &dest);
461   void bnez(Register Rs, const address &dest);
462   void la(Register Rd, Label &label);
463   void la(Register Rd, const address &dest);
464   void la(Register Rd, const Address &adr);
465   //label
466   void beqz(Register Rs, Label &l, bool is_far = false);
467   void bnez(Register Rs, Label &l, bool is_far = false);
468   void blez(Register Rs, Label &l, bool is_far = false);
469   void bgez(Register Rs, Label &l, bool is_far = false);
470   void bltz(Register Rs, Label &l, bool is_far = false);
471   void bgtz(Register Rs, Label &l, bool is_far = false);
472   void float_beq(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
473   void float_bne(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
474   void float_ble(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
475   void float_bge(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
476   void float_blt(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
477   void float_bgt(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
478   void double_beq(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
479   void double_bne(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
480   void double_ble(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
481   void double_bge(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
482   void double_blt(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
483   void double_bgt(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
484 
485   void push_reg(RegSet regs, Register stack) { if (regs.bits()) { push_reg(regs.bits(), stack); } }
486   void pop_reg(RegSet regs, Register stack) { if (regs.bits()) { pop_reg(regs.bits(), stack); } }
487   void push_reg(Register Rs);
488   void pop_reg(Register Rd);
489   int  push_reg(unsigned int bitset, Register stack);
490   int  pop_reg(unsigned int bitset, Register stack);
491   void push_fp(RegSet regs, Register stack) { if (regs.bits()) push_fp(regs.bits(), stack); }
492   void pop_fp(RegSet regs, Register stack) { if (regs.bits()) pop_fp(regs.bits(), stack); }
493 #ifdef COMPILER2
494   void push_vp(RegSet regs, Register stack) { if (regs.bits()) push_vp(regs.bits(), stack); }
495   void pop_vp(RegSet regs, Register stack) { if (regs.bits()) pop_vp(regs.bits(), stack); }
496 #endif // COMPILER2
497 
498   // Push and pop everything that might be clobbered by a native
499   // runtime call except t0 and t1. (They are always
500   // temporary registers, so we don't have to protect them.)
501   // Additional registers can be excluded in a passed RegSet.
502   void push_call_clobbered_registers_except(RegSet exclude);
503   void pop_call_clobbered_registers_except(RegSet exclude);
504 
505   void push_call_clobbered_registers() {
506     push_call_clobbered_registers_except(RegSet());
507   }
508   void pop_call_clobbered_registers() {
509     pop_call_clobbered_registers_except(RegSet());
510   }
511 
512   void pusha();
513   void popa();
514   void push_CPU_state(bool save_vectors = false, int vector_size_in_bytes = 0);
515   void pop_CPU_state(bool restore_vectors = false, int vector_size_in_bytes = 0);
516 
517   // if heap base register is used - reinit it with the correct value
518   void reinit_heapbase();
519 
520   void bind(Label& L) {
521     Assembler::bind(L);
522     // fences across basic blocks should not be merged
523     code()->clear_last_insn();
524   }
525 
526   // mv
527   void mv(Register Rd, int64_t imm64);
528   void mv(Register Rd, int imm);
529   void mvw(Register Rd, int32_t imm32);
530   void mv(Register Rd, Address dest);
531   void mv(Register Rd, address addr);
532   void mv(Register Rd, RegisterOrConstant src);
533 
534   // logic
535   void andrw(Register Rd, Register Rs1, Register Rs2);
536   void orrw(Register Rd, Register Rs1, Register Rs2);
537   void xorrw(Register Rd, Register Rs1, Register Rs2);
538 
539   // grev
540   void reverseb16(Register Rd, Register Rs, Register Rtmp1 = t0, Register Rtmp2= t1);  // reverse bytes in 16-bit and move to lower
541   void reverseh32(Register Rd, Register Rs, Register Rtmp1 = t0, Register Rtmp2= t1);  // reverse half-words in 32-bit and move to lower
542   void grevh(Register Rd, Register Rs, Register Rtmp = t0);                            // basic reverse bytes in 16-bit halfwords, sign-extend
543   void grev16w(Register Rd, Register Rs, Register Rtmp1 = t0, Register Rtmp2 = t1);    // reverse bytes in 16-bit halfwords(32), sign-extend
544   void grevw(Register Rd, Register Rs, Register Rtmp1 = t0, Register Rtmp2 = t1);      // reverse bytes(32), sign-extend
545   void grev16(Register Rd, Register Rs, Register Rtmp1 = t0, Register Rtmp2= t1);      // reverse bytes in 16-bit halfwords
546   void grev32(Register Rd, Register Rs, Register Rtmp1 = t0, Register Rtmp2= t1);      // reverse bytes in 32-bit words
547   void grev(Register Rd, Register Rs, Register Rtmp1 = t0, Register Rtmp2 = t1);       // reverse bytes in 64-bit double-words
548   void grevhu(Register Rd, Register Rs, Register Rtmp = t0);                           // basic reverse bytes in 16-bit halfwords, zero-extend
549   void grev16wu(Register Rd, Register Rs, Register Rtmp1 = t0, Register Rtmp2 = t1);   // reverse bytes in 16-bit halfwords(32), zero-extend
550   void grevwu(Register Rd, Register Rs, Register Rtmp1 = t0, Register Rtmp2 = t1);     // reverse bytes(32), zero-extend
551 
552 
553   void andi(Register Rd, Register Rn, int64_t increment, Register temp = t0);
554   void orptr(Address adr, RegisterOrConstant src, Register tmp1 = t0, Register tmp2 = t1);
555 
556   void cmpxchg_obj_header(Register oldv, Register newv, Register obj, Register tmp, Label &succeed, Label *fail);
557   void cmpxchgptr(Register oldv, Register newv, Register addr, Register tmp, Label &succeed, Label *fail) ;
558   void cmpxchg(Register addr, Register expected,
559                Register new_val,
560                enum operand_size size,
561                Assembler::Aqrl acquire, Assembler::Aqrl release,
562                Register result, bool result_as_bool = false);
563   void cmpxchg_weak(Register addr, Register expected,
564                     Register new_val,
565                     enum operand_size size,
566                     Assembler::Aqrl acquire, Assembler::Aqrl release,
567                     Register result);
568   void cmpxchg_narrow_value_helper(Register addr, Register expected,
569                                    Register new_val,
570                                    enum operand_size size,
571                                    Register tmp1, Register tmp2, Register tmp3);
572   void cmpxchg_narrow_value(Register addr, Register expected,
573                             Register new_val,
574                             enum operand_size size,
575                             Assembler::Aqrl acquire, Assembler::Aqrl release,
576                             Register result, bool result_as_bool,
577                             Register tmp1, Register tmp2, Register tmp3);
578   void weak_cmpxchg_narrow_value(Register addr, Register expected,
579                                  Register new_val,
580                                  enum operand_size size,
581                                  Assembler::Aqrl acquire, Assembler::Aqrl release,
582                                  Register result,
583                                  Register tmp1, Register tmp2, Register tmp3);
584 
585   void atomic_add(Register prev, RegisterOrConstant incr, Register addr);
586   void atomic_addw(Register prev, RegisterOrConstant incr, Register addr);
587   void atomic_addal(Register prev, RegisterOrConstant incr, Register addr);
588   void atomic_addalw(Register prev, RegisterOrConstant incr, Register addr);
589 
590   void atomic_xchg(Register prev, Register newv, Register addr);
591   void atomic_xchgw(Register prev, Register newv, Register addr);
592   void atomic_xchgal(Register prev, Register newv, Register addr);
593   void atomic_xchgalw(Register prev, Register newv, Register addr);
594   void atomic_xchgwu(Register prev, Register newv, Register addr);
595   void atomic_xchgalwu(Register prev, Register newv, Register addr);
596 
597   static bool far_branches() {
598     return ReservedCodeCacheSize > branch_range;
599   }
600 
601   //atomic
602   void atomic_incw(Register counter_addr, Register tmp1);
603   void atomic_incw(Address counter_addr, Register tmp1, Register tmp2) {
604     la(tmp1, counter_addr);
605     atomic_incw(tmp1, tmp2);
606   }
607 
608   // Jumps that can reach anywhere in the code cache.
609   // Trashes tmp.
610   void far_call(Address entry, CodeBuffer *cbuf = NULL, Register tmp = t0);
611   void far_jump(Address entry, CodeBuffer *cbuf = NULL, Register tmp = t0);
612 
613   static int far_branch_size() {
614     if (far_branches()) {
615       return 2 * 4;  // auipc + jalr, see far_call() & far_jump()
616     } else {
617       return 4;
618     }
619   }
620 
621   void load_byte_map_base(Register reg);
622 
623   void bang_stack_with_offset(int offset) {
624     // stack grows down, caller passes positive offset
625     assert(offset > 0, "must bang with negative offset");
626     sub(t1, sp, offset);
627     sd(zr, Address(t1));
628   }
629 
630   void la_patchable(Register reg1, const Address &dest, int32_t &offset);
631 
632   virtual void _call_Unimplemented(address call_site) {
633     mv(t1, call_site);
634   }
635 
636   #define call_Unimplemented() _call_Unimplemented((address)__PRETTY_FUNCTION__)
637 
638   void clear_upper_bits(Register r, unsigned upper_bits) {
639     assert(upper_bits < 64, "bit count to clear must be less than 64");
640 
641     int sig_bits = 64 - upper_bits; // significance bits
642     if (sig_bits < 12) {
643       andi(r, r, (1UL << sig_bits) - 1);
644     } else {
645       zero_ext(r, r, upper_bits);
646     }
647   }
648 
649   // Frame creation and destruction shared between JITs.
650   void build_frame(int framesize);
651   void remove_frame(int framesize);
652 
653   void reserved_stack_check();
654 
655   void get_polling_page(Register dest, relocInfo::relocType rtype);
656   address read_polling_page(Register r, int32_t offset, relocInfo::relocType rtype);
657 
658   address trampoline_call(Address entry, CodeBuffer* cbuf = NULL);
659   address ic_call(address entry, jint method_index = 0);
660 
661   void add_memory_int64(const Address dst, int64_t imm);
662   void add_memory_int32(const Address dst, int32_t imm);
663 
664   void cmpptr(Register src1, Address src2, Label& equal);
665 
666   void clinit_barrier(Register klass, Register tmp, Label* L_fast_path = NULL, Label* L_slow_path = NULL);
667   void load_method_holder_cld(Register result, Register method);
668   void load_method_holder(Register holder, Register method);
669 
670   void compute_index(Register str1, Register trailing_zeros, Register match_mask,
671                      Register result, Register char_tmp, Register tmp,
672                      bool haystack_isL);
673   void compute_match_mask(Register src, Register pattern, Register match_mask,
674                           Register mask1, Register mask2);
675 
676   void inflate_lo32(Register Rd, Register Rs, Register Rtmp1 = t0, Register Rtmp2 = t1);
677   void inflate_hi32(Register Rd, Register Rs, Register Rtmp1 = t0, Register Rtmp2 = t1);
678 
679   void ctzc_bit(Register Rd, Register Rs, bool isLL = false, Register Rtmp1 = t0, Register Rtmp2 = t1);
680 
681   void zero_words(Register base, u_int64_t cnt);
682   address zero_words(Register ptr, Register cnt);
683   void fill_words(Register base, Register cnt, Register value);
684   void zero_memory(Register addr, Register len, Register tmp1);
685 
686   // Here the float instructions with safe deal with some exceptions.
687   // e.g. convert from NaN, +Inf, -Inf to int, float, double
688   // will trigger exception, we need to deal with these situations
689   // to get correct results.
690   void fcvt_w_s_safe(Register dst, FloatRegister src, Register temp = t0);
691   void fcvt_l_s_safe(Register dst, FloatRegister src, Register temp = t0);
692   void fcvt_w_d_safe(Register dst, FloatRegister src, Register temp = t0);
693   void fcvt_l_d_safe(Register dst, FloatRegister src, Register temp = t0);
694 
695   // vector load/store unit-stride instructions
696   void vlex_v(VectorRegister vd, Register base, Assembler::SEW sew, VectorMask vm = unmasked) {
697     switch (sew) {
698       case Assembler::e64:
699         vle64_v(vd, base, vm);
700         break;
701       case Assembler::e32:
702         vle32_v(vd, base, vm);
703         break;
704       case Assembler::e16:
705         vle16_v(vd, base, vm);
706         break;
707       case Assembler::e8: // fall through
708       default:
709         vle8_v(vd, base, vm);
710         break;
711     }
712   }
713 
714   void vsex_v(VectorRegister store_data, Register base, Assembler::SEW sew, VectorMask vm = unmasked) {
715     switch (sew) {
716       case Assembler::e64:
717         vse64_v(store_data, base, vm);
718         break;
719       case Assembler::e32:
720         vse32_v(store_data, base, vm);
721         break;
722       case Assembler::e16:
723         vse16_v(store_data, base, vm);
724         break;
725       case Assembler::e8: // fall through
726       default:
727         vse8_v(store_data, base, vm);
728         break;
729     }
730   }
731 
732   static const int zero_words_block_size;
733 
734   void cast_primitive_type(BasicType type, Register Rt) {
735     switch (type) {
736       case T_BOOLEAN:
737         sltu(Rt, zr, Rt);
738         break;
739       case T_CHAR   :
740         zero_ext(Rt, Rt, registerSize - 16);
741         break;
742       case T_BYTE   :
743         sign_ext(Rt, Rt, registerSize - 8);
744         break;
745       case T_SHORT  :
746         sign_ext(Rt, Rt, registerSize - 16);
747         break;
748       case T_INT    :
749         addw(Rt, Rt, zr);
750         break;
751       case T_LONG   : /* nothing to do */        break;
752       case T_VOID   : /* nothing to do */        break;
753       case T_FLOAT  : /* nothing to do */        break;
754       case T_DOUBLE : /* nothing to do */        break;
755       default: ShouldNotReachHere();
756     }
757   }
758 
759   // float cmp with unordered_result
760   void float_compare(Register result, FloatRegister Rs1, FloatRegister Rs2, int unordered_result);
761   void double_compare(Register result, FloatRegister Rs1, FloatRegister Rs2, int unordered_result);
762 
763   // Zero/Sign-extend
764   void zero_ext(Register dst, Register src, int clear_bits);
765   void sign_ext(Register dst, Register src, int clear_bits);
766 
767   // compare src1 and src2 and get -1/0/1 in dst.
768   // if [src1 > src2], dst = 1;
769   // if [src1 == src2], dst = 0;
770   // if [src1 < src2], dst = -1;
771   void cmp_l2i(Register dst, Register src1, Register src2, Register tmp = t0);
772 
773   int push_fp(unsigned int bitset, Register stack);
774   int pop_fp(unsigned int bitset, Register stack);
775   int push_vp(unsigned int bitset, Register stack);
776   int pop_vp(unsigned int bitset, Register stack);
777 
778   // vext
779   void vmnot_m(VectorRegister vd, VectorRegister vs);
780   void vncvt_x_x_w(VectorRegister vd, VectorRegister vs, VectorMask vm = unmasked);
781   void vfneg_v(VectorRegister vd, VectorRegister vs);
782 
783 private:
784 
785 #ifdef ASSERT
786   // Template short-hand support to clean-up after a failed call to trampoline
787   // call generation (see trampoline_call() below), when a set of Labels must
788   // be reset (before returning).
789   template<typename Label, typename... More>
790   void reset_labels(Label& lbl, More&... more) {
791     lbl.reset(); reset_labels(more...);
792   }
793   template<typename Label>
794   void reset_labels(Label& lbl) {
795     lbl.reset();
796   }
797 #endif
798   void repne_scan(Register addr, Register value, Register count, Register temp);
799 
800   // Return true if an addres is within the 48-bit Riscv64 address
801   // space.
802   bool is_valid_riscv64_address(address addr) {
803     return ((uintptr_t)addr >> 48) == 0;
804   }
805 
806   void ld_constant(Register dest, const Address &const_addr) {
807     if (NearCpool) {
808       ld(dest, const_addr);
809     } else {
810       int32_t offset = 0;
811       la_patchable(dest, InternalAddress(const_addr.target()), offset);
812       ld(dest, Address(dest, offset));
813     }
814   }
815 
816   int bitset_to_fregs(unsigned int bitset, unsigned char* regs);
817   int bitset_to_regs(unsigned int bitset, unsigned char* regs);
818   Address add_memory_helper(const Address dst);
819 
820   void load_reserved(Register addr, enum operand_size size, Assembler::Aqrl acquire);
821   void store_conditional(Register addr, Register new_val, enum operand_size size, Assembler::Aqrl release);
822 
823   // Check the current thread doesn't need a cross modify fence.
824   void verify_cross_modify_fence_not_required() PRODUCT_RETURN;
825 };
826 
827 #ifdef ASSERT
828 inline bool AbstractAssembler::pd_check_instruction_mark() { return false; }
829 #endif
830 
831 /**
832  * class SkipIfEqual:
833  *
834  * Instantiating this class will result in assembly code being output that will
835  * jump around any code emitted between the creation of the instance and it's
836  * automatic destruction at the end of a scope block, depending on the value of
837  * the flag passed to the constructor, which will be checked at run-time.
838  */
839 class SkipIfEqual {
840  private:
841   MacroAssembler* _masm;
842   Label _label;
843 
844  public:
845    SkipIfEqual(MacroAssembler*, const bool* flag_addr, bool value);
846    ~SkipIfEqual();
847 };
848 #endif // CPU_RISCV_MACROASSEMBLER_RISCV_HPP