1 /*
2 * Copyright (c) 1997, 2025, Oracle and/or its affiliates. All rights reserved.
3 * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
4 * Copyright (c) 2020, 2024, Huawei Technologies Co., Ltd. All rights reserved.
5 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 *
7 * This code is free software; you can redistribute it and/or modify it
8 * under the terms of the GNU General Public License version 2 only, as
9 * published by the Free Software Foundation.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 *
25 */
26
27 #ifndef CPU_RISCV_MACROASSEMBLER_RISCV_HPP
28 #define CPU_RISCV_MACROASSEMBLER_RISCV_HPP
29
30 #include "asm/assembler.inline.hpp"
31 #include "code/vmreg.hpp"
32 #include "metaprogramming/enableIf.hpp"
33 #include "oops/compressedOops.hpp"
34 #include "utilities/powerOfTwo.hpp"
35
36 // MacroAssembler extends Assembler by frequently used macros.
37 //
38 // Instructions for which a 'better' code sequence exists depending
39 // on arguments should also go in here.
40
41 class MacroAssembler: public Assembler {
42
43 public:
44
45 MacroAssembler(CodeBuffer* code) : Assembler(code) {}
46
47 void safepoint_poll(Label& slow_path, bool at_return, bool in_nmethod, Register tmp_reg = t0);
48
49 // Alignment
50 int align(int modulus, int extra_offset = 0);
51
52 static inline void assert_alignment(address pc, int alignment = MacroAssembler::instruction_size) {
53 assert(is_aligned(pc, alignment), "bad alignment");
54 }
55
56 // nop
57 void post_call_nop();
58
59 // Stack frame creation/removal
60 // Note that SP must be updated to the right place before saving/restoring RA and FP
61 // because signal based thread suspend/resume could happen asynchronously.
62 void enter() {
63 subi(sp, sp, 2 * wordSize);
64 sd(ra, Address(sp, wordSize));
65 sd(fp, Address(sp));
66 addi(fp, sp, 2 * wordSize);
67 }
68
69 void leave() {
70 subi(sp, fp, 2 * wordSize);
71 ld(fp, Address(sp));
72 ld(ra, Address(sp, wordSize));
73 addi(sp, sp, 2 * wordSize);
74 }
75
76
77 // Support for getting the JavaThread pointer (i.e.; a reference to thread-local information)
78 // The pointer will be loaded into the thread register.
79 void get_thread(Register thread);
80
81 // Support for VM calls
82 //
83 // It is imperative that all calls into the VM are handled via the call_VM macros.
84 // They make sure that the stack linkage is setup correctly. call_VM's correspond
85 // to ENTRY/ENTRY_X entry points while call_VM_leaf's correspond to LEAF entry points.
86
87 void call_VM(Register oop_result,
88 address entry_point,
89 bool check_exceptions = true);
90 void call_VM(Register oop_result,
91 address entry_point,
92 Register arg_1,
93 bool check_exceptions = true);
94 void call_VM(Register oop_result,
95 address entry_point,
96 Register arg_1, Register arg_2,
97 bool check_exceptions = true);
98 void call_VM(Register oop_result,
99 address entry_point,
100 Register arg_1, Register arg_2, Register arg_3,
101 bool check_exceptions = true);
102
103 // Overloadings with last_Java_sp
104 void call_VM(Register oop_result,
105 Register last_java_sp,
106 address entry_point,
107 int number_of_arguments = 0,
108 bool check_exceptions = true);
109 void call_VM(Register oop_result,
110 Register last_java_sp,
111 address entry_point,
112 Register arg_1,
113 bool check_exceptions = true);
114 void call_VM(Register oop_result,
115 Register last_java_sp,
116 address entry_point,
117 Register arg_1, Register arg_2,
118 bool check_exceptions = true);
119 void call_VM(Register oop_result,
120 Register last_java_sp,
121 address entry_point,
122 Register arg_1, Register arg_2, Register arg_3,
123 bool check_exceptions = true);
124
125 void get_vm_result_oop(Register oop_result, Register java_thread);
126 void get_vm_result_metadata(Register metadata_result, Register java_thread);
127
128 // These always tightly bind to MacroAssembler::call_VM_leaf_base
129 // bypassing the virtual implementation
130 void call_VM_leaf(address entry_point,
131 int number_of_arguments = 0);
132 void call_VM_leaf(address entry_point,
133 Register arg_0);
134 void call_VM_leaf(address entry_point,
135 Register arg_0, Register arg_1);
136 void call_VM_leaf(address entry_point,
137 Register arg_0, Register arg_1, Register arg_2);
138
139 // These always tightly bind to MacroAssembler::call_VM_base
140 // bypassing the virtual implementation
141 void super_call_VM_leaf(address entry_point, Register arg_0);
142 void super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1);
143 void super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2);
144 void super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2, Register arg_3);
145
146 // last Java Frame (fills frame anchor)
147 void set_last_Java_frame(Register last_java_sp, Register last_java_fp, address last_java_pc, Register tmp);
148 void set_last_Java_frame(Register last_java_sp, Register last_java_fp, Label &last_java_pc, Register tmp);
149 void set_last_Java_frame(Register last_java_sp, Register last_java_fp, Register last_java_pc);
150
151 // thread in the default location (xthread)
152 void reset_last_Java_frame(bool clear_fp);
153
154 virtual void call_VM_leaf_base(
155 address entry_point, // the entry point
156 int number_of_arguments, // the number of arguments to pop after the call
157 Label* retaddr = nullptr
158 );
159
160 virtual void call_VM_leaf_base(
161 address entry_point, // the entry point
162 int number_of_arguments, // the number of arguments to pop after the call
163 Label& retaddr) {
164 call_VM_leaf_base(entry_point, number_of_arguments, &retaddr);
165 }
166
167 virtual void call_VM_base( // returns the register containing the thread upon return
168 Register oop_result, // where an oop-result ends up if any; use noreg otherwise
169 Register java_thread, // the thread if computed before ; use noreg otherwise
170 Register last_java_sp, // to set up last_Java_frame in stubs; use noreg otherwise
171 Label* return_pc, // to set up last_Java_frame; use nullptr otherwise
172 address entry_point, // the entry point
173 int number_of_arguments, // the number of arguments (w/o thread) to pop after the call
174 bool check_exceptions // whether to check for pending exceptions after return
175 );
176
177 void call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions);
178
179 virtual void check_and_handle_earlyret(Register java_thread);
180 virtual void check_and_handle_popframe(Register java_thread);
181
182 void resolve_weak_handle(Register result, Register tmp1, Register tmp2);
183 void resolve_oop_handle(Register result, Register tmp1, Register tmp2);
184 void resolve_jobject(Register value, Register tmp1, Register tmp2);
185 void resolve_global_jobject(Register value, Register tmp1, Register tmp2);
186
187 void movoop(Register dst, jobject obj);
188 void mov_metadata(Register dst, Metadata* obj);
189 void bang_stack_size(Register size, Register tmp);
190 void set_narrow_oop(Register dst, jobject obj);
191 void set_narrow_klass(Register dst, Klass* k);
192
193 void load_mirror(Register dst, Register method, Register tmp1, Register tmp2);
194 void access_load_at(BasicType type, DecoratorSet decorators, Register dst,
195 Address src, Register tmp1, Register tmp2);
196 void access_store_at(BasicType type, DecoratorSet decorators, Address dst,
197 Register val, Register tmp1, Register tmp2, Register tmp3);
198 void load_klass(Register dst, Register src, Register tmp = t0);
199 void load_narrow_klass_compact(Register dst, Register src);
200 void store_klass(Register dst, Register src, Register tmp = t0);
201 void cmp_klass_compressed(Register oop, Register trial_klass, Register tmp, Label &L, bool equal);
202
203 void encode_klass_not_null(Register r, Register tmp = t0);
204 void decode_klass_not_null(Register r, Register tmp = t0);
205 void encode_klass_not_null(Register dst, Register src, Register tmp);
206 void decode_klass_not_null(Register dst, Register src, Register tmp);
207 void decode_heap_oop_not_null(Register r);
208 void decode_heap_oop_not_null(Register dst, Register src);
209 void decode_heap_oop(Register d, Register s);
210 void decode_heap_oop(Register r) { decode_heap_oop(r, r); }
211 void encode_heap_oop_not_null(Register r);
212 void encode_heap_oop_not_null(Register dst, Register src);
213 void encode_heap_oop(Register d, Register s);
214 void encode_heap_oop(Register r) { encode_heap_oop(r, r); };
215 void load_heap_oop(Register dst, Address src, Register tmp1,
216 Register tmp2, DecoratorSet decorators = 0);
217 void load_heap_oop_not_null(Register dst, Address src, Register tmp1,
218 Register tmp2, DecoratorSet decorators = 0);
219 void store_heap_oop(Address dst, Register val, Register tmp1,
220 Register tmp2, Register tmp3, DecoratorSet decorators = 0);
221
222 void store_klass_gap(Register dst, Register src);
223
224 // currently unimplemented
225 // Used for storing null. All other oop constants should be
226 // stored using routines that take a jobject.
227 void store_heap_oop_null(Address dst);
228
229 // This dummy is to prevent a call to store_heap_oop from
230 // converting a zero (linked null) into a Register by giving
231 // the compiler two choices it can't resolve
232
233 void store_heap_oop(Address dst, void* dummy);
234
235 // Support for null-checks
236 //
237 // Generates code that causes a null OS exception if the content of reg is null.
238 // If the accessed location is M[reg + offset] and the offset is known, provide the
239 // offset. No explicit code generateion is needed if the offset is within a certain
240 // range (0 <= offset <= page_size).
241
242 virtual void null_check(Register reg, int offset = -1);
243 static bool needs_explicit_null_check(intptr_t offset);
244 static bool uses_implicit_null_check(void* address);
245
246 // interface method calling
247 void lookup_interface_method(Register recv_klass,
248 Register intf_klass,
249 RegisterOrConstant itable_index,
250 Register method_result,
251 Register scan_tmp,
252 Label& no_such_interface,
253 bool return_method = true);
254
255 void lookup_interface_method_stub(Register recv_klass,
256 Register holder_klass,
257 Register resolved_klass,
258 Register method_result,
259 Register temp_reg,
260 Register temp_reg2,
261 int itable_index,
262 Label& L_no_such_interface);
263
264 // virtual method calling
265 // n.n. x86 allows RegisterOrConstant for vtable_index
266 void lookup_virtual_method(Register recv_klass,
267 RegisterOrConstant vtable_index,
268 Register method_result);
269
270 // Form an address from base + offset in Rd. Rd my or may not
271 // actually be used: you must use the Address that is returned. It
272 // is up to you to ensure that the shift provided matches the size
273 // of your data.
274 Address form_address(Register Rd, Register base, int64_t byte_offset);
275
276 // Sometimes we get misaligned loads and stores, usually from Unsafe
277 // accesses, and these can exceed the offset range.
278 Address legitimize_address(Register Rd, const Address &adr) {
279 if (adr.getMode() == Address::base_plus_offset) {
280 if (!is_simm12(adr.offset())) {
281 return form_address(Rd, adr.base(), adr.offset());
282 }
283 }
284 return adr;
285 }
286
287 // allocation
288 void tlab_allocate(
289 Register obj, // result: pointer to object after successful allocation
290 Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise
291 int con_size_in_bytes, // object size in bytes if known at compile time
292 Register tmp1, // temp register
293 Register tmp2, // temp register
294 Label& slow_case, // continuation point of fast allocation fails
295 bool is_far = false
296 );
297
298 // Test sub_klass against super_klass, with fast and slow paths.
299
300 // The fast path produces a tri-state answer: yes / no / maybe-slow.
301 // One of the three labels can be null, meaning take the fall-through.
302 // If super_check_offset is -1, the value is loaded up from super_klass.
303 // No registers are killed, except tmp_reg
304 void check_klass_subtype_fast_path(Register sub_klass,
305 Register super_klass,
306 Register tmp_reg,
307 Label* L_success,
308 Label* L_failure,
309 Label* L_slow_path,
310 Register super_check_offset = noreg);
311
312 // The reset of the type check; must be wired to a corresponding fast path.
313 // It does not repeat the fast path logic, so don't use it standalone.
314 // The tmp1_reg and tmp2_reg can be noreg, if no temps are available.
315 // Updates the sub's secondary super cache as necessary.
316 void check_klass_subtype_slow_path(Register sub_klass,
317 Register super_klass,
318 Register tmp1_reg,
319 Register tmp2_reg,
320 Label* L_success,
321 Label* L_failure,
322 bool set_cond_codes = false);
323
324 void check_klass_subtype_slow_path_linear(Register sub_klass,
325 Register super_klass,
326 Register tmp1_reg,
327 Register tmp2_reg,
328 Label* L_success,
329 Label* L_failure,
330 bool set_cond_codes = false);
331
332 void check_klass_subtype_slow_path_table(Register sub_klass,
333 Register super_klass,
334 Register tmp1_reg,
335 Register tmp2_reg,
336 Label* L_success,
337 Label* L_failure,
338 bool set_cond_codes = false);
339
340 // If r is valid, return r.
341 // If r is invalid, remove a register r2 from available_regs, add r2
342 // to regs_to_push, then return r2.
343 Register allocate_if_noreg(const Register r,
344 RegSetIterator<Register> &available_regs,
345 RegSet ®s_to_push);
346
347 // Secondary subtype checking
348 void lookup_secondary_supers_table_var(Register sub_klass,
349 Register r_super_klass,
350 Register result,
351 Register tmp1,
352 Register tmp2,
353 Register tmp3,
354 Register tmp4,
355 Label *L_success);
356
357 void population_count(Register dst, Register src, Register tmp1, Register tmp2);
358
359 // As above, but with a constant super_klass.
360 // The result is in Register result, not the condition codes.
361 bool lookup_secondary_supers_table_const(Register r_sub_klass,
362 Register r_super_klass,
363 Register result,
364 Register tmp1,
365 Register tmp2,
366 Register tmp3,
367 Register tmp4,
368 u1 super_klass_slot,
369 bool stub_is_near = false);
370
371 void verify_secondary_supers_table(Register r_sub_klass,
372 Register r_super_klass,
373 Register result,
374 Register tmp1,
375 Register tmp2,
376 Register tmp3);
377
378 void lookup_secondary_supers_table_slow_path(Register r_super_klass,
379 Register r_array_base,
380 Register r_array_index,
381 Register r_bitmap,
382 Register result,
383 Register tmp,
384 bool is_stub = true);
385
386 void check_klass_subtype(Register sub_klass,
387 Register super_klass,
388 Register tmp_reg,
389 Label& L_success);
390
391 Address argument_address(RegisterOrConstant arg_slot, int extra_slot_offset = 0);
392
393 // only if +VerifyOops
394 void _verify_oop(Register reg, const char* s, const char* file, int line);
395 void _verify_oop_addr(Address addr, const char* s, const char* file, int line);
396
397 void _verify_oop_checked(Register reg, const char* s, const char* file, int line) {
398 if (VerifyOops) {
399 _verify_oop(reg, s, file, line);
400 }
401 }
402 void _verify_oop_addr_checked(Address reg, const char* s, const char* file, int line) {
403 if (VerifyOops) {
404 _verify_oop_addr(reg, s, file, line);
405 }
406 }
407
408 void _verify_method_ptr(Register reg, const char* msg, const char* file, int line) {}
409 void _verify_klass_ptr(Register reg, const char* msg, const char* file, int line) {}
410
411 #define verify_oop(reg) _verify_oop_checked(reg, "broken oop " #reg, __FILE__, __LINE__)
412 #define verify_oop_msg(reg, msg) _verify_oop_checked(reg, "broken oop " #reg ", " #msg, __FILE__, __LINE__)
413 #define verify_oop_addr(addr) _verify_oop_addr_checked(addr, "broken oop addr " #addr, __FILE__, __LINE__)
414 #define verify_method_ptr(reg) _verify_method_ptr(reg, "broken method " #reg, __FILE__, __LINE__)
415 #define verify_klass_ptr(reg) _verify_method_ptr(reg, "broken klass " #reg, __FILE__, __LINE__)
416
417 // A more convenient access to fence for our purposes
418 // We used four bit to indicate the read and write bits in the predecessors and successors,
419 // and extended i for r, o for w if UseConservativeFence enabled.
420 enum Membar_mask_bits {
421 StoreStore = 0b0101, // (pred = w + succ = w)
422 LoadStore = 0b1001, // (pred = r + succ = w)
423 StoreLoad = 0b0110, // (pred = w + succ = r)
424 LoadLoad = 0b1010, // (pred = r + succ = r)
425 AnyAny = LoadStore | StoreLoad // (pred = rw + succ = rw)
426 };
427
428 void membar(uint32_t order_constraint);
429
430 private:
431
432 static void membar_mask_to_pred_succ(uint32_t order_constraint,
433 uint32_t& predecessor, uint32_t& successor) {
434 predecessor = (order_constraint >> 2) & 0x3;
435 successor = order_constraint & 0x3;
436
437 // extend rw -> iorw:
438 // 01(w) -> 0101(ow)
439 // 10(r) -> 1010(ir)
440 // 11(rw)-> 1111(iorw)
441 if (UseConservativeFence) {
442 predecessor |= predecessor << 2;
443 successor |= successor << 2;
444 }
445 }
446
447 static int pred_succ_to_membar_mask(uint32_t predecessor, uint32_t successor) {
448 return ((predecessor & 0x3) << 2) | (successor & 0x3);
449 }
450
451 public:
452
453 void cmodx_fence();
454
455 void pause() {
456 // Zihintpause
457 // PAUSE is encoded as a FENCE instruction with pred=W, succ=0, fm=0, rd=x0, and rs1=x0.
458 Assembler::fence(w, 0);
459 }
460
461 // prints msg, dumps registers and stops execution
462 void stop(const char* msg);
463
464 static void debug64(char* msg, int64_t pc, int64_t regs[]);
465
466 void unimplemented(const char* what = "");
467
468 void should_not_reach_here() { stop("should not reach here"); }
469
470 static address target_addr_for_insn(address insn_addr);
471
472 // Required platform-specific helpers for Label::patch_instructions.
473 // They _shadow_ the declarations in AbstractAssembler, which are undefined.
474 static int pd_patch_instruction_size(address branch, address target);
475 static void pd_patch_instruction(address branch, address target, const char* file = nullptr, int line = 0) {
476 pd_patch_instruction_size(branch, target);
477 }
478 static address pd_call_destination(address branch) {
479 return target_addr_for_insn(branch);
480 }
481
482 static int patch_oop(address insn_addr, address o);
483
484 static address get_target_of_li32(address insn_addr);
485 static int patch_imm_in_li32(address branch, int32_t target);
486
487 // Return whether code is emitted to a scratch blob.
488 virtual bool in_scratch_emit_size() {
489 return false;
490 }
491
492 address emit_reloc_call_address_stub(int insts_call_instruction_offset, address target);
493 static int max_reloc_call_address_stub_size();
494
495 void emit_static_call_stub();
496 static int static_call_stub_size();
497
498 // The following 4 methods return the offset of the appropriate move instruction
499
500 // Support for fast byte/short loading with zero extension (depending on particular CPU)
501 int load_unsigned_byte(Register dst, Address src);
502 int load_unsigned_short(Register dst, Address src);
503
504 // Support for fast byte/short loading with sign extension (depending on particular CPU)
505 int load_signed_byte(Register dst, Address src);
506 int load_signed_short(Register dst, Address src);
507
508 // Load and store values by size and signed-ness
509 void load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed);
510 void store_sized_value(Address dst, Register src, size_t size_in_bytes);
511
512 // Misaligned loads, will use the best way, according to the AvoidUnalignedAccess flag
513 void load_short_misaligned(Register dst, Address src, Register tmp, bool is_signed, int granularity = 1);
514 void load_int_misaligned(Register dst, Address src, Register tmp, bool is_signed, int granularity = 1);
515 void load_long_misaligned(Register dst, Address src, Register tmp, int granularity = 1);
516
517 public:
518 // Standard pseudo instructions
519 inline void nop() {
520 addi(x0, x0, 0);
521 }
522
523 inline void mv(Register Rd, Register Rs) {
524 if (Rd != Rs) {
525 addi(Rd, Rs, 0);
526 }
527 }
528
529 inline void notr(Register Rd, Register Rs) {
530 if (do_compress_zcb(Rd, Rs) && (Rd == Rs)) {
531 c_not(Rd);
532 } else {
533 xori(Rd, Rs, -1);
534 }
535 }
536
537 inline void neg(Register Rd, Register Rs) {
538 sub(Rd, x0, Rs);
539 }
540
541 inline void negw(Register Rd, Register Rs) {
542 subw(Rd, x0, Rs);
543 }
544
545 inline void sext_w(Register Rd, Register Rs) {
546 addiw(Rd, Rs, 0);
547 }
548
549 inline void zext_b(Register Rd, Register Rs) {
550 if (do_compress_zcb(Rd, Rs) && (Rd == Rs)) {
551 c_zext_b(Rd);
552 } else {
553 andi(Rd, Rs, 0xFF);
554 }
555 }
556
557 inline void seqz(Register Rd, Register Rs) {
558 sltiu(Rd, Rs, 1);
559 }
560
561 inline void snez(Register Rd, Register Rs) {
562 sltu(Rd, x0, Rs);
563 }
564
565 inline void sltz(Register Rd, Register Rs) {
566 slt(Rd, Rs, x0);
567 }
568
569 inline void sgtz(Register Rd, Register Rs) {
570 slt(Rd, x0, Rs);
571 }
572
573 // Bit-manipulation extension pseudo instructions
574 // zero extend word
575 inline void zext_w(Register Rd, Register Rs) {
576 assert(UseZba, "must be");
577 if (do_compress_zcb(Rd, Rs) && (Rd == Rs)) {
578 c_zext_w(Rd);
579 } else {
580 add_uw(Rd, Rs, zr);
581 }
582 }
583
584 // Floating-point data-processing pseudo instructions
585 inline void fmv_s(FloatRegister Rd, FloatRegister Rs) {
586 if (Rd != Rs) {
587 fsgnj_s(Rd, Rs, Rs);
588 }
589 }
590
591 inline void fabs_s(FloatRegister Rd, FloatRegister Rs) {
592 fsgnjx_s(Rd, Rs, Rs);
593 }
594
595 inline void fneg_s(FloatRegister Rd, FloatRegister Rs) {
596 fsgnjn_s(Rd, Rs, Rs);
597 }
598
599 inline void fmv_d(FloatRegister Rd, FloatRegister Rs) {
600 if (Rd != Rs) {
601 fsgnj_d(Rd, Rs, Rs);
602 }
603 }
604
605 inline void fabs_d(FloatRegister Rd, FloatRegister Rs) {
606 fsgnjx_d(Rd, Rs, Rs);
607 }
608
609 inline void fneg_d(FloatRegister Rd, FloatRegister Rs) {
610 fsgnjn_d(Rd, Rs, Rs);
611 }
612
613 // Control and status pseudo instructions
614 void csrr(Register Rd, unsigned csr); // read csr
615 void csrw(unsigned csr, Register Rs); // write csr
616 void csrs(unsigned csr, Register Rs); // set bits in csr
617 void csrc(unsigned csr, Register Rs); // clear bits in csr
618 void csrwi(unsigned csr, unsigned imm);
619 void csrsi(unsigned csr, unsigned imm);
620 void csrci(unsigned csr, unsigned imm);
621 void frcsr(Register Rd) { csrr(Rd, CSR_FCSR); }; // read float-point csr
622 void fscsr(Register Rd, Register Rs); // swap float-point csr
623 void fscsr(Register Rs); // write float-point csr
624 void frrm(Register Rd) { csrr(Rd, CSR_FRM); }; // read float-point rounding mode
625 void fsrm(Register Rd, Register Rs); // swap float-point rounding mode
626 void fsrm(Register Rs); // write float-point rounding mode
627 void fsrmi(Register Rd, unsigned imm);
628 void fsrmi(unsigned imm);
629 void frflags(Register Rd) { csrr(Rd, CSR_FFLAGS); }; // read float-point exception flags
630 void fsflags(Register Rd, Register Rs); // swap float-point exception flags
631 void fsflags(Register Rs); // write float-point exception flags
632 void fsflagsi(Register Rd, unsigned imm);
633 void fsflagsi(unsigned imm);
634 // Requires Zicntr
635 void rdinstret(Register Rd) { csrr(Rd, CSR_INSTRET); }; // read instruction-retired counter
636 void rdcycle(Register Rd) { csrr(Rd, CSR_CYCLE); }; // read cycle counter
637 void rdtime(Register Rd) { csrr(Rd, CSR_TIME); }; // read time
638
639 // Restore cpu control state after JNI call
640 void restore_cpu_control_state_after_jni(Register tmp);
641
642 // Control transfer pseudo instructions
643 void beqz(Register Rs, const address dest);
644 void bnez(Register Rs, const address dest);
645 void blez(Register Rs, const address dest);
646 void bgez(Register Rs, const address dest);
647 void bltz(Register Rs, const address dest);
648 void bgtz(Register Rs, const address dest);
649
650 void cmov_eq(Register cmp1, Register cmp2, Register dst, Register src);
651 void cmov_ne(Register cmp1, Register cmp2, Register dst, Register src);
652 void cmov_le(Register cmp1, Register cmp2, Register dst, Register src);
653 void cmov_leu(Register cmp1, Register cmp2, Register dst, Register src);
654 void cmov_ge(Register cmp1, Register cmp2, Register dst, Register src);
655 void cmov_geu(Register cmp1, Register cmp2, Register dst, Register src);
656 void cmov_lt(Register cmp1, Register cmp2, Register dst, Register src);
657 void cmov_ltu(Register cmp1, Register cmp2, Register dst, Register src);
658 void cmov_gt(Register cmp1, Register cmp2, Register dst, Register src);
659 void cmov_gtu(Register cmp1, Register cmp2, Register dst, Register src);
660
661 void cmov_cmp_fp_eq(FloatRegister cmp1, FloatRegister cmp2, Register dst, Register src, bool is_single);
662 void cmov_cmp_fp_ne(FloatRegister cmp1, FloatRegister cmp2, Register dst, Register src, bool is_single);
663 void cmov_cmp_fp_le(FloatRegister cmp1, FloatRegister cmp2, Register dst, Register src, bool is_single);
664 void cmov_cmp_fp_ge(FloatRegister cmp1, FloatRegister cmp2, Register dst, Register src, bool is_single);
665 void cmov_cmp_fp_lt(FloatRegister cmp1, FloatRegister cmp2, Register dst, Register src, bool is_single);
666 void cmov_cmp_fp_gt(FloatRegister cmp1, FloatRegister cmp2, Register dst, Register src, bool is_single);
667
668 public:
669 // We try to follow risc-v asm menomics.
670 // But as we don't layout a reachable GOT,
671 // we often need to resort to movptr, li <48imm>.
672 // https://github.com/riscv-non-isa/riscv-asm-manual/blob/main/src/asm-manual.adoc
673
674 // Hotspot only use the standard calling convention using x1/ra.
675 // The alternative calling convection using x5/t0 is not used.
676 // Using x5 as a temp causes the CPU to mispredict returns.
677
678 // JALR, return address stack updates:
679 // | rd is x1/x5 | rs1 is x1/x5 | rd=rs1 | RAS action
680 // | ----------- | ------------ | ------ |-------------
681 // | No | No | - | None
682 // | No | Yes | - | Pop
683 // | Yes | No | - | Push
684 // | Yes | Yes | No | Pop, then push
685 // | Yes | Yes | Yes | Push
686 //
687 // JAL, return address stack updates:
688 // | rd is x1/x5 | RAS action
689 // | ----------- | ----------
690 // | Yes | Push
691 // | No | None
692 //
693 // JUMPs uses Rd = x0/zero and Rs = x6/t1 or imm
694 // CALLS uses Rd = x1/ra and Rs = x6/t1 or imm (or x1/ra*)
695 // RETURNS uses Rd = x0/zero and Rs = x1/ra
696 // *use of x1/ra should not normally be used, special case only.
697
698 // jump: jal x0, offset
699 // For long reach uses temp register for:
700 // la + jr
701 void j(const address dest, Register temp = t1);
702 void j(const Address &dest, Register temp = t1);
703 void j(Label &l, Register temp = noreg);
704
705 // jump register: jalr x0, offset(rs)
706 void jr(Register Rd, int32_t offset = 0);
707
708 // call: la + jalr x1
709 void call(const address dest, Register temp = t1);
710
711 // jalr: jalr x1, offset(rs)
712 void jalr(Register Rs, int32_t offset = 0);
713
714 // Emit a runtime call. Only invalidates the tmp register which
715 // is used to keep the entry address for jalr/movptr.
716 // Uses call() for intra code cache, else movptr + jalr.
717 // Clobebrs t1
718 void rt_call(address dest, Register tmp = t1);
719
720 // ret: jalr x0, 0(x1)
721 inline void ret() {
722 Assembler::jalr(x0, x1, 0);
723 }
724
725 //label
726 void beqz(Register Rs, Label &l, bool is_far = false);
727 void bnez(Register Rs, Label &l, bool is_far = false);
728 void blez(Register Rs, Label &l, bool is_far = false);
729 void bgez(Register Rs, Label &l, bool is_far = false);
730 void bltz(Register Rs, Label &l, bool is_far = false);
731 void bgtz(Register Rs, Label &l, bool is_far = false);
732
733 void beq (Register Rs1, Register Rs2, Label &L, bool is_far = false);
734 void bne (Register Rs1, Register Rs2, Label &L, bool is_far = false);
735 void blt (Register Rs1, Register Rs2, Label &L, bool is_far = false);
736 void bge (Register Rs1, Register Rs2, Label &L, bool is_far = false);
737 void bltu(Register Rs1, Register Rs2, Label &L, bool is_far = false);
738 void bgeu(Register Rs1, Register Rs2, Label &L, bool is_far = false);
739
740 void bgt (Register Rs, Register Rt, const address dest);
741 void ble (Register Rs, Register Rt, const address dest);
742 void bgtu(Register Rs, Register Rt, const address dest);
743 void bleu(Register Rs, Register Rt, const address dest);
744
745 void bgt (Register Rs, Register Rt, Label &l, bool is_far = false);
746 void ble (Register Rs, Register Rt, Label &l, bool is_far = false);
747 void bgtu(Register Rs, Register Rt, Label &l, bool is_far = false);
748 void bleu(Register Rs, Register Rt, Label &l, bool is_far = false);
749
750 #define INSN_ENTRY_RELOC(result_type, header) \
751 result_type header { \
752 guarantee(rtype == relocInfo::internal_word_type, \
753 "only internal_word_type relocs make sense here"); \
754 relocate(InternalAddress(dest).rspec()); \
755 IncompressibleScope scope(this); /* relocations */
756
757 #define INSN(NAME) \
758 void NAME(Register Rs1, Register Rs2, const address dest) { \
759 assert_cond(dest != nullptr); \
760 int64_t offset = dest - pc(); \
761 guarantee(is_simm13(offset) && is_even(offset), \
762 "offset is invalid: is_simm_13: %s offset: " INT64_FORMAT, \
763 BOOL_TO_STR(is_simm13(offset)), offset); \
764 Assembler::NAME(Rs1, Rs2, offset); \
765 } \
766 INSN_ENTRY_RELOC(void, NAME(Register Rs1, Register Rs2, address dest, relocInfo::relocType rtype)) \
767 NAME(Rs1, Rs2, dest); \
768 }
769
770 INSN(beq);
771 INSN(bne);
772 INSN(bge);
773 INSN(bgeu);
774 INSN(blt);
775 INSN(bltu);
776
777 #undef INSN
778
779 #undef INSN_ENTRY_RELOC
780
781 void float_beq(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
782 void float_bne(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
783 void float_ble(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
784 void float_bge(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
785 void float_blt(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
786 void float_bgt(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
787
788 void double_beq(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
789 void double_bne(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
790 void double_ble(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
791 void double_bge(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
792 void double_blt(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
793 void double_bgt(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
794
795 private:
796 int push_reg(unsigned int bitset, Register stack);
797 int pop_reg(unsigned int bitset, Register stack);
798 int push_fp(unsigned int bitset, Register stack);
799 int pop_fp(unsigned int bitset, Register stack);
800 #ifdef COMPILER2
801 int push_v(unsigned int bitset, Register stack);
802 int pop_v(unsigned int bitset, Register stack);
803 #endif // COMPILER2
804
805 // The signed 20-bit upper imm can materialize at most negative 0xF...F80000000, two G.
806 // The following signed 12-bit imm can at max subtract 0x800, two K, from that previously loaded two G.
807 bool is_valid_32bit_offset(int64_t x) {
808 constexpr int64_t twoG = (2 * G);
809 constexpr int64_t twoK = (2 * K);
810 return x < (twoG - twoK) && x >= (-twoG - twoK);
811 }
812
813 // Ensure that the auipc can reach the destination at x from anywhere within
814 // the code cache so that if it is relocated we know it will still reach.
815 bool is_32bit_offset_from_codecache(int64_t x) {
816 int64_t low = (int64_t)CodeCache::low_bound();
817 int64_t high = (int64_t)CodeCache::high_bound();
818 return is_valid_32bit_offset(x - low) && is_valid_32bit_offset(x - high);
819 }
820
821 public:
822 void push_reg(Register Rs);
823 void pop_reg(Register Rd);
824 void push_reg(RegSet regs, Register stack) { if (regs.bits()) push_reg(regs.bits(), stack); }
825 void pop_reg(RegSet regs, Register stack) { if (regs.bits()) pop_reg(regs.bits(), stack); }
826 void push_fp(FloatRegSet regs, Register stack) { if (regs.bits()) push_fp(regs.bits(), stack); }
827 void pop_fp(FloatRegSet regs, Register stack) { if (regs.bits()) pop_fp(regs.bits(), stack); }
828 #ifdef COMPILER2
829 void push_v(VectorRegSet regs, Register stack) { if (regs.bits()) push_v(regs.bits(), stack); }
830 void pop_v(VectorRegSet regs, Register stack) { if (regs.bits()) pop_v(regs.bits(), stack); }
831 #endif // COMPILER2
832
833 // Push and pop everything that might be clobbered by a native
834 // runtime call except t0 and t1. (They are always
835 // temporary registers, so we don't have to protect them.)
836 // Additional registers can be excluded in a passed RegSet.
837 void push_call_clobbered_registers_except(RegSet exclude);
838 void pop_call_clobbered_registers_except(RegSet exclude);
839
840 void push_call_clobbered_registers() {
841 push_call_clobbered_registers_except(RegSet());
842 }
843 void pop_call_clobbered_registers() {
844 pop_call_clobbered_registers_except(RegSet());
845 }
846
847 void push_CPU_state(bool save_vectors = false, int vector_size_in_bytes = 0);
848 void pop_CPU_state(bool restore_vectors = false, int vector_size_in_bytes = 0);
849
850 void push_cont_fastpath(Register java_thread = xthread);
851 void pop_cont_fastpath(Register java_thread = xthread);
852
853 // if heap base register is used - reinit it with the correct value
854 void reinit_heapbase();
855
856 void bind(Label& L) {
857 Assembler::bind(L);
858 // fences across basic blocks should not be merged
859 code()->clear_last_insn();
860 }
861
862 typedef void (MacroAssembler::* compare_and_branch_insn)(Register Rs1, Register Rs2, const address dest);
863 typedef void (MacroAssembler::* compare_and_branch_label_insn)(Register Rs1, Register Rs2, Label &L, bool is_far);
864 typedef void (MacroAssembler::* jal_jalr_insn)(Register Rt, address dest);
865
866 void wrap_label(Register r, Label &L, jal_jalr_insn insn);
867 void wrap_label(Register r1, Register r2, Label &L,
868 compare_and_branch_insn insn,
869 compare_and_branch_label_insn neg_insn, bool is_far = false);
870
871 void la(Register Rd, Label &label);
872 void la(Register Rd, const address addr);
873 void la(Register Rd, const address addr, int32_t &offset);
874 void la(Register Rd, const Address &adr);
875
876 void li16u(Register Rd, uint16_t imm);
877 void li32(Register Rd, int32_t imm);
878 void li (Register Rd, int64_t imm); // optimized load immediate
879
880 // mv
881 void mv(Register Rd, address addr) { li(Rd, (int64_t)addr); }
882 void mv(Register Rd, address addr, int32_t &offset) {
883 // Split address into a lower 12-bit sign-extended offset and the remainder,
884 // so that the offset could be encoded in jalr or load/store instruction.
885 offset = ((int32_t)(int64_t)addr << 20) >> 20;
886 li(Rd, (int64_t)addr - offset);
887 }
888
889 template<typename T, ENABLE_IF(std::is_integral<T>::value)>
890 inline void mv(Register Rd, T o) { li(Rd, (int64_t)o); }
891
892 void mv(Register Rd, RegisterOrConstant src) {
893 if (src.is_register()) {
894 mv(Rd, src.as_register());
895 } else {
896 mv(Rd, src.as_constant());
897 }
898 }
899
900 // Generates a load of a 48-bit constant which can be
901 // patched to any 48-bit constant, i.e. address.
902 // If common case supply additional temp register
903 // to shorten the instruction sequence.
904 void movptr(Register Rd, const Address &addr, Register tmp = noreg);
905 void movptr(Register Rd, address addr, Register tmp = noreg);
906 void movptr(Register Rd, address addr, int32_t &offset, Register tmp = noreg);
907
908 private:
909 void movptr1(Register Rd, uintptr_t addr, int32_t &offset);
910 void movptr2(Register Rd, uintptr_t addr, int32_t &offset, Register tmp);
911 public:
912 // float imm move
913 static bool can_hf_imm_load(short imm);
914 static bool can_fp_imm_load(float imm);
915 static bool can_dp_imm_load(double imm);
916 void fli_h(FloatRegister Rd, short imm);
917 void fli_s(FloatRegister Rd, float imm);
918 void fli_d(FloatRegister Rd, double imm);
919
920 // arith
921 void add (Register Rd, Register Rn, int64_t increment, Register tmp = t0);
922 void sub (Register Rd, Register Rn, int64_t decrement, Register tmp = t0);
923 void addw(Register Rd, Register Rn, int64_t increment, Register tmp = t0);
924 void subw(Register Rd, Register Rn, int64_t decrement, Register tmp = t0);
925
926 void subi(Register Rd, Register Rn, int64_t decrement) {
927 assert(is_simm12(-decrement), "Must be");
928 addi(Rd, Rn, -decrement);
929 }
930
931 void subiw(Register Rd, Register Rn, int64_t decrement) {
932 assert(is_simm12(-decrement), "Must be");
933 addiw(Rd, Rn, -decrement);
934 }
935
936 #define INSN(NAME) \
937 inline void NAME(Register Rd, Register Rs1, Register Rs2) { \
938 Assembler::NAME(Rd, Rs1, Rs2); \
939 }
940
941 INSN(add);
942 INSN(addw);
943 INSN(sub);
944 INSN(subw);
945
946 #undef INSN
947
948 // logic
949 void andrw(Register Rd, Register Rs1, Register Rs2);
950 void orrw(Register Rd, Register Rs1, Register Rs2);
951 void xorrw(Register Rd, Register Rs1, Register Rs2);
952
953 // logic with negate
954 void andn(Register Rd, Register Rs1, Register Rs2);
955 void orn(Register Rd, Register Rs1, Register Rs2);
956
957 // reverse bytes
958 void revbw(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2= t1); // reverse bytes in lower word, sign-extend
959 void revb(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1); // reverse bytes in doubleword
960
961 void ror(Register dst, Register src, Register shift, Register tmp = t0);
962 void ror(Register dst, Register src, uint32_t shift, Register tmp = t0);
963 void rolw(Register dst, Register src, uint32_t shift, Register tmp = t0);
964
965 void orptr(Address adr, RegisterOrConstant src, Register tmp1 = t0, Register tmp2 = t1);
966
967 // Load and Store Instructions
968 #define INSN_ENTRY_RELOC(result_type, header) \
969 result_type header { \
970 guarantee(rtype == relocInfo::internal_word_type, \
971 "only internal_word_type relocs make sense here"); \
972 relocate(InternalAddress(dest).rspec()); \
973 IncompressibleScope scope(this); /* relocations */
974
975 #define INSN(NAME) \
976 void NAME(Register Rd, address dest) { \
977 assert_cond(dest != nullptr); \
978 if (CodeCache::contains(dest)) { \
979 int64_t distance = dest - pc(); \
980 assert(is_valid_32bit_offset(distance), "Must be"); \
981 auipc(Rd, (int32_t)distance + 0x800); \
982 Assembler::NAME(Rd, Rd, ((int32_t)distance << 20) >> 20); \
983 } else { \
984 int32_t offset = 0; \
985 movptr(Rd, dest, offset); \
986 Assembler::NAME(Rd, Rd, offset); \
987 } \
988 } \
989 INSN_ENTRY_RELOC(void, NAME(Register Rd, address dest, relocInfo::relocType rtype)) \
990 NAME(Rd, dest); \
991 } \
992 void NAME(Register Rd, const Address &adr, Register temp = t0) { \
993 switch (adr.getMode()) { \
994 case Address::literal: { \
995 relocate(adr.rspec(), [&] { \
996 NAME(Rd, adr.target()); \
997 }); \
998 break; \
999 } \
1000 case Address::base_plus_offset: { \
1001 if (is_simm12(adr.offset())) { \
1002 Assembler::NAME(Rd, adr.base(), adr.offset()); \
1003 } else { \
1004 int32_t offset = ((int32_t)adr.offset() << 20) >> 20; \
1005 if (Rd == adr.base()) { \
1006 la(temp, Address(adr.base(), adr.offset() - offset)); \
1007 Assembler::NAME(Rd, temp, offset); \
1008 } else { \
1009 la(Rd, Address(adr.base(), adr.offset() - offset)); \
1010 Assembler::NAME(Rd, Rd, offset); \
1011 } \
1012 } \
1013 break; \
1014 } \
1015 default: \
1016 ShouldNotReachHere(); \
1017 } \
1018 } \
1019 void NAME(Register Rd, Label &L) { \
1020 wrap_label(Rd, L, &MacroAssembler::NAME); \
1021 }
1022
1023 INSN(lb);
1024 INSN(lbu);
1025 INSN(lh);
1026 INSN(lhu);
1027 INSN(lw);
1028 INSN(lwu);
1029 INSN(ld);
1030
1031 #undef INSN
1032
1033 #define INSN(NAME) \
1034 void NAME(FloatRegister Rd, address dest, Register temp = t0) { \
1035 assert_cond(dest != nullptr); \
1036 if (CodeCache::contains(dest)) { \
1037 int64_t distance = dest - pc(); \
1038 assert(is_valid_32bit_offset(distance), "Must be"); \
1039 auipc(temp, (int32_t)distance + 0x800); \
1040 Assembler::NAME(Rd, temp, ((int32_t)distance << 20) >> 20); \
1041 } else { \
1042 int32_t offset = 0; \
1043 movptr(temp, dest, offset); \
1044 Assembler::NAME(Rd, temp, offset); \
1045 } \
1046 } \
1047 INSN_ENTRY_RELOC(void, NAME(FloatRegister Rd, address dest, \
1048 relocInfo::relocType rtype, Register temp = t0)) \
1049 NAME(Rd, dest, temp); \
1050 } \
1051 void NAME(FloatRegister Rd, const Address &adr, Register temp = t0) { \
1052 switch (adr.getMode()) { \
1053 case Address::literal: { \
1054 relocate(adr.rspec(), [&] { \
1055 NAME(Rd, adr.target(), temp); \
1056 }); \
1057 break; \
1058 } \
1059 case Address::base_plus_offset: { \
1060 if (is_simm12(adr.offset())) { \
1061 Assembler::NAME(Rd, adr.base(), adr.offset()); \
1062 } else { \
1063 int32_t offset = ((int32_t)adr.offset() << 20) >> 20; \
1064 la(temp, Address(adr.base(), adr.offset() - offset)); \
1065 Assembler::NAME(Rd, temp, offset); \
1066 } \
1067 break; \
1068 } \
1069 default: \
1070 ShouldNotReachHere(); \
1071 } \
1072 }
1073
1074 INSN(flh);
1075 INSN(flw);
1076 INSN(fld);
1077
1078 #undef INSN
1079
1080 #define INSN(NAME, REGISTER) \
1081 INSN_ENTRY_RELOC(void, NAME(REGISTER Rs, address dest, \
1082 relocInfo::relocType rtype, Register temp = t0)) \
1083 NAME(Rs, dest, temp); \
1084 }
1085
1086 INSN(sb, Register);
1087 INSN(sh, Register);
1088 INSN(sw, Register);
1089 INSN(sd, Register);
1090 INSN(fsw, FloatRegister);
1091 INSN(fsd, FloatRegister);
1092
1093 #undef INSN
1094
1095 #define INSN(NAME) \
1096 void NAME(Register Rs, address dest, Register temp = t0) { \
1097 assert_cond(dest != nullptr); \
1098 assert_different_registers(Rs, temp); \
1099 if (CodeCache::contains(dest)) { \
1100 int64_t distance = dest - pc(); \
1101 assert(is_valid_32bit_offset(distance), "Must be"); \
1102 auipc(temp, (int32_t)distance + 0x800); \
1103 Assembler::NAME(Rs, temp, ((int32_t)distance << 20) >> 20); \
1104 } else { \
1105 int32_t offset = 0; \
1106 movptr(temp, dest, offset); \
1107 Assembler::NAME(Rs, temp, offset); \
1108 } \
1109 } \
1110 void NAME(Register Rs, const Address &adr, Register temp = t0) { \
1111 switch (adr.getMode()) { \
1112 case Address::literal: { \
1113 assert_different_registers(Rs, temp); \
1114 relocate(adr.rspec(), [&] { \
1115 NAME(Rs, adr.target(), temp); \
1116 }); \
1117 break; \
1118 } \
1119 case Address::base_plus_offset: { \
1120 if (is_simm12(adr.offset())) { \
1121 Assembler::NAME(Rs, adr.base(), adr.offset()); \
1122 } else { \
1123 assert_different_registers(Rs, temp); \
1124 int32_t offset = ((int32_t)adr.offset() << 20) >> 20; \
1125 la(temp, Address(adr.base(), adr.offset() - offset)); \
1126 Assembler::NAME(Rs, temp, offset); \
1127 } \
1128 break; \
1129 } \
1130 default: \
1131 ShouldNotReachHere(); \
1132 } \
1133 }
1134
1135 INSN(sb);
1136 INSN(sh);
1137 INSN(sw);
1138 INSN(sd);
1139
1140 #undef INSN
1141
1142 #define INSN(NAME) \
1143 void NAME(FloatRegister Rs, address dest, Register temp = t0) { \
1144 assert_cond(dest != nullptr); \
1145 if (CodeCache::contains(dest)) { \
1146 int64_t distance = dest - pc(); \
1147 assert(is_valid_32bit_offset(distance), "Must be"); \
1148 auipc(temp, (int32_t)distance + 0x800); \
1149 Assembler::NAME(Rs, temp, ((int32_t)distance << 20) >> 20); \
1150 } else { \
1151 int32_t offset = 0; \
1152 movptr(temp, dest, offset); \
1153 Assembler::NAME(Rs, temp, offset); \
1154 } \
1155 } \
1156 void NAME(FloatRegister Rs, const Address &adr, Register temp = t0) { \
1157 switch (adr.getMode()) { \
1158 case Address::literal: { \
1159 relocate(adr.rspec(), [&] { \
1160 NAME(Rs, adr.target(), temp); \
1161 }); \
1162 break; \
1163 } \
1164 case Address::base_plus_offset: { \
1165 if (is_simm12(adr.offset())) { \
1166 Assembler::NAME(Rs, adr.base(), adr.offset()); \
1167 } else { \
1168 int32_t offset = ((int32_t)adr.offset() << 20) >> 20; \
1169 la(temp, Address(adr.base(), adr.offset() - offset)); \
1170 Assembler::NAME(Rs, temp, offset); \
1171 } \
1172 break; \
1173 } \
1174 default: \
1175 ShouldNotReachHere(); \
1176 } \
1177 }
1178
1179 INSN(fsw);
1180 INSN(fsd);
1181
1182 #undef INSN
1183
1184 #undef INSN_ENTRY_RELOC
1185
1186 void cmpxchg_obj_header(Register oldv, Register newv, Register obj, Register tmp, Label &succeed, Label *fail);
1187 void cmpxchgptr(Register oldv, Register newv, Register addr, Register tmp, Label &succeed, Label *fail);
1188 void cmpxchg(Register addr, Register expected,
1189 Register new_val,
1190 Assembler::operand_size size,
1191 Assembler::Aqrl acquire, Assembler::Aqrl release,
1192 Register result, bool result_as_bool = false);
1193 void weak_cmpxchg(Register addr, Register expected,
1194 Register new_val,
1195 Assembler::operand_size size,
1196 Assembler::Aqrl acquire, Assembler::Aqrl release,
1197 Register result);
1198 void cmpxchg_narrow_value_helper(Register addr, Register expected, Register new_val,
1199 Assembler::operand_size size,
1200 Register shift, Register mask, Register aligned_addr);
1201 void cmpxchg_narrow_value(Register addr, Register expected,
1202 Register new_val,
1203 Assembler::operand_size size,
1204 Assembler::Aqrl acquire, Assembler::Aqrl release,
1205 Register result, bool result_as_bool,
1206 Register tmp1, Register tmp2, Register tmp3);
1207 void weak_cmpxchg_narrow_value(Register addr, Register expected,
1208 Register new_val,
1209 Assembler::operand_size size,
1210 Assembler::Aqrl acquire, Assembler::Aqrl release,
1211 Register result,
1212 Register tmp1, Register tmp2, Register tmp3);
1213
1214 void atomic_add(Register prev, RegisterOrConstant incr, Register addr);
1215 void atomic_addw(Register prev, RegisterOrConstant incr, Register addr);
1216 void atomic_addal(Register prev, RegisterOrConstant incr, Register addr);
1217 void atomic_addalw(Register prev, RegisterOrConstant incr, Register addr);
1218
1219 void atomic_xchg(Register prev, Register newv, Register addr);
1220 void atomic_xchgw(Register prev, Register newv, Register addr);
1221 void atomic_xchgal(Register prev, Register newv, Register addr);
1222 void atomic_xchgalw(Register prev, Register newv, Register addr);
1223 void atomic_xchgwu(Register prev, Register newv, Register addr);
1224 void atomic_xchgalwu(Register prev, Register newv, Register addr);
1225
1226 void atomic_cas(Register prev, Register newv, Register addr, Assembler::operand_size size,
1227 Assembler::Aqrl acquire = Assembler::relaxed, Assembler::Aqrl release = Assembler::relaxed);
1228
1229 // Emit a far call/jump. Only invalidates the tmp register which
1230 // is used to keep the entry address for jalr.
1231 // The address must be inside the code cache.
1232 // Supported entry.rspec():
1233 // - relocInfo::external_word_type
1234 // - relocInfo::runtime_call_type
1235 // - relocInfo::none
1236 // Clobbers t1 default.
1237 void far_call(const Address &entry, Register tmp = t1);
1238 void far_jump(const Address &entry, Register tmp = t1);
1239
1240 static int far_branch_size() {
1241 return 2 * MacroAssembler::instruction_size; // auipc + jalr, see far_call() & far_jump()
1242 }
1243
1244 void load_byte_map_base(Register reg);
1245
1246 void bang_stack_with_offset(int offset) {
1247 // stack grows down, caller passes positive offset
1248 assert(offset > 0, "must bang with negative offset");
1249 sub(t0, sp, offset);
1250 sd(zr, Address(t0));
1251 }
1252
1253 virtual void _call_Unimplemented(address call_site) {
1254 mv(t1, call_site);
1255 }
1256
1257 #define call_Unimplemented() _call_Unimplemented((address)__PRETTY_FUNCTION__)
1258
1259 // Frame creation and destruction shared between JITs.
1260 void build_frame(int framesize);
1261 void remove_frame(int framesize);
1262
1263 void reserved_stack_check();
1264
1265 void get_polling_page(Register dest, relocInfo::relocType rtype);
1266 void read_polling_page(Register r, int32_t offset, relocInfo::relocType rtype);
1267
1268 // RISCV64 OpenJDK uses three different types of calls:
1269 //
1270 // - far call: auipc reg, pc_relative_offset; jalr ra, reg, offset
1271 // The offset has the range [-(2G + 2K), 2G - 2K). Addresses out of the
1272 // range in the code cache requires indirect call.
1273 // If a jump is needed rather than a call, a far jump 'jalr x0, reg, offset'
1274 // can be used instead.
1275 // All instructions are embedded at a call site.
1276 //
1277 // - indirect call: movptr + jalr
1278 // This can reach anywhere in the address space, but it cannot be patched
1279 // while code is running, so it must only be modified at a safepoint.
1280 // This form of call is most suitable for targets at fixed addresses,
1281 // which will never be patched.
1282 //
1283 // - reloc call:
1284 // This too can reach anywhere in the address space but is only available
1285 // in C1/C2-generated code (nmethod).
1286 //
1287 // [Main code section]
1288 // auipc
1289 // ld <address_from_stub_section>
1290 // jalr
1291 //
1292 // [Stub section]
1293 // address stub:
1294 // <64-bit destination address>
1295 //
1296 // To change the destination we simply atomically store the new
1297 // address in the stub section.
1298 // There is a benign race in that the other thread might observe the old
1299 // 64-bit destination address before it observes the new address. That does
1300 // not matter because the destination method has been invalidated, so there
1301 // will be a trap at its start.
1302
1303 // Emit a reloc call and create a stub to hold the entry point address.
1304 // Supported entry.rspec():
1305 // - relocInfo::runtime_call_type
1306 // - relocInfo::opt_virtual_call_type
1307 // - relocInfo::static_call_type
1308 // - relocInfo::virtual_call_type
1309 //
1310 // Return: the call PC or nullptr if CodeCache is full.
1311 address reloc_call(Address entry, Register tmp = t1);
1312
1313 address ic_call(address entry, jint method_index = 0);
1314 static int ic_check_size();
1315 int ic_check(int end_alignment = MacroAssembler::instruction_size);
1316
1317 // Support for memory inc/dec
1318 // n.b. increment/decrement calls with an Address destination will
1319 // need to use a scratch register to load the value to be
1320 // incremented. increment/decrement calls which add or subtract a
1321 // constant value other than sign-extended 12-bit immediate will need
1322 // to use a 2nd scratch register to hold the constant. so, an address
1323 // increment/decrement may trash both t0 and t1.
1324
1325 void increment(const Address dst, int64_t value = 1, Register tmp1 = t0, Register tmp2 = t1);
1326 void incrementw(const Address dst, int32_t value = 1, Register tmp1 = t0, Register tmp2 = t1);
1327
1328 void decrement(const Address dst, int64_t value = 1, Register tmp1 = t0, Register tmp2 = t1);
1329 void decrementw(const Address dst, int32_t value = 1, Register tmp1 = t0, Register tmp2 = t1);
1330
1331 void cmpptr(Register src1, const Address &src2, Label& equal, Register tmp = t0);
1332
1333 void clinit_barrier(Register klass, Register tmp, Label* L_fast_path = nullptr, Label* L_slow_path = nullptr);
1334 void load_method_holder_cld(Register result, Register method);
1335 void load_method_holder(Register holder, Register method);
1336
1337 void compute_index(Register str1, Register trailing_zeros, Register match_mask,
1338 Register result, Register char_tmp, Register tmp,
1339 bool haystack_isL);
1340 void compute_match_mask(Register src, Register pattern, Register match_mask,
1341 Register mask1, Register mask2);
1342
1343 // CRC32 code for java.util.zip.CRC32::updateBytes() intrinsic.
1344 void kernel_crc32(Register crc, Register buf, Register len,
1345 Register table0, Register table1, Register table2, Register table3,
1346 Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5, Register tmp6);
1347 void update_word_crc32(Register crc, Register v, Register tmp1, Register tmp2, Register tmp3,
1348 Register table0, Register table1, Register table2, Register table3,
1349 bool upper);
1350 void update_byte_crc32(Register crc, Register val, Register table);
1351
1352 #ifdef COMPILER2
1353 void vector_update_crc32(Register crc, Register buf, Register len,
1354 Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5,
1355 Register table0, Register table3);
1356 void kernel_crc32_vclmul_fold(Register crc, Register buf, Register len,
1357 Register table0, Register table1, Register table2, Register table3,
1358 Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5);
1359 void crc32_vclmul_fold_to_16_bytes_vectorsize_32(VectorRegister vx, VectorRegister vy, VectorRegister vt,
1360 VectorRegister vtmp1, VectorRegister vtmp2, VectorRegister vtmp3, VectorRegister vtmp4);
1361 void kernel_crc32_vclmul_fold_vectorsize_32(Register crc, Register buf, Register len,
1362 Register vclmul_table, Register tmp1, Register tmp2);
1363 void crc32_vclmul_fold_16_bytes_vectorsize_16(VectorRegister vx, VectorRegister vt,
1364 VectorRegister vtmp1, VectorRegister vtmp2, VectorRegister vtmp3, VectorRegister vtmp4,
1365 Register buf, Register tmp, const int STEP);
1366 void crc32_vclmul_fold_16_bytes_vectorsize_16_2(VectorRegister vx, VectorRegister vy, VectorRegister vt,
1367 VectorRegister vtmp1, VectorRegister vtmp2, VectorRegister vtmp3, VectorRegister vtmp4,
1368 Register tmp);
1369 void crc32_vclmul_fold_16_bytes_vectorsize_16_3(VectorRegister vx, VectorRegister vy, VectorRegister vt,
1370 VectorRegister vtmp1, VectorRegister vtmp2, VectorRegister vtmp3, VectorRegister vtmp4,
1371 Register tmp);
1372 void kernel_crc32_vclmul_fold_vectorsize_16(Register crc, Register buf, Register len,
1373 Register vclmul_table, Register tmp1, Register tmp2);
1374
1375 void mul_add(Register out, Register in, Register offset,
1376 Register len, Register k, Register tmp);
1377 void wide_mul(Register prod_lo, Register prod_hi, Register n, Register m);
1378 void wide_madd(Register sum_lo, Register sum_hi, Register n,
1379 Register m, Register tmp1, Register tmp2);
1380 void cad(Register dst, Register src1, Register src2, Register carry);
1381 void cadc(Register dst, Register src1, Register src2, Register carry);
1382 void adc(Register dst, Register src1, Register src2, Register carry);
1383 void add2_with_carry(Register final_dest_hi, Register dest_hi, Register dest_lo,
1384 Register src1, Register src2, Register carry);
1385 void multiply_64_x_64_loop(Register x, Register xstart, Register x_xstart,
1386 Register y, Register y_idx, Register z,
1387 Register carry, Register product,
1388 Register idx, Register kdx);
1389 void multiply_128_x_128_loop(Register y, Register z,
1390 Register carry, Register carry2,
1391 Register idx, Register jdx,
1392 Register yz_idx1, Register yz_idx2,
1393 Register tmp, Register tmp3, Register tmp4,
1394 Register tmp6, Register product_hi);
1395 void multiply_to_len(Register x, Register xlen, Register y, Register ylen,
1396 Register z, Register tmp0,
1397 Register tmp1, Register tmp2, Register tmp3, Register tmp4,
1398 Register tmp5, Register tmp6, Register product_hi);
1399
1400 #endif // COMPILER2
1401
1402 void inflate_lo32(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1);
1403 void inflate_hi32(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1);
1404
1405 void ctzc_bits(Register Rd, Register Rs, bool isLL = false,
1406 Register tmp1 = t0, Register tmp2 = t1);
1407
1408 void zero_words(Register base, uint64_t cnt);
1409 address zero_words(Register ptr, Register cnt);
1410 void fill_words(Register base, Register cnt, Register value);
1411 void zero_memory(Register addr, Register len, Register tmp);
1412 void zero_dcache_blocks(Register base, Register cnt, Register tmp1, Register tmp2);
1413
1414 // shift left by shamt and add
1415 void shadd(Register Rd, Register Rs1, Register Rs2, Register tmp, int shamt);
1416
1417 // test single bit in Rs, result is set to Rd
1418 void test_bit(Register Rd, Register Rs, uint32_t bit_pos);
1419
1420 // Here the float instructions with safe deal with some exceptions.
1421 // e.g. convert from NaN, +Inf, -Inf to int, float, double
1422 // will trigger exception, we need to deal with these situations
1423 // to get correct results.
1424 void fcvt_w_s_safe(Register dst, FloatRegister src, Register tmp = t0);
1425 void fcvt_l_s_safe(Register dst, FloatRegister src, Register tmp = t0);
1426 void fcvt_w_d_safe(Register dst, FloatRegister src, Register tmp = t0);
1427 void fcvt_l_d_safe(Register dst, FloatRegister src, Register tmp = t0);
1428
1429 void java_round_float(Register dst, FloatRegister src, FloatRegister ftmp);
1430 void java_round_double(Register dst, FloatRegister src, FloatRegister ftmp);
1431
1432 // Helper routine processing the slow path of NaN when converting float to float16
1433 void float_to_float16_NaN(Register dst, FloatRegister src, Register tmp1, Register tmp2);
1434
1435 // vector load/store unit-stride instructions
1436 void vlex_v(VectorRegister vd, Register base, Assembler::SEW sew, VectorMask vm = unmasked) {
1437 switch (sew) {
1438 case Assembler::e64:
1439 vle64_v(vd, base, vm);
1440 break;
1441 case Assembler::e32:
1442 vle32_v(vd, base, vm);
1443 break;
1444 case Assembler::e16:
1445 vle16_v(vd, base, vm);
1446 break;
1447 case Assembler::e8: // fall through
1448 default:
1449 vle8_v(vd, base, vm);
1450 break;
1451 }
1452 }
1453
1454 void vsex_v(VectorRegister store_data, Register base, Assembler::SEW sew, VectorMask vm = unmasked) {
1455 switch (sew) {
1456 case Assembler::e64:
1457 vse64_v(store_data, base, vm);
1458 break;
1459 case Assembler::e32:
1460 vse32_v(store_data, base, vm);
1461 break;
1462 case Assembler::e16:
1463 vse16_v(store_data, base, vm);
1464 break;
1465 case Assembler::e8: // fall through
1466 default:
1467 vse8_v(store_data, base, vm);
1468 break;
1469 }
1470 }
1471
1472 // vector pseudo instructions
1473 // rotate vector register left with shift bits, 32-bit version
1474 inline void vrole32_vi(VectorRegister vd, uint32_t shift, VectorRegister tmp_vr) {
1475 vsrl_vi(tmp_vr, vd, 32 - shift);
1476 vsll_vi(vd, vd, shift);
1477 vor_vv(vd, vd, tmp_vr);
1478 }
1479
1480 inline void vl1r_v(VectorRegister vd, Register rs) {
1481 vl1re8_v(vd, rs);
1482 }
1483
1484 inline void vmnot_m(VectorRegister vd, VectorRegister vs) {
1485 vmnand_mm(vd, vs, vs);
1486 }
1487
1488 inline void vncvt_x_x_w(VectorRegister vd, VectorRegister vs, VectorMask vm = unmasked) {
1489 vnsrl_wx(vd, vs, x0, vm);
1490 }
1491
1492 inline void vneg_v(VectorRegister vd, VectorRegister vs, VectorMask vm = unmasked) {
1493 vrsub_vx(vd, vs, x0, vm);
1494 }
1495
1496 inline void vfneg_v(VectorRegister vd, VectorRegister vs, VectorMask vm = unmasked) {
1497 vfsgnjn_vv(vd, vs, vs, vm);
1498 }
1499
1500 inline void vfabs_v(VectorRegister vd, VectorRegister vs, VectorMask vm = unmasked) {
1501 vfsgnjx_vv(vd, vs, vs, vm);
1502 }
1503
1504 inline void vmsgt_vv(VectorRegister vd, VectorRegister vs2, VectorRegister vs1, VectorMask vm = unmasked) {
1505 vmslt_vv(vd, vs1, vs2, vm);
1506 }
1507
1508 inline void vmsgtu_vv(VectorRegister vd, VectorRegister vs2, VectorRegister vs1, VectorMask vm = unmasked) {
1509 vmsltu_vv(vd, vs1, vs2, vm);
1510 }
1511
1512 inline void vmsge_vv(VectorRegister vd, VectorRegister vs2, VectorRegister vs1, VectorMask vm = unmasked) {
1513 vmsle_vv(vd, vs1, vs2, vm);
1514 }
1515
1516 inline void vmsgeu_vv(VectorRegister vd, VectorRegister vs2, VectorRegister vs1, VectorMask vm = unmasked) {
1517 vmsleu_vv(vd, vs1, vs2, vm);
1518 }
1519
1520 inline void vmfgt_vv(VectorRegister vd, VectorRegister vs2, VectorRegister vs1, VectorMask vm = unmasked) {
1521 vmflt_vv(vd, vs1, vs2, vm);
1522 }
1523
1524 inline void vmfge_vv(VectorRegister vd, VectorRegister vs2, VectorRegister vs1, VectorMask vm = unmasked) {
1525 vmfle_vv(vd, vs1, vs2, vm);
1526 }
1527
1528 inline void vmsltu_vi(VectorRegister Vd, VectorRegister Vs2, uint32_t imm, VectorMask vm = unmasked) {
1529 guarantee(imm >= 1 && imm <= 16, "imm is invalid");
1530 vmsleu_vi(Vd, Vs2, imm-1, vm);
1531 }
1532
1533 inline void vmsgeu_vi(VectorRegister Vd, VectorRegister Vs2, uint32_t imm, VectorMask vm = unmasked) {
1534 guarantee(imm >= 1 && imm <= 16, "imm is invalid");
1535 vmsgtu_vi(Vd, Vs2, imm-1, vm);
1536 }
1537
1538 // Copy mask register
1539 inline void vmmv_m(VectorRegister vd, VectorRegister vs) {
1540 vmand_mm(vd, vs, vs);
1541 }
1542
1543 // Clear mask register
1544 inline void vmclr_m(VectorRegister vd) {
1545 vmxor_mm(vd, vd, vd);
1546 }
1547
1548 // Set mask register
1549 inline void vmset_m(VectorRegister vd) {
1550 vmxnor_mm(vd, vd, vd);
1551 }
1552
1553 inline void vnot_v(VectorRegister Vd, VectorRegister Vs, VectorMask vm = unmasked) {
1554 vxor_vi(Vd, Vs, -1, vm);
1555 }
1556
1557 static const int zero_words_block_size;
1558
1559 void cast_primitive_type(BasicType type, Register Rt) {
1560 switch (type) {
1561 case T_BOOLEAN:
1562 sltu(Rt, zr, Rt);
1563 break;
1564 case T_CHAR :
1565 zext(Rt, Rt, 16);
1566 break;
1567 case T_BYTE :
1568 sext(Rt, Rt, 8);
1569 break;
1570 case T_SHORT :
1571 sext(Rt, Rt, 16);
1572 break;
1573 case T_INT :
1574 sext(Rt, Rt, 32);
1575 break;
1576 case T_LONG : /* nothing to do */ break;
1577 case T_VOID : /* nothing to do */ break;
1578 case T_FLOAT : /* nothing to do */ break;
1579 case T_DOUBLE : /* nothing to do */ break;
1580 default: ShouldNotReachHere();
1581 }
1582 }
1583
1584 // float cmp with unordered_result
1585 void float_compare(Register result, FloatRegister Rs1, FloatRegister Rs2, int unordered_result);
1586 void double_compare(Register result, FloatRegister Rs1, FloatRegister Rs2, int unordered_result);
1587
1588 // Zero/Sign-extend
1589 void zext(Register dst, Register src, int bits);
1590 void sext(Register dst, Register src, int bits);
1591
1592 private:
1593 void cmp_x2i(Register dst, Register src1, Register src2, Register tmp, bool is_signed = true);
1594
1595 public:
1596 // compare src1 and src2 and get -1/0/1 in dst.
1597 // if [src1 > src2], dst = 1;
1598 // if [src1 == src2], dst = 0;
1599 // if [src1 < src2], dst = -1;
1600 void cmp_l2i(Register dst, Register src1, Register src2, Register tmp = t0);
1601 void cmp_ul2i(Register dst, Register src1, Register src2, Register tmp = t0);
1602 void cmp_uw2i(Register dst, Register src1, Register src2, Register tmp = t0);
1603
1604 // support for argument shuffling
1605 void move32_64(VMRegPair src, VMRegPair dst, Register tmp = t0);
1606 void float_move(VMRegPair src, VMRegPair dst, Register tmp = t0);
1607 void long_move(VMRegPair src, VMRegPair dst, Register tmp = t0);
1608 void double_move(VMRegPair src, VMRegPair dst, Register tmp = t0);
1609 void object_move(OopMap* map,
1610 int oop_handle_offset,
1611 int framesize_in_slots,
1612 VMRegPair src,
1613 VMRegPair dst,
1614 bool is_receiver,
1615 int* receiver_offset);
1616
1617 #ifdef ASSERT
1618 // Template short-hand support to clean-up after a failed call to trampoline
1619 // call generation (see trampoline_call() below), when a set of Labels must
1620 // be reset (before returning).
1621 template<typename Label, typename... More>
1622 void reset_labels(Label& lbl, More&... more) {
1623 lbl.reset(); reset_labels(more...);
1624 }
1625 template<typename Label>
1626 void reset_labels(Label& lbl) {
1627 lbl.reset();
1628 }
1629 #endif
1630
1631 private:
1632
1633 void repne_scan(Register addr, Register value, Register count, Register tmp);
1634
1635 int bitset_to_regs(unsigned int bitset, unsigned char* regs);
1636 Address add_memory_helper(const Address dst, Register tmp);
1637
1638 void load_reserved(Register dst, Register addr, Assembler::operand_size size, Assembler::Aqrl acquire);
1639 void store_conditional(Register dst, Register new_val, Register addr, Assembler::operand_size size, Assembler::Aqrl release);
1640
1641 public:
1642 void lightweight_lock(Register basic_lock, Register obj, Register tmp1, Register tmp2, Register tmp3, Label& slow);
1643 void lightweight_unlock(Register obj, Register tmp1, Register tmp2, Register tmp3, Label& slow);
1644
1645 public:
1646 enum {
1647 // movptr
1648 movptr1_instruction_size = 6 * MacroAssembler::instruction_size, // lui, addi, slli, addi, slli, addi. See movptr1().
1649 movptr2_instruction_size = 5 * MacroAssembler::instruction_size, // lui, lui, slli, add, addi. See movptr2().
1650 load_pc_relative_instruction_size = 2 * MacroAssembler::instruction_size // auipc, ld
1651 };
1652
1653 static bool is_load_pc_relative_at(address branch);
1654 static bool is_li16u_at(address instr);
1655
1656 static bool is_jal_at(address instr) { assert_cond(instr != nullptr); return extract_opcode(instr) == 0b1101111; }
1657 static bool is_jalr_at(address instr) { assert_cond(instr != nullptr); return extract_opcode(instr) == 0b1100111 && extract_funct3(instr) == 0b000; }
1658 static bool is_branch_at(address instr) { assert_cond(instr != nullptr); return extract_opcode(instr) == 0b1100011; }
1659 static bool is_ld_at(address instr) { assert_cond(instr != nullptr); return is_load_at(instr) && extract_funct3(instr) == 0b011; }
1660 static bool is_load_at(address instr) { assert_cond(instr != nullptr); return extract_opcode(instr) == 0b0000011; }
1661 static bool is_float_load_at(address instr) { assert_cond(instr != nullptr); return extract_opcode(instr) == 0b0000111; }
1662 static bool is_auipc_at(address instr) { assert_cond(instr != nullptr); return extract_opcode(instr) == 0b0010111; }
1663 static bool is_jump_at(address instr) { assert_cond(instr != nullptr); return is_branch_at(instr) || is_jal_at(instr) || is_jalr_at(instr); }
1664 static bool is_add_at(address instr) { assert_cond(instr != nullptr); return extract_opcode(instr) == 0b0110011 && extract_funct3(instr) == 0b000; }
1665 static bool is_addi_at(address instr) { assert_cond(instr != nullptr); return extract_opcode(instr) == 0b0010011 && extract_funct3(instr) == 0b000; }
1666 static bool is_addiw_at(address instr) { assert_cond(instr != nullptr); return extract_opcode(instr) == 0b0011011 && extract_funct3(instr) == 0b000; }
1667 static bool is_addiw_to_zr_at(address instr){ assert_cond(instr != nullptr); return is_addiw_at(instr) && extract_rd(instr) == zr; }
1668 static bool is_lui_at(address instr) { assert_cond(instr != nullptr); return extract_opcode(instr) == 0b0110111; }
1669 static bool is_lui_to_zr_at(address instr) { assert_cond(instr != nullptr); return is_lui_at(instr) && extract_rd(instr) == zr; }
1670
1671 static bool is_srli_at(address instr) {
1672 assert_cond(instr != nullptr);
1673 return extract_opcode(instr) == 0b0010011 &&
1674 extract_funct3(instr) == 0b101 &&
1675 Assembler::extract(((unsigned*)instr)[0], 31, 26) == 0b000000;
1676 }
1677
1678 static bool is_slli_shift_at(address instr, uint32_t shift) {
1679 assert_cond(instr != nullptr);
1680 return (extract_opcode(instr) == 0b0010011 && // opcode field
1681 extract_funct3(instr) == 0b001 && // funct3 field, select the type of operation
1682 Assembler::extract(Assembler::ld_instr(instr), 25, 20) == shift); // shamt field
1683 }
1684
1685 static bool is_movptr1_at(address instr);
1686 static bool is_movptr2_at(address instr);
1687
1688 static bool is_lwu_to_zr(address instr);
1689
1690 static Register extract_rs1(address instr);
1691 static Register extract_rs2(address instr);
1692 static Register extract_rd(address instr);
1693 static uint32_t extract_opcode(address instr);
1694 static uint32_t extract_funct3(address instr);
1695
1696 // the instruction sequence of movptr is as below:
1697 // lui
1698 // addi
1699 // slli
1700 // addi
1701 // slli
1702 // addi/jalr/load
1703 static bool check_movptr1_data_dependency(address instr) {
1704 address lui = instr;
1705 address addi1 = lui + MacroAssembler::instruction_size;
1706 address slli1 = addi1 + MacroAssembler::instruction_size;
1707 address addi2 = slli1 + MacroAssembler::instruction_size;
1708 address slli2 = addi2 + MacroAssembler::instruction_size;
1709 address last_instr = slli2 + MacroAssembler::instruction_size;
1710 return extract_rs1(addi1) == extract_rd(lui) &&
1711 extract_rs1(addi1) == extract_rd(addi1) &&
1712 extract_rs1(slli1) == extract_rd(addi1) &&
1713 extract_rs1(slli1) == extract_rd(slli1) &&
1714 extract_rs1(addi2) == extract_rd(slli1) &&
1715 extract_rs1(addi2) == extract_rd(addi2) &&
1716 extract_rs1(slli2) == extract_rd(addi2) &&
1717 extract_rs1(slli2) == extract_rd(slli2) &&
1718 extract_rs1(last_instr) == extract_rd(slli2);
1719 }
1720
1721 // the instruction sequence of movptr2 is as below:
1722 // lui
1723 // lui
1724 // slli
1725 // add
1726 // addi/jalr/load
1727 static bool check_movptr2_data_dependency(address instr) {
1728 address lui1 = instr;
1729 address lui2 = lui1 + MacroAssembler::instruction_size;
1730 address slli = lui2 + MacroAssembler::instruction_size;
1731 address add = slli + MacroAssembler::instruction_size;
1732 address last_instr = add + MacroAssembler::instruction_size;
1733 return extract_rd(add) == extract_rd(lui2) &&
1734 extract_rs1(add) == extract_rd(lui2) &&
1735 extract_rs2(add) == extract_rd(slli) &&
1736 extract_rs1(slli) == extract_rd(lui1) &&
1737 extract_rd(slli) == extract_rd(lui1) &&
1738 extract_rs1(last_instr) == extract_rd(add);
1739 }
1740
1741 // the instruction sequence of li16u is as below:
1742 // lui
1743 // srli
1744 static bool check_li16u_data_dependency(address instr) {
1745 address lui = instr;
1746 address srli = lui + MacroAssembler::instruction_size;
1747
1748 return extract_rs1(srli) == extract_rd(lui) &&
1749 extract_rs1(srli) == extract_rd(srli);
1750 }
1751
1752 // the instruction sequence of li32 is as below:
1753 // lui
1754 // addiw
1755 static bool check_li32_data_dependency(address instr) {
1756 address lui = instr;
1757 address addiw = lui + MacroAssembler::instruction_size;
1758
1759 return extract_rs1(addiw) == extract_rd(lui) &&
1760 extract_rs1(addiw) == extract_rd(addiw);
1761 }
1762
1763 // the instruction sequence of pc-relative is as below:
1764 // auipc
1765 // jalr/addi/load/float_load
1766 static bool check_pc_relative_data_dependency(address instr) {
1767 address auipc = instr;
1768 address last_instr = auipc + MacroAssembler::instruction_size;
1769
1770 return extract_rs1(last_instr) == extract_rd(auipc);
1771 }
1772
1773 // the instruction sequence of load_label is as below:
1774 // auipc
1775 // load
1776 static bool check_load_pc_relative_data_dependency(address instr) {
1777 address auipc = instr;
1778 address load = auipc + MacroAssembler::instruction_size;
1779
1780 return extract_rd(load) == extract_rd(auipc) &&
1781 extract_rs1(load) == extract_rd(load);
1782 }
1783
1784 static bool is_li32_at(address instr);
1785 static bool is_pc_relative_at(address branch);
1786
1787 static bool is_membar(address addr) {
1788 return (Bytes::get_native_u4(addr) & 0x7f) == 0b1111 && extract_funct3(addr) == 0;
1789 }
1790 static uint32_t get_membar_kind(address addr);
1791 static void set_membar_kind(address addr, uint32_t order_kind);
1792 };
1793
1794 #ifdef ASSERT
1795 inline bool AbstractAssembler::pd_check_instruction_mark() { return false; }
1796 #endif
1797
1798 #endif // CPU_RISCV_MACROASSEMBLER_RISCV_HPP