1 /*
2 * Copyright (c) 1997, 2026, Oracle and/or its affiliates. All rights reserved.
3 * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
4 * Copyright (c) 2020, 2024, Huawei Technologies Co., Ltd. All rights reserved.
5 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 *
7 * This code is free software; you can redistribute it and/or modify it
8 * under the terms of the GNU General Public License version 2 only, as
9 * published by the Free Software Foundation.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 *
25 */
26
27 #ifndef CPU_RISCV_MACROASSEMBLER_RISCV_HPP
28 #define CPU_RISCV_MACROASSEMBLER_RISCV_HPP
29
30 #include "asm/assembler.inline.hpp"
31 #include "code/vmreg.hpp"
32 #include "metaprogramming/enableIf.hpp"
33 #include "oops/compressedOops.hpp"
34 #include "utilities/powerOfTwo.hpp"
35
36 class ciInlineKlass;
37 class SigEntry;
38 class VMRegPair;
39
40 // MacroAssembler extends Assembler by frequently used macros.
41 //
42 // Instructions for which a 'better' code sequence exists depending
43 // on arguments should also go in here.
44
45 class MacroAssembler: public Assembler {
46
47 public:
48
49 MacroAssembler(CodeBuffer* code) : Assembler(code) {}
50
51 void safepoint_poll(Label& slow_path, bool at_return, bool in_nmethod, Register tmp_reg = t0);
52
53 // Alignment
54 int align(int modulus, int extra_offset = 0);
55
56 static inline void assert_alignment(address pc, int alignment = MacroAssembler::instruction_size) {
57 assert(is_aligned(pc, alignment), "bad alignment");
58 }
59
60 // nop
61 void post_call_nop();
62
63 // Stack frame creation/removal
64 // Note that SP must be updated to the right place before saving/restoring RA and FP
65 // because signal based thread suspend/resume could happen asynchronously.
66 void enter() {
67 subi(sp, sp, 2 * wordSize);
68 sd(ra, Address(sp, wordSize));
69 sd(fp, Address(sp));
70 addi(fp, sp, 2 * wordSize);
71 }
72
73 void leave() {
74 subi(sp, fp, 2 * wordSize);
75 ld(fp, Address(sp));
76 ld(ra, Address(sp, wordSize));
77 addi(sp, sp, 2 * wordSize);
78 }
79
80
81 // Support for getting the JavaThread pointer (i.e.; a reference to thread-local information)
82 // The pointer will be loaded into the thread register.
83 void get_thread(Register thread);
84
85 // Support for VM calls
86 //
87 // It is imperative that all calls into the VM are handled via the call_VM macros.
88 // They make sure that the stack linkage is setup correctly. call_VM's correspond
89 // to ENTRY/ENTRY_X entry points while call_VM_leaf's correspond to LEAF entry points.
90
91 void call_VM(Register oop_result,
92 address entry_point,
93 bool check_exceptions = true);
94 void call_VM(Register oop_result,
95 address entry_point,
96 Register arg_1,
97 bool check_exceptions = true);
98 void call_VM(Register oop_result,
99 address entry_point,
100 Register arg_1, Register arg_2,
101 bool check_exceptions = true);
102 void call_VM(Register oop_result,
103 address entry_point,
104 Register arg_1, Register arg_2, Register arg_3,
105 bool check_exceptions = true);
106
107 // Overloadings with last_Java_sp
108 void call_VM(Register oop_result,
109 Register last_java_sp,
110 address entry_point,
111 int number_of_arguments = 0,
112 bool check_exceptions = true);
113 void call_VM(Register oop_result,
114 Register last_java_sp,
115 address entry_point,
116 Register arg_1,
117 bool check_exceptions = true);
118 void call_VM(Register oop_result,
119 Register last_java_sp,
120 address entry_point,
121 Register arg_1, Register arg_2,
122 bool check_exceptions = true);
123 void call_VM(Register oop_result,
124 Register last_java_sp,
125 address entry_point,
126 Register arg_1, Register arg_2, Register arg_3,
127 bool check_exceptions = true);
128
129 void get_vm_result_oop(Register oop_result, Register java_thread);
130 void get_vm_result_metadata(Register metadata_result, Register java_thread);
131
132 // These always tightly bind to MacroAssembler::call_VM_leaf_base
133 // bypassing the virtual implementation
134 void call_VM_leaf(address entry_point,
135 int number_of_arguments = 0);
136 void call_VM_leaf(address entry_point,
137 Register arg_0);
138 void call_VM_leaf(address entry_point,
139 Register arg_0, Register arg_1);
140 void call_VM_leaf(address entry_point,
141 Register arg_0, Register arg_1, Register arg_2);
142
143 // These always tightly bind to MacroAssembler::call_VM_base
144 // bypassing the virtual implementation
145 void super_call_VM_leaf(address entry_point, Register arg_0);
146 void super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1);
147 void super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2);
148 void super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2, Register arg_3);
149
150 // last Java Frame (fills frame anchor)
151 void set_last_Java_frame(Register last_java_sp, Register last_java_fp, address last_java_pc, Register tmp);
152 void set_last_Java_frame(Register last_java_sp, Register last_java_fp, Label &last_java_pc, Register tmp);
153 void set_last_Java_frame(Register last_java_sp, Register last_java_fp, Register last_java_pc);
154
155 // thread in the default location (xthread)
156 void reset_last_Java_frame(bool clear_fp);
157
158 virtual void call_VM_leaf_base(
159 address entry_point, // the entry point
160 int number_of_arguments, // the number of arguments to pop after the call
161 Label* retaddr = nullptr
162 );
163
164 virtual void call_VM_leaf_base(
165 address entry_point, // the entry point
166 int number_of_arguments, // the number of arguments to pop after the call
167 Label& retaddr) {
168 call_VM_leaf_base(entry_point, number_of_arguments, &retaddr);
169 }
170
171 virtual void call_VM_base( // returns the register containing the thread upon return
172 Register oop_result, // where an oop-result ends up if any; use noreg otherwise
173 Register java_thread, // the thread if computed before ; use noreg otherwise
174 Register last_java_sp, // to set up last_Java_frame in stubs; use noreg otherwise
175 Label* return_pc, // to set up last_Java_frame; use nullptr otherwise
176 address entry_point, // the entry point
177 int number_of_arguments, // the number of arguments (w/o thread) to pop after the call
178 bool check_exceptions // whether to check for pending exceptions after return
179 );
180
181 void call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions);
182
183 virtual void check_and_handle_earlyret(Register java_thread);
184 virtual void check_and_handle_popframe(Register java_thread);
185
186 void resolve_weak_handle(Register result, Register tmp1, Register tmp2);
187 void resolve_oop_handle(Register result, Register tmp1, Register tmp2);
188 void resolve_jobject(Register value, Register tmp1, Register tmp2);
189 void resolve_global_jobject(Register value, Register tmp1, Register tmp2);
190
191 void movoop(Register dst, jobject obj);
192 void mov_metadata(Register dst, Metadata* obj);
193 void bang_stack_size(Register size, Register tmp);
194 void set_narrow_oop(Register dst, jobject obj);
195 void set_narrow_klass(Register dst, Klass* k);
196
197 void load_mirror(Register dst, Register method, Register tmp1, Register tmp2);
198 void access_load_at(BasicType type, DecoratorSet decorators, Register dst,
199 Address src, Register tmp1, Register tmp2);
200 void access_store_at(BasicType type, DecoratorSet decorators, Address dst,
201 Register val, Register tmp1, Register tmp2, Register tmp3);
202 void load_klass(Register dst, Register src, Register tmp = t0);
203 void load_narrow_klass_compact(Register dst, Register src);
204 void store_klass(Register dst, Register src, Register tmp = t0);
205 void cmp_klass_beq(Register obj, Register klass,
206 Register tmp1, Register tmp2,
207 Label &L, bool is_far = false);
208 void cmp_klass_bne(Register obj, Register klass,
209 Register tmp1, Register tmp2,
210 Label &L, bool is_far = false);
211
212 void encode_klass_not_null(Register r, Register tmp = t0);
213 void decode_klass_not_null(Register r, Register tmp = t0);
214 void encode_klass_not_null(Register dst, Register src, Register tmp);
215 void decode_klass_not_null(Register dst, Register src, Register tmp);
216 void decode_heap_oop_not_null(Register r);
217 void decode_heap_oop_not_null(Register dst, Register src);
218 void decode_heap_oop(Register d, Register s);
219 void decode_heap_oop(Register r) { decode_heap_oop(r, r); }
220 void encode_heap_oop_not_null(Register r);
221 void encode_heap_oop_not_null(Register dst, Register src);
222 void encode_heap_oop(Register d, Register s);
223 void encode_heap_oop(Register r) { encode_heap_oop(r, r); };
224 void load_heap_oop(Register dst, Address src, Register tmp1,
225 Register tmp2, DecoratorSet decorators = 0);
226 void load_heap_oop_not_null(Register dst, Address src, Register tmp1,
227 Register tmp2, DecoratorSet decorators = 0);
228 void store_heap_oop(Address dst, Register val, Register tmp1,
229 Register tmp2, Register tmp3, DecoratorSet decorators = 0);
230
231 void store_klass_gap(Register dst, Register src);
232
233 // currently unimplemented
234 // Used for storing null. All other oop constants should be
235 // stored using routines that take a jobject.
236 void store_heap_oop_null(Address dst);
237
238 // This dummy is to prevent a call to store_heap_oop from
239 // converting a zero (linked null) into a Register by giving
240 // the compiler two choices it can't resolve
241
242 void store_heap_oop(Address dst, void* dummy);
243
244 // Support for null-checks
245 //
246 // Generates code that causes a null OS exception if the content of reg is null.
247 // If the accessed location is M[reg + offset] and the offset is known, provide the
248 // offset. No explicit code generateion is needed if the offset is within a certain
249 // range (0 <= offset <= page_size).
250
251 virtual void null_check(Register reg, int offset = -1);
252 static bool needs_explicit_null_check(intptr_t offset);
253 static bool uses_implicit_null_check(void* address);
254
255 // interface method calling
256 void lookup_interface_method(Register recv_klass,
257 Register intf_klass,
258 RegisterOrConstant itable_index,
259 Register method_result,
260 Register scan_tmp,
261 Label& no_such_interface,
262 bool return_method = true);
263
264 void lookup_interface_method_stub(Register recv_klass,
265 Register holder_klass,
266 Register resolved_klass,
267 Register method_result,
268 Register temp_reg,
269 Register temp_reg2,
270 int itable_index,
271 Label& L_no_such_interface);
272
273 // virtual method calling
274 // n.n. x86 allows RegisterOrConstant for vtable_index
275 void lookup_virtual_method(Register recv_klass,
276 RegisterOrConstant vtable_index,
277 Register method_result);
278
279 // Form an address from base + offset in Rd. Rd my or may not
280 // actually be used: you must use the Address that is returned. It
281 // is up to you to ensure that the shift provided matches the size
282 // of your data.
283 Address form_address(Register Rd, Register base, int64_t byte_offset);
284
285 // Sometimes we get misaligned loads and stores, usually from Unsafe
286 // accesses, and these can exceed the offset range.
287 Address legitimize_address(Register Rd, const Address &adr) {
288 if (adr.getMode() == Address::base_plus_offset) {
289 if (!is_simm12(adr.offset())) {
290 return form_address(Rd, adr.base(), adr.offset());
291 }
292 }
293 return adr;
294 }
295
296 // allocation
297 void tlab_allocate(
298 Register obj, // result: pointer to object after successful allocation
299 Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise
300 int con_size_in_bytes, // object size in bytes if known at compile time
301 Register tmp1, // temp register
302 Register tmp2, // temp register
303 Label& slow_case, // continuation point of fast allocation fails
304 bool is_far = false
305 );
306
307 // Test sub_klass against super_klass, with fast and slow paths.
308
309 // The fast path produces a tri-state answer: yes / no / maybe-slow.
310 // One of the three labels can be null, meaning take the fall-through.
311 // If super_check_offset is -1, the value is loaded up from super_klass.
312 // No registers are killed, except tmp_reg
313 void check_klass_subtype_fast_path(Register sub_klass,
314 Register super_klass,
315 Register tmp_reg,
316 Label* L_success,
317 Label* L_failure,
318 Label* L_slow_path,
319 Register super_check_offset = noreg);
320
321 // The reset of the type check; must be wired to a corresponding fast path.
322 // It does not repeat the fast path logic, so don't use it standalone.
323 // The tmp1_reg and tmp2_reg can be noreg, if no temps are available.
324 // Updates the sub's secondary super cache as necessary.
325 void check_klass_subtype_slow_path(Register sub_klass,
326 Register super_klass,
327 Register tmp1_reg,
328 Register tmp2_reg,
329 Label* L_success,
330 Label* L_failure,
331 bool set_cond_codes = false);
332
333 void check_klass_subtype_slow_path_linear(Register sub_klass,
334 Register super_klass,
335 Register tmp1_reg,
336 Register tmp2_reg,
337 Label* L_success,
338 Label* L_failure,
339 bool set_cond_codes = false);
340
341 void check_klass_subtype_slow_path_table(Register sub_klass,
342 Register super_klass,
343 Register tmp1_reg,
344 Register tmp2_reg,
345 Label* L_success,
346 Label* L_failure,
347 bool set_cond_codes = false);
348
349 // If r is valid, return r.
350 // If r is invalid, remove a register r2 from available_regs, add r2
351 // to regs_to_push, then return r2.
352 Register allocate_if_noreg(const Register r,
353 RegSetIterator<Register> &available_regs,
354 RegSet ®s_to_push);
355
356 // Secondary subtype checking
357 void lookup_secondary_supers_table_var(Register sub_klass,
358 Register r_super_klass,
359 Register result,
360 Register tmp1,
361 Register tmp2,
362 Register tmp3,
363 Register tmp4,
364 Label *L_success);
365
366 void population_count(Register dst, Register src, Register tmp1, Register tmp2);
367
368 // As above, but with a constant super_klass.
369 // The result is in Register result, not the condition codes.
370 bool lookup_secondary_supers_table_const(Register r_sub_klass,
371 Register r_super_klass,
372 Register result,
373 Register tmp1,
374 Register tmp2,
375 Register tmp3,
376 Register tmp4,
377 u1 super_klass_slot,
378 bool stub_is_near = false);
379
380 void verify_secondary_supers_table(Register r_sub_klass,
381 Register r_super_klass,
382 Register result,
383 Register tmp1,
384 Register tmp2,
385 Register tmp3);
386
387 void lookup_secondary_supers_table_slow_path(Register r_super_klass,
388 Register r_array_base,
389 Register r_array_index,
390 Register r_bitmap,
391 Register result,
392 Register tmp,
393 bool is_stub = true);
394
395 void check_klass_subtype(Register sub_klass,
396 Register super_klass,
397 Register tmp_reg,
398 Label& L_success);
399
400 Address argument_address(RegisterOrConstant arg_slot, int extra_slot_offset = 0);
401
402 void profile_receiver_type(Register recv, Register mdp, int mdp_offset);
403
404 // only if +VerifyOops
405 void _verify_oop(Register reg, const char* s, const char* file, int line);
406 void _verify_oop_addr(Address addr, const char* s, const char* file, int line);
407
408 void _verify_oop_checked(Register reg, const char* s, const char* file, int line) {
409 if (VerifyOops) {
410 _verify_oop(reg, s, file, line);
411 }
412 }
413 void _verify_oop_addr_checked(Address reg, const char* s, const char* file, int line) {
414 if (VerifyOops) {
415 _verify_oop_addr(reg, s, file, line);
416 }
417 }
418
419 void _verify_method_ptr(Register reg, const char* msg, const char* file, int line) {}
420 void _verify_klass_ptr(Register reg, const char* msg, const char* file, int line) {}
421
422 #define verify_oop(reg) _verify_oop_checked(reg, "broken oop " #reg, __FILE__, __LINE__)
423 #define verify_oop_msg(reg, msg) _verify_oop_checked(reg, "broken oop " #reg ", " #msg, __FILE__, __LINE__)
424 #define verify_oop_addr(addr) _verify_oop_addr_checked(addr, "broken oop addr " #addr, __FILE__, __LINE__)
425 #define verify_method_ptr(reg) _verify_method_ptr(reg, "broken method " #reg, __FILE__, __LINE__)
426 #define verify_klass_ptr(reg) _verify_method_ptr(reg, "broken klass " #reg, __FILE__, __LINE__)
427
428 // A more convenient access to fence for our purposes
429 // We used four bit to indicate the read and write bits in the predecessors and successors,
430 // and extended i for r, o for w if UseConservativeFence enabled.
431 enum Membar_mask_bits {
432 StoreStore = 0b0101, // (pred = w + succ = w)
433 LoadStore = 0b1001, // (pred = r + succ = w)
434 StoreLoad = 0b0110, // (pred = w + succ = r)
435 LoadLoad = 0b1010, // (pred = r + succ = r)
436 AnyAny = LoadStore | StoreLoad // (pred = rw + succ = rw)
437 };
438
439 void membar(uint32_t order_constraint);
440
441 private:
442
443 static void membar_mask_to_pred_succ(uint32_t order_constraint,
444 uint32_t& predecessor, uint32_t& successor) {
445 predecessor = (order_constraint >> 2) & 0x3;
446 successor = order_constraint & 0x3;
447
448 // extend rw -> iorw:
449 // 01(w) -> 0101(ow)
450 // 10(r) -> 1010(ir)
451 // 11(rw)-> 1111(iorw)
452 if (UseConservativeFence) {
453 predecessor |= predecessor << 2;
454 successor |= successor << 2;
455 }
456 }
457
458 static int pred_succ_to_membar_mask(uint32_t predecessor, uint32_t successor) {
459 return ((predecessor & 0x3) << 2) | (successor & 0x3);
460 }
461
462 public:
463
464 void cmodx_fence();
465
466 void pause() {
467 // Zihintpause
468 // PAUSE is encoded as a FENCE instruction with pred=W, succ=0, fm=0, rd=x0, and rs1=x0.
469 Assembler::fence(w, 0);
470 }
471
472 // prints msg, dumps registers and stops execution
473 void stop(const char* msg);
474
475 static void debug64(char* msg, int64_t pc, int64_t regs[]);
476
477 void unimplemented(const char* what = "");
478
479 void should_not_reach_here() { stop("should not reach here"); }
480
481 static address target_addr_for_insn(address insn_addr);
482
483 // Required platform-specific helpers for Label::patch_instructions.
484 // They _shadow_ the declarations in AbstractAssembler, which are undefined.
485 static int pd_patch_instruction_size(address branch, address target);
486 static void pd_patch_instruction(address branch, address target, const char* file = nullptr, int line = 0) {
487 pd_patch_instruction_size(branch, target);
488 }
489 static address pd_call_destination(address branch) {
490 return target_addr_for_insn(branch);
491 }
492
493 static int patch_oop(address insn_addr, address o);
494
495 static address get_target_of_li32(address insn_addr);
496 static int patch_imm_in_li32(address branch, int32_t target);
497
498 // Return whether code is emitted to a scratch blob.
499 virtual bool in_scratch_emit_size() {
500 return false;
501 }
502
503 address emit_reloc_call_address_stub(int insts_call_instruction_offset, address target);
504 static int max_reloc_call_address_stub_size();
505
506 void emit_static_call_stub();
507 static int static_call_stub_size();
508
509 // The following 4 methods return the offset of the appropriate move instruction
510
511 // Support for fast byte/short loading with zero extension (depending on particular CPU)
512 int load_unsigned_byte(Register dst, Address src);
513 int load_unsigned_short(Register dst, Address src);
514
515 // Support for fast byte/short loading with sign extension (depending on particular CPU)
516 int load_signed_byte(Register dst, Address src);
517 int load_signed_short(Register dst, Address src);
518
519 // Load and store values by size and signed-ness
520 void load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed);
521 void store_sized_value(Address dst, Register src, size_t size_in_bytes);
522
523 // Misaligned loads, will use the best way, according to the AvoidUnalignedAccess flag
524 void load_short_misaligned(Register dst, Address src, Register tmp, bool is_signed, int granularity = 1);
525 void load_int_misaligned(Register dst, Address src, Register tmp, bool is_signed, int granularity = 1);
526 void load_long_misaligned(Register dst, Address src, Register tmp, int granularity = 1);
527
528 public:
529 // Standard pseudo instructions
530 inline void nop() {
531 addi(x0, x0, 0);
532 }
533
534 inline void mv(Register Rd, Register Rs) {
535 if (Rd != Rs) {
536 addi(Rd, Rs, 0);
537 }
538 }
539
540 inline void notr(Register Rd, Register Rs) {
541 if (do_compress_zcb(Rd, Rs) && (Rd == Rs)) {
542 c_not(Rd);
543 } else {
544 xori(Rd, Rs, -1);
545 }
546 }
547
548 inline void neg(Register Rd, Register Rs) {
549 sub(Rd, x0, Rs);
550 }
551
552 inline void negw(Register Rd, Register Rs) {
553 subw(Rd, x0, Rs);
554 }
555
556 inline void sext_w(Register Rd, Register Rs) {
557 addiw(Rd, Rs, 0);
558 }
559
560 inline void zext_b(Register Rd, Register Rs) {
561 if (do_compress_zcb(Rd, Rs) && (Rd == Rs)) {
562 c_zext_b(Rd);
563 } else {
564 andi(Rd, Rs, 0xFF);
565 }
566 }
567
568 inline void seqz(Register Rd, Register Rs) {
569 sltiu(Rd, Rs, 1);
570 }
571
572 inline void snez(Register Rd, Register Rs) {
573 sltu(Rd, x0, Rs);
574 }
575
576 inline void sltz(Register Rd, Register Rs) {
577 slt(Rd, Rs, x0);
578 }
579
580 inline void sgtz(Register Rd, Register Rs) {
581 slt(Rd, x0, Rs);
582 }
583
584 // Bit-manipulation extension pseudo instructions
585 // zero extend word
586 inline void zext_w(Register Rd, Register Rs) {
587 assert(UseZba, "must be");
588 if (do_compress_zcb(Rd, Rs) && (Rd == Rs)) {
589 c_zext_w(Rd);
590 } else {
591 add_uw(Rd, Rs, zr);
592 }
593 }
594
595 // Floating-point data-processing pseudo instructions
596 inline void fmv_s(FloatRegister Rd, FloatRegister Rs) {
597 if (Rd != Rs) {
598 fsgnj_s(Rd, Rs, Rs);
599 }
600 }
601
602 inline void fabs_s(FloatRegister Rd, FloatRegister Rs) {
603 fsgnjx_s(Rd, Rs, Rs);
604 }
605
606 inline void fneg_s(FloatRegister Rd, FloatRegister Rs) {
607 fsgnjn_s(Rd, Rs, Rs);
608 }
609
610 inline void fmv_d(FloatRegister Rd, FloatRegister Rs) {
611 if (Rd != Rs) {
612 fsgnj_d(Rd, Rs, Rs);
613 }
614 }
615
616 inline void fabs_d(FloatRegister Rd, FloatRegister Rs) {
617 fsgnjx_d(Rd, Rs, Rs);
618 }
619
620 inline void fneg_d(FloatRegister Rd, FloatRegister Rs) {
621 fsgnjn_d(Rd, Rs, Rs);
622 }
623
624 // Control and status pseudo instructions
625 void csrr(Register Rd, unsigned csr); // read csr
626 void csrw(unsigned csr, Register Rs); // write csr
627 void csrs(unsigned csr, Register Rs); // set bits in csr
628 void csrc(unsigned csr, Register Rs); // clear bits in csr
629 void csrwi(unsigned csr, unsigned imm);
630 void csrsi(unsigned csr, unsigned imm);
631 void csrci(unsigned csr, unsigned imm);
632 void frcsr(Register Rd) { csrr(Rd, CSR_FCSR); }; // read float-point csr
633 void fscsr(Register Rd, Register Rs); // swap float-point csr
634 void fscsr(Register Rs); // write float-point csr
635 void frrm(Register Rd) { csrr(Rd, CSR_FRM); }; // read float-point rounding mode
636 void fsrm(Register Rd, Register Rs); // swap float-point rounding mode
637 void fsrm(Register Rs); // write float-point rounding mode
638 void fsrmi(Register Rd, unsigned imm);
639 void fsrmi(unsigned imm);
640 void frflags(Register Rd) { csrr(Rd, CSR_FFLAGS); }; // read float-point exception flags
641 void fsflags(Register Rd, Register Rs); // swap float-point exception flags
642 void fsflags(Register Rs); // write float-point exception flags
643 void fsflagsi(Register Rd, unsigned imm);
644 void fsflagsi(unsigned imm);
645 // Requires Zicntr
646 void rdinstret(Register Rd) { csrr(Rd, CSR_INSTRET); }; // read instruction-retired counter
647 void rdcycle(Register Rd) { csrr(Rd, CSR_CYCLE); }; // read cycle counter
648 void rdtime(Register Rd) { csrr(Rd, CSR_TIME); }; // read time
649
650 // Restore cpu control state after JNI call
651 void restore_cpu_control_state_after_jni(Register tmp);
652
653 // Control transfer pseudo instructions
654 void beqz(Register Rs, const address dest);
655 void bnez(Register Rs, const address dest);
656 void blez(Register Rs, const address dest);
657 void bgez(Register Rs, const address dest);
658 void bltz(Register Rs, const address dest);
659 void bgtz(Register Rs, const address dest);
660
661 void cmov_eq(Register cmp1, Register cmp2, Register dst, Register src);
662 void cmov_ne(Register cmp1, Register cmp2, Register dst, Register src);
663 void cmov_le(Register cmp1, Register cmp2, Register dst, Register src);
664 void cmov_leu(Register cmp1, Register cmp2, Register dst, Register src);
665 void cmov_ge(Register cmp1, Register cmp2, Register dst, Register src);
666 void cmov_geu(Register cmp1, Register cmp2, Register dst, Register src);
667 void cmov_lt(Register cmp1, Register cmp2, Register dst, Register src);
668 void cmov_ltu(Register cmp1, Register cmp2, Register dst, Register src);
669 void cmov_gt(Register cmp1, Register cmp2, Register dst, Register src);
670 void cmov_gtu(Register cmp1, Register cmp2, Register dst, Register src);
671
672 void cmov_cmp_fp_eq(FloatRegister cmp1, FloatRegister cmp2, Register dst, Register src, bool is_single);
673 void cmov_cmp_fp_ne(FloatRegister cmp1, FloatRegister cmp2, Register dst, Register src, bool is_single);
674 void cmov_cmp_fp_le(FloatRegister cmp1, FloatRegister cmp2, Register dst, Register src, bool is_single);
675 void cmov_cmp_fp_ge(FloatRegister cmp1, FloatRegister cmp2, Register dst, Register src, bool is_single);
676 void cmov_cmp_fp_lt(FloatRegister cmp1, FloatRegister cmp2, Register dst, Register src, bool is_single);
677 void cmov_cmp_fp_gt(FloatRegister cmp1, FloatRegister cmp2, Register dst, Register src, bool is_single);
678
679 void cmov_fp_eq(Register cmp1, Register cmp2, FloatRegister dst, FloatRegister src, bool is_single);
680 void cmov_fp_ne(Register cmp1, Register cmp2, FloatRegister dst, FloatRegister src, bool is_single);
681 void cmov_fp_le(Register cmp1, Register cmp2, FloatRegister dst, FloatRegister src, bool is_single);
682 void cmov_fp_leu(Register cmp1, Register cmp2, FloatRegister dst, FloatRegister src, bool is_single);
683 void cmov_fp_ge(Register cmp1, Register cmp2, FloatRegister dst, FloatRegister src, bool is_single);
684 void cmov_fp_geu(Register cmp1, Register cmp2, FloatRegister dst, FloatRegister src, bool is_single);
685 void cmov_fp_lt(Register cmp1, Register cmp2, FloatRegister dst, FloatRegister src, bool is_single);
686 void cmov_fp_ltu(Register cmp1, Register cmp2, FloatRegister dst, FloatRegister src, bool is_single);
687 void cmov_fp_gt(Register cmp1, Register cmp2, FloatRegister dst, FloatRegister src, bool is_single);
688 void cmov_fp_gtu(Register cmp1, Register cmp2, FloatRegister dst, FloatRegister src, bool is_single);
689
690 void cmov_fp_cmp_fp_eq(FloatRegister cmp1, FloatRegister cmp2, FloatRegister dst, FloatRegister src, bool cmp_single, bool cmov_single);
691 void cmov_fp_cmp_fp_ne(FloatRegister cmp1, FloatRegister cmp2, FloatRegister dst, FloatRegister src, bool cmp_single, bool cmov_single);
692 void cmov_fp_cmp_fp_le(FloatRegister cmp1, FloatRegister cmp2, FloatRegister dst, FloatRegister src, bool cmp_single, bool cmov_single);
693 void cmov_fp_cmp_fp_ge(FloatRegister cmp1, FloatRegister cmp2, FloatRegister dst, FloatRegister src, bool cmp_single, bool cmov_single);
694 void cmov_fp_cmp_fp_lt(FloatRegister cmp1, FloatRegister cmp2, FloatRegister dst, FloatRegister src, bool cmp_single, bool cmov_single);
695 void cmov_fp_cmp_fp_gt(FloatRegister cmp1, FloatRegister cmp2, FloatRegister dst, FloatRegister src, bool cmp_single, bool cmov_single);
696
697 public:
698 // We try to follow risc-v asm menomics.
699 // But as we don't layout a reachable GOT,
700 // we often need to resort to movptr, li <48imm>.
701 // https://github.com/riscv-non-isa/riscv-asm-manual/blob/main/src/asm-manual.adoc
702
703 // Hotspot only use the standard calling convention using x1/ra.
704 // The alternative calling convection using x5/t0 is not used.
705 // Using x5 as a temp causes the CPU to mispredict returns.
706
707 // JALR, return address stack updates:
708 // | rd is x1/x5 | rs1 is x1/x5 | rd=rs1 | RAS action
709 // | ----------- | ------------ | ------ |-------------
710 // | No | No | - | None
711 // | No | Yes | - | Pop
712 // | Yes | No | - | Push
713 // | Yes | Yes | No | Pop, then push
714 // | Yes | Yes | Yes | Push
715 //
716 // JAL, return address stack updates:
717 // | rd is x1/x5 | RAS action
718 // | ----------- | ----------
719 // | Yes | Push
720 // | No | None
721 //
722 // JUMPs uses Rd = x0/zero and Rs = x6/t1 or imm
723 // CALLS uses Rd = x1/ra and Rs = x6/t1 or imm (or x1/ra*)
724 // RETURNS uses Rd = x0/zero and Rs = x1/ra
725 // *use of x1/ra should not normally be used, special case only.
726
727 // jump: jal x0, offset
728 // For long reach uses temp register for:
729 // la + jr
730 void j(const address dest, Register temp = t1);
731 void j(const Address &dest, Register temp = t1);
732 void j(Label &l, Register temp = noreg);
733
734 // jump register: jalr x0, offset(rs)
735 void jr(Register Rd, int32_t offset = 0);
736
737 // call: la + jalr x1
738 void call(const address dest, Register temp = t1);
739
740 // jalr: jalr x1, offset(rs)
741 void jalr(Register Rs, int32_t offset = 0);
742
743 // Emit a runtime call. Only invalidates the tmp register which
744 // is used to keep the entry address for jalr/movptr.
745 // Uses call() for intra code cache, else movptr + jalr.
746 // Clobebrs t1
747 void rt_call(address dest, Register tmp = t1);
748
749 // ret: jalr x0, 0(x1)
750 inline void ret() {
751 Assembler::jalr(x0, x1, 0);
752 }
753
754 //label
755 void beqz(Register Rs, Label &l, bool is_far = false);
756 void bnez(Register Rs, Label &l, bool is_far = false);
757 void blez(Register Rs, Label &l, bool is_far = false);
758 void bgez(Register Rs, Label &l, bool is_far = false);
759 void bltz(Register Rs, Label &l, bool is_far = false);
760 void bgtz(Register Rs, Label &l, bool is_far = false);
761
762 void beq (Register Rs1, Register Rs2, Label &L, bool is_far = false);
763 void bne (Register Rs1, Register Rs2, Label &L, bool is_far = false);
764 void blt (Register Rs1, Register Rs2, Label &L, bool is_far = false);
765 void bge (Register Rs1, Register Rs2, Label &L, bool is_far = false);
766 void bltu(Register Rs1, Register Rs2, Label &L, bool is_far = false);
767 void bgeu(Register Rs1, Register Rs2, Label &L, bool is_far = false);
768
769 void bgt (Register Rs, Register Rt, const address dest);
770 void ble (Register Rs, Register Rt, const address dest);
771 void bgtu(Register Rs, Register Rt, const address dest);
772 void bleu(Register Rs, Register Rt, const address dest);
773
774 void bgt (Register Rs, Register Rt, Label &l, bool is_far = false);
775 void ble (Register Rs, Register Rt, Label &l, bool is_far = false);
776 void bgtu(Register Rs, Register Rt, Label &l, bool is_far = false);
777 void bleu(Register Rs, Register Rt, Label &l, bool is_far = false);
778
779 #define INSN_ENTRY_RELOC(result_type, header) \
780 result_type header { \
781 guarantee(rtype == relocInfo::internal_word_type, \
782 "only internal_word_type relocs make sense here"); \
783 relocate(InternalAddress(dest).rspec()); \
784 IncompressibleScope scope(this); /* relocations */
785
786 #define INSN(NAME) \
787 void NAME(Register Rs1, Register Rs2, const address dest) { \
788 assert_cond(dest != nullptr); \
789 int64_t offset = dest - pc(); \
790 guarantee(is_simm13(offset) && is_even(offset), \
791 "offset is invalid: is_simm_13: %s offset: " INT64_FORMAT, \
792 BOOL_TO_STR(is_simm13(offset)), offset); \
793 Assembler::NAME(Rs1, Rs2, offset); \
794 } \
795 INSN_ENTRY_RELOC(void, NAME(Register Rs1, Register Rs2, address dest, relocInfo::relocType rtype)) \
796 NAME(Rs1, Rs2, dest); \
797 }
798
799 INSN(beq);
800 INSN(bne);
801 INSN(bge);
802 INSN(bgeu);
803 INSN(blt);
804 INSN(bltu);
805
806 #undef INSN
807
808 #undef INSN_ENTRY_RELOC
809
810 void float_beq(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
811 void float_bne(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
812 void float_ble(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
813 void float_bge(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
814 void float_blt(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
815 void float_bgt(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
816
817 void double_beq(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
818 void double_bne(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
819 void double_ble(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
820 void double_bge(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
821 void double_blt(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
822 void double_bgt(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
823
824 private:
825 // The signed 20-bit upper imm can materialize at most negative 0xF...F80000000, two G.
826 // The following signed 12-bit imm can at max subtract 0x800, two K, from that previously loaded two G.
827 bool is_valid_32bit_offset(int64_t x) {
828 constexpr int64_t twoG = (2 * G);
829 constexpr int64_t twoK = (2 * K);
830 return x < (twoG - twoK) && x >= (-twoG - twoK);
831 }
832
833 // Ensure that the auipc can reach the destination at x from anywhere within
834 // the code cache so that if it is relocated we know it will still reach.
835 bool is_32bit_offset_from_codecache(int64_t x) {
836 int64_t low = (int64_t)CodeCache::low_bound();
837 int64_t high = (int64_t)CodeCache::high_bound();
838 return is_valid_32bit_offset(x - low) && is_valid_32bit_offset(x - high);
839 }
840
841 public:
842 // Stack push and pop individual 64 bit registers
843 void push_reg(Register Rs);
844 void pop_reg(Register Rd);
845
846 int push_reg(RegSet regset, Register stack);
847 int pop_reg(RegSet regset, Register stack);
848
849 int push_fp(FloatRegSet regset, Register stack);
850 int pop_fp(FloatRegSet regset, Register stack);
851
852 #ifdef COMPILER2
853 int push_v(VectorRegSet regset, Register stack);
854 int pop_v(VectorRegSet regset, Register stack);
855 #endif // COMPILER2
856
857 // Push and pop everything that might be clobbered by a native
858 // runtime call except t0 and t1. (They are always
859 // temporary registers, so we don't have to protect them.)
860 // Additional registers can be excluded in a passed RegSet.
861 void push_call_clobbered_registers_except(RegSet exclude);
862 void pop_call_clobbered_registers_except(RegSet exclude);
863
864 void push_call_clobbered_registers() {
865 push_call_clobbered_registers_except(RegSet());
866 }
867 void pop_call_clobbered_registers() {
868 pop_call_clobbered_registers_except(RegSet());
869 }
870
871 void push_CPU_state(bool save_vectors = false, int vector_size_in_bytes = 0);
872 void pop_CPU_state(bool restore_vectors = false, int vector_size_in_bytes = 0);
873
874 void push_cont_fastpath(Register java_thread = xthread);
875 void pop_cont_fastpath(Register java_thread = xthread);
876
877 // if heap base register is used - reinit it with the correct value
878 void reinit_heapbase();
879
880 void bind(Label& L) {
881 Assembler::bind(L);
882 // fences across basic blocks should not be merged
883 code()->clear_last_insn();
884 }
885
886 typedef void (MacroAssembler::* compare_and_branch_insn)(Register Rs1, Register Rs2, const address dest);
887 typedef void (MacroAssembler::* compare_and_branch_label_insn)(Register Rs1, Register Rs2, Label &L, bool is_far);
888 typedef void (MacroAssembler::* jal_jalr_insn)(Register Rt, address dest);
889
890 void wrap_label(Register r, Label &L, jal_jalr_insn insn);
891 void wrap_label(Register r1, Register r2, Label &L,
892 compare_and_branch_insn insn,
893 compare_and_branch_label_insn neg_insn, bool is_far = false);
894
895 void la(Register Rd, Label &label);
896 void la(Register Rd, const address addr);
897 void la(Register Rd, const address addr, int32_t &offset);
898 void la(Register Rd, const Address &adr);
899
900 void li16u(Register Rd, uint16_t imm);
901 void li32(Register Rd, int32_t imm);
902 void li (Register Rd, int64_t imm); // optimized load immediate
903
904 // mv
905 void mv(Register Rd, address addr) { li(Rd, (int64_t)addr); }
906 void mv(Register Rd, address addr, int32_t &offset) {
907 // Split address into a lower 12-bit sign-extended offset and the remainder,
908 // so that the offset could be encoded in jalr or load/store instruction.
909 offset = ((int32_t)(int64_t)addr << 20) >> 20;
910 li(Rd, (int64_t)addr - offset);
911 }
912
913 template<typename T, ENABLE_IF(std::is_integral<T>::value)>
914 inline void mv(Register Rd, T o) { li(Rd, (int64_t)o); }
915
916 void mv(Register Rd, RegisterOrConstant src) {
917 if (src.is_register()) {
918 mv(Rd, src.as_register());
919 } else {
920 mv(Rd, src.as_constant());
921 }
922 }
923
924 // Generates a load of a 48-bit constant which can be
925 // patched to any 48-bit constant, i.e. address.
926 // If common case supply additional temp register
927 // to shorten the instruction sequence.
928 void movptr(Register Rd, const Address &addr, Register tmp = noreg);
929 void movptr(Register Rd, address addr, Register tmp = noreg);
930 void movptr(Register Rd, address addr, int32_t &offset, Register tmp = noreg);
931
932 private:
933 void movptr1(Register Rd, uintptr_t addr, int32_t &offset);
934 void movptr2(Register Rd, uintptr_t addr, int32_t &offset, Register tmp);
935 public:
936 // float imm move
937 static bool can_hf_imm_load(short imm);
938 static bool can_fp_imm_load(float imm);
939 static bool can_dp_imm_load(double imm);
940 void fli_h(FloatRegister Rd, short imm);
941 void fli_s(FloatRegister Rd, float imm);
942 void fli_d(FloatRegister Rd, double imm);
943
944 // arith
945 void add (Register Rd, Register Rn, int64_t increment, Register tmp = t0);
946 void sub (Register Rd, Register Rn, int64_t decrement, Register tmp = t0);
947 void addw(Register Rd, Register Rn, int64_t increment, Register tmp = t0);
948 void subw(Register Rd, Register Rn, int64_t decrement, Register tmp = t0);
949
950 void subi(Register Rd, Register Rn, int64_t decrement) {
951 assert(is_simm12(-decrement), "Must be");
952 addi(Rd, Rn, -decrement);
953 }
954
955 void subiw(Register Rd, Register Rn, int64_t decrement) {
956 assert(is_simm12(-decrement), "Must be");
957 addiw(Rd, Rn, -decrement);
958 }
959
960 #define INSN(NAME) \
961 inline void NAME(Register Rd, Register Rs1, Register Rs2) { \
962 Assembler::NAME(Rd, Rs1, Rs2); \
963 }
964
965 INSN(add);
966 INSN(addw);
967 INSN(sub);
968 INSN(subw);
969
970 #undef INSN
971
972 // logic
973 void andrw(Register Rd, Register Rs1, Register Rs2);
974 void orrw(Register Rd, Register Rs1, Register Rs2);
975 void xorrw(Register Rd, Register Rs1, Register Rs2);
976
977 // logic with negate
978 void andn(Register Rd, Register Rs1, Register Rs2);
979 void orn(Register Rd, Register Rs1, Register Rs2);
980
981 // reverse bytes
982 void revbw(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2= t1); // reverse bytes in lower word, sign-extend
983 void revb(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1); // reverse bytes in doubleword
984
985 void ror(Register dst, Register src, Register shift, Register tmp = t0);
986 void ror(Register dst, Register src, uint32_t shift, Register tmp = t0);
987 void rolw(Register dst, Register src, uint32_t shift, Register tmp = t0);
988
989 void orptr(Address adr, RegisterOrConstant src, Register tmp1 = t0, Register tmp2 = t1);
990
991 // Load and Store Instructions
992 #define INSN_ENTRY_RELOC(result_type, header) \
993 result_type header { \
994 guarantee(rtype == relocInfo::internal_word_type, \
995 "only internal_word_type relocs make sense here"); \
996 relocate(InternalAddress(dest).rspec()); \
997 IncompressibleScope scope(this); /* relocations */
998
999 #define INSN(NAME) \
1000 void NAME(Register Rd, address dest) { \
1001 assert_cond(dest != nullptr); \
1002 if (CodeCache::contains(dest)) { \
1003 int64_t distance = dest - pc(); \
1004 assert(is_valid_32bit_offset(distance), "Must be"); \
1005 auipc(Rd, (int32_t)distance + 0x800); \
1006 Assembler::NAME(Rd, Rd, ((int32_t)distance << 20) >> 20); \
1007 } else { \
1008 int32_t offset = 0; \
1009 movptr(Rd, dest, offset); \
1010 Assembler::NAME(Rd, Rd, offset); \
1011 } \
1012 } \
1013 INSN_ENTRY_RELOC(void, NAME(Register Rd, address dest, relocInfo::relocType rtype)) \
1014 NAME(Rd, dest); \
1015 } \
1016 void NAME(Register Rd, const Address &adr, Register temp = t0) { \
1017 switch (adr.getMode()) { \
1018 case Address::literal: { \
1019 relocate(adr.rspec(), [&] { \
1020 NAME(Rd, adr.target()); \
1021 }); \
1022 break; \
1023 } \
1024 case Address::base_plus_offset: { \
1025 if (is_simm12(adr.offset())) { \
1026 Assembler::NAME(Rd, adr.base(), adr.offset()); \
1027 } else { \
1028 int32_t offset = ((int32_t)adr.offset() << 20) >> 20; \
1029 if (Rd == adr.base()) { \
1030 la(temp, Address(adr.base(), adr.offset() - offset)); \
1031 Assembler::NAME(Rd, temp, offset); \
1032 } else { \
1033 la(Rd, Address(adr.base(), adr.offset() - offset)); \
1034 Assembler::NAME(Rd, Rd, offset); \
1035 } \
1036 } \
1037 break; \
1038 } \
1039 default: \
1040 ShouldNotReachHere(); \
1041 } \
1042 } \
1043 void NAME(Register Rd, Label &L) { \
1044 wrap_label(Rd, L, &MacroAssembler::NAME); \
1045 }
1046
1047 INSN(lb);
1048 INSN(lbu);
1049 INSN(lh);
1050 INSN(lhu);
1051 INSN(lw);
1052 INSN(lwu);
1053 INSN(ld);
1054
1055 #undef INSN
1056
1057 #define INSN(NAME) \
1058 void NAME(FloatRegister Rd, address dest, Register temp = t0) { \
1059 assert_cond(dest != nullptr); \
1060 if (CodeCache::contains(dest)) { \
1061 int64_t distance = dest - pc(); \
1062 assert(is_valid_32bit_offset(distance), "Must be"); \
1063 auipc(temp, (int32_t)distance + 0x800); \
1064 Assembler::NAME(Rd, temp, ((int32_t)distance << 20) >> 20); \
1065 } else { \
1066 int32_t offset = 0; \
1067 movptr(temp, dest, offset); \
1068 Assembler::NAME(Rd, temp, offset); \
1069 } \
1070 } \
1071 INSN_ENTRY_RELOC(void, NAME(FloatRegister Rd, address dest, \
1072 relocInfo::relocType rtype, Register temp = t0)) \
1073 NAME(Rd, dest, temp); \
1074 } \
1075 void NAME(FloatRegister Rd, const Address &adr, Register temp = t0) { \
1076 switch (adr.getMode()) { \
1077 case Address::literal: { \
1078 relocate(adr.rspec(), [&] { \
1079 NAME(Rd, adr.target(), temp); \
1080 }); \
1081 break; \
1082 } \
1083 case Address::base_plus_offset: { \
1084 if (is_simm12(adr.offset())) { \
1085 Assembler::NAME(Rd, adr.base(), adr.offset()); \
1086 } else { \
1087 int32_t offset = ((int32_t)adr.offset() << 20) >> 20; \
1088 la(temp, Address(adr.base(), adr.offset() - offset)); \
1089 Assembler::NAME(Rd, temp, offset); \
1090 } \
1091 break; \
1092 } \
1093 default: \
1094 ShouldNotReachHere(); \
1095 } \
1096 }
1097
1098 INSN(flh);
1099 INSN(flw);
1100 INSN(fld);
1101
1102 #undef INSN
1103
1104 #define INSN(NAME, REGISTER) \
1105 INSN_ENTRY_RELOC(void, NAME(REGISTER Rs, address dest, \
1106 relocInfo::relocType rtype, Register temp = t0)) \
1107 NAME(Rs, dest, temp); \
1108 }
1109
1110 INSN(sb, Register);
1111 INSN(sh, Register);
1112 INSN(sw, Register);
1113 INSN(sd, Register);
1114 INSN(fsw, FloatRegister);
1115 INSN(fsd, FloatRegister);
1116
1117 #undef INSN
1118
1119 #define INSN(NAME) \
1120 void NAME(Register Rs, address dest, Register temp = t0) { \
1121 assert_cond(dest != nullptr); \
1122 assert_different_registers(Rs, temp); \
1123 if (CodeCache::contains(dest)) { \
1124 int64_t distance = dest - pc(); \
1125 assert(is_valid_32bit_offset(distance), "Must be"); \
1126 auipc(temp, (int32_t)distance + 0x800); \
1127 Assembler::NAME(Rs, temp, ((int32_t)distance << 20) >> 20); \
1128 } else { \
1129 int32_t offset = 0; \
1130 movptr(temp, dest, offset); \
1131 Assembler::NAME(Rs, temp, offset); \
1132 } \
1133 } \
1134 void NAME(Register Rs, const Address &adr, Register temp = t0) { \
1135 switch (adr.getMode()) { \
1136 case Address::literal: { \
1137 assert_different_registers(Rs, temp); \
1138 relocate(adr.rspec(), [&] { \
1139 NAME(Rs, adr.target(), temp); \
1140 }); \
1141 break; \
1142 } \
1143 case Address::base_plus_offset: { \
1144 if (is_simm12(adr.offset())) { \
1145 Assembler::NAME(Rs, adr.base(), adr.offset()); \
1146 } else { \
1147 assert_different_registers(Rs, temp); \
1148 int32_t offset = ((int32_t)adr.offset() << 20) >> 20; \
1149 la(temp, Address(adr.base(), adr.offset() - offset)); \
1150 Assembler::NAME(Rs, temp, offset); \
1151 } \
1152 break; \
1153 } \
1154 default: \
1155 ShouldNotReachHere(); \
1156 } \
1157 }
1158
1159 INSN(sb);
1160 INSN(sh);
1161 INSN(sw);
1162 INSN(sd);
1163
1164 #undef INSN
1165
1166 #define INSN(NAME) \
1167 void NAME(FloatRegister Rs, address dest, Register temp = t0) { \
1168 assert_cond(dest != nullptr); \
1169 if (CodeCache::contains(dest)) { \
1170 int64_t distance = dest - pc(); \
1171 assert(is_valid_32bit_offset(distance), "Must be"); \
1172 auipc(temp, (int32_t)distance + 0x800); \
1173 Assembler::NAME(Rs, temp, ((int32_t)distance << 20) >> 20); \
1174 } else { \
1175 int32_t offset = 0; \
1176 movptr(temp, dest, offset); \
1177 Assembler::NAME(Rs, temp, offset); \
1178 } \
1179 } \
1180 void NAME(FloatRegister Rs, const Address &adr, Register temp = t0) { \
1181 switch (adr.getMode()) { \
1182 case Address::literal: { \
1183 relocate(adr.rspec(), [&] { \
1184 NAME(Rs, adr.target(), temp); \
1185 }); \
1186 break; \
1187 } \
1188 case Address::base_plus_offset: { \
1189 if (is_simm12(adr.offset())) { \
1190 Assembler::NAME(Rs, adr.base(), adr.offset()); \
1191 } else { \
1192 int32_t offset = ((int32_t)adr.offset() << 20) >> 20; \
1193 la(temp, Address(adr.base(), adr.offset() - offset)); \
1194 Assembler::NAME(Rs, temp, offset); \
1195 } \
1196 break; \
1197 } \
1198 default: \
1199 ShouldNotReachHere(); \
1200 } \
1201 }
1202
1203 INSN(fsw);
1204 INSN(fsd);
1205
1206 #undef INSN
1207
1208 #undef INSN_ENTRY_RELOC
1209
1210 void cmpxchg_obj_header(Register oldv, Register newv, Register obj, Register tmp, Label &succeed, Label *fail);
1211 void cmpxchgptr(Register oldv, Register newv, Register addr, Register tmp, Label &succeed, Label *fail);
1212 void cmpxchg(Register addr, Register expected,
1213 Register new_val,
1214 Assembler::operand_size size,
1215 Assembler::Aqrl acquire, Assembler::Aqrl release,
1216 Register result, bool result_as_bool = false);
1217 void weak_cmpxchg(Register addr, Register expected,
1218 Register new_val,
1219 Assembler::operand_size size,
1220 Assembler::Aqrl acquire, Assembler::Aqrl release,
1221 Register result);
1222 void cmpxchg_narrow_value_helper(Register addr, Register expected, Register new_val,
1223 Assembler::operand_size size,
1224 Register shift, Register mask, Register aligned_addr);
1225 void cmpxchg_narrow_value(Register addr, Register expected,
1226 Register new_val,
1227 Assembler::operand_size size,
1228 Assembler::Aqrl acquire, Assembler::Aqrl release,
1229 Register result, bool result_as_bool,
1230 Register tmp1, Register tmp2, Register tmp3);
1231 void weak_cmpxchg_narrow_value(Register addr, Register expected,
1232 Register new_val,
1233 Assembler::operand_size size,
1234 Assembler::Aqrl acquire, Assembler::Aqrl release,
1235 Register result,
1236 Register tmp1, Register tmp2, Register tmp3);
1237
1238 void atomic_add(Register prev, RegisterOrConstant incr, Register addr);
1239 void atomic_addw(Register prev, RegisterOrConstant incr, Register addr);
1240 void atomic_addal(Register prev, RegisterOrConstant incr, Register addr);
1241 void atomic_addalw(Register prev, RegisterOrConstant incr, Register addr);
1242
1243 void atomic_xchg(Register prev, Register newv, Register addr);
1244 void atomic_xchgw(Register prev, Register newv, Register addr);
1245 void atomic_xchgal(Register prev, Register newv, Register addr);
1246 void atomic_xchgalw(Register prev, Register newv, Register addr);
1247 void atomic_xchgwu(Register prev, Register newv, Register addr);
1248 void atomic_xchgalwu(Register prev, Register newv, Register addr);
1249
1250 void atomic_cas(Register prev, Register newv, Register addr, Assembler::operand_size size,
1251 Assembler::Aqrl acquire = Assembler::relaxed, Assembler::Aqrl release = Assembler::relaxed);
1252
1253 // Emit a far call/jump. Only invalidates the tmp register which
1254 // is used to keep the entry address for jalr.
1255 // The address must be inside the code cache.
1256 // Supported entry.rspec():
1257 // - relocInfo::external_word_type
1258 // - relocInfo::runtime_call_type
1259 // - relocInfo::none
1260 // Clobbers t1 default.
1261 void far_call(const Address &entry, Register tmp = t1);
1262 void far_jump(const Address &entry, Register tmp = t1);
1263
1264 static int far_branch_size() {
1265 return 2 * MacroAssembler::instruction_size; // auipc + jalr, see far_call() & far_jump()
1266 }
1267
1268 void load_byte_map_base(Register reg);
1269
1270 void bang_stack_with_offset(int offset) {
1271 // stack grows down, caller passes positive offset
1272 assert(offset > 0, "must bang with negative offset");
1273 sub(t0, sp, offset);
1274 sd(zr, Address(t0));
1275 }
1276
1277 virtual void _call_Unimplemented(address call_site) {
1278 mv(t1, call_site);
1279 }
1280
1281 #define call_Unimplemented() _call_Unimplemented((address)__PRETTY_FUNCTION__)
1282
1283 // Frame creation and destruction shared between JITs.
1284 void build_frame(int framesize);
1285 void remove_frame(int framesize);
1286
1287 void reserved_stack_check();
1288
1289 void get_polling_page(Register dest, relocInfo::relocType rtype);
1290 void read_polling_page(Register r, int32_t offset, relocInfo::relocType rtype);
1291
1292 // RISCV64 OpenJDK uses three different types of calls:
1293 //
1294 // - far call: auipc reg, pc_relative_offset; jalr ra, reg, offset
1295 // The offset has the range [-(2G + 2K), 2G - 2K). Addresses out of the
1296 // range in the code cache requires indirect call.
1297 // If a jump is needed rather than a call, a far jump 'jalr x0, reg, offset'
1298 // can be used instead.
1299 // All instructions are embedded at a call site.
1300 //
1301 // - indirect call: movptr + jalr
1302 // This can reach anywhere in the address space, but it cannot be patched
1303 // while code is running, so it must only be modified at a safepoint.
1304 // This form of call is most suitable for targets at fixed addresses,
1305 // which will never be patched.
1306 //
1307 // - reloc call:
1308 // This too can reach anywhere in the address space but is only available
1309 // in C1/C2-generated code (nmethod).
1310 //
1311 // [Main code section]
1312 // auipc
1313 // ld <address_from_stub_section>
1314 // jalr
1315 //
1316 // [Stub section]
1317 // address stub:
1318 // <64-bit destination address>
1319 //
1320 // To change the destination we simply atomically store the new
1321 // address in the stub section.
1322 // There is a benign race in that the other thread might observe the old
1323 // 64-bit destination address before it observes the new address. That does
1324 // not matter because the destination method has been invalidated, so there
1325 // will be a trap at its start.
1326
1327 // Emit a reloc call and create a stub to hold the entry point address.
1328 // Supported entry.rspec():
1329 // - relocInfo::runtime_call_type
1330 // - relocInfo::opt_virtual_call_type
1331 // - relocInfo::static_call_type
1332 // - relocInfo::virtual_call_type
1333 //
1334 // Return: the call PC or nullptr if CodeCache is full.
1335 address reloc_call(Address entry, Register tmp = t1);
1336
1337 address ic_call(address entry, jint method_index = 0);
1338 static int ic_check_size();
1339 int ic_check(int end_alignment = MacroAssembler::instruction_size);
1340
1341 // Support for memory inc/dec
1342 // n.b. increment/decrement calls with an Address destination will
1343 // need to use a scratch register to load the value to be
1344 // incremented. increment/decrement calls which add or subtract a
1345 // constant value other than sign-extended 12-bit immediate will need
1346 // to use a 2nd scratch register to hold the constant. so, an address
1347 // increment/decrement may trash both t0 and t1.
1348
1349 void increment(const Address dst, int64_t value = 1, Register tmp1 = t0, Register tmp2 = t1);
1350 void incrementw(const Address dst, int32_t value = 1, Register tmp1 = t0, Register tmp2 = t1);
1351
1352 void decrement(const Address dst, int64_t value = 1, Register tmp1 = t0, Register tmp2 = t1);
1353 void decrementw(const Address dst, int32_t value = 1, Register tmp1 = t0, Register tmp2 = t1);
1354
1355 void clinit_barrier(Register klass, Register tmp, Label* L_fast_path = nullptr, Label* L_slow_path = nullptr);
1356
1357 void load_method_holder_cld(Register result, Register method);
1358 void load_method_holder(Register holder, Register method);
1359
1360 void compute_index(Register str1, Register trailing_zeros, Register match_mask,
1361 Register result, Register char_tmp, Register tmp,
1362 bool haystack_isL);
1363 void compute_match_mask(Register src, Register pattern, Register match_mask,
1364 Register mask1, Register mask2);
1365
1366 // CRC32 code for java.util.zip.CRC32::updateBytes() intrinsic.
1367 void kernel_crc32(Register crc, Register buf, Register len,
1368 Register table0, Register table1, Register table2, Register table3,
1369 Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5, Register tmp6);
1370 void update_word_crc32(Register crc, Register v, Register tmp1, Register tmp2, Register tmp3,
1371 Register table0, Register table1, Register table2, Register table3,
1372 bool upper);
1373 void update_byte_crc32(Register crc, Register val, Register table);
1374
1375 #ifdef COMPILER2
1376 void vector_update_crc32(Register crc, Register buf, Register len,
1377 Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5,
1378 Register table0, Register table3);
1379 void kernel_crc32_vclmul_fold(Register crc, Register buf, Register len,
1380 Register table0, Register table1, Register table2, Register table3,
1381 Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5);
1382 void crc32_vclmul_fold_to_16_bytes_vectorsize_32(VectorRegister vx, VectorRegister vy, VectorRegister vt,
1383 VectorRegister vtmp1, VectorRegister vtmp2, VectorRegister vtmp3, VectorRegister vtmp4);
1384 void kernel_crc32_vclmul_fold_vectorsize_32(Register crc, Register buf, Register len,
1385 Register vclmul_table, Register tmp1, Register tmp2);
1386 void crc32_vclmul_fold_16_bytes_vectorsize_16(VectorRegister vx, VectorRegister vt,
1387 VectorRegister vtmp1, VectorRegister vtmp2, VectorRegister vtmp3, VectorRegister vtmp4,
1388 Register buf, Register tmp, const int STEP);
1389 void crc32_vclmul_fold_16_bytes_vectorsize_16_2(VectorRegister vx, VectorRegister vy, VectorRegister vt,
1390 VectorRegister vtmp1, VectorRegister vtmp2, VectorRegister vtmp3, VectorRegister vtmp4,
1391 Register tmp);
1392 void crc32_vclmul_fold_16_bytes_vectorsize_16_3(VectorRegister vx, VectorRegister vy, VectorRegister vt,
1393 VectorRegister vtmp1, VectorRegister vtmp2, VectorRegister vtmp3, VectorRegister vtmp4,
1394 Register tmp);
1395 void kernel_crc32_vclmul_fold_vectorsize_16(Register crc, Register buf, Register len,
1396 Register vclmul_table, Register tmp1, Register tmp2);
1397
1398 void mul_add(Register out, Register in, Register offset,
1399 Register len, Register k, Register tmp);
1400 void wide_mul(Register prod_lo, Register prod_hi, Register n, Register m);
1401 void wide_madd(Register sum_lo, Register sum_hi, Register n,
1402 Register m, Register tmp1, Register tmp2);
1403 void cad(Register dst, Register src1, Register src2, Register carry);
1404 void cadc(Register dst, Register src1, Register src2, Register carry);
1405 void adc(Register dst, Register src1, Register src2, Register carry);
1406 void add2_with_carry(Register final_dest_hi, Register dest_hi, Register dest_lo,
1407 Register src1, Register src2, Register carry);
1408 void multiply_64_x_64_loop(Register x, Register xstart, Register x_xstart,
1409 Register y, Register y_idx, Register z,
1410 Register carry, Register product,
1411 Register idx, Register kdx);
1412 void multiply_128_x_128_loop(Register y, Register z,
1413 Register carry, Register carry2,
1414 Register idx, Register jdx,
1415 Register yz_idx1, Register yz_idx2,
1416 Register tmp, Register tmp3, Register tmp4,
1417 Register tmp6, Register product_hi);
1418 void multiply_to_len(Register x, Register xlen, Register y, Register ylen,
1419 Register z, Register tmp0,
1420 Register tmp1, Register tmp2, Register tmp3, Register tmp4,
1421 Register tmp5, Register tmp6, Register product_hi);
1422
1423 #endif // COMPILER2
1424
1425 void inflate_lo32(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1);
1426 void inflate_hi32(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1);
1427
1428 void ctzc_bits(Register Rd, Register Rs, bool isLL = false,
1429 Register tmp1 = t0, Register tmp2 = t1);
1430
1431 void zero_words(Register base, uint64_t cnt);
1432 address zero_words(Register ptr, Register cnt);
1433 void fill_words(Register base, Register cnt, Register value);
1434 void zero_memory(Register addr, Register len, Register tmp);
1435 void zero_dcache_blocks(Register base, Register cnt, Register tmp1, Register tmp2);
1436
1437 // shift left by shamt and add
1438 void shadd(Register Rd, Register Rs1, Register Rs2, Register tmp, int shamt);
1439
1440 // test single bit in Rs, result is set to Rd
1441 void test_bit(Register Rd, Register Rs, uint32_t bit_pos);
1442
1443 // Here the float instructions with safe deal with some exceptions.
1444 // e.g. convert from NaN, +Inf, -Inf to int, float, double
1445 // will trigger exception, we need to deal with these situations
1446 // to get correct results.
1447 void fcvt_w_s_safe(Register dst, FloatRegister src, Register tmp = t0);
1448 void fcvt_l_s_safe(Register dst, FloatRegister src, Register tmp = t0);
1449 void fcvt_w_d_safe(Register dst, FloatRegister src, Register tmp = t0);
1450 void fcvt_l_d_safe(Register dst, FloatRegister src, Register tmp = t0);
1451
1452 void java_round_float(Register dst, FloatRegister src, FloatRegister ftmp);
1453 void java_round_double(Register dst, FloatRegister src, FloatRegister ftmp);
1454
1455 // Helper routine processing the slow path of NaN when converting float to float16
1456 void float_to_float16_NaN(Register dst, FloatRegister src, Register tmp1, Register tmp2);
1457
1458 // vector load/store unit-stride instructions
1459 void vlex_v(VectorRegister vd, Register base, Assembler::SEW sew, VectorMask vm = unmasked) {
1460 switch (sew) {
1461 case Assembler::e64:
1462 vle64_v(vd, base, vm);
1463 break;
1464 case Assembler::e32:
1465 vle32_v(vd, base, vm);
1466 break;
1467 case Assembler::e16:
1468 vle16_v(vd, base, vm);
1469 break;
1470 case Assembler::e8: // fall through
1471 default:
1472 vle8_v(vd, base, vm);
1473 break;
1474 }
1475 }
1476
1477 void vsex_v(VectorRegister store_data, Register base, Assembler::SEW sew, VectorMask vm = unmasked) {
1478 switch (sew) {
1479 case Assembler::e64:
1480 vse64_v(store_data, base, vm);
1481 break;
1482 case Assembler::e32:
1483 vse32_v(store_data, base, vm);
1484 break;
1485 case Assembler::e16:
1486 vse16_v(store_data, base, vm);
1487 break;
1488 case Assembler::e8: // fall through
1489 default:
1490 vse8_v(store_data, base, vm);
1491 break;
1492 }
1493 }
1494
1495 // vector pseudo instructions
1496 // rotate vector register left with shift bits, 32-bit version
1497 inline void vrole32_vi(VectorRegister vd, uint32_t shift, VectorRegister tmp_vr) {
1498 vsrl_vi(tmp_vr, vd, 32 - shift);
1499 vsll_vi(vd, vd, shift);
1500 vor_vv(vd, vd, tmp_vr);
1501 }
1502
1503 inline void vl1r_v(VectorRegister vd, Register rs) {
1504 vl1re8_v(vd, rs);
1505 }
1506
1507 inline void vmnot_m(VectorRegister vd, VectorRegister vs) {
1508 vmnand_mm(vd, vs, vs);
1509 }
1510
1511 inline void vncvt_x_x_w(VectorRegister vd, VectorRegister vs, VectorMask vm = unmasked) {
1512 vnsrl_wx(vd, vs, x0, vm);
1513 }
1514
1515 inline void vneg_v(VectorRegister vd, VectorRegister vs, VectorMask vm = unmasked) {
1516 vrsub_vx(vd, vs, x0, vm);
1517 }
1518
1519 inline void vfneg_v(VectorRegister vd, VectorRegister vs, VectorMask vm = unmasked) {
1520 vfsgnjn_vv(vd, vs, vs, vm);
1521 }
1522
1523 inline void vfabs_v(VectorRegister vd, VectorRegister vs, VectorMask vm = unmasked) {
1524 vfsgnjx_vv(vd, vs, vs, vm);
1525 }
1526
1527 inline void vmsgt_vv(VectorRegister vd, VectorRegister vs2, VectorRegister vs1, VectorMask vm = unmasked) {
1528 vmslt_vv(vd, vs1, vs2, vm);
1529 }
1530
1531 inline void vmsgtu_vv(VectorRegister vd, VectorRegister vs2, VectorRegister vs1, VectorMask vm = unmasked) {
1532 vmsltu_vv(vd, vs1, vs2, vm);
1533 }
1534
1535 inline void vmsge_vv(VectorRegister vd, VectorRegister vs2, VectorRegister vs1, VectorMask vm = unmasked) {
1536 vmsle_vv(vd, vs1, vs2, vm);
1537 }
1538
1539 inline void vmsgeu_vv(VectorRegister vd, VectorRegister vs2, VectorRegister vs1, VectorMask vm = unmasked) {
1540 vmsleu_vv(vd, vs1, vs2, vm);
1541 }
1542
1543 inline void vmfgt_vv(VectorRegister vd, VectorRegister vs2, VectorRegister vs1, VectorMask vm = unmasked) {
1544 vmflt_vv(vd, vs1, vs2, vm);
1545 }
1546
1547 inline void vmfge_vv(VectorRegister vd, VectorRegister vs2, VectorRegister vs1, VectorMask vm = unmasked) {
1548 vmfle_vv(vd, vs1, vs2, vm);
1549 }
1550
1551 inline void vmsltu_vi(VectorRegister Vd, VectorRegister Vs2, uint32_t imm, VectorMask vm = unmasked) {
1552 guarantee(imm >= 1 && imm <= 16, "imm is invalid");
1553 vmsleu_vi(Vd, Vs2, imm-1, vm);
1554 }
1555
1556 inline void vmsgeu_vi(VectorRegister Vd, VectorRegister Vs2, uint32_t imm, VectorMask vm = unmasked) {
1557 guarantee(imm >= 1 && imm <= 16, "imm is invalid");
1558 vmsgtu_vi(Vd, Vs2, imm-1, vm);
1559 }
1560
1561 // Copy mask register
1562 inline void vmmv_m(VectorRegister vd, VectorRegister vs) {
1563 vmand_mm(vd, vs, vs);
1564 }
1565
1566 // Clear mask register
1567 inline void vmclr_m(VectorRegister vd) {
1568 vmxor_mm(vd, vd, vd);
1569 }
1570
1571 // Set mask register
1572 inline void vmset_m(VectorRegister vd) {
1573 vmxnor_mm(vd, vd, vd);
1574 }
1575
1576 inline void vnot_v(VectorRegister Vd, VectorRegister Vs, VectorMask vm = unmasked) {
1577 vxor_vi(Vd, Vs, -1, vm);
1578 }
1579
1580 static const int zero_words_block_size;
1581
1582 void cast_primitive_type(BasicType type, Register Rt) {
1583 switch (type) {
1584 case T_BOOLEAN:
1585 sltu(Rt, zr, Rt);
1586 break;
1587 case T_CHAR :
1588 zext(Rt, Rt, 16);
1589 break;
1590 case T_BYTE :
1591 sext(Rt, Rt, 8);
1592 break;
1593 case T_SHORT :
1594 sext(Rt, Rt, 16);
1595 break;
1596 case T_INT :
1597 sext(Rt, Rt, 32);
1598 break;
1599 case T_LONG : /* nothing to do */ break;
1600 case T_VOID : /* nothing to do */ break;
1601 case T_FLOAT : /* nothing to do */ break;
1602 case T_DOUBLE : /* nothing to do */ break;
1603 default: ShouldNotReachHere();
1604 }
1605 }
1606
1607 // float cmp with unordered_result
1608 void float_compare(Register result, FloatRegister Rs1, FloatRegister Rs2, int unordered_result);
1609 void double_compare(Register result, FloatRegister Rs1, FloatRegister Rs2, int unordered_result);
1610
1611 // Zero/Sign-extend
1612 void zext(Register dst, Register src, int bits);
1613 void sext(Register dst, Register src, int bits);
1614
1615 private:
1616 void cmp_x2i(Register dst, Register src1, Register src2, Register tmp, bool is_signed = true);
1617
1618 public:
1619 // compare src1 and src2 and get -1/0/1 in dst.
1620 // if [src1 > src2], dst = 1;
1621 // if [src1 == src2], dst = 0;
1622 // if [src1 < src2], dst = -1;
1623 void cmp_l2i(Register dst, Register src1, Register src2, Register tmp = t0);
1624 void cmp_ul2i(Register dst, Register src1, Register src2, Register tmp = t0);
1625 void cmp_uw2i(Register dst, Register src1, Register src2, Register tmp = t0);
1626
1627 // support for argument shuffling
1628 void move32_64(VMRegPair src, VMRegPair dst, Register tmp = t0);
1629 void float_move(VMRegPair src, VMRegPair dst, Register tmp = t0);
1630 void long_move(VMRegPair src, VMRegPair dst, Register tmp = t0);
1631 void double_move(VMRegPair src, VMRegPair dst, Register tmp = t0);
1632 void object_move(OopMap* map,
1633 int oop_handle_offset,
1634 int framesize_in_slots,
1635 VMRegPair src,
1636 VMRegPair dst,
1637 bool is_receiver,
1638 int* receiver_offset);
1639
1640 #ifdef ASSERT
1641 // Template short-hand support to clean-up after a failed call to trampoline
1642 // call generation (see trampoline_call() below), when a set of Labels must
1643 // be reset (before returning).
1644 template<typename Label, typename... More>
1645 void reset_labels(Label& lbl, More&... more) {
1646 lbl.reset(); reset_labels(more...);
1647 }
1648 template<typename Label>
1649 void reset_labels(Label& lbl) {
1650 lbl.reset();
1651 }
1652 #endif
1653
1654 private:
1655
1656 void repne_scan(Register addr, Register value, Register count, Register tmp);
1657
1658 int bitset_to_regs(unsigned int bitset, unsigned char* regs);
1659 Address add_memory_helper(const Address dst, Register tmp);
1660
1661 void load_reserved(Register dst, Register addr, Assembler::operand_size size, Assembler::Aqrl acquire);
1662 void store_conditional(Register dst, Register new_val, Register addr, Assembler::operand_size size, Assembler::Aqrl release);
1663
1664 public:
1665 void fast_lock(Register basic_lock, Register obj, Register tmp1, Register tmp2, Register tmp3, Label& slow);
1666 void fast_unlock(Register obj, Register tmp1, Register tmp2, Register tmp3, Label& slow);
1667
1668 public:
1669 enum {
1670 // movptr
1671 movptr1_instruction_size = 6 * MacroAssembler::instruction_size, // lui, addi, slli, addi, slli, addi. See movptr1().
1672 movptr2_instruction_size = 5 * MacroAssembler::instruction_size, // lui, lui, slli, add, addi. See movptr2().
1673 load_pc_relative_instruction_size = 2 * MacroAssembler::instruction_size // auipc, ld
1674 };
1675
1676 static bool is_load_pc_relative_at(address branch);
1677 static bool is_li16u_at(address instr);
1678
1679 static bool is_jal_at(address instr) { assert_cond(instr != nullptr); return extract_opcode(instr) == 0b1101111; }
1680 static bool is_jalr_at(address instr) { assert_cond(instr != nullptr); return extract_opcode(instr) == 0b1100111 && extract_funct3(instr) == 0b000; }
1681 static bool is_branch_at(address instr) { assert_cond(instr != nullptr); return extract_opcode(instr) == 0b1100011; }
1682 static bool is_ld_at(address instr) { assert_cond(instr != nullptr); return is_load_at(instr) && extract_funct3(instr) == 0b011; }
1683 static bool is_load_at(address instr) { assert_cond(instr != nullptr); return extract_opcode(instr) == 0b0000011; }
1684 static bool is_float_load_at(address instr) { assert_cond(instr != nullptr); return extract_opcode(instr) == 0b0000111; }
1685 static bool is_auipc_at(address instr) { assert_cond(instr != nullptr); return extract_opcode(instr) == 0b0010111; }
1686 static bool is_jump_at(address instr) { assert_cond(instr != nullptr); return is_branch_at(instr) || is_jal_at(instr) || is_jalr_at(instr); }
1687 static bool is_add_at(address instr) { assert_cond(instr != nullptr); return extract_opcode(instr) == 0b0110011 && extract_funct3(instr) == 0b000; }
1688 static bool is_addi_at(address instr) { assert_cond(instr != nullptr); return extract_opcode(instr) == 0b0010011 && extract_funct3(instr) == 0b000; }
1689 static bool is_addiw_at(address instr) { assert_cond(instr != nullptr); return extract_opcode(instr) == 0b0011011 && extract_funct3(instr) == 0b000; }
1690 static bool is_addiw_to_zr_at(address instr){ assert_cond(instr != nullptr); return is_addiw_at(instr) && extract_rd(instr) == zr; }
1691 static bool is_lui_at(address instr) { assert_cond(instr != nullptr); return extract_opcode(instr) == 0b0110111; }
1692 static bool is_lui_to_zr_at(address instr) { assert_cond(instr != nullptr); return is_lui_at(instr) && extract_rd(instr) == zr; }
1693
1694 static bool is_srli_at(address instr) {
1695 assert_cond(instr != nullptr);
1696 return extract_opcode(instr) == 0b0010011 &&
1697 extract_funct3(instr) == 0b101 &&
1698 Assembler::extract(((unsigned*)instr)[0], 31, 26) == 0b000000;
1699 }
1700
1701 static bool is_slli_shift_at(address instr, uint32_t shift) {
1702 assert_cond(instr != nullptr);
1703 return (extract_opcode(instr) == 0b0010011 && // opcode field
1704 extract_funct3(instr) == 0b001 && // funct3 field, select the type of operation
1705 Assembler::extract(Assembler::ld_instr(instr), 25, 20) == shift); // shamt field
1706 }
1707
1708 static bool is_movptr1_at(address instr);
1709 static bool is_movptr2_at(address instr);
1710
1711 static bool is_lwu_to_zr(address instr);
1712
1713 static Register extract_rs1(address instr);
1714 static Register extract_rs2(address instr);
1715 static Register extract_rd(address instr);
1716 static uint32_t extract_opcode(address instr);
1717 static uint32_t extract_funct3(address instr);
1718
1719 // the instruction sequence of movptr is as below:
1720 // lui
1721 // addi
1722 // slli
1723 // addi
1724 // slli
1725 // addi/jalr/load
1726 static bool check_movptr1_data_dependency(address instr) {
1727 address lui = instr;
1728 address addi1 = lui + MacroAssembler::instruction_size;
1729 address slli1 = addi1 + MacroAssembler::instruction_size;
1730 address addi2 = slli1 + MacroAssembler::instruction_size;
1731 address slli2 = addi2 + MacroAssembler::instruction_size;
1732 address last_instr = slli2 + MacroAssembler::instruction_size;
1733 return extract_rs1(addi1) == extract_rd(lui) &&
1734 extract_rs1(addi1) == extract_rd(addi1) &&
1735 extract_rs1(slli1) == extract_rd(addi1) &&
1736 extract_rs1(slli1) == extract_rd(slli1) &&
1737 extract_rs1(addi2) == extract_rd(slli1) &&
1738 extract_rs1(addi2) == extract_rd(addi2) &&
1739 extract_rs1(slli2) == extract_rd(addi2) &&
1740 extract_rs1(slli2) == extract_rd(slli2) &&
1741 extract_rs1(last_instr) == extract_rd(slli2);
1742 }
1743
1744 // the instruction sequence of movptr2 is as below:
1745 // lui
1746 // lui
1747 // slli
1748 // add
1749 // addi/jalr/load
1750 static bool check_movptr2_data_dependency(address instr) {
1751 address lui1 = instr;
1752 address lui2 = lui1 + MacroAssembler::instruction_size;
1753 address slli = lui2 + MacroAssembler::instruction_size;
1754 address add = slli + MacroAssembler::instruction_size;
1755 address last_instr = add + MacroAssembler::instruction_size;
1756 return extract_rd(add) == extract_rd(lui2) &&
1757 extract_rs1(add) == extract_rd(lui2) &&
1758 extract_rs2(add) == extract_rd(slli) &&
1759 extract_rs1(slli) == extract_rd(lui1) &&
1760 extract_rd(slli) == extract_rd(lui1) &&
1761 extract_rs1(last_instr) == extract_rd(add);
1762 }
1763
1764 // the instruction sequence of li16u is as below:
1765 // lui
1766 // srli
1767 static bool check_li16u_data_dependency(address instr) {
1768 address lui = instr;
1769 address srli = lui + MacroAssembler::instruction_size;
1770
1771 return extract_rs1(srli) == extract_rd(lui) &&
1772 extract_rs1(srli) == extract_rd(srli);
1773 }
1774
1775 // the instruction sequence of li32 is as below:
1776 // lui
1777 // addiw
1778 static bool check_li32_data_dependency(address instr) {
1779 address lui = instr;
1780 address addiw = lui + MacroAssembler::instruction_size;
1781
1782 return extract_rs1(addiw) == extract_rd(lui) &&
1783 extract_rs1(addiw) == extract_rd(addiw);
1784 }
1785
1786 // the instruction sequence of pc-relative is as below:
1787 // auipc
1788 // jalr/addi/load/float_load
1789 static bool check_pc_relative_data_dependency(address instr) {
1790 address auipc = instr;
1791 address last_instr = auipc + MacroAssembler::instruction_size;
1792
1793 return extract_rs1(last_instr) == extract_rd(auipc);
1794 }
1795
1796 // the instruction sequence of load_label is as below:
1797 // auipc
1798 // load
1799 static bool check_load_pc_relative_data_dependency(address instr) {
1800 address auipc = instr;
1801 address load = auipc + MacroAssembler::instruction_size;
1802
1803 return extract_rd(load) == extract_rd(auipc) &&
1804 extract_rs1(load) == extract_rd(load);
1805 }
1806
1807 static bool is_li32_at(address instr);
1808 static bool is_pc_relative_at(address branch);
1809
1810 static bool is_membar(address addr) {
1811 return (Bytes::get_native_u4(addr) & 0x7f) == 0b1111 && extract_funct3(addr) == 0;
1812 }
1813 static uint32_t get_membar_kind(address addr);
1814 static void set_membar_kind(address addr, uint32_t order_kind);
1815
1816 public:
1817 // Inline type specific methods
1818 #include "asm/macroAssembler_common.hpp"
1819 };
1820
1821 #ifdef ASSERT
1822 inline bool AbstractAssembler::pd_check_instruction_mark() { return false; }
1823 #endif
1824
1825 #endif // CPU_RISCV_MACROASSEMBLER_RISCV_HPP