1 /*
2 * Copyright (c) 1997, 2026, Oracle and/or its affiliates. All rights reserved.
3 * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
4 * Copyright (c) 2020, 2024, Huawei Technologies Co., Ltd. All rights reserved.
5 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 *
7 * This code is free software; you can redistribute it and/or modify it
8 * under the terms of the GNU General Public License version 2 only, as
9 * published by the Free Software Foundation.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 *
25 */
26
27 #ifndef CPU_RISCV_MACROASSEMBLER_RISCV_HPP
28 #define CPU_RISCV_MACROASSEMBLER_RISCV_HPP
29
30 #include "asm/assembler.inline.hpp"
31 #include "code/vmreg.hpp"
32 #include "metaprogramming/enableIf.hpp"
33 #include "oops/compressedOops.hpp"
34 #include "utilities/powerOfTwo.hpp"
35 #include "runtime/signature.hpp"
36
37 class ciInlineKlass;
38
39 // MacroAssembler extends Assembler by frequently used macros.
40 //
41 // Instructions for which a 'better' code sequence exists depending
42 // on arguments should also go in here.
43
44 class MacroAssembler: public Assembler {
45
46 public:
47
48 MacroAssembler(CodeBuffer* code) : Assembler(code) {}
49
50 void safepoint_poll(Label& slow_path, bool at_return, bool in_nmethod, Register tmp_reg = t0);
51
52 // Alignment
53 int align(int modulus, int extra_offset = 0);
54
55 static inline void assert_alignment(address pc, int alignment = MacroAssembler::instruction_size) {
56 assert(is_aligned(pc, alignment), "bad alignment");
57 }
58
59 // nop
60 void post_call_nop();
61
62 // Stack frame creation/removal
63 // Note that SP must be updated to the right place before saving/restoring RA and FP
64 // because signal based thread suspend/resume could happen asynchronously.
65 void enter() {
66 subi(sp, sp, 2 * wordSize);
67 sd(ra, Address(sp, wordSize));
68 sd(fp, Address(sp));
69 addi(fp, sp, 2 * wordSize);
70 }
71
72 void leave() {
73 subi(sp, fp, 2 * wordSize);
74 ld(fp, Address(sp));
75 ld(ra, Address(sp, wordSize));
76 addi(sp, sp, 2 * wordSize);
77 }
78
79
80 // Support for getting the JavaThread pointer (i.e.; a reference to thread-local information)
81 // The pointer will be loaded into the thread register.
82 void get_thread(Register thread);
83
84 // Support for VM calls
85 //
86 // It is imperative that all calls into the VM are handled via the call_VM macros.
87 // They make sure that the stack linkage is setup correctly. call_VM's correspond
88 // to ENTRY/ENTRY_X entry points while call_VM_leaf's correspond to LEAF entry points.
89
90 void call_VM(Register oop_result,
91 address entry_point,
92 bool check_exceptions = true);
93 void call_VM(Register oop_result,
94 address entry_point,
95 Register arg_1,
96 bool check_exceptions = true);
97 void call_VM(Register oop_result,
98 address entry_point,
99 Register arg_1, Register arg_2,
100 bool check_exceptions = true);
101 void call_VM(Register oop_result,
102 address entry_point,
103 Register arg_1, Register arg_2, Register arg_3,
104 bool check_exceptions = true);
105
106 // Overloadings with last_Java_sp
107 void call_VM(Register oop_result,
108 Register last_java_sp,
109 address entry_point,
110 int number_of_arguments = 0,
111 bool check_exceptions = true);
112 void call_VM(Register oop_result,
113 Register last_java_sp,
114 address entry_point,
115 Register arg_1,
116 bool check_exceptions = true);
117 void call_VM(Register oop_result,
118 Register last_java_sp,
119 address entry_point,
120 Register arg_1, Register arg_2,
121 bool check_exceptions = true);
122 void call_VM(Register oop_result,
123 Register last_java_sp,
124 address entry_point,
125 Register arg_1, Register arg_2, Register arg_3,
126 bool check_exceptions = true);
127
128 void get_vm_result_oop(Register oop_result, Register java_thread);
129 void get_vm_result_metadata(Register metadata_result, Register java_thread);
130
131 // These always tightly bind to MacroAssembler::call_VM_leaf_base
132 // bypassing the virtual implementation
133 void call_VM_leaf(address entry_point,
134 int number_of_arguments = 0);
135 void call_VM_leaf(address entry_point,
136 Register arg_0);
137 void call_VM_leaf(address entry_point,
138 Register arg_0, Register arg_1);
139 void call_VM_leaf(address entry_point,
140 Register arg_0, Register arg_1, Register arg_2);
141
142 // These always tightly bind to MacroAssembler::call_VM_base
143 // bypassing the virtual implementation
144 void super_call_VM_leaf(address entry_point);
145 void super_call_VM_leaf(address entry_point, Register arg_0);
146 void super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1);
147 void super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2);
148 void super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2, Register arg_3);
149
150 // last Java Frame (fills frame anchor)
151 void set_last_Java_frame(Register last_java_sp, Register last_java_fp, address last_java_pc, Register tmp);
152 void set_last_Java_frame(Register last_java_sp, Register last_java_fp, Label &last_java_pc, Register tmp);
153 void set_last_Java_frame(Register last_java_sp, Register last_java_fp, Register last_java_pc);
154
155 // thread in the default location (xthread)
156 void reset_last_Java_frame(bool clear_fp);
157
158 virtual void call_VM_leaf_base(
159 address entry_point, // the entry point
160 int number_of_arguments, // the number of arguments to pop after the call
161 Label* retaddr = nullptr
162 );
163
164 virtual void call_VM_leaf_base(
165 address entry_point, // the entry point
166 int number_of_arguments, // the number of arguments to pop after the call
167 Label& retaddr) {
168 call_VM_leaf_base(entry_point, number_of_arguments, &retaddr);
169 }
170
171 virtual void call_VM_base( // returns the register containing the thread upon return
172 Register oop_result, // where an oop-result ends up if any; use noreg otherwise
173 Register java_thread, // the thread if computed before ; use noreg otherwise
174 Register last_java_sp, // to set up last_Java_frame in stubs; use noreg otherwise
175 Label* return_pc, // to set up last_Java_frame; use nullptr otherwise
176 address entry_point, // the entry point
177 int number_of_arguments, // the number of arguments (w/o thread) to pop after the call
178 bool check_exceptions // whether to check for pending exceptions after return
179 );
180
181 void call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions);
182
183 virtual void check_and_handle_earlyret(Register java_thread);
184 virtual void check_and_handle_popframe(Register java_thread);
185
186 void resolve_weak_handle(Register result, Register tmp1, Register tmp2);
187 void resolve_oop_handle(Register result, Register tmp1, Register tmp2);
188 void resolve_jobject(Register value, Register tmp1, Register tmp2);
189 void resolve_global_jobject(Register value, Register tmp1, Register tmp2);
190
191 void movoop(Register dst, jobject obj);
192 void mov_metadata(Register dst, Metadata* obj);
193 void bang_stack_size(Register size, Register tmp);
194 void set_narrow_oop(Register dst, jobject obj);
195 void set_narrow_klass(Register dst, Klass* k);
196
197 void load_mirror(Register dst, Register method, Register tmp1, Register tmp2);
198 void access_load_at(BasicType type, DecoratorSet decorators, Register dst,
199 Address src, Register tmp1, Register tmp2);
200 void access_store_at(BasicType type, DecoratorSet decorators, Address dst,
201 Register val, Register tmp1, Register tmp2, Register tmp3);
202 void load_klass(Register dst, Register src, Register tmp = t0);
203 void load_prototype_header(Register dst, Register src, Register tmp = t0);
204 void load_narrow_klass_compact(Register dst, Register src);
205 void store_klass(Register dst, Register src, Register tmp = t0);
206 void cmp_klass_beq(Register obj, Register klass,
207 Register tmp1, Register tmp2,
208 Label &L, bool is_far = false);
209 void cmp_klass_bne(Register obj, Register klass,
210 Register tmp1, Register tmp2,
211 Label &L, bool is_far = false);
212
213 void encode_klass_not_null(Register r, Register tmp = t0);
214 void decode_klass_not_null(Register r, Register tmp = t0);
215 void encode_klass_not_null(Register dst, Register src, Register tmp);
216 void decode_klass_not_null(Register dst, Register src, Register tmp);
217 void decode_heap_oop_not_null(Register r);
218 void decode_heap_oop_not_null(Register dst, Register src);
219 void decode_heap_oop(Register d, Register s);
220 void decode_heap_oop(Register r) { decode_heap_oop(r, r); }
221 void encode_heap_oop_not_null(Register r);
222 void encode_heap_oop_not_null(Register dst, Register src);
223 void encode_heap_oop(Register d, Register s);
224 void encode_heap_oop(Register r) { encode_heap_oop(r, r); };
225 void load_heap_oop(Register dst, Address src, Register tmp1,
226 Register tmp2, DecoratorSet decorators = 0);
227 void load_heap_oop_not_null(Register dst, Address src, Register tmp1,
228 Register tmp2, DecoratorSet decorators = 0);
229 void store_heap_oop(Address dst, Register val, Register tmp1,
230 Register tmp2, Register tmp3, DecoratorSet decorators = 0);
231
232 void store_klass_gap(Register dst, Register src);
233
234 // currently unimplemented
235 // Used for storing null. All other oop constants should be
236 // stored using routines that take a jobject.
237 void store_heap_oop_null(Address dst);
238
239 // This dummy is to prevent a call to store_heap_oop from
240 // converting a zero (linked null) into a Register by giving
241 // the compiler two choices it can't resolve
242
243 void store_heap_oop(Address dst, void* dummy);
244
245 // Support for null-checks
246 //
247 // Generates code that causes a null OS exception if the content of reg is null.
248 // If the accessed location is M[reg + offset] and the offset is known, provide the
249 // offset. No explicit code generateion is needed if the offset is within a certain
250 // range (0 <= offset <= page_size).
251
252 virtual void null_check(Register reg, int offset = -1);
253 static bool needs_explicit_null_check(intptr_t offset);
254 static bool uses_implicit_null_check(void* address);
255
256 void test_field_is_null_free_inline_type(Register flags, Register temp_reg, Label& is_null_free);
257 void test_field_is_not_null_free_inline_type(Register flags, Register temp_reg, Label& not_null_free_inline_type);
258 void test_field_is_flat(Register flags, Register temp_reg, Label& is_flat);
259
260 void test_markword_is_inline_type(Register markword, Label& is_inline_type);
261 void test_oop_is_not_inline_type(Register object, Register tmp, Label& not_inline_type, bool can_be_null = true);
262 void test_oop_prototype_bit(Register oop, Register temp_reg, int32_t tst_bit, bool jmp_set, Label& jmp_label);
263 void test_flat_array_oop(Register klass, Register temp_reg, Label& is_flat_array);
264 void test_null_free_array_oop(Register oop, Register temp_reg, Label& is_null_free_array);
265 void test_non_flat_array_oop(Register oop, Register temp_reg, Label&is_non_flat_array);
266 void test_non_null_free_array_oop(Register oop, Register temp_reg, Label&is_non_null_free_array);
267
268 // Check array klass layout helper for flat or null-free arrays...
269 void test_flat_array_layout(Register lh, Label& is_flat_array);
270
271 void inline_layout_info(Register holder_klass, Register index, Register layout_info);
272
273 void flat_field_copy(DecoratorSet decorators, Register src, Register dst, Register inline_layout_info);
274
275 // inline type data payload offsets...
276 void payload_offset(Register inline_klass, Register offset);
277 void payload_address(Register oop, Register data, Register inline_klass);
278
279 // interface method calling
280 void lookup_interface_method(Register recv_klass,
281 Register intf_klass,
282 RegisterOrConstant itable_index,
283 Register method_result,
284 Register scan_tmp,
285 Label& no_such_interface,
286 bool return_method = true);
287
288 void lookup_interface_method_stub(Register recv_klass,
289 Register holder_klass,
290 Register resolved_klass,
291 Register method_result,
292 Register temp_reg,
293 Register temp_reg2,
294 int itable_index,
295 Label& L_no_such_interface);
296
297 // virtual method calling
298 // n.n. x86 allows RegisterOrConstant for vtable_index
299 void lookup_virtual_method(Register recv_klass,
300 RegisterOrConstant vtable_index,
301 Register method_result);
302
303 // Form an address from base + offset in Rd. Rd my or may not
304 // actually be used: you must use the Address that is returned. It
305 // is up to you to ensure that the shift provided matches the size
306 // of your data.
307 Address form_address(Register Rd, Register base, int64_t byte_offset);
308
309 // Sometimes we get misaligned loads and stores, usually from Unsafe
310 // accesses, and these can exceed the offset range.
311 Address legitimize_address(Register Rd, const Address &adr) {
312 if (adr.getMode() == Address::base_plus_offset) {
313 if (!is_simm12(adr.offset())) {
314 return form_address(Rd, adr.base(), adr.offset());
315 }
316 }
317 return adr;
318 }
319
320 // allocation
321
322 void tlab_allocate(
323 Register obj, // result: pointer to object after successful allocation
324 Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise
325 int con_size_in_bytes, // object size in bytes if known at compile time
326 Register tmp1, // temp register
327 Register tmp2, // temp register
328 Label& slow_case, // continuation point of fast allocation fails
329 bool is_far = false
330 );
331
332 // Test sub_klass against super_klass, with fast and slow paths.
333
334 // The fast path produces a tri-state answer: yes / no / maybe-slow.
335 // One of the three labels can be null, meaning take the fall-through.
336 // If super_check_offset is -1, the value is loaded up from super_klass.
337 // No registers are killed, except tmp_reg
338 void check_klass_subtype_fast_path(Register sub_klass,
339 Register super_klass,
340 Register tmp_reg,
341 Label* L_success,
342 Label* L_failure,
343 Label* L_slow_path,
344 Register super_check_offset = noreg);
345
346 // The reset of the type check; must be wired to a corresponding fast path.
347 // It does not repeat the fast path logic, so don't use it standalone.
348 // The tmp1_reg and tmp2_reg can be noreg, if no temps are available.
349 // Updates the sub's secondary super cache as necessary.
350 void check_klass_subtype_slow_path(Register sub_klass,
351 Register super_klass,
352 Register tmp1_reg,
353 Register tmp2_reg,
354 Label* L_success,
355 Label* L_failure,
356 bool set_cond_codes = false);
357
358 void check_klass_subtype_slow_path_linear(Register sub_klass,
359 Register super_klass,
360 Register tmp1_reg,
361 Register tmp2_reg,
362 Label* L_success,
363 Label* L_failure,
364 bool set_cond_codes = false);
365
366 void check_klass_subtype_slow_path_table(Register sub_klass,
367 Register super_klass,
368 Register tmp1_reg,
369 Register tmp2_reg,
370 Label* L_success,
371 Label* L_failure,
372 bool set_cond_codes = false);
373
374 // If r is valid, return r.
375 // If r is invalid, remove a register r2 from available_regs, add r2
376 // to regs_to_push, then return r2.
377 Register allocate_if_noreg(const Register r,
378 RegSetIterator<Register> &available_regs,
379 RegSet ®s_to_push);
380
381 // Secondary subtype checking
382 void lookup_secondary_supers_table_var(Register sub_klass,
383 Register r_super_klass,
384 Register result,
385 Register tmp1,
386 Register tmp2,
387 Register tmp3,
388 Register tmp4,
389 Label *L_success);
390
391 void population_count(Register dst, Register src, Register tmp1, Register tmp2);
392
393 // As above, but with a constant super_klass.
394 // The result is in Register result, not the condition codes.
395 bool lookup_secondary_supers_table_const(Register r_sub_klass,
396 Register r_super_klass,
397 Register result,
398 Register tmp1,
399 Register tmp2,
400 Register tmp3,
401 Register tmp4,
402 u1 super_klass_slot,
403 bool stub_is_near = false);
404
405 void verify_secondary_supers_table(Register r_sub_klass,
406 Register r_super_klass,
407 Register result,
408 Register tmp1,
409 Register tmp2,
410 Register tmp3);
411
412 void lookup_secondary_supers_table_slow_path(Register r_super_klass,
413 Register r_array_base,
414 Register r_array_index,
415 Register r_bitmap,
416 Register result,
417 Register tmp,
418 bool is_stub = true);
419
420 void check_klass_subtype(Register sub_klass,
421 Register super_klass,
422 Register tmp_reg,
423 Label& L_success);
424
425 Address argument_address(RegisterOrConstant arg_slot, int extra_slot_offset = 0);
426
427 void profile_receiver_type(Register recv, Register mdp, int mdp_offset);
428
429 // only if +VerifyOops
430 void _verify_oop(Register reg, const char* s, const char* file, int line);
431 void _verify_oop_addr(Address addr, const char* s, const char* file, int line);
432
433 void _verify_oop_checked(Register reg, const char* s, const char* file, int line) {
434 if (VerifyOops) {
435 _verify_oop(reg, s, file, line);
436 }
437 }
438 void _verify_oop_addr_checked(Address reg, const char* s, const char* file, int line) {
439 if (VerifyOops) {
440 _verify_oop_addr(reg, s, file, line);
441 }
442 }
443
444 void _verify_method_ptr(Register reg, const char* msg, const char* file, int line) {}
445 void _verify_klass_ptr(Register reg, const char* msg, const char* file, int line) {}
446
447 #define verify_oop(reg) _verify_oop_checked(reg, "broken oop " #reg, __FILE__, __LINE__)
448 #define verify_oop_msg(reg, msg) _verify_oop_checked(reg, "broken oop " #reg ", " #msg, __FILE__, __LINE__)
449 #define verify_oop_addr(addr) _verify_oop_addr_checked(addr, "broken oop addr " #addr, __FILE__, __LINE__)
450 #define verify_method_ptr(reg) _verify_method_ptr(reg, "broken method " #reg, __FILE__, __LINE__)
451 #define verify_klass_ptr(reg) _verify_method_ptr(reg, "broken klass " #reg, __FILE__, __LINE__)
452
453 // A more convenient access to fence for our purposes
454 // We used four bit to indicate the read and write bits in the predecessors and successors,
455 // and extended i for r, o for w if UseConservativeFence enabled.
456 enum Membar_mask_bits {
457 StoreStore = 0b0101, // (pred = w + succ = w)
458 LoadStore = 0b1001, // (pred = r + succ = w)
459 StoreLoad = 0b0110, // (pred = w + succ = r)
460 LoadLoad = 0b1010, // (pred = r + succ = r)
461 AnyAny = LoadStore | StoreLoad // (pred = rw + succ = rw)
462 };
463
464 void membar(uint32_t order_constraint);
465
466 private:
467
468 static void membar_mask_to_pred_succ(uint32_t order_constraint,
469 uint32_t& predecessor, uint32_t& successor) {
470 predecessor = (order_constraint >> 2) & 0x3;
471 successor = order_constraint & 0x3;
472
473 // extend rw -> iorw:
474 // 01(w) -> 0101(ow)
475 // 10(r) -> 1010(ir)
476 // 11(rw)-> 1111(iorw)
477 if (UseConservativeFence) {
478 predecessor |= predecessor << 2;
479 successor |= successor << 2;
480 }
481 }
482
483 static int pred_succ_to_membar_mask(uint32_t predecessor, uint32_t successor) {
484 return ((predecessor & 0x3) << 2) | (successor & 0x3);
485 }
486
487 public:
488
489 void cmodx_fence();
490
491 void pause() {
492 // Zihintpause
493 // PAUSE is encoded as a FENCE instruction with pred=W, succ=0, fm=0, rd=x0, and rs1=x0.
494 Assembler::fence(w, 0);
495 }
496
497 // prints msg, dumps registers and stops execution
498 void stop(const char* msg);
499
500 static void debug64(char* msg, int64_t pc, int64_t regs[]);
501
502 void unimplemented(const char* what = "");
503
504 void should_not_reach_here() { stop("should not reach here"); }
505
506 static address target_addr_for_insn(address insn_addr);
507
508 // Required platform-specific helpers for Label::patch_instructions.
509 // They _shadow_ the declarations in AbstractAssembler, which are undefined.
510 static int pd_patch_instruction_size(address branch, address target);
511 static void pd_patch_instruction(address branch, address target, const char* file = nullptr, int line = 0) {
512 pd_patch_instruction_size(branch, target);
513 }
514 static address pd_call_destination(address branch) {
515 return target_addr_for_insn(branch);
516 }
517
518 static int patch_oop(address insn_addr, address o);
519
520 static address get_target_of_li32(address insn_addr);
521 static int patch_imm_in_li32(address branch, int32_t target);
522
523 // Return whether code is emitted to a scratch blob.
524 virtual bool in_scratch_emit_size() {
525 return false;
526 }
527
528 address emit_reloc_call_address_stub(int insts_call_instruction_offset, address target);
529 static int max_reloc_call_address_stub_size();
530
531 void emit_static_call_stub();
532 static int static_call_stub_size();
533
534 // The following 4 methods return the offset of the appropriate move instruction
535
536 // Support for fast byte/short loading with zero extension (depending on particular CPU)
537 int load_unsigned_byte(Register dst, Address src);
538 int load_unsigned_short(Register dst, Address src);
539
540 // Support for fast byte/short loading with sign extension (depending on particular CPU)
541 int load_signed_byte(Register dst, Address src);
542 int load_signed_short(Register dst, Address src);
543
544 // Load and store values by size and signed-ness
545 void load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed);
546 void store_sized_value(Address dst, Register src, size_t size_in_bytes);
547
548 // Misaligned loads, will use the best way, according to the AvoidUnalignedAccess flag
549 void load_short_misaligned(Register dst, Address src, Register tmp, bool is_signed, int granularity = 1);
550 void load_int_misaligned(Register dst, Address src, Register tmp, bool is_signed, int granularity = 1);
551 void load_long_misaligned(Register dst, Address src, Register tmp, int granularity = 1);
552
553 public:
554 // Standard pseudo instructions
555 inline void nop() {
556 addi(x0, x0, 0);
557 }
558
559 inline void mv(Register Rd, Register Rs) {
560 if (Rd != Rs) {
561 addi(Rd, Rs, 0);
562 }
563 }
564
565 inline void notr(Register Rd, Register Rs) {
566 if (do_compress_zcb(Rd, Rs) && (Rd == Rs)) {
567 c_not(Rd);
568 } else {
569 xori(Rd, Rs, -1);
570 }
571 }
572
573 inline void neg(Register Rd, Register Rs) {
574 sub(Rd, x0, Rs);
575 }
576
577 inline void negw(Register Rd, Register Rs) {
578 subw(Rd, x0, Rs);
579 }
580
581 inline void sext_w(Register Rd, Register Rs) {
582 addiw(Rd, Rs, 0);
583 }
584
585 inline void zext_b(Register Rd, Register Rs) {
586 if (do_compress_zcb(Rd, Rs) && (Rd == Rs)) {
587 c_zext_b(Rd);
588 } else {
589 andi(Rd, Rs, 0xFF);
590 }
591 }
592
593 inline void seqz(Register Rd, Register Rs) {
594 sltiu(Rd, Rs, 1);
595 }
596
597 inline void snez(Register Rd, Register Rs) {
598 sltu(Rd, x0, Rs);
599 }
600
601 inline void sltz(Register Rd, Register Rs) {
602 slt(Rd, Rs, x0);
603 }
604
605 inline void sgtz(Register Rd, Register Rs) {
606 slt(Rd, x0, Rs);
607 }
608
609 // Bit-manipulation extension pseudo instructions
610 // zero extend word
611 inline void zext_w(Register Rd, Register Rs) {
612 assert(UseZba, "must be");
613 if (do_compress_zcb(Rd, Rs) && (Rd == Rs)) {
614 c_zext_w(Rd);
615 } else {
616 add_uw(Rd, Rs, zr);
617 }
618 }
619
620 // Floating-point data-processing pseudo instructions
621 inline void fmv_s(FloatRegister Rd, FloatRegister Rs) {
622 if (Rd != Rs) {
623 fsgnj_s(Rd, Rs, Rs);
624 }
625 }
626
627 inline void fabs_s(FloatRegister Rd, FloatRegister Rs) {
628 fsgnjx_s(Rd, Rs, Rs);
629 }
630
631 inline void fneg_s(FloatRegister Rd, FloatRegister Rs) {
632 fsgnjn_s(Rd, Rs, Rs);
633 }
634
635 inline void fmv_d(FloatRegister Rd, FloatRegister Rs) {
636 if (Rd != Rs) {
637 fsgnj_d(Rd, Rs, Rs);
638 }
639 }
640
641 inline void fabs_d(FloatRegister Rd, FloatRegister Rs) {
642 fsgnjx_d(Rd, Rs, Rs);
643 }
644
645 inline void fneg_d(FloatRegister Rd, FloatRegister Rs) {
646 fsgnjn_d(Rd, Rs, Rs);
647 }
648
649 // Control and status pseudo instructions
650 void csrr(Register Rd, unsigned csr); // read csr
651 void csrw(unsigned csr, Register Rs); // write csr
652 void csrs(unsigned csr, Register Rs); // set bits in csr
653 void csrc(unsigned csr, Register Rs); // clear bits in csr
654 void csrwi(unsigned csr, unsigned imm);
655 void csrsi(unsigned csr, unsigned imm);
656 void csrci(unsigned csr, unsigned imm);
657 void frcsr(Register Rd) { csrr(Rd, CSR_FCSR); }; // read float-point csr
658 void fscsr(Register Rd, Register Rs); // swap float-point csr
659 void fscsr(Register Rs); // write float-point csr
660 void frrm(Register Rd) { csrr(Rd, CSR_FRM); }; // read float-point rounding mode
661 void fsrm(Register Rd, Register Rs); // swap float-point rounding mode
662 void fsrm(Register Rs); // write float-point rounding mode
663 void fsrmi(Register Rd, unsigned imm);
664 void fsrmi(unsigned imm);
665 void frflags(Register Rd) { csrr(Rd, CSR_FFLAGS); }; // read float-point exception flags
666 void fsflags(Register Rd, Register Rs); // swap float-point exception flags
667 void fsflags(Register Rs); // write float-point exception flags
668 void fsflagsi(Register Rd, unsigned imm);
669 void fsflagsi(unsigned imm);
670 // Requires Zicntr
671 void rdinstret(Register Rd) { csrr(Rd, CSR_INSTRET); }; // read instruction-retired counter
672 void rdcycle(Register Rd) { csrr(Rd, CSR_CYCLE); }; // read cycle counter
673 void rdtime(Register Rd) { csrr(Rd, CSR_TIME); }; // read time
674
675 // Restore cpu control state after JNI call
676 void restore_cpu_control_state_after_jni(Register tmp);
677
678 // Control transfer pseudo instructions
679 void beqz(Register Rs, const address dest);
680 void bnez(Register Rs, const address dest);
681 void blez(Register Rs, const address dest);
682 void bgez(Register Rs, const address dest);
683 void bltz(Register Rs, const address dest);
684 void bgtz(Register Rs, const address dest);
685
686 void cmov_eq(Register cmp1, Register cmp2, Register dst, Register src);
687 void cmov_ne(Register cmp1, Register cmp2, Register dst, Register src);
688 void cmov_le(Register cmp1, Register cmp2, Register dst, Register src);
689 void cmov_leu(Register cmp1, Register cmp2, Register dst, Register src);
690 void cmov_ge(Register cmp1, Register cmp2, Register dst, Register src);
691 void cmov_geu(Register cmp1, Register cmp2, Register dst, Register src);
692 void cmov_lt(Register cmp1, Register cmp2, Register dst, Register src);
693 void cmov_ltu(Register cmp1, Register cmp2, Register dst, Register src);
694 void cmov_gt(Register cmp1, Register cmp2, Register dst, Register src);
695 void cmov_gtu(Register cmp1, Register cmp2, Register dst, Register src);
696
697 void cmov_cmp_fp_eq(FloatRegister cmp1, FloatRegister cmp2, Register dst, Register src, bool is_single);
698 void cmov_cmp_fp_ne(FloatRegister cmp1, FloatRegister cmp2, Register dst, Register src, bool is_single);
699 void cmov_cmp_fp_le(FloatRegister cmp1, FloatRegister cmp2, Register dst, Register src, bool is_single);
700 void cmov_cmp_fp_ge(FloatRegister cmp1, FloatRegister cmp2, Register dst, Register src, bool is_single);
701 void cmov_cmp_fp_lt(FloatRegister cmp1, FloatRegister cmp2, Register dst, Register src, bool is_single);
702 void cmov_cmp_fp_gt(FloatRegister cmp1, FloatRegister cmp2, Register dst, Register src, bool is_single);
703
704 void cmov_fp_eq(Register cmp1, Register cmp2, FloatRegister dst, FloatRegister src, bool is_single);
705 void cmov_fp_ne(Register cmp1, Register cmp2, FloatRegister dst, FloatRegister src, bool is_single);
706 void cmov_fp_le(Register cmp1, Register cmp2, FloatRegister dst, FloatRegister src, bool is_single);
707 void cmov_fp_leu(Register cmp1, Register cmp2, FloatRegister dst, FloatRegister src, bool is_single);
708 void cmov_fp_ge(Register cmp1, Register cmp2, FloatRegister dst, FloatRegister src, bool is_single);
709 void cmov_fp_geu(Register cmp1, Register cmp2, FloatRegister dst, FloatRegister src, bool is_single);
710 void cmov_fp_lt(Register cmp1, Register cmp2, FloatRegister dst, FloatRegister src, bool is_single);
711 void cmov_fp_ltu(Register cmp1, Register cmp2, FloatRegister dst, FloatRegister src, bool is_single);
712 void cmov_fp_gt(Register cmp1, Register cmp2, FloatRegister dst, FloatRegister src, bool is_single);
713 void cmov_fp_gtu(Register cmp1, Register cmp2, FloatRegister dst, FloatRegister src, bool is_single);
714
715 void cmov_fp_cmp_fp_eq(FloatRegister cmp1, FloatRegister cmp2, FloatRegister dst, FloatRegister src, bool cmp_single, bool cmov_single);
716 void cmov_fp_cmp_fp_ne(FloatRegister cmp1, FloatRegister cmp2, FloatRegister dst, FloatRegister src, bool cmp_single, bool cmov_single);
717 void cmov_fp_cmp_fp_le(FloatRegister cmp1, FloatRegister cmp2, FloatRegister dst, FloatRegister src, bool cmp_single, bool cmov_single);
718 void cmov_fp_cmp_fp_ge(FloatRegister cmp1, FloatRegister cmp2, FloatRegister dst, FloatRegister src, bool cmp_single, bool cmov_single);
719 void cmov_fp_cmp_fp_lt(FloatRegister cmp1, FloatRegister cmp2, FloatRegister dst, FloatRegister src, bool cmp_single, bool cmov_single);
720 void cmov_fp_cmp_fp_gt(FloatRegister cmp1, FloatRegister cmp2, FloatRegister dst, FloatRegister src, bool cmp_single, bool cmov_single);
721
722 public:
723 // We try to follow risc-v asm menomics.
724 // But as we don't layout a reachable GOT,
725 // we often need to resort to movptr, li <48imm>.
726 // https://github.com/riscv-non-isa/riscv-asm-manual/blob/main/src/asm-manual.adoc
727
728 // Hotspot only use the standard calling convention using x1/ra.
729 // The alternative calling convection using x5/t0 is not used.
730 // Using x5 as a temp causes the CPU to mispredict returns.
731
732 // JALR, return address stack updates:
733 // | rd is x1/x5 | rs1 is x1/x5 | rd=rs1 | RAS action
734 // | ----------- | ------------ | ------ |-------------
735 // | No | No | - | None
736 // | No | Yes | - | Pop
737 // | Yes | No | - | Push
738 // | Yes | Yes | No | Pop, then push
739 // | Yes | Yes | Yes | Push
740 //
741 // JAL, return address stack updates:
742 // | rd is x1/x5 | RAS action
743 // | ----------- | ----------
744 // | Yes | Push
745 // | No | None
746 //
747 // JUMPs uses Rd = x0/zero and Rs = x6/t1 or imm
748 // CALLS uses Rd = x1/ra and Rs = x6/t1 or imm (or x1/ra*)
749 // RETURNS uses Rd = x0/zero and Rs = x1/ra
750 // *use of x1/ra should not normally be used, special case only.
751
752 // jump: jal x0, offset
753 // For long reach uses temp register for:
754 // la + jr
755 void j(const address dest, Register temp = t1);
756 void j(const Address &dest, Register temp = t1);
757 void j(Label &l, Register temp = noreg);
758
759 // jump register: jalr x0, offset(rs)
760 void jr(Register Rd, int32_t offset = 0);
761
762 // call: la + jalr x1
763 void call(const address dest, Register temp = t1);
764
765 // jalr: jalr x1, offset(rs)
766 void jalr(Register Rs, int32_t offset = 0);
767
768 // Emit a runtime call. Only invalidates the tmp register which
769 // is used to keep the entry address for jalr/movptr.
770 // Uses call() for intra code cache, else movptr + jalr.
771 // Clobebrs t1
772 void rt_call(address dest, Register tmp = t1);
773
774 // ret: jalr x0, 0(x1)
775 inline void ret() {
776 Assembler::jalr(x0, x1, 0);
777 }
778
779 //label
780 void beqz(Register Rs, Label &l, bool is_far = false);
781 void bnez(Register Rs, Label &l, bool is_far = false);
782 void blez(Register Rs, Label &l, bool is_far = false);
783 void bgez(Register Rs, Label &l, bool is_far = false);
784 void bltz(Register Rs, Label &l, bool is_far = false);
785 void bgtz(Register Rs, Label &l, bool is_far = false);
786
787 void beq (Register Rs1, Register Rs2, Label &L, bool is_far = false);
788 void bne (Register Rs1, Register Rs2, Label &L, bool is_far = false);
789 void blt (Register Rs1, Register Rs2, Label &L, bool is_far = false);
790 void bge (Register Rs1, Register Rs2, Label &L, bool is_far = false);
791 void bltu(Register Rs1, Register Rs2, Label &L, bool is_far = false);
792 void bgeu(Register Rs1, Register Rs2, Label &L, bool is_far = false);
793
794 void bgt (Register Rs, Register Rt, const address dest);
795 void ble (Register Rs, Register Rt, const address dest);
796 void bgtu(Register Rs, Register Rt, const address dest);
797 void bleu(Register Rs, Register Rt, const address dest);
798
799 void bgt (Register Rs, Register Rt, Label &l, bool is_far = false);
800 void ble (Register Rs, Register Rt, Label &l, bool is_far = false);
801 void bgtu(Register Rs, Register Rt, Label &l, bool is_far = false);
802 void bleu(Register Rs, Register Rt, Label &l, bool is_far = false);
803
804 #define INSN_ENTRY_RELOC(result_type, header) \
805 result_type header { \
806 guarantee(rtype == relocInfo::internal_word_type, \
807 "only internal_word_type relocs make sense here"); \
808 relocate(InternalAddress(dest).rspec()); \
809 IncompressibleScope scope(this); /* relocations */
810
811 #define INSN(NAME) \
812 void NAME(Register Rs1, Register Rs2, const address dest) { \
813 assert_cond(dest != nullptr); \
814 int64_t offset = dest - pc(); \
815 guarantee(is_simm13(offset) && is_even(offset), \
816 "offset is invalid: is_simm_13: %s offset: " INT64_FORMAT, \
817 BOOL_TO_STR(is_simm13(offset)), offset); \
818 Assembler::NAME(Rs1, Rs2, offset); \
819 } \
820 INSN_ENTRY_RELOC(void, NAME(Register Rs1, Register Rs2, address dest, relocInfo::relocType rtype)) \
821 NAME(Rs1, Rs2, dest); \
822 }
823
824 INSN(beq);
825 INSN(bne);
826 INSN(bge);
827 INSN(bgeu);
828 INSN(blt);
829 INSN(bltu);
830
831 #undef INSN
832
833 #undef INSN_ENTRY_RELOC
834
835 void float_beq(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
836 void float_bne(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
837 void float_ble(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
838 void float_bge(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
839 void float_blt(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
840 void float_bgt(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
841
842 void double_beq(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
843 void double_bne(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
844 void double_ble(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
845 void double_bge(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
846 void double_blt(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
847 void double_bgt(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
848
849 private:
850 // The signed 20-bit upper imm can materialize at most negative 0xF...F80000000, two G.
851 // The following signed 12-bit imm can at max subtract 0x800, two K, from that previously loaded two G.
852 bool is_valid_32bit_offset(int64_t x) {
853 constexpr int64_t twoG = (2 * G);
854 constexpr int64_t twoK = (2 * K);
855 return x < (twoG - twoK) && x >= (-twoG - twoK);
856 }
857
858 // Ensure that the auipc can reach the destination at x from anywhere within
859 // the code cache so that if it is relocated we know it will still reach.
860 bool is_32bit_offset_from_codecache(int64_t x) {
861 int64_t low = (int64_t)CodeCache::low_bound();
862 int64_t high = (int64_t)CodeCache::high_bound();
863 return is_valid_32bit_offset(x - low) && is_valid_32bit_offset(x - high);
864 }
865
866 public:
867 // Stack push and pop individual 64 bit registers
868 void push_reg(Register Rs);
869 void pop_reg(Register Rd);
870
871 int push_reg(RegSet regset, Register stack);
872 int pop_reg(RegSet regset, Register stack);
873
874 int push_fp(FloatRegSet regset, Register stack);
875 int pop_fp(FloatRegSet regset, Register stack);
876
877 #ifdef COMPILER2
878 int push_v(VectorRegSet regset, Register stack);
879 int pop_v(VectorRegSet regset, Register stack);
880 #endif // COMPILER2
881
882 // Push and pop everything that might be clobbered by a native
883 // runtime call except t0 and t1. (They are always
884 // temporary registers, so we don't have to protect them.)
885 // Additional registers can be excluded in a passed RegSet.
886 void push_call_clobbered_registers_except(RegSet exclude);
887 void pop_call_clobbered_registers_except(RegSet exclude);
888
889 void push_call_clobbered_registers() {
890 push_call_clobbered_registers_except(RegSet());
891 }
892 void pop_call_clobbered_registers() {
893 pop_call_clobbered_registers_except(RegSet());
894 }
895
896 void push_CPU_state(bool save_vectors = false, int vector_size_in_bytes = 0);
897 void pop_CPU_state(bool restore_vectors = false, int vector_size_in_bytes = 0);
898
899 void push_cont_fastpath(Register java_thread = xthread);
900 void pop_cont_fastpath(Register java_thread = xthread);
901
902 // if heap base register is used - reinit it with the correct value
903 void reinit_heapbase();
904
905 void bind(Label& L) {
906 Assembler::bind(L);
907 // fences across basic blocks should not be merged
908 code()->clear_last_merge_candidate();
909 }
910
911 typedef void (MacroAssembler::* compare_and_branch_insn)(Register Rs1, Register Rs2, const address dest);
912 typedef void (MacroAssembler::* compare_and_branch_label_insn)(Register Rs1, Register Rs2, Label &L, bool is_far);
913 typedef void (MacroAssembler::* jal_jalr_insn)(Register Rt, address dest);
914
915 void wrap_label(Register r, Label &L, jal_jalr_insn insn);
916 void wrap_label(Register r1, Register r2, Label &L,
917 compare_and_branch_insn insn,
918 compare_and_branch_label_insn neg_insn, bool is_far = false);
919
920 void la(Register Rd, Label &label);
921 void la(Register Rd, const address addr);
922 void la(Register Rd, const address addr, int32_t &offset);
923 void la(Register Rd, const Address &adr);
924
925 void li16u(Register Rd, uint16_t imm);
926 void li32(Register Rd, int32_t imm);
927 void li (Register Rd, int64_t imm); // optimized load immediate
928
929 // mv
930 void mv(Register Rd, address addr) { li(Rd, (int64_t)addr); }
931 void mv(Register Rd, address addr, int32_t &offset) {
932 // Split address into a lower 12-bit sign-extended offset and the remainder,
933 // so that the offset could be encoded in jalr or load/store instruction.
934 offset = ((int32_t)(int64_t)addr << 20) >> 20;
935 li(Rd, (int64_t)addr - offset);
936 }
937
938 template<typename T, ENABLE_IF(std::is_integral<T>::value)>
939 inline void mv(Register Rd, T o) { li(Rd, (int64_t)o); }
940
941 void mv(Register Rd, RegisterOrConstant src) {
942 if (src.is_register()) {
943 mv(Rd, src.as_register());
944 } else {
945 mv(Rd, src.as_constant());
946 }
947 }
948
949 // Generates a load of a 48-bit constant which can be
950 // patched to any 48-bit constant, i.e. address.
951 // If common case supply additional temp register
952 // to shorten the instruction sequence.
953 void movptr(Register Rd, const Address &addr, Register tmp = noreg);
954 void movptr(Register Rd, address addr, Register tmp = noreg);
955 void movptr(Register Rd, address addr, int32_t &offset, Register tmp = noreg);
956
957 private:
958 void movptr1(Register Rd, uintptr_t addr, int32_t &offset);
959 void movptr2(Register Rd, uintptr_t addr, int32_t &offset, Register tmp);
960 public:
961 // float imm move
962 static bool can_hf_imm_load(short imm);
963 static bool can_fp_imm_load(float imm);
964 static bool can_dp_imm_load(double imm);
965 void fli_h(FloatRegister Rd, short imm);
966 void fli_s(FloatRegister Rd, float imm);
967 void fli_d(FloatRegister Rd, double imm);
968
969 // arith
970 void add (Register Rd, Register Rn, int64_t increment, Register tmp = t0);
971 void sub (Register Rd, Register Rn, int64_t decrement, Register tmp = t0);
972 void addw(Register Rd, Register Rn, int64_t increment, Register tmp = t0);
973 void subw(Register Rd, Register Rn, int64_t decrement, Register tmp = t0);
974
975 void subi(Register Rd, Register Rn, int64_t decrement) {
976 assert(is_simm12(-decrement), "Must be");
977 addi(Rd, Rn, -decrement);
978 }
979
980 void subiw(Register Rd, Register Rn, int64_t decrement) {
981 assert(is_simm12(-decrement), "Must be");
982 addiw(Rd, Rn, -decrement);
983 }
984
985 #define INSN(NAME) \
986 inline void NAME(Register Rd, Register Rs1, Register Rs2) { \
987 Assembler::NAME(Rd, Rs1, Rs2); \
988 }
989
990 INSN(add);
991 INSN(addw);
992 INSN(sub);
993 INSN(subw);
994
995 #undef INSN
996
997 // logic
998 void andrw(Register Rd, Register Rs1, Register Rs2);
999 void orrw(Register Rd, Register Rs1, Register Rs2);
1000 void xorrw(Register Rd, Register Rs1, Register Rs2);
1001
1002 // logic with negate
1003 void andn(Register Rd, Register Rs1, Register Rs2);
1004 void orn(Register Rd, Register Rs1, Register Rs2);
1005
1006 // reverse bytes
1007 void revbw(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2= t1); // reverse bytes in lower word, sign-extend
1008 void revb(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1); // reverse bytes in doubleword
1009
1010 void ror(Register dst, Register src, Register shift, Register tmp = t0);
1011 void ror(Register dst, Register src, uint32_t shift, Register tmp = t0);
1012 void rolw(Register dst, Register src, uint32_t shift, Register tmp = t0);
1013
1014 void orptr(Address adr, RegisterOrConstant src, Register tmp1 = t0, Register tmp2 = t1);
1015
1016 // Load and Store Instructions
1017 #define INSN_ENTRY_RELOC(result_type, header) \
1018 result_type header { \
1019 guarantee(rtype == relocInfo::internal_word_type, \
1020 "only internal_word_type relocs make sense here"); \
1021 relocate(InternalAddress(dest).rspec()); \
1022 IncompressibleScope scope(this); /* relocations */
1023
1024 #define INSN(NAME) \
1025 void NAME(Register Rd, address dest) { \
1026 assert_cond(dest != nullptr); \
1027 if (CodeCache::contains(dest)) { \
1028 int64_t distance = dest - pc(); \
1029 assert(is_valid_32bit_offset(distance), "Must be"); \
1030 auipc(Rd, (int32_t)distance + 0x800); \
1031 Assembler::NAME(Rd, Rd, ((int32_t)distance << 20) >> 20); \
1032 } else { \
1033 int32_t offset = 0; \
1034 movptr(Rd, dest, offset); \
1035 Assembler::NAME(Rd, Rd, offset); \
1036 } \
1037 } \
1038 INSN_ENTRY_RELOC(void, NAME(Register Rd, address dest, relocInfo::relocType rtype)) \
1039 NAME(Rd, dest); \
1040 } \
1041 void NAME(Register Rd, const Address &adr, Register temp = t0) { \
1042 switch (adr.getMode()) { \
1043 case Address::literal: { \
1044 relocate(adr.rspec(), [&] { \
1045 NAME(Rd, adr.target()); \
1046 }); \
1047 break; \
1048 } \
1049 case Address::base_plus_offset: { \
1050 if (is_simm12(adr.offset())) { \
1051 Assembler::NAME(Rd, adr.base(), adr.offset()); \
1052 } else { \
1053 int32_t offset = ((int32_t)adr.offset() << 20) >> 20; \
1054 if (Rd == adr.base()) { \
1055 la(temp, Address(adr.base(), adr.offset() - offset)); \
1056 Assembler::NAME(Rd, temp, offset); \
1057 } else { \
1058 la(Rd, Address(adr.base(), adr.offset() - offset)); \
1059 Assembler::NAME(Rd, Rd, offset); \
1060 } \
1061 } \
1062 break; \
1063 } \
1064 default: \
1065 ShouldNotReachHere(); \
1066 } \
1067 } \
1068 void NAME(Register Rd, Label &L) { \
1069 wrap_label(Rd, L, &MacroAssembler::NAME); \
1070 }
1071
1072 INSN(lb);
1073 INSN(lbu);
1074 INSN(lh);
1075 INSN(lhu);
1076 INSN(lw);
1077 INSN(lwu);
1078 INSN(ld);
1079
1080 #undef INSN
1081
1082 #define INSN(NAME) \
1083 void NAME(FloatRegister Rd, address dest, Register temp = t0) { \
1084 assert_cond(dest != nullptr); \
1085 if (CodeCache::contains(dest)) { \
1086 int64_t distance = dest - pc(); \
1087 assert(is_valid_32bit_offset(distance), "Must be"); \
1088 auipc(temp, (int32_t)distance + 0x800); \
1089 Assembler::NAME(Rd, temp, ((int32_t)distance << 20) >> 20); \
1090 } else { \
1091 int32_t offset = 0; \
1092 movptr(temp, dest, offset); \
1093 Assembler::NAME(Rd, temp, offset); \
1094 } \
1095 } \
1096 INSN_ENTRY_RELOC(void, NAME(FloatRegister Rd, address dest, \
1097 relocInfo::relocType rtype, Register temp = t0)) \
1098 NAME(Rd, dest, temp); \
1099 } \
1100 void NAME(FloatRegister Rd, const Address &adr, Register temp = t0) { \
1101 switch (adr.getMode()) { \
1102 case Address::literal: { \
1103 relocate(adr.rspec(), [&] { \
1104 NAME(Rd, adr.target(), temp); \
1105 }); \
1106 break; \
1107 } \
1108 case Address::base_plus_offset: { \
1109 if (is_simm12(adr.offset())) { \
1110 Assembler::NAME(Rd, adr.base(), adr.offset()); \
1111 } else { \
1112 int32_t offset = ((int32_t)adr.offset() << 20) >> 20; \
1113 la(temp, Address(adr.base(), adr.offset() - offset)); \
1114 Assembler::NAME(Rd, temp, offset); \
1115 } \
1116 break; \
1117 } \
1118 default: \
1119 ShouldNotReachHere(); \
1120 } \
1121 }
1122
1123 INSN(flh);
1124 INSN(flw);
1125 INSN(fld);
1126
1127 #undef INSN
1128
1129 #define INSN(NAME, REGISTER) \
1130 INSN_ENTRY_RELOC(void, NAME(REGISTER Rs, address dest, \
1131 relocInfo::relocType rtype, Register temp = t0)) \
1132 NAME(Rs, dest, temp); \
1133 }
1134
1135 INSN(sb, Register);
1136 INSN(sh, Register);
1137 INSN(sw, Register);
1138 INSN(sd, Register);
1139 INSN(fsw, FloatRegister);
1140 INSN(fsd, FloatRegister);
1141
1142 #undef INSN
1143
1144 #define INSN(NAME) \
1145 void NAME(Register Rs, address dest, Register temp = t0) { \
1146 assert_cond(dest != nullptr); \
1147 assert_different_registers(Rs, temp); \
1148 if (CodeCache::contains(dest)) { \
1149 int64_t distance = dest - pc(); \
1150 assert(is_valid_32bit_offset(distance), "Must be"); \
1151 auipc(temp, (int32_t)distance + 0x800); \
1152 Assembler::NAME(Rs, temp, ((int32_t)distance << 20) >> 20); \
1153 } else { \
1154 int32_t offset = 0; \
1155 movptr(temp, dest, offset); \
1156 Assembler::NAME(Rs, temp, offset); \
1157 } \
1158 } \
1159 void NAME(Register Rs, const Address &adr, Register temp = t0) { \
1160 switch (adr.getMode()) { \
1161 case Address::literal: { \
1162 assert_different_registers(Rs, temp); \
1163 relocate(adr.rspec(), [&] { \
1164 NAME(Rs, adr.target(), temp); \
1165 }); \
1166 break; \
1167 } \
1168 case Address::base_plus_offset: { \
1169 if (is_simm12(adr.offset())) { \
1170 Assembler::NAME(Rs, adr.base(), adr.offset()); \
1171 } else { \
1172 assert_different_registers(Rs, temp); \
1173 int32_t offset = ((int32_t)adr.offset() << 20) >> 20; \
1174 la(temp, Address(adr.base(), adr.offset() - offset)); \
1175 Assembler::NAME(Rs, temp, offset); \
1176 } \
1177 break; \
1178 } \
1179 default: \
1180 ShouldNotReachHere(); \
1181 } \
1182 }
1183
1184 INSN(sb);
1185 INSN(sh);
1186 INSN(sw);
1187 INSN(sd);
1188
1189 #undef INSN
1190
1191 #define INSN(NAME) \
1192 void NAME(FloatRegister Rs, address dest, Register temp = t0) { \
1193 assert_cond(dest != nullptr); \
1194 if (CodeCache::contains(dest)) { \
1195 int64_t distance = dest - pc(); \
1196 assert(is_valid_32bit_offset(distance), "Must be"); \
1197 auipc(temp, (int32_t)distance + 0x800); \
1198 Assembler::NAME(Rs, temp, ((int32_t)distance << 20) >> 20); \
1199 } else { \
1200 int32_t offset = 0; \
1201 movptr(temp, dest, offset); \
1202 Assembler::NAME(Rs, temp, offset); \
1203 } \
1204 } \
1205 void NAME(FloatRegister Rs, const Address &adr, Register temp = t0) { \
1206 switch (adr.getMode()) { \
1207 case Address::literal: { \
1208 relocate(adr.rspec(), [&] { \
1209 NAME(Rs, adr.target(), temp); \
1210 }); \
1211 break; \
1212 } \
1213 case Address::base_plus_offset: { \
1214 if (is_simm12(adr.offset())) { \
1215 Assembler::NAME(Rs, adr.base(), adr.offset()); \
1216 } else { \
1217 int32_t offset = ((int32_t)adr.offset() << 20) >> 20; \
1218 la(temp, Address(adr.base(), adr.offset() - offset)); \
1219 Assembler::NAME(Rs, temp, offset); \
1220 } \
1221 break; \
1222 } \
1223 default: \
1224 ShouldNotReachHere(); \
1225 } \
1226 }
1227
1228 INSN(fsw);
1229 INSN(fsd);
1230
1231 #undef INSN
1232
1233 #undef INSN_ENTRY_RELOC
1234
1235 void cmpxchg(Register addr, Register expected,
1236 Register new_val,
1237 Assembler::operand_size size,
1238 Assembler::Aqrl acquire, Assembler::Aqrl release,
1239 Register result, bool result_as_bool = false);
1240 void weak_cmpxchg(Register addr, Register expected,
1241 Register new_val,
1242 Assembler::operand_size size,
1243 Assembler::Aqrl acquire, Assembler::Aqrl release,
1244 Register result);
1245 void cmpxchg_narrow_value_helper(Register addr, Register expected, Register new_val,
1246 Assembler::operand_size size,
1247 Register shift, Register mask, Register aligned_addr);
1248 void cmpxchg_narrow_value(Register addr, Register expected,
1249 Register new_val,
1250 Assembler::operand_size size,
1251 Assembler::Aqrl acquire, Assembler::Aqrl release,
1252 Register result, bool result_as_bool,
1253 Register tmp1, Register tmp2, Register tmp3);
1254 void weak_cmpxchg_narrow_value(Register addr, Register expected,
1255 Register new_val,
1256 Assembler::operand_size size,
1257 Assembler::Aqrl acquire, Assembler::Aqrl release,
1258 Register result,
1259 Register tmp1, Register tmp2, Register tmp3);
1260
1261 void atomic_add(Register prev, RegisterOrConstant incr, Register addr);
1262 void atomic_addw(Register prev, RegisterOrConstant incr, Register addr);
1263 void atomic_addal(Register prev, RegisterOrConstant incr, Register addr);
1264 void atomic_addalw(Register prev, RegisterOrConstant incr, Register addr);
1265
1266 void atomic_xchg(Register prev, Register newv, Register addr);
1267 void atomic_xchgw(Register prev, Register newv, Register addr);
1268 void atomic_xchgal(Register prev, Register newv, Register addr);
1269 void atomic_xchgalw(Register prev, Register newv, Register addr);
1270 void atomic_xchgwu(Register prev, Register newv, Register addr);
1271 void atomic_xchgalwu(Register prev, Register newv, Register addr);
1272
1273 void atomic_cas(Register prev, Register newv, Register addr, Assembler::operand_size size,
1274 Assembler::Aqrl acquire = Assembler::relaxed, Assembler::Aqrl release = Assembler::relaxed);
1275
1276 // Emit a far call/jump. Only invalidates the tmp register which
1277 // is used to keep the entry address for jalr.
1278 // The address must be inside the code cache.
1279 // Supported entry.rspec():
1280 // - relocInfo::external_word_type
1281 // - relocInfo::runtime_call_type
1282 // - relocInfo::none
1283 // Clobbers t1 default.
1284 void far_call(const Address &entry, Register tmp = t1);
1285 void far_jump(const Address &entry, Register tmp = t1);
1286
1287 static int far_branch_size() {
1288 return 2 * MacroAssembler::instruction_size; // auipc + jalr, see far_call() & far_jump()
1289 }
1290
1291 void load_byte_map_base(Register reg);
1292
1293 void bang_stack_with_offset(int offset) {
1294 // stack grows down, caller passes positive offset
1295 assert(offset > 0, "must bang with negative offset");
1296 sub(t0, sp, offset);
1297 sd(zr, Address(t0));
1298 }
1299
1300 virtual void _call_Unimplemented(address call_site) {
1301 mv(t1, call_site);
1302 }
1303
1304 #define call_Unimplemented() _call_Unimplemented((address)__PRETTY_FUNCTION__)
1305
1306 // Frame creation and destruction shared between JITs.
1307 void build_frame(int framesize);
1308 void remove_frame(int framesize);
1309
1310 void verified_entry(Compile* C, int sp_inc);
1311
1312 void reserved_stack_check();
1313
1314 void get_polling_page(Register dest, relocInfo::relocType rtype);
1315 void read_polling_page(Register r, int32_t offset, relocInfo::relocType rtype);
1316
1317 // RISCV64 OpenJDK uses three different types of calls:
1318 //
1319 // - far call: auipc reg, pc_relative_offset; jalr ra, reg, offset
1320 // The offset has the range [-(2G + 2K), 2G - 2K). Addresses out of the
1321 // range in the code cache requires indirect call.
1322 // If a jump is needed rather than a call, a far jump 'jalr x0, reg, offset'
1323 // can be used instead.
1324 // All instructions are embedded at a call site.
1325 //
1326 // - indirect call: movptr + jalr
1327 // This can reach anywhere in the address space, but it cannot be patched
1328 // while code is running, so it must only be modified at a safepoint.
1329 // This form of call is most suitable for targets at fixed addresses,
1330 // which will never be patched.
1331 //
1332 // - reloc call:
1333 // This too can reach anywhere in the address space but is only available
1334 // in C1/C2-generated code (nmethod).
1335 //
1336 // [Main code section]
1337 // auipc
1338 // ld <address_from_stub_section>
1339 // jalr
1340 //
1341 // [Stub section]
1342 // address stub:
1343 // <64-bit destination address>
1344 //
1345 // To change the destination we simply atomically store the new
1346 // address in the stub section.
1347 // There is a benign race in that the other thread might observe the old
1348 // 64-bit destination address before it observes the new address. That does
1349 // not matter because the destination method has been invalidated, so there
1350 // will be a trap at its start.
1351
1352 // Emit a reloc call and create a stub to hold the entry point address.
1353 // Supported entry.rspec():
1354 // - relocInfo::runtime_call_type
1355 // - relocInfo::opt_virtual_call_type
1356 // - relocInfo::static_call_type
1357 // - relocInfo::virtual_call_type
1358 //
1359 // Return: the call PC or nullptr if CodeCache is full.
1360 address reloc_call(Address entry, Register tmp = t1);
1361
1362 address ic_call(address entry, jint method_index = 0);
1363 static int ic_check_size();
1364 int ic_check(int end_alignment = MacroAssembler::instruction_size);
1365
1366 // Support for memory inc/dec
1367 // n.b. increment/decrement calls with an Address destination will
1368 // need to use a scratch register to load the value to be
1369 // incremented. increment/decrement calls which add or subtract a
1370 // constant value other than sign-extended 12-bit immediate will need
1371 // to use a 2nd scratch register to hold the constant. so, an address
1372 // increment/decrement may trash both t0 and t1.
1373
1374 void increment(const Address dst, int64_t value = 1, Register tmp1 = t0, Register tmp2 = t1);
1375 void incrementw(const Address dst, int32_t value = 1, Register tmp1 = t0, Register tmp2 = t1);
1376
1377 void decrement(const Address dst, int64_t value = 1, Register tmp1 = t0, Register tmp2 = t1);
1378 void decrementw(const Address dst, int32_t value = 1, Register tmp1 = t0, Register tmp2 = t1);
1379
1380 void clinit_barrier(Register klass, Register tmp, Label* L_fast_path = nullptr, Label* L_slow_path = nullptr);
1381
1382 void load_method_holder_cld(Register result, Register method);
1383 void load_method_holder(Register holder, Register method);
1384 void load_metadata(Register dst, Register src);
1385
1386 void compute_index(Register str1, Register trailing_zeros, Register match_mask,
1387 Register result, Register char_tmp, Register tmp,
1388 bool haystack_isL);
1389 void compute_match_mask(Register src, Register pattern, Register match_mask,
1390 Register mask1, Register mask2);
1391
1392 // CRC32 code for java.util.zip.CRC32::updateBytes() intrinsic.
1393 void kernel_crc32(Register crc, Register buf, Register len,
1394 Register table0, Register table1, Register table2, Register table3,
1395 Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5, Register tmp6);
1396 void update_word_crc32(Register crc, Register v, Register tmp1, Register tmp2, Register tmp3,
1397 Register table0, Register table1, Register table2, Register table3,
1398 bool upper);
1399 void update_byte_crc32(Register crc, Register val, Register table);
1400
1401 #ifdef COMPILER2
1402 void vector_update_crc32(Register crc, Register buf, Register len,
1403 Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5,
1404 Register table0, Register table3);
1405 void kernel_crc32_vclmul_fold(Register crc, Register buf, Register len,
1406 Register table0, Register table1, Register table2, Register table3,
1407 Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5);
1408 void crc32_vclmul_fold_to_16_bytes_vectorsize_32(VectorRegister vx, VectorRegister vy, VectorRegister vt,
1409 VectorRegister vtmp1, VectorRegister vtmp2, VectorRegister vtmp3, VectorRegister vtmp4);
1410 void kernel_crc32_vclmul_fold_vectorsize_32(Register crc, Register buf, Register len,
1411 Register vclmul_table, Register tmp1, Register tmp2);
1412 void crc32_vclmul_fold_16_bytes_vectorsize_16(VectorRegister vx, VectorRegister vt,
1413 VectorRegister vtmp1, VectorRegister vtmp2, VectorRegister vtmp3, VectorRegister vtmp4,
1414 Register buf, Register tmp, const int STEP);
1415 void crc32_vclmul_fold_16_bytes_vectorsize_16_2(VectorRegister vx, VectorRegister vy, VectorRegister vt,
1416 VectorRegister vtmp1, VectorRegister vtmp2, VectorRegister vtmp3, VectorRegister vtmp4,
1417 Register tmp);
1418 void crc32_vclmul_fold_16_bytes_vectorsize_16_3(VectorRegister vx, VectorRegister vy, VectorRegister vt,
1419 VectorRegister vtmp1, VectorRegister vtmp2, VectorRegister vtmp3, VectorRegister vtmp4,
1420 Register tmp);
1421 void kernel_crc32_vclmul_fold_vectorsize_16(Register crc, Register buf, Register len,
1422 Register vclmul_table, Register tmp1, Register tmp2);
1423
1424 void mul_add(Register out, Register in, Register offset,
1425 Register len, Register k, Register tmp);
1426 void wide_mul(Register prod_lo, Register prod_hi, Register n, Register m);
1427 void wide_madd(Register sum_lo, Register sum_hi, Register n,
1428 Register m, Register tmp1, Register tmp2);
1429 void cad(Register dst, Register src1, Register src2, Register carry);
1430 void cadc(Register dst, Register src1, Register src2, Register carry);
1431 void adc(Register dst, Register src1, Register src2, Register carry);
1432 void add2_with_carry(Register final_dest_hi, Register dest_hi, Register dest_lo,
1433 Register src1, Register src2, Register carry);
1434 void multiply_64_x_64_loop(Register x, Register xstart, Register x_xstart,
1435 Register y, Register y_idx, Register z,
1436 Register carry, Register product,
1437 Register idx, Register kdx);
1438 void multiply_128_x_128_loop(Register y, Register z,
1439 Register carry, Register carry2,
1440 Register idx, Register jdx,
1441 Register yz_idx1, Register yz_idx2,
1442 Register tmp, Register tmp3, Register tmp4,
1443 Register tmp6, Register product_hi);
1444 void multiply_to_len(Register x, Register xlen, Register y, Register ylen,
1445 Register z, Register tmp0,
1446 Register tmp1, Register tmp2, Register tmp3, Register tmp4,
1447 Register tmp5, Register tmp6, Register product_hi);
1448
1449 #endif // COMPILER2
1450
1451 void inflate_lo32(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1);
1452 void inflate_hi32(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1);
1453
1454 void ctzc_bits(Register Rd, Register Rs, bool isLL = false,
1455 Register tmp1 = t0, Register tmp2 = t1);
1456
1457 void zero_words(Register base, uint64_t cnt);
1458 address zero_words(Register ptr, Register cnt);
1459 void fill_words(Register base, Register cnt, Register value);
1460 void zero_memory(Register addr, Register len, Register tmp);
1461 void zero_dcache_blocks(Register base, Register cnt, Register tmp1, Register tmp2);
1462
1463 // shift left by shamt and add
1464 void shadd(Register Rd, Register Rs1, Register Rs2, Register tmp, int shamt);
1465
1466 // test single bit in Rs, result is set to Rd
1467 void test_bit(Register Rd, Register Rs, uint32_t bit_pos);
1468
1469 // Here the float instructions with safe deal with some exceptions.
1470 // e.g. convert from NaN, +Inf, -Inf to int, float, double
1471 // will trigger exception, we need to deal with these situations
1472 // to get correct results.
1473 void fcvt_w_s_safe(Register dst, FloatRegister src, Register tmp = t0);
1474 void fcvt_l_s_safe(Register dst, FloatRegister src, Register tmp = t0);
1475 void fcvt_w_d_safe(Register dst, FloatRegister src, Register tmp = t0);
1476 void fcvt_l_d_safe(Register dst, FloatRegister src, Register tmp = t0);
1477
1478 void java_round_float(Register dst, FloatRegister src, FloatRegister ftmp);
1479 void java_round_double(Register dst, FloatRegister src, FloatRegister ftmp);
1480
1481 // Helper routine processing the slow path of NaN when converting float to float16
1482 void float_to_float16_NaN(Register dst, FloatRegister src, Register tmp1, Register tmp2);
1483
1484 // vector load/store unit-stride instructions
1485 void vlex_v(VectorRegister vd, Register base, Assembler::SEW sew, VectorMask vm = unmasked) {
1486 switch (sew) {
1487 case Assembler::e64:
1488 vle64_v(vd, base, vm);
1489 break;
1490 case Assembler::e32:
1491 vle32_v(vd, base, vm);
1492 break;
1493 case Assembler::e16:
1494 vle16_v(vd, base, vm);
1495 break;
1496 case Assembler::e8: // fall through
1497 default:
1498 vle8_v(vd, base, vm);
1499 break;
1500 }
1501 }
1502
1503 void vsex_v(VectorRegister store_data, Register base, Assembler::SEW sew, VectorMask vm = unmasked) {
1504 switch (sew) {
1505 case Assembler::e64:
1506 vse64_v(store_data, base, vm);
1507 break;
1508 case Assembler::e32:
1509 vse32_v(store_data, base, vm);
1510 break;
1511 case Assembler::e16:
1512 vse16_v(store_data, base, vm);
1513 break;
1514 case Assembler::e8: // fall through
1515 default:
1516 vse8_v(store_data, base, vm);
1517 break;
1518 }
1519 }
1520
1521 // vector pseudo instructions
1522 // rotate vector register left with shift bits, 32-bit version
1523 inline void vrole32_vi(VectorRegister vd, uint32_t shift, VectorRegister tmp_vr) {
1524 vsrl_vi(tmp_vr, vd, 32 - shift);
1525 vsll_vi(vd, vd, shift);
1526 vor_vv(vd, vd, tmp_vr);
1527 }
1528
1529 inline void vl1r_v(VectorRegister vd, Register rs) {
1530 vl1re8_v(vd, rs);
1531 }
1532
1533 inline void vmnot_m(VectorRegister vd, VectorRegister vs) {
1534 vmnand_mm(vd, vs, vs);
1535 }
1536
1537 inline void vncvt_x_x_w(VectorRegister vd, VectorRegister vs, VectorMask vm = unmasked) {
1538 vnsrl_wx(vd, vs, x0, vm);
1539 }
1540
1541 inline void vneg_v(VectorRegister vd, VectorRegister vs, VectorMask vm = unmasked) {
1542 vrsub_vx(vd, vs, x0, vm);
1543 }
1544
1545 inline void vfneg_v(VectorRegister vd, VectorRegister vs, VectorMask vm = unmasked) {
1546 vfsgnjn_vv(vd, vs, vs, vm);
1547 }
1548
1549 inline void vfabs_v(VectorRegister vd, VectorRegister vs, VectorMask vm = unmasked) {
1550 vfsgnjx_vv(vd, vs, vs, vm);
1551 }
1552
1553 inline void vmsgt_vv(VectorRegister vd, VectorRegister vs2, VectorRegister vs1, VectorMask vm = unmasked) {
1554 vmslt_vv(vd, vs1, vs2, vm);
1555 }
1556
1557 inline void vmsgtu_vv(VectorRegister vd, VectorRegister vs2, VectorRegister vs1, VectorMask vm = unmasked) {
1558 vmsltu_vv(vd, vs1, vs2, vm);
1559 }
1560
1561 inline void vmsge_vv(VectorRegister vd, VectorRegister vs2, VectorRegister vs1, VectorMask vm = unmasked) {
1562 vmsle_vv(vd, vs1, vs2, vm);
1563 }
1564
1565 inline void vmsgeu_vv(VectorRegister vd, VectorRegister vs2, VectorRegister vs1, VectorMask vm = unmasked) {
1566 vmsleu_vv(vd, vs1, vs2, vm);
1567 }
1568
1569 inline void vmfgt_vv(VectorRegister vd, VectorRegister vs2, VectorRegister vs1, VectorMask vm = unmasked) {
1570 vmflt_vv(vd, vs1, vs2, vm);
1571 }
1572
1573 inline void vmfge_vv(VectorRegister vd, VectorRegister vs2, VectorRegister vs1, VectorMask vm = unmasked) {
1574 vmfle_vv(vd, vs1, vs2, vm);
1575 }
1576
1577 inline void vmsltu_vi(VectorRegister Vd, VectorRegister Vs2, uint32_t imm, VectorMask vm = unmasked) {
1578 guarantee(imm >= 1 && imm <= 16, "imm is invalid");
1579 vmsleu_vi(Vd, Vs2, imm-1, vm);
1580 }
1581
1582 inline void vmsgeu_vi(VectorRegister Vd, VectorRegister Vs2, uint32_t imm, VectorMask vm = unmasked) {
1583 guarantee(imm >= 1 && imm <= 16, "imm is invalid");
1584 vmsgtu_vi(Vd, Vs2, imm-1, vm);
1585 }
1586
1587 // Copy mask register
1588 inline void vmmv_m(VectorRegister vd, VectorRegister vs) {
1589 vmand_mm(vd, vs, vs);
1590 }
1591
1592 // Clear mask register
1593 inline void vmclr_m(VectorRegister vd) {
1594 vmxor_mm(vd, vd, vd);
1595 }
1596
1597 // Set mask register
1598 inline void vmset_m(VectorRegister vd) {
1599 vmxnor_mm(vd, vd, vd);
1600 }
1601
1602 inline void vnot_v(VectorRegister Vd, VectorRegister Vs, VectorMask vm = unmasked) {
1603 vxor_vi(Vd, Vs, -1, vm);
1604 }
1605
1606 static const int zero_words_block_size;
1607
1608 void cast_primitive_type(BasicType type, Register Rt) {
1609 switch (type) {
1610 case T_BOOLEAN:
1611 sltu(Rt, zr, Rt);
1612 break;
1613 case T_CHAR :
1614 zext(Rt, Rt, 16);
1615 break;
1616 case T_BYTE :
1617 sext(Rt, Rt, 8);
1618 break;
1619 case T_SHORT :
1620 sext(Rt, Rt, 16);
1621 break;
1622 case T_INT :
1623 sext(Rt, Rt, 32);
1624 break;
1625 case T_LONG : /* nothing to do */ break;
1626 case T_VOID : /* nothing to do */ break;
1627 case T_FLOAT : /* nothing to do */ break;
1628 case T_DOUBLE : /* nothing to do */ break;
1629 default: ShouldNotReachHere();
1630 }
1631 }
1632
1633 // float cmp with unordered_result
1634 void float_compare(Register result, FloatRegister Rs1, FloatRegister Rs2, int unordered_result);
1635 void double_compare(Register result, FloatRegister Rs1, FloatRegister Rs2, int unordered_result);
1636
1637 // Zero/Sign-extend
1638 void zext(Register dst, Register src, int bits);
1639 void sext(Register dst, Register src, int bits);
1640
1641 private:
1642 void cmp_x2i(Register dst, Register src1, Register src2, Register tmp, bool is_signed = true);
1643
1644 public:
1645 // compare src1 and src2 and get -1/0/1 in dst.
1646 // if [src1 > src2], dst = 1;
1647 // if [src1 == src2], dst = 0;
1648 // if [src1 < src2], dst = -1;
1649 void cmp_l2i(Register dst, Register src1, Register src2, Register tmp = t0);
1650 void cmp_ul2i(Register dst, Register src1, Register src2, Register tmp = t0);
1651 void cmp_uw2i(Register dst, Register src1, Register src2, Register tmp = t0);
1652
1653 // support for argument shuffling
1654 void move32_64(VMRegPair src, VMRegPair dst, Register tmp = t0);
1655 void float_move(VMRegPair src, VMRegPair dst, Register tmp = t0);
1656 void long_move(VMRegPair src, VMRegPair dst, Register tmp = t0);
1657 void double_move(VMRegPair src, VMRegPair dst, Register tmp = t0);
1658 void object_move(OopMap* map,
1659 int oop_handle_offset,
1660 int framesize_in_slots,
1661 VMRegPair src,
1662 VMRegPair dst,
1663 bool is_receiver,
1664 int* receiver_offset);
1665
1666 #ifdef ASSERT
1667 // Template short-hand support to clean-up after a failed call to trampoline
1668 // call generation (see trampoline_call() below), when a set of Labels must
1669 // be reset (before returning).
1670 template<typename Label, typename... More>
1671 void reset_labels(Label& lbl, More&... more) {
1672 lbl.reset(); reset_labels(more...);
1673 }
1674 template<typename Label>
1675 void reset_labels(Label& lbl) {
1676 lbl.reset();
1677 }
1678 #endif
1679
1680 private:
1681
1682 void repne_scan(Register addr, Register value, Register count, Register tmp);
1683
1684 int bitset_to_regs(unsigned int bitset, unsigned char* regs);
1685 Address add_memory_helper(const Address dst, Register tmp);
1686
1687 void load_reserved(Register dst, Register addr, Assembler::operand_size size, Assembler::Aqrl acquire);
1688 void store_conditional(Register dst, Register new_val, Register addr, Assembler::operand_size size, Assembler::Aqrl release);
1689
1690 public:
1691 void fast_lock(Register basic_lock, Register obj, Register tmp1, Register tmp2, Register tmp3, Label& slow);
1692 void fast_unlock(Register obj, Register tmp1, Register tmp2, Register tmp3, Label& slow);
1693
1694 public:
1695 enum {
1696 // movptr
1697 movptr1_instruction_size = 6 * MacroAssembler::instruction_size, // lui, addi, slli, addi, slli, addi. See movptr1().
1698 movptr2_instruction_size = 5 * MacroAssembler::instruction_size, // lui, lui, slli, add, addi. See movptr2().
1699 load_pc_relative_instruction_size = 2 * MacroAssembler::instruction_size // auipc, ld
1700 };
1701
1702 static bool is_load_pc_relative_at(address branch);
1703 static bool is_li16u_at(address instr);
1704
1705 static bool is_jal_at(address instr) { assert_cond(instr != nullptr); return extract_opcode(instr) == 0b1101111; }
1706 static bool is_jalr_at(address instr) { assert_cond(instr != nullptr); return extract_opcode(instr) == 0b1100111 && extract_funct3(instr) == 0b000; }
1707 static bool is_branch_at(address instr) { assert_cond(instr != nullptr); return extract_opcode(instr) == 0b1100011; }
1708 static bool is_ld_at(address instr) { assert_cond(instr != nullptr); return is_load_at(instr) && extract_funct3(instr) == 0b011; }
1709 static bool is_load_at(address instr) { assert_cond(instr != nullptr); return extract_opcode(instr) == 0b0000011; }
1710 static bool is_float_load_at(address instr) { assert_cond(instr != nullptr); return extract_opcode(instr) == 0b0000111; }
1711 static bool is_auipc_at(address instr) { assert_cond(instr != nullptr); return extract_opcode(instr) == 0b0010111; }
1712 static bool is_jump_at(address instr) { assert_cond(instr != nullptr); return is_branch_at(instr) || is_jal_at(instr) || is_jalr_at(instr); }
1713 static bool is_add_at(address instr) { assert_cond(instr != nullptr); return extract_opcode(instr) == 0b0110011 && extract_funct3(instr) == 0b000; }
1714 static bool is_addi_at(address instr) { assert_cond(instr != nullptr); return extract_opcode(instr) == 0b0010011 && extract_funct3(instr) == 0b000; }
1715 static bool is_addiw_at(address instr) { assert_cond(instr != nullptr); return extract_opcode(instr) == 0b0011011 && extract_funct3(instr) == 0b000; }
1716 static bool is_addiw_to_zr_at(address instr){ assert_cond(instr != nullptr); return is_addiw_at(instr) && extract_rd(instr) == zr; }
1717 static bool is_lui_at(address instr) { assert_cond(instr != nullptr); return extract_opcode(instr) == 0b0110111; }
1718 static bool is_lui_to_zr_at(address instr) { assert_cond(instr != nullptr); return is_lui_at(instr) && extract_rd(instr) == zr; }
1719
1720 static bool is_srli_at(address instr) {
1721 assert_cond(instr != nullptr);
1722 return extract_opcode(instr) == 0b0010011 &&
1723 extract_funct3(instr) == 0b101 &&
1724 Assembler::extract(((unsigned*)instr)[0], 31, 26) == 0b000000;
1725 }
1726
1727 static bool is_slli_shift_at(address instr, uint32_t shift) {
1728 assert_cond(instr != nullptr);
1729 return (extract_opcode(instr) == 0b0010011 && // opcode field
1730 extract_funct3(instr) == 0b001 && // funct3 field, select the type of operation
1731 Assembler::extract(Assembler::ld_instr(instr), 25, 20) == shift); // shamt field
1732 }
1733
1734 static bool is_movptr1_at(address instr);
1735 static bool is_movptr2_at(address instr);
1736
1737 static bool is_lwu_to_zr(address instr);
1738
1739 static Register extract_rs1(address instr);
1740 static Register extract_rs2(address instr);
1741 static Register extract_rd(address instr);
1742 static uint32_t extract_opcode(address instr);
1743 static uint32_t extract_funct3(address instr);
1744
1745 // the instruction sequence of movptr is as below:
1746 // lui
1747 // addi
1748 // slli
1749 // addi
1750 // slli
1751 // addi/jalr/load
1752 static bool check_movptr1_data_dependency(address instr) {
1753 address lui = instr;
1754 address addi1 = lui + MacroAssembler::instruction_size;
1755 address slli1 = addi1 + MacroAssembler::instruction_size;
1756 address addi2 = slli1 + MacroAssembler::instruction_size;
1757 address slli2 = addi2 + MacroAssembler::instruction_size;
1758 address last_instr = slli2 + MacroAssembler::instruction_size;
1759 return extract_rs1(addi1) == extract_rd(lui) &&
1760 extract_rs1(addi1) == extract_rd(addi1) &&
1761 extract_rs1(slli1) == extract_rd(addi1) &&
1762 extract_rs1(slli1) == extract_rd(slli1) &&
1763 extract_rs1(addi2) == extract_rd(slli1) &&
1764 extract_rs1(addi2) == extract_rd(addi2) &&
1765 extract_rs1(slli2) == extract_rd(addi2) &&
1766 extract_rs1(slli2) == extract_rd(slli2) &&
1767 extract_rs1(last_instr) == extract_rd(slli2);
1768 }
1769
1770 // the instruction sequence of movptr2 is as below:
1771 // lui
1772 // lui
1773 // slli
1774 // add
1775 // addi/jalr/load
1776 static bool check_movptr2_data_dependency(address instr) {
1777 address lui1 = instr;
1778 address lui2 = lui1 + MacroAssembler::instruction_size;
1779 address slli = lui2 + MacroAssembler::instruction_size;
1780 address add = slli + MacroAssembler::instruction_size;
1781 address last_instr = add + MacroAssembler::instruction_size;
1782 return extract_rd(add) == extract_rd(lui2) &&
1783 extract_rs1(add) == extract_rd(lui2) &&
1784 extract_rs2(add) == extract_rd(slli) &&
1785 extract_rs1(slli) == extract_rd(lui1) &&
1786 extract_rd(slli) == extract_rd(lui1) &&
1787 extract_rs1(last_instr) == extract_rd(add);
1788 }
1789
1790 // the instruction sequence of li16u is as below:
1791 // lui
1792 // srli
1793 static bool check_li16u_data_dependency(address instr) {
1794 address lui = instr;
1795 address srli = lui + MacroAssembler::instruction_size;
1796
1797 return extract_rs1(srli) == extract_rd(lui) &&
1798 extract_rs1(srli) == extract_rd(srli);
1799 }
1800
1801 // the instruction sequence of li32 is as below:
1802 // lui
1803 // addiw
1804 static bool check_li32_data_dependency(address instr) {
1805 address lui = instr;
1806 address addiw = lui + MacroAssembler::instruction_size;
1807
1808 return extract_rs1(addiw) == extract_rd(lui) &&
1809 extract_rs1(addiw) == extract_rd(addiw);
1810 }
1811
1812 // the instruction sequence of pc-relative is as below:
1813 // auipc
1814 // jalr/addi/load/float_load
1815 static bool check_pc_relative_data_dependency(address instr) {
1816 address auipc = instr;
1817 address last_instr = auipc + MacroAssembler::instruction_size;
1818
1819 return extract_rs1(last_instr) == extract_rd(auipc);
1820 }
1821
1822 // the instruction sequence of load_label is as below:
1823 // auipc
1824 // load
1825 static bool check_load_pc_relative_data_dependency(address instr) {
1826 address auipc = instr;
1827 address load = auipc + MacroAssembler::instruction_size;
1828
1829 return extract_rd(load) == extract_rd(auipc) &&
1830 extract_rs1(load) == extract_rd(load);
1831 }
1832
1833 static bool is_li32_at(address instr);
1834 static bool is_pc_relative_at(address branch);
1835
1836 static bool is_membar(address addr) {
1837 return (Bytes::get_native_u4(addr) & 0x7f) == 0b1111 && extract_funct3(addr) == 0;
1838 }
1839 static uint32_t get_membar_kind(address addr);
1840 static void set_membar_kind(address addr, uint32_t order_kind);
1841
1842 public:
1843 // Inline type specific methods
1844 #include "asm/macroAssembler_common.hpp"
1845 };
1846
1847 #ifdef ASSERT
1848 inline bool AbstractAssembler::pd_check_instruction_mark() { return false; }
1849 #endif
1850
1851 #endif // CPU_RISCV_MACROASSEMBLER_RISCV_HPP