1 /*
2 * Copyright (c) 1997, 2023, Oracle and/or its affiliates. All rights reserved.
3 * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
4 * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved.
5 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 *
7 * This code is free software; you can redistribute it and/or modify it
8 * under the terms of the GNU General Public License version 2 only, as
9 * published by the Free Software Foundation.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 *
25 */
26
27 #ifndef CPU_RISCV_MACROASSEMBLER_RISCV_HPP
28 #define CPU_RISCV_MACROASSEMBLER_RISCV_HPP
29
30 #include "asm/assembler.inline.hpp"
31 #include "code/vmreg.hpp"
32 #include "metaprogramming/enableIf.hpp"
33 #include "nativeInst_riscv.hpp"
34 #include "oops/compressedOops.hpp"
35 #include "utilities/powerOfTwo.hpp"
36
37 // MacroAssembler extends Assembler by frequently used macros.
38 //
39 // Instructions for which a 'better' code sequence exists depending
40 // on arguments should also go in here.
41
42 class MacroAssembler: public Assembler {
43
44 public:
45 MacroAssembler(CodeBuffer* code) : Assembler(code) {}
46
47 void safepoint_poll(Label& slow_path, bool at_return, bool acquire, bool in_nmethod);
48
49 // Alignment
50 int align(int modulus, int extra_offset = 0);
51
52 static inline void assert_alignment(address pc, int alignment = NativeInstruction::instruction_size) {
53 assert(is_aligned(pc, alignment), "bad alignment");
54 }
55
56 // nop
57 void post_call_nop();
58
59 // Stack frame creation/removal
60 // Note that SP must be updated to the right place before saving/restoring RA and FP
61 // because signal based thread suspend/resume could happen asynchronously.
62 void enter() {
63 addi(sp, sp, - 2 * wordSize);
64 sd(ra, Address(sp, wordSize));
65 sd(fp, Address(sp));
66 addi(fp, sp, 2 * wordSize);
67 }
68
69 void leave() {
70 addi(sp, fp, - 2 * wordSize);
71 ld(fp, Address(sp));
72 ld(ra, Address(sp, wordSize));
73 addi(sp, sp, 2 * wordSize);
74 }
75
76
77 // Support for getting the JavaThread pointer (i.e.; a reference to thread-local information)
78 // The pointer will be loaded into the thread register.
79 void get_thread(Register thread);
80
81 // Support for VM calls
82 //
83 // It is imperative that all calls into the VM are handled via the call_VM macros.
84 // They make sure that the stack linkage is setup correctly. call_VM's correspond
85 // to ENTRY/ENTRY_X entry points while call_VM_leaf's correspond to LEAF entry points.
86
87 void call_VM(Register oop_result,
88 address entry_point,
89 bool check_exceptions = true);
90 void call_VM(Register oop_result,
91 address entry_point,
92 Register arg_1,
93 bool check_exceptions = true);
94 void call_VM(Register oop_result,
95 address entry_point,
96 Register arg_1, Register arg_2,
97 bool check_exceptions = true);
98 void call_VM(Register oop_result,
99 address entry_point,
100 Register arg_1, Register arg_2, Register arg_3,
101 bool check_exceptions = true);
102
103 // Overloadings with last_Java_sp
104 void call_VM(Register oop_result,
105 Register last_java_sp,
106 address entry_point,
107 int number_of_arguments = 0,
108 bool check_exceptions = true);
109 void call_VM(Register oop_result,
110 Register last_java_sp,
111 address entry_point,
112 Register arg_1,
113 bool check_exceptions = true);
114 void call_VM(Register oop_result,
115 Register last_java_sp,
116 address entry_point,
117 Register arg_1, Register arg_2,
118 bool check_exceptions = true);
119 void call_VM(Register oop_result,
120 Register last_java_sp,
121 address entry_point,
122 Register arg_1, Register arg_2, Register arg_3,
123 bool check_exceptions = true);
124
125 void get_vm_result(Register oop_result, Register java_thread);
126 void get_vm_result_2(Register metadata_result, Register java_thread);
127
128 // These always tightly bind to MacroAssembler::call_VM_leaf_base
129 // bypassing the virtual implementation
130 void call_VM_leaf(address entry_point,
131 int number_of_arguments = 0);
132 void call_VM_leaf(address entry_point,
133 Register arg_0);
134 void call_VM_leaf(address entry_point,
135 Register arg_0, Register arg_1);
136 void call_VM_leaf(address entry_point,
137 Register arg_0, Register arg_1, Register arg_2);
138
139 // These always tightly bind to MacroAssembler::call_VM_base
140 // bypassing the virtual implementation
141 void super_call_VM_leaf(address entry_point, Register arg_0);
142 void super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1);
143 void super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2);
144 void super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2, Register arg_3);
145
146 // last Java Frame (fills frame anchor)
147 void set_last_Java_frame(Register last_java_sp, Register last_java_fp, address last_java_pc, Register tmp);
148 void set_last_Java_frame(Register last_java_sp, Register last_java_fp, Label &last_java_pc, Register tmp);
149 void set_last_Java_frame(Register last_java_sp, Register last_java_fp, Register last_java_pc, Register tmp);
150
151 // thread in the default location (xthread)
152 void reset_last_Java_frame(bool clear_fp);
153
154 virtual void call_VM_leaf_base(
155 address entry_point, // the entry point
156 int number_of_arguments, // the number of arguments to pop after the call
157 Label* retaddr = nullptr
158 );
159
160 virtual void call_VM_leaf_base(
161 address entry_point, // the entry point
162 int number_of_arguments, // the number of arguments to pop after the call
163 Label& retaddr) {
164 call_VM_leaf_base(entry_point, number_of_arguments, &retaddr);
165 }
166
167 virtual void call_VM_base( // returns the register containing the thread upon return
168 Register oop_result, // where an oop-result ends up if any; use noreg otherwise
169 Register java_thread, // the thread if computed before ; use noreg otherwise
170 Register last_java_sp, // to set up last_Java_frame in stubs; use noreg otherwise
171 address entry_point, // the entry point
172 int number_of_arguments, // the number of arguments (w/o thread) to pop after the call
173 bool check_exceptions // whether to check for pending exceptions after return
174 );
175
176 void call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions);
177
178 virtual void check_and_handle_earlyret(Register java_thread);
179 virtual void check_and_handle_popframe(Register java_thread);
180
181 void resolve_weak_handle(Register result, Register tmp1, Register tmp2);
182 void resolve_oop_handle(Register result, Register tmp1, Register tmp2);
183 void resolve_jobject(Register value, Register tmp1, Register tmp2);
184 void resolve_global_jobject(Register value, Register tmp1, Register tmp2);
185
186 void movoop(Register dst, jobject obj);
187 void mov_metadata(Register dst, Metadata* obj);
188 void bang_stack_size(Register size, Register tmp);
189 void set_narrow_oop(Register dst, jobject obj);
190 void set_narrow_klass(Register dst, Klass* k);
191
192 void load_mirror(Register dst, Register method, Register tmp1, Register tmp2);
193 void access_load_at(BasicType type, DecoratorSet decorators, Register dst,
194 Address src, Register tmp1, Register tmp2);
195 void access_store_at(BasicType type, DecoratorSet decorators, Address dst,
196 Register val, Register tmp1, Register tmp2, Register tmp3);
197 void load_klass(Register dst, Register src, Register tmp = t0);
198 void store_klass(Register dst, Register src, Register tmp = t0);
199 void cmp_klass(Register oop, Register trial_klass, Register tmp1, Register tmp2, Label &L);
200
201 void encode_klass_not_null(Register r, Register tmp = t0);
202 void decode_klass_not_null(Register r, Register tmp = t0);
203 void encode_klass_not_null(Register dst, Register src, Register tmp);
204 void decode_klass_not_null(Register dst, Register src, Register tmp);
205 void decode_heap_oop_not_null(Register r);
206 void decode_heap_oop_not_null(Register dst, Register src);
207 void decode_heap_oop(Register d, Register s);
208 void decode_heap_oop(Register r) { decode_heap_oop(r, r); }
209 void encode_heap_oop(Register d, Register s);
210 void encode_heap_oop(Register r) { encode_heap_oop(r, r); };
211 void load_heap_oop(Register dst, Address src, Register tmp1,
212 Register tmp2, DecoratorSet decorators = 0);
213 void load_heap_oop_not_null(Register dst, Address src, Register tmp1,
214 Register tmp2, DecoratorSet decorators = 0);
215 void store_heap_oop(Address dst, Register val, Register tmp1,
216 Register tmp2, Register tmp3, DecoratorSet decorators = 0);
217
218 void store_klass_gap(Register dst, Register src);
219
220 // currently unimplemented
221 // Used for storing null. All other oop constants should be
222 // stored using routines that take a jobject.
223 void store_heap_oop_null(Address dst);
224
225 // This dummy is to prevent a call to store_heap_oop from
226 // converting a zero (linked null) into a Register by giving
227 // the compiler two choices it can't resolve
228
229 void store_heap_oop(Address dst, void* dummy);
230
231 // Support for null-checks
232 //
233 // Generates code that causes a null OS exception if the content of reg is null.
234 // If the accessed location is M[reg + offset] and the offset is known, provide the
235 // offset. No explicit code generateion is needed if the offset is within a certain
236 // range (0 <= offset <= page_size).
237
238 virtual void null_check(Register reg, int offset = -1);
239 static bool needs_explicit_null_check(intptr_t offset);
240 static bool uses_implicit_null_check(void* address);
241
242 // idiv variant which deals with MINLONG as dividend and -1 as divisor
243 int corrected_idivl(Register result, Register rs1, Register rs2,
244 bool want_remainder);
245 int corrected_idivq(Register result, Register rs1, Register rs2,
246 bool want_remainder);
247
248 // interface method calling
249 void lookup_interface_method(Register recv_klass,
250 Register intf_klass,
251 RegisterOrConstant itable_index,
252 Register method_result,
253 Register scan_tmp,
254 Label& no_such_interface,
255 bool return_method = true);
256
257 // virtual method calling
258 // n.n. x86 allows RegisterOrConstant for vtable_index
259 void lookup_virtual_method(Register recv_klass,
260 RegisterOrConstant vtable_index,
261 Register method_result);
262
263 // Form an address from base + offset in Rd. Rd my or may not
264 // actually be used: you must use the Address that is returned. It
265 // is up to you to ensure that the shift provided matches the size
266 // of your data.
267 Address form_address(Register Rd, Register base, int64_t byte_offset);
268
269 // Sometimes we get misaligned loads and stores, usually from Unsafe
270 // accesses, and these can exceed the offset range.
271 Address legitimize_address(Register Rd, const Address &adr) {
272 if (adr.getMode() == Address::base_plus_offset) {
273 if (!is_simm12(adr.offset())) {
274 return form_address(Rd, adr.base(), adr.offset());
275 }
276 }
277 return adr;
278 }
279
280 // allocation
281 void tlab_allocate(
282 Register obj, // result: pointer to object after successful allocation
283 Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise
284 int con_size_in_bytes, // object size in bytes if known at compile time
285 Register tmp1, // temp register
286 Register tmp2, // temp register
287 Label& slow_case, // continuation point of fast allocation fails
288 bool is_far = false
289 );
290
291 // Test sub_klass against super_klass, with fast and slow paths.
292
293 // The fast path produces a tri-state answer: yes / no / maybe-slow.
294 // One of the three labels can be null, meaning take the fall-through.
295 // If super_check_offset is -1, the value is loaded up from super_klass.
296 // No registers are killed, except tmp_reg
297 void check_klass_subtype_fast_path(Register sub_klass,
298 Register super_klass,
299 Register tmp_reg,
300 Label* L_success,
301 Label* L_failure,
302 Label* L_slow_path,
303 Register super_check_offset = noreg);
304
305 // The reset of the type check; must be wired to a corresponding fast path.
306 // It does not repeat the fast path logic, so don't use it standalone.
307 // The tmp1_reg and tmp2_reg can be noreg, if no temps are available.
308 // Updates the sub's secondary super cache as necessary.
309 void check_klass_subtype_slow_path(Register sub_klass,
310 Register super_klass,
311 Register tmp1_reg,
312 Register tmp2_reg,
313 Label* L_success,
314 Label* L_failure);
315
316 void check_klass_subtype(Register sub_klass,
317 Register super_klass,
318 Register tmp_reg,
319 Label& L_success);
320
321 Address argument_address(RegisterOrConstant arg_slot, int extra_slot_offset = 0);
322
323 // only if +VerifyOops
324 void _verify_oop(Register reg, const char* s, const char* file, int line);
325 void _verify_oop_addr(Address addr, const char* s, const char* file, int line);
326
327 void _verify_oop_checked(Register reg, const char* s, const char* file, int line) {
328 if (VerifyOops) {
329 _verify_oop(reg, s, file, line);
330 }
331 }
332 void _verify_oop_addr_checked(Address reg, const char* s, const char* file, int line) {
333 if (VerifyOops) {
334 _verify_oop_addr(reg, s, file, line);
335 }
336 }
337
338 void _verify_method_ptr(Register reg, const char* msg, const char* file, int line) {}
339 void _verify_klass_ptr(Register reg, const char* msg, const char* file, int line) {}
340
341 #define verify_oop(reg) _verify_oop_checked(reg, "broken oop " #reg, __FILE__, __LINE__)
342 #define verify_oop_msg(reg, msg) _verify_oop_checked(reg, "broken oop " #reg ", " #msg, __FILE__, __LINE__)
343 #define verify_oop_addr(addr) _verify_oop_addr_checked(addr, "broken oop addr " #addr, __FILE__, __LINE__)
344 #define verify_method_ptr(reg) _verify_method_ptr(reg, "broken method " #reg, __FILE__, __LINE__)
345 #define verify_klass_ptr(reg) _verify_method_ptr(reg, "broken klass " #reg, __FILE__, __LINE__)
346
347 // A more convenient access to fence for our purposes
348 // We used four bit to indicate the read and write bits in the predecessors and successors,
349 // and extended i for r, o for w if UseConservativeFence enabled.
350 enum Membar_mask_bits {
351 StoreStore = 0b0101, // (pred = ow + succ = ow)
352 LoadStore = 0b1001, // (pred = ir + succ = ow)
353 StoreLoad = 0b0110, // (pred = ow + succ = ir)
354 LoadLoad = 0b1010, // (pred = ir + succ = ir)
355 AnyAny = LoadStore | StoreLoad // (pred = iorw + succ = iorw)
356 };
357
358 void membar(uint32_t order_constraint);
359
360 static void membar_mask_to_pred_succ(uint32_t order_constraint,
361 uint32_t& predecessor, uint32_t& successor) {
362 predecessor = (order_constraint >> 2) & 0x3;
363 successor = order_constraint & 0x3;
364
365 // extend rw -> iorw:
366 // 01(w) -> 0101(ow)
367 // 10(r) -> 1010(ir)
368 // 11(rw)-> 1111(iorw)
369 if (UseConservativeFence) {
370 predecessor |= predecessor << 2;
371 successor |= successor << 2;
372 }
373 }
374
375 static int pred_succ_to_membar_mask(uint32_t predecessor, uint32_t successor) {
376 return ((predecessor & 0x3) << 2) | (successor & 0x3);
377 }
378
379 void pause() {
380 fence(w, 0);
381 }
382
383 // prints msg, dumps registers and stops execution
384 void stop(const char* msg);
385
386 static void debug64(char* msg, int64_t pc, int64_t regs[]);
387
388 void unimplemented(const char* what = "");
389
390 void should_not_reach_here() { stop("should not reach here"); }
391
392 static address target_addr_for_insn(address insn_addr);
393
394 // Required platform-specific helpers for Label::patch_instructions.
395 // They _shadow_ the declarations in AbstractAssembler, which are undefined.
396 static int pd_patch_instruction_size(address branch, address target);
397 static void pd_patch_instruction(address branch, address target, const char* file = nullptr, int line = 0) {
398 pd_patch_instruction_size(branch, target);
399 }
400 static address pd_call_destination(address branch) {
401 return target_addr_for_insn(branch);
402 }
403
404 static int patch_oop(address insn_addr, address o);
405
406 static address get_target_of_li32(address insn_addr);
407 static int patch_imm_in_li32(address branch, int32_t target);
408
409 // Return whether code is emitted to a scratch blob.
410 virtual bool in_scratch_emit_size() {
411 return false;
412 }
413
414 address emit_trampoline_stub(int insts_call_instruction_offset, address target);
415 static int max_trampoline_stub_size();
416 void emit_static_call_stub();
417 static int static_call_stub_size();
418
419 // The following 4 methods return the offset of the appropriate move instruction
420
421 // Support for fast byte/short loading with zero extension (depending on particular CPU)
422 int load_unsigned_byte(Register dst, Address src);
423 int load_unsigned_short(Register dst, Address src);
424
425 // Support for fast byte/short loading with sign extension (depending on particular CPU)
426 int load_signed_byte(Register dst, Address src);
427 int load_signed_short(Register dst, Address src);
428
429 // Load and store values by size and signed-ness
430 void load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed);
431 void store_sized_value(Address dst, Register src, size_t size_in_bytes);
432
433 // Misaligned loads, will use the best way, according to the AvoidUnalignedAccess flag
434 void load_short_misaligned(Register dst, Address src, Register tmp, bool is_signed, int granularity = 1);
435 void load_int_misaligned(Register dst, Address src, Register tmp, bool is_signed, int granularity = 1);
436 void load_long_misaligned(Register dst, Address src, Register tmp, int granularity = 1);
437
438 public:
439 // Standard pseudo instructions
440 inline void nop() {
441 addi(x0, x0, 0);
442 }
443
444 inline void mv(Register Rd, Register Rs) {
445 if (Rd != Rs) {
446 addi(Rd, Rs, 0);
447 }
448 }
449
450 inline void notr(Register Rd, Register Rs) {
451 xori(Rd, Rs, -1);
452 }
453
454 inline void neg(Register Rd, Register Rs) {
455 sub(Rd, x0, Rs);
456 }
457
458 inline void negw(Register Rd, Register Rs) {
459 subw(Rd, x0, Rs);
460 }
461
462 inline void sext_w(Register Rd, Register Rs) {
463 addiw(Rd, Rs, 0);
464 }
465
466 inline void zext_b(Register Rd, Register Rs) {
467 andi(Rd, Rs, 0xFF);
468 }
469
470 inline void seqz(Register Rd, Register Rs) {
471 sltiu(Rd, Rs, 1);
472 }
473
474 inline void snez(Register Rd, Register Rs) {
475 sltu(Rd, x0, Rs);
476 }
477
478 inline void sltz(Register Rd, Register Rs) {
479 slt(Rd, Rs, x0);
480 }
481
482 inline void sgtz(Register Rd, Register Rs) {
483 slt(Rd, x0, Rs);
484 }
485
486 // Bit-manipulation extension pseudo instructions
487 // zero extend word
488 inline void zext_w(Register Rd, Register Rs) {
489 add_uw(Rd, Rs, zr);
490 }
491
492 // Floating-point data-processing pseudo instructions
493 inline void fmv_s(FloatRegister Rd, FloatRegister Rs) {
494 if (Rd != Rs) {
495 fsgnj_s(Rd, Rs, Rs);
496 }
497 }
498
499 inline void fabs_s(FloatRegister Rd, FloatRegister Rs) {
500 fsgnjx_s(Rd, Rs, Rs);
501 }
502
503 inline void fneg_s(FloatRegister Rd, FloatRegister Rs) {
504 fsgnjn_s(Rd, Rs, Rs);
505 }
506
507 inline void fmv_d(FloatRegister Rd, FloatRegister Rs) {
508 if (Rd != Rs) {
509 fsgnj_d(Rd, Rs, Rs);
510 }
511 }
512
513 inline void fabs_d(FloatRegister Rd, FloatRegister Rs) {
514 fsgnjx_d(Rd, Rs, Rs);
515 }
516
517 inline void fneg_d(FloatRegister Rd, FloatRegister Rs) {
518 fsgnjn_d(Rd, Rs, Rs);
519 }
520
521 // Control and status pseudo instructions
522 void rdinstret(Register Rd); // read instruction-retired counter
523 void rdcycle(Register Rd); // read cycle counter
524 void rdtime(Register Rd); // read time
525 void csrr(Register Rd, unsigned csr); // read csr
526 void csrw(unsigned csr, Register Rs); // write csr
527 void csrs(unsigned csr, Register Rs); // set bits in csr
528 void csrc(unsigned csr, Register Rs); // clear bits in csr
529 void csrwi(unsigned csr, unsigned imm);
530 void csrsi(unsigned csr, unsigned imm);
531 void csrci(unsigned csr, unsigned imm);
532 void frcsr(Register Rd); // read float-point csr
533 void fscsr(Register Rd, Register Rs); // swap float-point csr
534 void fscsr(Register Rs); // write float-point csr
535 void frrm(Register Rd); // read float-point rounding mode
536 void fsrm(Register Rd, Register Rs); // swap float-point rounding mode
537 void fsrm(Register Rs); // write float-point rounding mode
538 void fsrmi(Register Rd, unsigned imm);
539 void fsrmi(unsigned imm);
540 void frflags(Register Rd); // read float-point exception flags
541 void fsflags(Register Rd, Register Rs); // swap float-point exception flags
542 void fsflags(Register Rs); // write float-point exception flags
543 void fsflagsi(Register Rd, unsigned imm);
544 void fsflagsi(unsigned imm);
545
546 // Control transfer pseudo instructions
547 void beqz(Register Rs, const address dest);
548 void bnez(Register Rs, const address dest);
549 void blez(Register Rs, const address dest);
550 void bgez(Register Rs, const address dest);
551 void bltz(Register Rs, const address dest);
552 void bgtz(Register Rs, const address dest);
553
554 void j(Label &l, Register temp = t0);
555 void j(const address dest, Register temp = t0);
556 void j(const Address &adr, Register temp = t0);
557 void jal(Label &l, Register temp = t0);
558 void jal(const address dest, Register temp = t0);
559 void jal(const Address &adr, Register temp = t0);
560 void jal(Register Rd, Label &L, Register temp = t0);
561 void jal(Register Rd, const address dest, Register temp = t0);
562
563 //label
564 void beqz(Register Rs, Label &l, bool is_far = false);
565 void bnez(Register Rs, Label &l, bool is_far = false);
566 void blez(Register Rs, Label &l, bool is_far = false);
567 void bgez(Register Rs, Label &l, bool is_far = false);
568 void bltz(Register Rs, Label &l, bool is_far = false);
569 void bgtz(Register Rs, Label &l, bool is_far = false);
570
571 void beq (Register Rs1, Register Rs2, Label &L, bool is_far = false);
572 void bne (Register Rs1, Register Rs2, Label &L, bool is_far = false);
573 void blt (Register Rs1, Register Rs2, Label &L, bool is_far = false);
574 void bge (Register Rs1, Register Rs2, Label &L, bool is_far = false);
575 void bltu(Register Rs1, Register Rs2, Label &L, bool is_far = false);
576 void bgeu(Register Rs1, Register Rs2, Label &L, bool is_far = false);
577
578 void bgt (Register Rs, Register Rt, const address dest);
579 void ble (Register Rs, Register Rt, const address dest);
580 void bgtu(Register Rs, Register Rt, const address dest);
581 void bleu(Register Rs, Register Rt, const address dest);
582
583 void bgt (Register Rs, Register Rt, Label &l, bool is_far = false);
584 void ble (Register Rs, Register Rt, Label &l, bool is_far = false);
585 void bgtu(Register Rs, Register Rt, Label &l, bool is_far = false);
586 void bleu(Register Rs, Register Rt, Label &l, bool is_far = false);
587
588 #define INSN_ENTRY_RELOC(result_type, header) \
589 result_type header { \
590 guarantee(rtype == relocInfo::internal_word_type, \
591 "only internal_word_type relocs make sense here"); \
592 relocate(InternalAddress(dest).rspec()); \
593 IncompressibleRegion ir(this); /* relocations */
594
595 #define INSN(NAME) \
596 void NAME(Register Rs1, Register Rs2, const address dest) { \
597 assert_cond(dest != nullptr); \
598 int64_t offset = dest - pc(); \
599 guarantee(is_simm13(offset) && is_even(offset), \
600 "offset is invalid: is_simm_13: %s offset: " INT64_FORMAT, \
601 BOOL_TO_STR(is_simm13(offset)), offset); \
602 Assembler::NAME(Rs1, Rs2, offset); \
603 } \
604 INSN_ENTRY_RELOC(void, NAME(Register Rs1, Register Rs2, address dest, relocInfo::relocType rtype)) \
605 NAME(Rs1, Rs2, dest); \
606 }
607
608 INSN(beq);
609 INSN(bne);
610 INSN(bge);
611 INSN(bgeu);
612 INSN(blt);
613 INSN(bltu);
614
615 #undef INSN
616
617 #undef INSN_ENTRY_RELOC
618
619 void float_beq(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
620 void float_bne(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
621 void float_ble(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
622 void float_bge(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
623 void float_blt(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
624 void float_bgt(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
625
626 void double_beq(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
627 void double_bne(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
628 void double_ble(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
629 void double_bge(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
630 void double_blt(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
631 void double_bgt(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false);
632
633 private:
634 int push_reg(unsigned int bitset, Register stack);
635 int pop_reg(unsigned int bitset, Register stack);
636 int push_fp(unsigned int bitset, Register stack);
637 int pop_fp(unsigned int bitset, Register stack);
638 #ifdef COMPILER2
639 int push_v(unsigned int bitset, Register stack);
640 int pop_v(unsigned int bitset, Register stack);
641 #endif // COMPILER2
642
643 // The signed 20-bit upper imm can materialize at most negative 0xF...F80000000, two G.
644 // The following signed 12-bit imm can at max subtract 0x800, two K, from that previously loaded two G.
645 bool is_valid_32bit_offset(int64_t x) {
646 constexpr int64_t twoG = (2 * G);
647 constexpr int64_t twoK = (2 * K);
648 return x < (twoG - twoK) && x >= (-twoG - twoK);
649 }
650
651 public:
652 void push_reg(Register Rs);
653 void pop_reg(Register Rd);
654 void push_reg(RegSet regs, Register stack) { if (regs.bits()) push_reg(regs.bits(), stack); }
655 void pop_reg(RegSet regs, Register stack) { if (regs.bits()) pop_reg(regs.bits(), stack); }
656 void push_fp(FloatRegSet regs, Register stack) { if (regs.bits()) push_fp(regs.bits(), stack); }
657 void pop_fp(FloatRegSet regs, Register stack) { if (regs.bits()) pop_fp(regs.bits(), stack); }
658 #ifdef COMPILER2
659 void push_v(VectorRegSet regs, Register stack) { if (regs.bits()) push_v(regs.bits(), stack); }
660 void pop_v(VectorRegSet regs, Register stack) { if (regs.bits()) pop_v(regs.bits(), stack); }
661 #endif // COMPILER2
662
663 // Push and pop everything that might be clobbered by a native
664 // runtime call except t0 and t1. (They are always
665 // temporary registers, so we don't have to protect them.)
666 // Additional registers can be excluded in a passed RegSet.
667 void push_call_clobbered_registers_except(RegSet exclude);
668 void pop_call_clobbered_registers_except(RegSet exclude);
669
670 void push_call_clobbered_registers() {
671 push_call_clobbered_registers_except(RegSet());
672 }
673 void pop_call_clobbered_registers() {
674 pop_call_clobbered_registers_except(RegSet());
675 }
676
677 void push_CPU_state(bool save_vectors = false, int vector_size_in_bytes = 0);
678 void pop_CPU_state(bool restore_vectors = false, int vector_size_in_bytes = 0);
679
680 void push_cont_fastpath(Register java_thread);
681 void pop_cont_fastpath(Register java_thread);
682
683 // if heap base register is used - reinit it with the correct value
684 void reinit_heapbase();
685
686 void bind(Label& L) {
687 Assembler::bind(L);
688 // fences across basic blocks should not be merged
689 code()->clear_last_insn();
690 }
691
692 typedef void (MacroAssembler::* compare_and_branch_insn)(Register Rs1, Register Rs2, const address dest);
693 typedef void (MacroAssembler::* compare_and_branch_label_insn)(Register Rs1, Register Rs2, Label &L, bool is_far);
694 typedef void (MacroAssembler::* jal_jalr_insn)(Register Rt, address dest);
695 typedef void (MacroAssembler::* load_insn_by_temp)(Register Rt, address dest, Register temp);
696
697 void wrap_label(Register r, Label &L, Register t, load_insn_by_temp insn);
698 void wrap_label(Register r, Label &L, jal_jalr_insn insn);
699 void wrap_label(Register r1, Register r2, Label &L,
700 compare_and_branch_insn insn,
701 compare_and_branch_label_insn neg_insn, bool is_far = false);
702
703 void la(Register Rd, Label &label);
704 void la(Register Rd, const address dest);
705 void la(Register Rd, const Address &adr);
706
707 void li16u(Register Rd, uint16_t imm);
708 void li32(Register Rd, int32_t imm);
709 void li64(Register Rd, int64_t imm);
710 void li (Register Rd, int64_t imm); // optimized load immediate
711
712 // mv
713 void mv(Register Rd, address addr) { li(Rd, (int64_t)addr); }
714 void mv(Register Rd, address addr, int32_t &offset) {
715 // Split address into a lower 12-bit sign-extended offset and the remainder,
716 // so that the offset could be encoded in jalr or load/store instruction.
717 offset = ((int32_t)(int64_t)addr << 20) >> 20;
718 li(Rd, (int64_t)addr - offset);
719 }
720
721 template<typename T, ENABLE_IF(std::is_integral<T>::value)>
722 inline void mv(Register Rd, T o) { li(Rd, (int64_t)o); }
723
724 void mv(Register Rd, Address dest) {
725 assert(dest.getMode() == Address::literal, "Address mode should be Address::literal");
726 relocate(dest.rspec(), [&] {
727 movptr(Rd, dest.target());
728 });
729 }
730
731 void mv(Register Rd, RegisterOrConstant src) {
732 if (src.is_register()) {
733 mv(Rd, src.as_register());
734 } else {
735 mv(Rd, src.as_constant());
736 }
737 }
738
739 void movptr(Register Rd, address addr, int32_t &offset);
740
741 void movptr(Register Rd, address addr) {
742 int offset = 0;
743 movptr(Rd, addr, offset);
744 addi(Rd, Rd, offset);
745 }
746
747 inline void movptr(Register Rd, uintptr_t imm64) {
748 movptr(Rd, (address)imm64);
749 }
750
751 // arith
752 void add (Register Rd, Register Rn, int64_t increment, Register temp = t0);
753 void addw(Register Rd, Register Rn, int32_t increment, Register temp = t0);
754 void sub (Register Rd, Register Rn, int64_t decrement, Register temp = t0);
755 void subw(Register Rd, Register Rn, int32_t decrement, Register temp = t0);
756
757 #define INSN(NAME) \
758 inline void NAME(Register Rd, Register Rs1, Register Rs2) { \
759 Assembler::NAME(Rd, Rs1, Rs2); \
760 }
761
762 INSN(add);
763 INSN(addw);
764 INSN(sub);
765 INSN(subw);
766
767 #undef INSN
768
769 // logic
770 void andrw(Register Rd, Register Rs1, Register Rs2);
771 void orrw(Register Rd, Register Rs1, Register Rs2);
772 void xorrw(Register Rd, Register Rs1, Register Rs2);
773
774 // logic with negate
775 void andn(Register Rd, Register Rs1, Register Rs2);
776 void orn(Register Rd, Register Rs1, Register Rs2);
777
778 // revb
779 void revb_h_h(Register Rd, Register Rs, Register tmp = t0); // reverse bytes in halfword in lower 16 bits, sign-extend
780 void revb_w_w(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1); // reverse bytes in lower word, sign-extend
781 void revb_h_h_u(Register Rd, Register Rs, Register tmp = t0); // reverse bytes in halfword in lower 16 bits, zero-extend
782 void revb_h_w_u(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1); // reverse bytes in halfwords in lower 32 bits, zero-extend
783 void revb_h_helper(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2= t1); // reverse bytes in upper 16 bits (48:63) and move to lower
784 void revb_h(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2= t1); // reverse bytes in each halfword
785 void revb_w(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2= t1); // reverse bytes in each word
786 void revb(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1); // reverse bytes in doubleword
787
788 void ror_imm(Register dst, Register src, uint32_t shift, Register tmp = t0);
789 void rolw_imm(Register dst, Register src, uint32_t, Register tmp = t0);
790 void andi(Register Rd, Register Rn, int64_t imm, Register tmp = t0);
791 void orptr(Address adr, RegisterOrConstant src, Register tmp1 = t0, Register tmp2 = t1);
792
793 // Load and Store Instructions
794 #define INSN_ENTRY_RELOC(result_type, header) \
795 result_type header { \
796 guarantee(rtype == relocInfo::internal_word_type, \
797 "only internal_word_type relocs make sense here"); \
798 relocate(InternalAddress(dest).rspec()); \
799 IncompressibleRegion ir(this); /* relocations */
800
801 #define INSN(NAME) \
802 void NAME(Register Rd, address dest) { \
803 assert_cond(dest != nullptr); \
804 int64_t distance = dest - pc(); \
805 if (is_valid_32bit_offset(distance)) { \
806 auipc(Rd, (int32_t)distance + 0x800); \
807 Assembler::NAME(Rd, Rd, ((int32_t)distance << 20) >> 20); \
808 } else { \
809 int32_t offset = 0; \
810 movptr(Rd, dest, offset); \
811 Assembler::NAME(Rd, Rd, offset); \
812 } \
813 } \
814 INSN_ENTRY_RELOC(void, NAME(Register Rd, address dest, relocInfo::relocType rtype)) \
815 NAME(Rd, dest); \
816 } \
817 void NAME(Register Rd, const Address &adr, Register temp = t0) { \
818 switch (adr.getMode()) { \
819 case Address::literal: { \
820 relocate(adr.rspec(), [&] { \
821 NAME(Rd, adr.target()); \
822 }); \
823 break; \
824 } \
825 case Address::base_plus_offset: { \
826 if (is_simm12(adr.offset())) { \
827 Assembler::NAME(Rd, adr.base(), adr.offset()); \
828 } else { \
829 int32_t offset = ((int32_t)adr.offset() << 20) >> 20; \
830 if (Rd == adr.base()) { \
831 la(temp, Address(adr.base(), adr.offset() - offset)); \
832 Assembler::NAME(Rd, temp, offset); \
833 } else { \
834 la(Rd, Address(adr.base(), adr.offset() - offset)); \
835 Assembler::NAME(Rd, Rd, offset); \
836 } \
837 } \
838 break; \
839 } \
840 default: \
841 ShouldNotReachHere(); \
842 } \
843 } \
844 void NAME(Register Rd, Label &L) { \
845 wrap_label(Rd, L, &MacroAssembler::NAME); \
846 }
847
848 INSN(lb);
849 INSN(lbu);
850 INSN(lh);
851 INSN(lhu);
852 INSN(lw);
853 INSN(lwu);
854 INSN(ld);
855
856 #undef INSN
857
858 #define INSN(NAME) \
859 void NAME(FloatRegister Rd, address dest, Register temp = t0) { \
860 assert_cond(dest != nullptr); \
861 int64_t distance = dest - pc(); \
862 if (is_valid_32bit_offset(distance)) { \
863 auipc(temp, (int32_t)distance + 0x800); \
864 Assembler::NAME(Rd, temp, ((int32_t)distance << 20) >> 20); \
865 } else { \
866 int32_t offset = 0; \
867 movptr(temp, dest, offset); \
868 Assembler::NAME(Rd, temp, offset); \
869 } \
870 } \
871 INSN_ENTRY_RELOC(void, NAME(FloatRegister Rd, address dest, \
872 relocInfo::relocType rtype, Register temp = t0)) \
873 NAME(Rd, dest, temp); \
874 } \
875 void NAME(FloatRegister Rd, const Address &adr, Register temp = t0) { \
876 switch (adr.getMode()) { \
877 case Address::literal: { \
878 relocate(adr.rspec(), [&] { \
879 NAME(Rd, adr.target(), temp); \
880 }); \
881 break; \
882 } \
883 case Address::base_plus_offset: { \
884 if (is_simm12(adr.offset())) { \
885 Assembler::NAME(Rd, adr.base(), adr.offset()); \
886 } else { \
887 int32_t offset = ((int32_t)adr.offset() << 20) >> 20; \
888 la(temp, Address(adr.base(), adr.offset() - offset)); \
889 Assembler::NAME(Rd, temp, offset); \
890 } \
891 break; \
892 } \
893 default: \
894 ShouldNotReachHere(); \
895 } \
896 }
897
898 INSN(flw);
899 INSN(fld);
900
901 #undef INSN
902
903 #define INSN(NAME, REGISTER) \
904 INSN_ENTRY_RELOC(void, NAME(REGISTER Rs, address dest, \
905 relocInfo::relocType rtype, Register temp = t0)) \
906 NAME(Rs, dest, temp); \
907 }
908
909 INSN(sb, Register);
910 INSN(sh, Register);
911 INSN(sw, Register);
912 INSN(sd, Register);
913 INSN(fsw, FloatRegister);
914 INSN(fsd, FloatRegister);
915
916 #undef INSN
917
918 #define INSN(NAME) \
919 void NAME(Register Rs, address dest, Register temp = t0) { \
920 assert_cond(dest != nullptr); \
921 assert_different_registers(Rs, temp); \
922 int64_t distance = dest - pc(); \
923 if (is_valid_32bit_offset(distance)) { \
924 auipc(temp, (int32_t)distance + 0x800); \
925 Assembler::NAME(Rs, temp, ((int32_t)distance << 20) >> 20); \
926 } else { \
927 int32_t offset = 0; \
928 movptr(temp, dest, offset); \
929 Assembler::NAME(Rs, temp, offset); \
930 } \
931 } \
932 void NAME(Register Rs, const Address &adr, Register temp = t0) { \
933 switch (adr.getMode()) { \
934 case Address::literal: { \
935 assert_different_registers(Rs, temp); \
936 relocate(adr.rspec(), [&] { \
937 NAME(Rs, adr.target(), temp); \
938 }); \
939 break; \
940 } \
941 case Address::base_plus_offset: { \
942 if (is_simm12(adr.offset())) { \
943 Assembler::NAME(Rs, adr.base(), adr.offset()); \
944 } else { \
945 assert_different_registers(Rs, temp); \
946 int32_t offset = ((int32_t)adr.offset() << 20) >> 20; \
947 la(temp, Address(adr.base(), adr.offset() - offset)); \
948 Assembler::NAME(Rs, temp, offset); \
949 } \
950 break; \
951 } \
952 default: \
953 ShouldNotReachHere(); \
954 } \
955 }
956
957 INSN(sb);
958 INSN(sh);
959 INSN(sw);
960 INSN(sd);
961
962 #undef INSN
963
964 #define INSN(NAME) \
965 void NAME(FloatRegister Rs, address dest, Register temp = t0) { \
966 assert_cond(dest != nullptr); \
967 int64_t distance = dest - pc(); \
968 if (is_valid_32bit_offset(distance)) { \
969 auipc(temp, (int32_t)distance + 0x800); \
970 Assembler::NAME(Rs, temp, ((int32_t)distance << 20) >> 20); \
971 } else { \
972 int32_t offset = 0; \
973 movptr(temp, dest, offset); \
974 Assembler::NAME(Rs, temp, offset); \
975 } \
976 } \
977 void NAME(FloatRegister Rs, const Address &adr, Register temp = t0) { \
978 switch (adr.getMode()) { \
979 case Address::literal: { \
980 relocate(adr.rspec(), [&] { \
981 NAME(Rs, adr.target(), temp); \
982 }); \
983 break; \
984 } \
985 case Address::base_plus_offset: { \
986 if (is_simm12(adr.offset())) { \
987 Assembler::NAME(Rs, adr.base(), adr.offset()); \
988 } else { \
989 int32_t offset = ((int32_t)adr.offset() << 20) >> 20; \
990 la(temp, Address(adr.base(), adr.offset() - offset)); \
991 Assembler::NAME(Rs, temp, offset); \
992 } \
993 break; \
994 } \
995 default: \
996 ShouldNotReachHere(); \
997 } \
998 }
999
1000 INSN(fsw);
1001 INSN(fsd);
1002
1003 #undef INSN
1004
1005 #undef INSN_ENTRY_RELOC
1006
1007 void cmpxchg_obj_header(Register oldv, Register newv, Register obj, Register tmp, Label &succeed, Label *fail);
1008 void cmpxchgptr(Register oldv, Register newv, Register addr, Register tmp, Label &succeed, Label *fail);
1009 void cmpxchg(Register addr, Register expected,
1010 Register new_val,
1011 enum operand_size size,
1012 Assembler::Aqrl acquire, Assembler::Aqrl release,
1013 Register result, bool result_as_bool = false);
1014 void cmpxchg_weak(Register addr, Register expected,
1015 Register new_val,
1016 enum operand_size size,
1017 Assembler::Aqrl acquire, Assembler::Aqrl release,
1018 Register result);
1019 void cmpxchg_narrow_value_helper(Register addr, Register expected,
1020 Register new_val,
1021 enum operand_size size,
1022 Register tmp1, Register tmp2, Register tmp3);
1023 void cmpxchg_narrow_value(Register addr, Register expected,
1024 Register new_val,
1025 enum operand_size size,
1026 Assembler::Aqrl acquire, Assembler::Aqrl release,
1027 Register result, bool result_as_bool,
1028 Register tmp1, Register tmp2, Register tmp3);
1029 void weak_cmpxchg_narrow_value(Register addr, Register expected,
1030 Register new_val,
1031 enum operand_size size,
1032 Assembler::Aqrl acquire, Assembler::Aqrl release,
1033 Register result,
1034 Register tmp1, Register tmp2, Register tmp3);
1035
1036 void atomic_add(Register prev, RegisterOrConstant incr, Register addr);
1037 void atomic_addw(Register prev, RegisterOrConstant incr, Register addr);
1038 void atomic_addal(Register prev, RegisterOrConstant incr, Register addr);
1039 void atomic_addalw(Register prev, RegisterOrConstant incr, Register addr);
1040
1041 void atomic_xchg(Register prev, Register newv, Register addr);
1042 void atomic_xchgw(Register prev, Register newv, Register addr);
1043 void atomic_xchgal(Register prev, Register newv, Register addr);
1044 void atomic_xchgalw(Register prev, Register newv, Register addr);
1045 void atomic_xchgwu(Register prev, Register newv, Register addr);
1046 void atomic_xchgalwu(Register prev, Register newv, Register addr);
1047
1048 static bool far_branches() {
1049 return ReservedCodeCacheSize > branch_range;
1050 }
1051
1052 // Emit a direct call/jump if the entry address will always be in range,
1053 // otherwise a far call/jump.
1054 // The address must be inside the code cache.
1055 // Supported entry.rspec():
1056 // - relocInfo::external_word_type
1057 // - relocInfo::runtime_call_type
1058 // - relocInfo::none
1059 // In the case of a far call/jump, the entry address is put in the tmp register.
1060 // The tmp register is invalidated.
1061 void far_call(Address entry, Register tmp = t0);
1062 void far_jump(Address entry, Register tmp = t0);
1063
1064 static int far_branch_size() {
1065 if (far_branches()) {
1066 return 2 * 4; // auipc + jalr, see far_call() & far_jump()
1067 } else {
1068 return 4;
1069 }
1070 }
1071
1072 void load_byte_map_base(Register reg);
1073
1074 void bang_stack_with_offset(int offset) {
1075 // stack grows down, caller passes positive offset
1076 assert(offset > 0, "must bang with negative offset");
1077 sub(t0, sp, offset);
1078 sd(zr, Address(t0));
1079 }
1080
1081 void la_patchable(Register reg1, const Address &dest, int32_t &offset);
1082
1083 virtual void _call_Unimplemented(address call_site) {
1084 mv(t1, call_site);
1085 }
1086
1087 #define call_Unimplemented() _call_Unimplemented((address)__PRETTY_FUNCTION__)
1088
1089 // Frame creation and destruction shared between JITs.
1090 void build_frame(int framesize);
1091 void remove_frame(int framesize);
1092
1093 void reserved_stack_check();
1094
1095 void get_polling_page(Register dest, relocInfo::relocType rtype);
1096 void read_polling_page(Register r, int32_t offset, relocInfo::relocType rtype);
1097
1098 // RISCV64 OpenJDK uses four different types of calls:
1099 // - direct call: jal pc_relative_offset
1100 // This is the shortest and the fastest, but the offset has the range: +/-1MB.
1101 //
1102 // - far call: auipc reg, pc_relative_offset; jalr ra, reg, offset
1103 // This is longer than a direct call. The offset has
1104 // the range [-(2G + 2K), 2G - 2K). Addresses out of the range in the code cache
1105 // requires indirect call.
1106 // If a jump is needed rather than a call, a far jump 'jalr x0, reg, offset' can
1107 // be used instead.
1108 // All instructions are embedded at a call site.
1109 //
1110 // - trampoline call:
1111 // This is only available in C1/C2-generated code (nmethod). It is a combination
1112 // of a direct call, which is used if the destination of a call is in range,
1113 // and a register-indirect call. It has the advantages of reaching anywhere in
1114 // the RISCV address space and being patchable at runtime when the generated
1115 // code is being executed by other threads.
1116 //
1117 // [Main code section]
1118 // jal trampoline
1119 // [Stub code section]
1120 // trampoline:
1121 // ld reg, pc + 8 (auipc + ld)
1122 // jr reg
1123 // <64-bit destination address>
1124 //
1125 // If the destination is in range when the generated code is moved to the code
1126 // cache, 'jal trampoline' is replaced with 'jal destination' and the trampoline
1127 // is not used.
1128 // The optimization does not remove the trampoline from the stub section.
1129
1130 // This is necessary because the trampoline may well be redirected later when
1131 // code is patched, and the new destination may not be reachable by a simple JAL
1132 // instruction.
1133 //
1134 // - indirect call: movptr + jalr
1135 // This too can reach anywhere in the address space, but it cannot be
1136 // patched while code is running, so it must only be modified at a safepoint.
1137 // This form of call is most suitable for targets at fixed addresses, which
1138 // will never be patched.
1139 //
1140 //
1141 // To patch a trampoline call when the JAL can't reach, we first modify
1142 // the 64-bit destination address in the trampoline, then modify the
1143 // JAL to point to the trampoline, then flush the instruction cache to
1144 // broadcast the change to all executing threads. See
1145 // NativeCall::set_destination_mt_safe for the details.
1146 //
1147 // There is a benign race in that the other thread might observe the
1148 // modified JAL before it observes the modified 64-bit destination
1149 // address. That does not matter because the destination method has been
1150 // invalidated, so there will be a trap at its start.
1151 // For this to work, the destination address in the trampoline is
1152 // always updated, even if we're not using the trampoline.
1153
1154 // Emit a direct call if the entry address will always be in range,
1155 // otherwise a trampoline call.
1156 // Supported entry.rspec():
1157 // - relocInfo::runtime_call_type
1158 // - relocInfo::opt_virtual_call_type
1159 // - relocInfo::static_call_type
1160 // - relocInfo::virtual_call_type
1161 //
1162 // Return: the call PC or null if CodeCache is full.
1163 address trampoline_call(Address entry);
1164 address ic_call(address entry, jint method_index = 0);
1165
1166 // Support for memory inc/dec
1167 // n.b. increment/decrement calls with an Address destination will
1168 // need to use a scratch register to load the value to be
1169 // incremented. increment/decrement calls which add or subtract a
1170 // constant value other than sign-extended 12-bit immediate will need
1171 // to use a 2nd scratch register to hold the constant. so, an address
1172 // increment/decrement may trash both t0 and t1.
1173
1174 void increment(const Address dst, int64_t value = 1, Register tmp1 = t0, Register tmp2 = t1);
1175 void incrementw(const Address dst, int32_t value = 1, Register tmp1 = t0, Register tmp2 = t1);
1176
1177 void decrement(const Address dst, int64_t value = 1, Register tmp1 = t0, Register tmp2 = t1);
1178 void decrementw(const Address dst, int32_t value = 1, Register tmp1 = t0, Register tmp2 = t1);
1179
1180 void cmpptr(Register src1, Address src2, Label& equal);
1181
1182 void clinit_barrier(Register klass, Register tmp, Label* L_fast_path = nullptr, Label* L_slow_path = nullptr);
1183 void load_method_holder_cld(Register result, Register method);
1184 void load_method_holder(Register holder, Register method);
1185
1186 void compute_index(Register str1, Register trailing_zeros, Register match_mask,
1187 Register result, Register char_tmp, Register tmp,
1188 bool haystack_isL);
1189 void compute_match_mask(Register src, Register pattern, Register match_mask,
1190 Register mask1, Register mask2);
1191
1192 #ifdef COMPILER2
1193 void mul_add(Register out, Register in, Register offset,
1194 Register len, Register k, Register tmp);
1195 void cad(Register dst, Register src1, Register src2, Register carry);
1196 void cadc(Register dst, Register src1, Register src2, Register carry);
1197 void adc(Register dst, Register src1, Register src2, Register carry);
1198 void add2_with_carry(Register final_dest_hi, Register dest_hi, Register dest_lo,
1199 Register src1, Register src2, Register carry);
1200 void multiply_32_x_32_loop(Register x, Register xstart, Register x_xstart,
1201 Register y, Register y_idx, Register z,
1202 Register carry, Register product,
1203 Register idx, Register kdx);
1204 void multiply_64_x_64_loop(Register x, Register xstart, Register x_xstart,
1205 Register y, Register y_idx, Register z,
1206 Register carry, Register product,
1207 Register idx, Register kdx);
1208 void multiply_128_x_128_loop(Register y, Register z,
1209 Register carry, Register carry2,
1210 Register idx, Register jdx,
1211 Register yz_idx1, Register yz_idx2,
1212 Register tmp, Register tmp3, Register tmp4,
1213 Register tmp6, Register product_hi);
1214 void multiply_to_len(Register x, Register xlen, Register y, Register ylen,
1215 Register z, Register zlen,
1216 Register tmp1, Register tmp2, Register tmp3, Register tmp4,
1217 Register tmp5, Register tmp6, Register product_hi);
1218 #endif
1219
1220 void inflate_lo32(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1);
1221 void inflate_hi32(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1);
1222
1223 void ctzc_bit(Register Rd, Register Rs, bool isLL = false, Register tmp1 = t0, Register tmp2 = t1);
1224
1225 void zero_words(Register base, uint64_t cnt);
1226 address zero_words(Register ptr, Register cnt);
1227 void fill_words(Register base, Register cnt, Register value);
1228 void zero_memory(Register addr, Register len, Register tmp);
1229 void zero_dcache_blocks(Register base, Register cnt, Register tmp1, Register tmp2);
1230
1231 // shift left by shamt and add
1232 void shadd(Register Rd, Register Rs1, Register Rs2, Register tmp, int shamt);
1233
1234 // test single bit in Rs, result is set to Rd
1235 void test_bit(Register Rd, Register Rs, uint32_t bit_pos);
1236
1237 // Here the float instructions with safe deal with some exceptions.
1238 // e.g. convert from NaN, +Inf, -Inf to int, float, double
1239 // will trigger exception, we need to deal with these situations
1240 // to get correct results.
1241 void fcvt_w_s_safe(Register dst, FloatRegister src, Register tmp = t0);
1242 void fcvt_l_s_safe(Register dst, FloatRegister src, Register tmp = t0);
1243 void fcvt_w_d_safe(Register dst, FloatRegister src, Register tmp = t0);
1244 void fcvt_l_d_safe(Register dst, FloatRegister src, Register tmp = t0);
1245
1246 void java_round_float(Register dst, FloatRegister src, FloatRegister ftmp);
1247 void java_round_double(Register dst, FloatRegister src, FloatRegister ftmp);
1248
1249 // vector load/store unit-stride instructions
1250 void vlex_v(VectorRegister vd, Register base, Assembler::SEW sew, VectorMask vm = unmasked) {
1251 switch (sew) {
1252 case Assembler::e64:
1253 vle64_v(vd, base, vm);
1254 break;
1255 case Assembler::e32:
1256 vle32_v(vd, base, vm);
1257 break;
1258 case Assembler::e16:
1259 vle16_v(vd, base, vm);
1260 break;
1261 case Assembler::e8: // fall through
1262 default:
1263 vle8_v(vd, base, vm);
1264 break;
1265 }
1266 }
1267
1268 void vsex_v(VectorRegister store_data, Register base, Assembler::SEW sew, VectorMask vm = unmasked) {
1269 switch (sew) {
1270 case Assembler::e64:
1271 vse64_v(store_data, base, vm);
1272 break;
1273 case Assembler::e32:
1274 vse32_v(store_data, base, vm);
1275 break;
1276 case Assembler::e16:
1277 vse16_v(store_data, base, vm);
1278 break;
1279 case Assembler::e8: // fall through
1280 default:
1281 vse8_v(store_data, base, vm);
1282 break;
1283 }
1284 }
1285
1286 // vector pseudo instructions
1287 inline void vl1r_v(VectorRegister vd, Register rs) {
1288 vl1re8_v(vd, rs);
1289 }
1290
1291 inline void vmnot_m(VectorRegister vd, VectorRegister vs) {
1292 vmnand_mm(vd, vs, vs);
1293 }
1294
1295 inline void vncvt_x_x_w(VectorRegister vd, VectorRegister vs, VectorMask vm = unmasked) {
1296 vnsrl_wx(vd, vs, x0, vm);
1297 }
1298
1299 inline void vneg_v(VectorRegister vd, VectorRegister vs, VectorMask vm = unmasked) {
1300 vrsub_vx(vd, vs, x0, vm);
1301 }
1302
1303 inline void vfneg_v(VectorRegister vd, VectorRegister vs, VectorMask vm = unmasked) {
1304 vfsgnjn_vv(vd, vs, vs, vm);
1305 }
1306
1307 inline void vfabs_v(VectorRegister vd, VectorRegister vs, VectorMask vm = unmasked) {
1308 vfsgnjx_vv(vd, vs, vs, vm);
1309 }
1310
1311 inline void vmsgt_vv(VectorRegister vd, VectorRegister vs2, VectorRegister vs1, VectorMask vm = unmasked) {
1312 vmslt_vv(vd, vs1, vs2, vm);
1313 }
1314
1315 inline void vmsgtu_vv(VectorRegister vd, VectorRegister vs2, VectorRegister vs1, VectorMask vm = unmasked) {
1316 vmsltu_vv(vd, vs1, vs2, vm);
1317 }
1318
1319 inline void vmsge_vv(VectorRegister vd, VectorRegister vs2, VectorRegister vs1, VectorMask vm = unmasked) {
1320 vmsle_vv(vd, vs1, vs2, vm);
1321 }
1322
1323 inline void vmsgeu_vv(VectorRegister vd, VectorRegister vs2, VectorRegister vs1, VectorMask vm = unmasked) {
1324 vmsleu_vv(vd, vs1, vs2, vm);
1325 }
1326
1327 inline void vmfgt_vv(VectorRegister vd, VectorRegister vs2, VectorRegister vs1, VectorMask vm = unmasked) {
1328 vmflt_vv(vd, vs1, vs2, vm);
1329 }
1330
1331 inline void vmfge_vv(VectorRegister vd, VectorRegister vs2, VectorRegister vs1, VectorMask vm = unmasked) {
1332 vmfle_vv(vd, vs1, vs2, vm);
1333 }
1334
1335 // Copy mask register
1336 inline void vmmv_m(VectorRegister vd, VectorRegister vs) {
1337 vmand_mm(vd, vs, vs);
1338 }
1339
1340 // Clear mask register
1341 inline void vmclr_m(VectorRegister vd) {
1342 vmxor_mm(vd, vd, vd);
1343 }
1344
1345 // Set mask register
1346 inline void vmset_m(VectorRegister vd) {
1347 vmxnor_mm(vd, vd, vd);
1348 }
1349
1350 static const int zero_words_block_size;
1351
1352 void cast_primitive_type(BasicType type, Register Rt) {
1353 switch (type) {
1354 case T_BOOLEAN:
1355 sltu(Rt, zr, Rt);
1356 break;
1357 case T_CHAR :
1358 zero_extend(Rt, Rt, 16);
1359 break;
1360 case T_BYTE :
1361 sign_extend(Rt, Rt, 8);
1362 break;
1363 case T_SHORT :
1364 sign_extend(Rt, Rt, 16);
1365 break;
1366 case T_INT :
1367 sign_extend(Rt, Rt, 32);
1368 break;
1369 case T_LONG : /* nothing to do */ break;
1370 case T_VOID : /* nothing to do */ break;
1371 case T_FLOAT : /* nothing to do */ break;
1372 case T_DOUBLE : /* nothing to do */ break;
1373 default: ShouldNotReachHere();
1374 }
1375 }
1376
1377 // float cmp with unordered_result
1378 void float_compare(Register result, FloatRegister Rs1, FloatRegister Rs2, int unordered_result);
1379 void double_compare(Register result, FloatRegister Rs1, FloatRegister Rs2, int unordered_result);
1380
1381 // Zero/Sign-extend
1382 void zero_extend(Register dst, Register src, int bits);
1383 void sign_extend(Register dst, Register src, int bits);
1384
1385 // compare src1 and src2 and get -1/0/1 in dst.
1386 // if [src1 > src2], dst = 1;
1387 // if [src1 == src2], dst = 0;
1388 // if [src1 < src2], dst = -1;
1389 void cmp_l2i(Register dst, Register src1, Register src2, Register tmp = t0);
1390
1391 // support for argument shuffling
1392 void move32_64(VMRegPair src, VMRegPair dst, Register tmp = t0);
1393 void float_move(VMRegPair src, VMRegPair dst, Register tmp = t0);
1394 void long_move(VMRegPair src, VMRegPair dst, Register tmp = t0);
1395 void double_move(VMRegPair src, VMRegPair dst, Register tmp = t0);
1396 void object_move(OopMap* map,
1397 int oop_handle_offset,
1398 int framesize_in_slots,
1399 VMRegPair src,
1400 VMRegPair dst,
1401 bool is_receiver,
1402 int* receiver_offset);
1403 void rt_call(address dest, Register tmp = t0);
1404
1405 void call(const address dest, Register temp = t0) {
1406 assert_cond(dest != nullptr);
1407 assert(temp != noreg, "expecting a register");
1408 int32_t offset = 0;
1409 mv(temp, dest, offset);
1410 jalr(x1, temp, offset);
1411 }
1412
1413 inline void ret() {
1414 jalr(x0, x1, 0);
1415 }
1416
1417 #ifdef ASSERT
1418 // Template short-hand support to clean-up after a failed call to trampoline
1419 // call generation (see trampoline_call() below), when a set of Labels must
1420 // be reset (before returning).
1421 template<typename Label, typename... More>
1422 void reset_labels(Label& lbl, More&... more) {
1423 lbl.reset(); reset_labels(more...);
1424 }
1425 template<typename Label>
1426 void reset_labels(Label& lbl) {
1427 lbl.reset();
1428 }
1429 #endif
1430
1431 private:
1432
1433 void repne_scan(Register addr, Register value, Register count, Register tmp);
1434
1435 void ld_constant(Register dest, const Address &const_addr) {
1436 if (NearCpool) {
1437 ld(dest, const_addr);
1438 } else {
1439 InternalAddress target(const_addr.target());
1440 relocate(target.rspec(), [&] {
1441 int32_t offset;
1442 la_patchable(dest, target, offset);
1443 ld(dest, Address(dest, offset));
1444 });
1445 }
1446 }
1447
1448 int bitset_to_regs(unsigned int bitset, unsigned char* regs);
1449 Address add_memory_helper(const Address dst, Register tmp);
1450
1451 void load_reserved(Register addr, enum operand_size size, Assembler::Aqrl acquire);
1452 void store_conditional(Register addr, Register new_val, enum operand_size size, Assembler::Aqrl release);
1453
1454 public:
1455 void lightweight_lock(Register obj, Register hdr, Register tmp1, Register tmp2, Label& slow);
1456 void lightweight_unlock(Register obj, Register hdr, Register tmp1, Register tmp2, Label& slow);
1457 };
1458
1459 #ifdef ASSERT
1460 inline bool AbstractAssembler::pd_check_instruction_mark() { return false; }
1461 #endif
1462
1463 /**
1464 * class SkipIfEqual:
1465 *
1466 * Instantiating this class will result in assembly code being output that will
1467 * jump around any code emitted between the creation of the instance and it's
1468 * automatic destruction at the end of a scope block, depending on the value of
1469 * the flag passed to the constructor, which will be checked at run-time.
1470 */
1471 class SkipIfEqual {
1472 private:
1473 MacroAssembler* _masm;
1474 Label _label;
1475
1476 public:
1477 SkipIfEqual(MacroAssembler*, const bool* flag_addr, bool value);
1478 ~SkipIfEqual();
1479 };
1480
1481 #endif // CPU_RISCV_MACROASSEMBLER_RISCV_HPP