1 /* 2 * Copyright (c) 1997, 2024, Oracle and/or its affiliates. All rights reserved. 3 * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. 4 * Copyright (c) 2020, 2024, Huawei Technologies Co., Ltd. All rights reserved. 5 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 6 * 7 * This code is free software; you can redistribute it and/or modify it 8 * under the terms of the GNU General Public License version 2 only, as 9 * published by the Free Software Foundation. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 * 25 */ 26 27 #ifndef CPU_RISCV_MACROASSEMBLER_RISCV_HPP 28 #define CPU_RISCV_MACROASSEMBLER_RISCV_HPP 29 30 #include "asm/assembler.inline.hpp" 31 #include "code/vmreg.hpp" 32 #include "metaprogramming/enableIf.hpp" 33 #include "oops/compressedOops.hpp" 34 #include "utilities/powerOfTwo.hpp" 35 36 // MacroAssembler extends Assembler by frequently used macros. 37 // 38 // Instructions for which a 'better' code sequence exists depending 39 // on arguments should also go in here. 40 41 class MacroAssembler: public Assembler { 42 43 public: 44 45 MacroAssembler(CodeBuffer* code) : Assembler(code) {} 46 47 void safepoint_poll(Label& slow_path, bool at_return, bool acquire, bool in_nmethod); 48 49 // Alignment 50 int align(int modulus, int extra_offset = 0); 51 52 static inline void assert_alignment(address pc, int alignment = MacroAssembler::instruction_size) { 53 assert(is_aligned(pc, alignment), "bad alignment"); 54 } 55 56 // nop 57 void post_call_nop(); 58 59 // Stack frame creation/removal 60 // Note that SP must be updated to the right place before saving/restoring RA and FP 61 // because signal based thread suspend/resume could happen asynchronously. 62 void enter() { 63 addi(sp, sp, - 2 * wordSize); 64 sd(ra, Address(sp, wordSize)); 65 sd(fp, Address(sp)); 66 addi(fp, sp, 2 * wordSize); 67 } 68 69 void leave() { 70 addi(sp, fp, - 2 * wordSize); 71 ld(fp, Address(sp)); 72 ld(ra, Address(sp, wordSize)); 73 addi(sp, sp, 2 * wordSize); 74 } 75 76 77 // Support for getting the JavaThread pointer (i.e.; a reference to thread-local information) 78 // The pointer will be loaded into the thread register. 79 void get_thread(Register thread); 80 81 // Support for VM calls 82 // 83 // It is imperative that all calls into the VM are handled via the call_VM macros. 84 // They make sure that the stack linkage is setup correctly. call_VM's correspond 85 // to ENTRY/ENTRY_X entry points while call_VM_leaf's correspond to LEAF entry points. 86 87 void call_VM(Register oop_result, 88 address entry_point, 89 bool check_exceptions = true); 90 void call_VM(Register oop_result, 91 address entry_point, 92 Register arg_1, 93 bool check_exceptions = true); 94 void call_VM(Register oop_result, 95 address entry_point, 96 Register arg_1, Register arg_2, 97 bool check_exceptions = true); 98 void call_VM(Register oop_result, 99 address entry_point, 100 Register arg_1, Register arg_2, Register arg_3, 101 bool check_exceptions = true); 102 103 // Overloadings with last_Java_sp 104 void call_VM(Register oop_result, 105 Register last_java_sp, 106 address entry_point, 107 int number_of_arguments = 0, 108 bool check_exceptions = true); 109 void call_VM(Register oop_result, 110 Register last_java_sp, 111 address entry_point, 112 Register arg_1, 113 bool check_exceptions = true); 114 void call_VM(Register oop_result, 115 Register last_java_sp, 116 address entry_point, 117 Register arg_1, Register arg_2, 118 bool check_exceptions = true); 119 void call_VM(Register oop_result, 120 Register last_java_sp, 121 address entry_point, 122 Register arg_1, Register arg_2, Register arg_3, 123 bool check_exceptions = true); 124 125 void get_vm_result(Register oop_result, Register java_thread); 126 void get_vm_result_2(Register metadata_result, Register java_thread); 127 128 // These always tightly bind to MacroAssembler::call_VM_leaf_base 129 // bypassing the virtual implementation 130 void call_VM_leaf(address entry_point, 131 int number_of_arguments = 0); 132 void call_VM_leaf(address entry_point, 133 Register arg_0); 134 void call_VM_leaf(address entry_point, 135 Register arg_0, Register arg_1); 136 void call_VM_leaf(address entry_point, 137 Register arg_0, Register arg_1, Register arg_2); 138 139 // These always tightly bind to MacroAssembler::call_VM_base 140 // bypassing the virtual implementation 141 void super_call_VM_leaf(address entry_point, Register arg_0); 142 void super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1); 143 void super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2); 144 void super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2, Register arg_3); 145 146 // last Java Frame (fills frame anchor) 147 void set_last_Java_frame(Register last_java_sp, Register last_java_fp, address last_java_pc, Register tmp); 148 void set_last_Java_frame(Register last_java_sp, Register last_java_fp, Label &last_java_pc, Register tmp); 149 void set_last_Java_frame(Register last_java_sp, Register last_java_fp, Register last_java_pc); 150 151 // thread in the default location (xthread) 152 void reset_last_Java_frame(bool clear_fp); 153 154 virtual void call_VM_leaf_base( 155 address entry_point, // the entry point 156 int number_of_arguments, // the number of arguments to pop after the call 157 Label* retaddr = nullptr 158 ); 159 160 virtual void call_VM_leaf_base( 161 address entry_point, // the entry point 162 int number_of_arguments, // the number of arguments to pop after the call 163 Label& retaddr) { 164 call_VM_leaf_base(entry_point, number_of_arguments, &retaddr); 165 } 166 167 virtual void call_VM_base( // returns the register containing the thread upon return 168 Register oop_result, // where an oop-result ends up if any; use noreg otherwise 169 Register java_thread, // the thread if computed before ; use noreg otherwise 170 Register last_java_sp, // to set up last_Java_frame in stubs; use noreg otherwise 171 address entry_point, // the entry point 172 int number_of_arguments, // the number of arguments (w/o thread) to pop after the call 173 bool check_exceptions // whether to check for pending exceptions after return 174 ); 175 176 void call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions); 177 178 virtual void check_and_handle_earlyret(Register java_thread); 179 virtual void check_and_handle_popframe(Register java_thread); 180 181 void resolve_weak_handle(Register result, Register tmp1, Register tmp2); 182 void resolve_oop_handle(Register result, Register tmp1, Register tmp2); 183 void resolve_jobject(Register value, Register tmp1, Register tmp2); 184 void resolve_global_jobject(Register value, Register tmp1, Register tmp2); 185 186 void movoop(Register dst, jobject obj); 187 void mov_metadata(Register dst, Metadata* obj); 188 void bang_stack_size(Register size, Register tmp); 189 void set_narrow_oop(Register dst, jobject obj); 190 void set_narrow_klass(Register dst, Klass* k); 191 192 void load_mirror(Register dst, Register method, Register tmp1, Register tmp2); 193 void access_load_at(BasicType type, DecoratorSet decorators, Register dst, 194 Address src, Register tmp1, Register tmp2); 195 void access_store_at(BasicType type, DecoratorSet decorators, Address dst, 196 Register val, Register tmp1, Register tmp2, Register tmp3); 197 void load_klass(Register dst, Register src, Register tmp = t0); 198 void store_klass(Register dst, Register src, Register tmp = t0); 199 void cmp_klass(Register oop, Register trial_klass, Register tmp1, Register tmp2, Label &L); 200 201 void encode_klass_not_null(Register r, Register tmp = t0); 202 void decode_klass_not_null(Register r, Register tmp = t0); 203 void encode_klass_not_null(Register dst, Register src, Register tmp); 204 void decode_klass_not_null(Register dst, Register src, Register tmp); 205 void decode_heap_oop_not_null(Register r); 206 void decode_heap_oop_not_null(Register dst, Register src); 207 void decode_heap_oop(Register d, Register s); 208 void decode_heap_oop(Register r) { decode_heap_oop(r, r); } 209 void encode_heap_oop_not_null(Register r); 210 void encode_heap_oop_not_null(Register dst, Register src); 211 void encode_heap_oop(Register d, Register s); 212 void encode_heap_oop(Register r) { encode_heap_oop(r, r); }; 213 void load_heap_oop(Register dst, Address src, Register tmp1, 214 Register tmp2, DecoratorSet decorators = 0); 215 void load_heap_oop_not_null(Register dst, Address src, Register tmp1, 216 Register tmp2, DecoratorSet decorators = 0); 217 void store_heap_oop(Address dst, Register val, Register tmp1, 218 Register tmp2, Register tmp3, DecoratorSet decorators = 0); 219 220 void store_klass_gap(Register dst, Register src); 221 222 // currently unimplemented 223 // Used for storing null. All other oop constants should be 224 // stored using routines that take a jobject. 225 void store_heap_oop_null(Address dst); 226 227 // This dummy is to prevent a call to store_heap_oop from 228 // converting a zero (linked null) into a Register by giving 229 // the compiler two choices it can't resolve 230 231 void store_heap_oop(Address dst, void* dummy); 232 233 // Support for null-checks 234 // 235 // Generates code that causes a null OS exception if the content of reg is null. 236 // If the accessed location is M[reg + offset] and the offset is known, provide the 237 // offset. No explicit code generateion is needed if the offset is within a certain 238 // range (0 <= offset <= page_size). 239 240 virtual void null_check(Register reg, int offset = -1); 241 static bool needs_explicit_null_check(intptr_t offset); 242 static bool uses_implicit_null_check(void* address); 243 244 // idiv variant which deals with MINLONG as dividend and -1 as divisor 245 int corrected_idivl(Register result, Register rs1, Register rs2, 246 bool want_remainder, bool is_signed); 247 int corrected_idivq(Register result, Register rs1, Register rs2, 248 bool want_remainder, bool is_signed); 249 250 // interface method calling 251 void lookup_interface_method(Register recv_klass, 252 Register intf_klass, 253 RegisterOrConstant itable_index, 254 Register method_result, 255 Register scan_tmp, 256 Label& no_such_interface, 257 bool return_method = true); 258 259 void lookup_interface_method_stub(Register recv_klass, 260 Register holder_klass, 261 Register resolved_klass, 262 Register method_result, 263 Register temp_reg, 264 Register temp_reg2, 265 int itable_index, 266 Label& L_no_such_interface); 267 268 // virtual method calling 269 // n.n. x86 allows RegisterOrConstant for vtable_index 270 void lookup_virtual_method(Register recv_klass, 271 RegisterOrConstant vtable_index, 272 Register method_result); 273 274 // Form an address from base + offset in Rd. Rd my or may not 275 // actually be used: you must use the Address that is returned. It 276 // is up to you to ensure that the shift provided matches the size 277 // of your data. 278 Address form_address(Register Rd, Register base, int64_t byte_offset); 279 280 // Sometimes we get misaligned loads and stores, usually from Unsafe 281 // accesses, and these can exceed the offset range. 282 Address legitimize_address(Register Rd, const Address &adr) { 283 if (adr.getMode() == Address::base_plus_offset) { 284 if (!is_simm12(adr.offset())) { 285 return form_address(Rd, adr.base(), adr.offset()); 286 } 287 } 288 return adr; 289 } 290 291 // allocation 292 void tlab_allocate( 293 Register obj, // result: pointer to object after successful allocation 294 Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise 295 int con_size_in_bytes, // object size in bytes if known at compile time 296 Register tmp1, // temp register 297 Register tmp2, // temp register 298 Label& slow_case, // continuation point of fast allocation fails 299 bool is_far = false 300 ); 301 302 // Test sub_klass against super_klass, with fast and slow paths. 303 304 // The fast path produces a tri-state answer: yes / no / maybe-slow. 305 // One of the three labels can be null, meaning take the fall-through. 306 // If super_check_offset is -1, the value is loaded up from super_klass. 307 // No registers are killed, except tmp_reg 308 void check_klass_subtype_fast_path(Register sub_klass, 309 Register super_klass, 310 Register tmp_reg, 311 Label* L_success, 312 Label* L_failure, 313 Label* L_slow_path, 314 Register super_check_offset = noreg); 315 316 // The reset of the type check; must be wired to a corresponding fast path. 317 // It does not repeat the fast path logic, so don't use it standalone. 318 // The tmp1_reg and tmp2_reg can be noreg, if no temps are available. 319 // Updates the sub's secondary super cache as necessary. 320 void check_klass_subtype_slow_path(Register sub_klass, 321 Register super_klass, 322 Register tmp1_reg, 323 Register tmp2_reg, 324 Label* L_success, 325 Label* L_failure); 326 327 void population_count(Register dst, Register src, Register tmp1, Register tmp2); 328 329 // As above, but with a constant super_klass. 330 // The result is in Register result, not the condition codes. 331 bool lookup_secondary_supers_table(Register r_sub_klass, 332 Register r_super_klass, 333 Register result, 334 Register tmp1, 335 Register tmp2, 336 Register tmp3, 337 Register tmp4, 338 u1 super_klass_slot, 339 bool stub_is_near = false); 340 341 void verify_secondary_supers_table(Register r_sub_klass, 342 Register r_super_klass, 343 Register result, 344 Register tmp1, 345 Register tmp2, 346 Register tmp3); 347 348 void lookup_secondary_supers_table_slow_path(Register r_super_klass, 349 Register r_array_base, 350 Register r_array_index, 351 Register r_bitmap, 352 Register result, 353 Register tmp1); 354 355 void check_klass_subtype(Register sub_klass, 356 Register super_klass, 357 Register tmp_reg, 358 Label& L_success); 359 360 Address argument_address(RegisterOrConstant arg_slot, int extra_slot_offset = 0); 361 362 // only if +VerifyOops 363 void _verify_oop(Register reg, const char* s, const char* file, int line); 364 void _verify_oop_addr(Address addr, const char* s, const char* file, int line); 365 366 void _verify_oop_checked(Register reg, const char* s, const char* file, int line) { 367 if (VerifyOops) { 368 _verify_oop(reg, s, file, line); 369 } 370 } 371 void _verify_oop_addr_checked(Address reg, const char* s, const char* file, int line) { 372 if (VerifyOops) { 373 _verify_oop_addr(reg, s, file, line); 374 } 375 } 376 377 void _verify_method_ptr(Register reg, const char* msg, const char* file, int line) {} 378 void _verify_klass_ptr(Register reg, const char* msg, const char* file, int line) {} 379 380 #define verify_oop(reg) _verify_oop_checked(reg, "broken oop " #reg, __FILE__, __LINE__) 381 #define verify_oop_msg(reg, msg) _verify_oop_checked(reg, "broken oop " #reg ", " #msg, __FILE__, __LINE__) 382 #define verify_oop_addr(addr) _verify_oop_addr_checked(addr, "broken oop addr " #addr, __FILE__, __LINE__) 383 #define verify_method_ptr(reg) _verify_method_ptr(reg, "broken method " #reg, __FILE__, __LINE__) 384 #define verify_klass_ptr(reg) _verify_method_ptr(reg, "broken klass " #reg, __FILE__, __LINE__) 385 386 // A more convenient access to fence for our purposes 387 // We used four bit to indicate the read and write bits in the predecessors and successors, 388 // and extended i for r, o for w if UseConservativeFence enabled. 389 enum Membar_mask_bits { 390 StoreStore = 0b0101, // (pred = ow + succ = ow) 391 LoadStore = 0b1001, // (pred = ir + succ = ow) 392 StoreLoad = 0b0110, // (pred = ow + succ = ir) 393 LoadLoad = 0b1010, // (pred = ir + succ = ir) 394 AnyAny = LoadStore | StoreLoad // (pred = iorw + succ = iorw) 395 }; 396 397 void membar(uint32_t order_constraint); 398 399 static void membar_mask_to_pred_succ(uint32_t order_constraint, 400 uint32_t& predecessor, uint32_t& successor) { 401 predecessor = (order_constraint >> 2) & 0x3; 402 successor = order_constraint & 0x3; 403 404 // extend rw -> iorw: 405 // 01(w) -> 0101(ow) 406 // 10(r) -> 1010(ir) 407 // 11(rw)-> 1111(iorw) 408 if (UseConservativeFence) { 409 predecessor |= predecessor << 2; 410 successor |= successor << 2; 411 } 412 } 413 414 static int pred_succ_to_membar_mask(uint32_t predecessor, uint32_t successor) { 415 return ((predecessor & 0x3) << 2) | (successor & 0x3); 416 } 417 418 void fence(uint32_t predecessor, uint32_t successor) { 419 if (UseZtso) { 420 if ((pred_succ_to_membar_mask(predecessor, successor) & StoreLoad) == StoreLoad) { 421 // TSO allows for stores to be reordered after loads. When the compiler 422 // generates a fence to disallow that, we are required to generate the 423 // fence for correctness. 424 Assembler::fence(predecessor, successor); 425 } else { 426 // TSO guarantees other fences already. 427 } 428 } else { 429 // always generate fence for RVWMO 430 Assembler::fence(predecessor, successor); 431 } 432 } 433 434 void cmodx_fence(); 435 436 void pause() { 437 Assembler::fence(w, 0); 438 } 439 440 // prints msg, dumps registers and stops execution 441 void stop(const char* msg); 442 443 static void debug64(char* msg, int64_t pc, int64_t regs[]); 444 445 void unimplemented(const char* what = ""); 446 447 void should_not_reach_here() { stop("should not reach here"); } 448 449 static address target_addr_for_insn(address insn_addr); 450 451 // Required platform-specific helpers for Label::patch_instructions. 452 // They _shadow_ the declarations in AbstractAssembler, which are undefined. 453 static int pd_patch_instruction_size(address branch, address target); 454 static void pd_patch_instruction(address branch, address target, const char* file = nullptr, int line = 0) { 455 pd_patch_instruction_size(branch, target); 456 } 457 static address pd_call_destination(address branch) { 458 return target_addr_for_insn(branch); 459 } 460 461 static int patch_oop(address insn_addr, address o); 462 463 static address get_target_of_li32(address insn_addr); 464 static int patch_imm_in_li32(address branch, int32_t target); 465 466 // Return whether code is emitted to a scratch blob. 467 virtual bool in_scratch_emit_size() { 468 return false; 469 } 470 471 address emit_address_stub(int insts_call_instruction_offset, address target); 472 address emit_trampoline_stub(int insts_call_instruction_offset, address target); 473 static int max_reloc_call_stub_size(); 474 475 void emit_static_call_stub(); 476 static int static_call_stub_size(); 477 478 // The following 4 methods return the offset of the appropriate move instruction 479 480 // Support for fast byte/short loading with zero extension (depending on particular CPU) 481 int load_unsigned_byte(Register dst, Address src); 482 int load_unsigned_short(Register dst, Address src); 483 484 // Support for fast byte/short loading with sign extension (depending on particular CPU) 485 int load_signed_byte(Register dst, Address src); 486 int load_signed_short(Register dst, Address src); 487 488 // Load and store values by size and signed-ness 489 void load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed); 490 void store_sized_value(Address dst, Register src, size_t size_in_bytes); 491 492 // Misaligned loads, will use the best way, according to the AvoidUnalignedAccess flag 493 void load_short_misaligned(Register dst, Address src, Register tmp, bool is_signed, int granularity = 1); 494 void load_int_misaligned(Register dst, Address src, Register tmp, bool is_signed, int granularity = 1); 495 void load_long_misaligned(Register dst, Address src, Register tmp, int granularity = 1); 496 497 public: 498 // Standard pseudo instructions 499 inline void nop() { 500 addi(x0, x0, 0); 501 } 502 503 inline void mv(Register Rd, Register Rs) { 504 if (Rd != Rs) { 505 addi(Rd, Rs, 0); 506 } 507 } 508 509 inline void notr(Register Rd, Register Rs) { 510 if (do_compress_zcb(Rd, Rs) && (Rd == Rs)) { 511 c_not(Rd); 512 } else { 513 xori(Rd, Rs, -1); 514 } 515 } 516 517 inline void neg(Register Rd, Register Rs) { 518 sub(Rd, x0, Rs); 519 } 520 521 inline void negw(Register Rd, Register Rs) { 522 subw(Rd, x0, Rs); 523 } 524 525 inline void sext_w(Register Rd, Register Rs) { 526 addiw(Rd, Rs, 0); 527 } 528 529 inline void zext_b(Register Rd, Register Rs) { 530 if (do_compress_zcb(Rd, Rs) && (Rd == Rs)) { 531 c_zext_b(Rd); 532 } else { 533 andi(Rd, Rs, 0xFF); 534 } 535 } 536 537 inline void seqz(Register Rd, Register Rs) { 538 sltiu(Rd, Rs, 1); 539 } 540 541 inline void snez(Register Rd, Register Rs) { 542 sltu(Rd, x0, Rs); 543 } 544 545 inline void sltz(Register Rd, Register Rs) { 546 slt(Rd, Rs, x0); 547 } 548 549 inline void sgtz(Register Rd, Register Rs) { 550 slt(Rd, x0, Rs); 551 } 552 553 // Bit-manipulation extension pseudo instructions 554 // zero extend word 555 inline void zext_w(Register Rd, Register Rs) { 556 assert(UseZba, "must be"); 557 if (do_compress_zcb(Rd, Rs) && (Rd == Rs)) { 558 c_zext_w(Rd); 559 } else { 560 add_uw(Rd, Rs, zr); 561 } 562 } 563 564 // Floating-point data-processing pseudo instructions 565 inline void fmv_s(FloatRegister Rd, FloatRegister Rs) { 566 if (Rd != Rs) { 567 fsgnj_s(Rd, Rs, Rs); 568 } 569 } 570 571 inline void fabs_s(FloatRegister Rd, FloatRegister Rs) { 572 fsgnjx_s(Rd, Rs, Rs); 573 } 574 575 inline void fneg_s(FloatRegister Rd, FloatRegister Rs) { 576 fsgnjn_s(Rd, Rs, Rs); 577 } 578 579 inline void fmv_d(FloatRegister Rd, FloatRegister Rs) { 580 if (Rd != Rs) { 581 fsgnj_d(Rd, Rs, Rs); 582 } 583 } 584 585 inline void fabs_d(FloatRegister Rd, FloatRegister Rs) { 586 fsgnjx_d(Rd, Rs, Rs); 587 } 588 589 inline void fneg_d(FloatRegister Rd, FloatRegister Rs) { 590 fsgnjn_d(Rd, Rs, Rs); 591 } 592 593 // Control and status pseudo instructions 594 void rdinstret(Register Rd); // read instruction-retired counter 595 void rdcycle(Register Rd); // read cycle counter 596 void rdtime(Register Rd); // read time 597 void csrr(Register Rd, unsigned csr); // read csr 598 void csrw(unsigned csr, Register Rs); // write csr 599 void csrs(unsigned csr, Register Rs); // set bits in csr 600 void csrc(unsigned csr, Register Rs); // clear bits in csr 601 void csrwi(unsigned csr, unsigned imm); 602 void csrsi(unsigned csr, unsigned imm); 603 void csrci(unsigned csr, unsigned imm); 604 void frcsr(Register Rd); // read float-point csr 605 void fscsr(Register Rd, Register Rs); // swap float-point csr 606 void fscsr(Register Rs); // write float-point csr 607 void frrm(Register Rd); // read float-point rounding mode 608 void fsrm(Register Rd, Register Rs); // swap float-point rounding mode 609 void fsrm(Register Rs); // write float-point rounding mode 610 void fsrmi(Register Rd, unsigned imm); 611 void fsrmi(unsigned imm); 612 void frflags(Register Rd); // read float-point exception flags 613 void fsflags(Register Rd, Register Rs); // swap float-point exception flags 614 void fsflags(Register Rs); // write float-point exception flags 615 void fsflagsi(Register Rd, unsigned imm); 616 void fsflagsi(unsigned imm); 617 618 // Restore cpu control state after JNI call 619 void restore_cpu_control_state_after_jni(Register tmp); 620 621 // Control transfer pseudo instructions 622 void beqz(Register Rs, const address dest); 623 void bnez(Register Rs, const address dest); 624 void blez(Register Rs, const address dest); 625 void bgez(Register Rs, const address dest); 626 void bltz(Register Rs, const address dest); 627 void bgtz(Register Rs, const address dest); 628 629 private: 630 void load_link_jump(const address source, Register temp = t0); 631 void jump_link(const address dest, Register temp); 632 public: 633 // We try to follow risc-v asm menomics. 634 // But as we don't layout a reachable GOT, 635 // we often need to resort to movptr, li <48imm>. 636 // https://github.com/riscv-non-isa/riscv-asm-manual/blob/master/riscv-asm.md 637 638 // jump: jal x0, offset 639 // For long reach uses temp register for: 640 // la + jr 641 void j(const address dest, Register temp = t0); 642 void j(const Address &adr, Register temp = t0); 643 void j(Label &l, Register temp = t0); 644 645 // jump register: jalr x0, offset(rs) 646 void jr(Register Rd, int32_t offset = 0); 647 648 // call: la + jalr x1 649 void call(const address dest, Register temp = t0); 650 651 // jalr: jalr x1, offset(rs) 652 void jalr(Register Rs, int32_t offset = 0); 653 654 // Emit a runtime call. Only invalidates the tmp register which 655 // is used to keep the entry address for jalr/movptr. 656 // Uses call() for intra code cache, else movptr + jalr. 657 void rt_call(address dest, Register tmp = t0); 658 659 // ret: jalr x0, 0(x1) 660 inline void ret() { 661 Assembler::jalr(x0, x1, 0); 662 } 663 664 //label 665 void beqz(Register Rs, Label &l, bool is_far = false); 666 void bnez(Register Rs, Label &l, bool is_far = false); 667 void blez(Register Rs, Label &l, bool is_far = false); 668 void bgez(Register Rs, Label &l, bool is_far = false); 669 void bltz(Register Rs, Label &l, bool is_far = false); 670 void bgtz(Register Rs, Label &l, bool is_far = false); 671 672 void beq (Register Rs1, Register Rs2, Label &L, bool is_far = false); 673 void bne (Register Rs1, Register Rs2, Label &L, bool is_far = false); 674 void blt (Register Rs1, Register Rs2, Label &L, bool is_far = false); 675 void bge (Register Rs1, Register Rs2, Label &L, bool is_far = false); 676 void bltu(Register Rs1, Register Rs2, Label &L, bool is_far = false); 677 void bgeu(Register Rs1, Register Rs2, Label &L, bool is_far = false); 678 679 void bgt (Register Rs, Register Rt, const address dest); 680 void ble (Register Rs, Register Rt, const address dest); 681 void bgtu(Register Rs, Register Rt, const address dest); 682 void bleu(Register Rs, Register Rt, const address dest); 683 684 void bgt (Register Rs, Register Rt, Label &l, bool is_far = false); 685 void ble (Register Rs, Register Rt, Label &l, bool is_far = false); 686 void bgtu(Register Rs, Register Rt, Label &l, bool is_far = false); 687 void bleu(Register Rs, Register Rt, Label &l, bool is_far = false); 688 689 #define INSN_ENTRY_RELOC(result_type, header) \ 690 result_type header { \ 691 guarantee(rtype == relocInfo::internal_word_type, \ 692 "only internal_word_type relocs make sense here"); \ 693 relocate(InternalAddress(dest).rspec()); \ 694 IncompressibleRegion ir(this); /* relocations */ 695 696 #define INSN(NAME) \ 697 void NAME(Register Rs1, Register Rs2, const address dest) { \ 698 assert_cond(dest != nullptr); \ 699 int64_t offset = dest - pc(); \ 700 guarantee(is_simm13(offset) && is_even(offset), \ 701 "offset is invalid: is_simm_13: %s offset: " INT64_FORMAT, \ 702 BOOL_TO_STR(is_simm13(offset)), offset); \ 703 Assembler::NAME(Rs1, Rs2, offset); \ 704 } \ 705 INSN_ENTRY_RELOC(void, NAME(Register Rs1, Register Rs2, address dest, relocInfo::relocType rtype)) \ 706 NAME(Rs1, Rs2, dest); \ 707 } 708 709 INSN(beq); 710 INSN(bne); 711 INSN(bge); 712 INSN(bgeu); 713 INSN(blt); 714 INSN(bltu); 715 716 #undef INSN 717 718 #undef INSN_ENTRY_RELOC 719 720 void float_beq(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false); 721 void float_bne(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false); 722 void float_ble(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false); 723 void float_bge(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false); 724 void float_blt(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false); 725 void float_bgt(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false); 726 727 void double_beq(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false); 728 void double_bne(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false); 729 void double_ble(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false); 730 void double_bge(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false); 731 void double_blt(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false); 732 void double_bgt(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false); 733 734 private: 735 int push_reg(unsigned int bitset, Register stack); 736 int pop_reg(unsigned int bitset, Register stack); 737 int push_fp(unsigned int bitset, Register stack); 738 int pop_fp(unsigned int bitset, Register stack); 739 #ifdef COMPILER2 740 int push_v(unsigned int bitset, Register stack); 741 int pop_v(unsigned int bitset, Register stack); 742 #endif // COMPILER2 743 744 // The signed 20-bit upper imm can materialize at most negative 0xF...F80000000, two G. 745 // The following signed 12-bit imm can at max subtract 0x800, two K, from that previously loaded two G. 746 bool is_valid_32bit_offset(int64_t x) { 747 constexpr int64_t twoG = (2 * G); 748 constexpr int64_t twoK = (2 * K); 749 return x < (twoG - twoK) && x >= (-twoG - twoK); 750 } 751 752 // Ensure that the auipc can reach the destination at x from anywhere within 753 // the code cache so that if it is relocated we know it will still reach. 754 bool is_32bit_offset_from_codecache(int64_t x) { 755 int64_t low = (int64_t)CodeCache::low_bound(); 756 int64_t high = (int64_t)CodeCache::high_bound(); 757 return is_valid_32bit_offset(x - low) && is_valid_32bit_offset(x - high); 758 } 759 760 public: 761 void push_reg(Register Rs); 762 void pop_reg(Register Rd); 763 void push_reg(RegSet regs, Register stack) { if (regs.bits()) push_reg(regs.bits(), stack); } 764 void pop_reg(RegSet regs, Register stack) { if (regs.bits()) pop_reg(regs.bits(), stack); } 765 void push_fp(FloatRegSet regs, Register stack) { if (regs.bits()) push_fp(regs.bits(), stack); } 766 void pop_fp(FloatRegSet regs, Register stack) { if (regs.bits()) pop_fp(regs.bits(), stack); } 767 #ifdef COMPILER2 768 void push_v(VectorRegSet regs, Register stack) { if (regs.bits()) push_v(regs.bits(), stack); } 769 void pop_v(VectorRegSet regs, Register stack) { if (regs.bits()) pop_v(regs.bits(), stack); } 770 #endif // COMPILER2 771 772 // Push and pop everything that might be clobbered by a native 773 // runtime call except t0 and t1. (They are always 774 // temporary registers, so we don't have to protect them.) 775 // Additional registers can be excluded in a passed RegSet. 776 void push_call_clobbered_registers_except(RegSet exclude); 777 void pop_call_clobbered_registers_except(RegSet exclude); 778 779 void push_call_clobbered_registers() { 780 push_call_clobbered_registers_except(RegSet()); 781 } 782 void pop_call_clobbered_registers() { 783 pop_call_clobbered_registers_except(RegSet()); 784 } 785 786 void push_CPU_state(bool save_vectors = false, int vector_size_in_bytes = 0); 787 void pop_CPU_state(bool restore_vectors = false, int vector_size_in_bytes = 0); 788 789 void push_cont_fastpath(Register java_thread = xthread); 790 void pop_cont_fastpath(Register java_thread = xthread); 791 792 void inc_held_monitor_count(Register tmp = t0); 793 void dec_held_monitor_count(Register tmp = t0); 794 795 // if heap base register is used - reinit it with the correct value 796 void reinit_heapbase(); 797 798 void bind(Label& L) { 799 Assembler::bind(L); 800 // fences across basic blocks should not be merged 801 code()->clear_last_insn(); 802 } 803 804 typedef void (MacroAssembler::* compare_and_branch_insn)(Register Rs1, Register Rs2, const address dest); 805 typedef void (MacroAssembler::* compare_and_branch_label_insn)(Register Rs1, Register Rs2, Label &L, bool is_far); 806 typedef void (MacroAssembler::* jal_jalr_insn)(Register Rt, address dest); 807 808 void wrap_label(Register r, Label &L, jal_jalr_insn insn); 809 void wrap_label(Register r1, Register r2, Label &L, 810 compare_and_branch_insn insn, 811 compare_and_branch_label_insn neg_insn, bool is_far = false); 812 813 // la will use movptr instead of GOT when not in reach for auipc. 814 void la(Register Rd, Label &label); 815 void la(Register Rd, const address addr); 816 void la(Register Rd, const address addr, int32_t &offset); 817 void la(Register Rd, const Address &adr); 818 819 void li16u(Register Rd, uint16_t imm); 820 void li32(Register Rd, int32_t imm); 821 void li (Register Rd, int64_t imm); // optimized load immediate 822 823 // mv 824 void mv(Register Rd, address addr) { li(Rd, (int64_t)addr); } 825 void mv(Register Rd, address addr, int32_t &offset) { 826 // Split address into a lower 12-bit sign-extended offset and the remainder, 827 // so that the offset could be encoded in jalr or load/store instruction. 828 offset = ((int32_t)(int64_t)addr << 20) >> 20; 829 li(Rd, (int64_t)addr - offset); 830 } 831 832 template<typename T, ENABLE_IF(std::is_integral<T>::value)> 833 inline void mv(Register Rd, T o) { li(Rd, (int64_t)o); } 834 835 void mv(Register Rd, RegisterOrConstant src) { 836 if (src.is_register()) { 837 mv(Rd, src.as_register()); 838 } else { 839 mv(Rd, src.as_constant()); 840 } 841 } 842 843 // Generates a load of a 48-bit constant which can be 844 // patched to any 48-bit constant, i.e. address. 845 // If common case supply additional temp register 846 // to shorten the instruction sequence. 847 void movptr(Register Rd, address addr, Register tmp = noreg); 848 void movptr(Register Rd, address addr, int32_t &offset, Register tmp = noreg); 849 private: 850 void movptr1(Register Rd, uintptr_t addr, int32_t &offset); 851 void movptr2(Register Rd, uintptr_t addr, int32_t &offset, Register tmp); 852 public: 853 854 // arith 855 void add (Register Rd, Register Rn, int64_t increment, Register temp = t0); 856 void addw(Register Rd, Register Rn, int32_t increment, Register temp = t0); 857 void sub (Register Rd, Register Rn, int64_t decrement, Register temp = t0); 858 void subw(Register Rd, Register Rn, int32_t decrement, Register temp = t0); 859 860 #define INSN(NAME) \ 861 inline void NAME(Register Rd, Register Rs1, Register Rs2) { \ 862 Assembler::NAME(Rd, Rs1, Rs2); \ 863 } 864 865 INSN(add); 866 INSN(addw); 867 INSN(sub); 868 INSN(subw); 869 870 #undef INSN 871 872 // logic 873 void andrw(Register Rd, Register Rs1, Register Rs2); 874 void orrw(Register Rd, Register Rs1, Register Rs2); 875 void xorrw(Register Rd, Register Rs1, Register Rs2); 876 877 // logic with negate 878 void andn(Register Rd, Register Rs1, Register Rs2); 879 void orn(Register Rd, Register Rs1, Register Rs2); 880 881 // revb 882 void revb_h_h(Register Rd, Register Rs, Register tmp = t0); // reverse bytes in halfword in lower 16 bits, sign-extend 883 void revb_w_w(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1); // reverse bytes in lower word, sign-extend 884 void revb_h_h_u(Register Rd, Register Rs, Register tmp = t0); // reverse bytes in halfword in lower 16 bits, zero-extend 885 void revb_h_w_u(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1); // reverse bytes in halfwords in lower 32 bits, zero-extend 886 void revb_h_helper(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2= t1); // reverse bytes in upper 16 bits (48:63) and move to lower 887 void revb_h(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2= t1); // reverse bytes in each halfword 888 void revb_w(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2= t1); // reverse bytes in each word 889 void revb(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1); // reverse bytes in doubleword 890 891 void ror_imm(Register dst, Register src, uint32_t shift, Register tmp = t0); 892 void rolw_imm(Register dst, Register src, uint32_t, Register tmp = t0); 893 void andi(Register Rd, Register Rn, int64_t imm, Register tmp = t0); 894 void orptr(Address adr, RegisterOrConstant src, Register tmp1 = t0, Register tmp2 = t1); 895 896 // Load and Store Instructions 897 #define INSN_ENTRY_RELOC(result_type, header) \ 898 result_type header { \ 899 guarantee(rtype == relocInfo::internal_word_type, \ 900 "only internal_word_type relocs make sense here"); \ 901 relocate(InternalAddress(dest).rspec()); \ 902 IncompressibleRegion ir(this); /* relocations */ 903 904 #define INSN(NAME) \ 905 void NAME(Register Rd, address dest) { \ 906 assert_cond(dest != nullptr); \ 907 int64_t distance = dest - pc(); \ 908 if (is_valid_32bit_offset(distance)) { \ 909 auipc(Rd, (int32_t)distance + 0x800); \ 910 Assembler::NAME(Rd, Rd, ((int32_t)distance << 20) >> 20); \ 911 } else { \ 912 int32_t offset = 0; \ 913 movptr(Rd, dest, offset); \ 914 Assembler::NAME(Rd, Rd, offset); \ 915 } \ 916 } \ 917 INSN_ENTRY_RELOC(void, NAME(Register Rd, address dest, relocInfo::relocType rtype)) \ 918 NAME(Rd, dest); \ 919 } \ 920 void NAME(Register Rd, const Address &adr, Register temp = t0) { \ 921 switch (adr.getMode()) { \ 922 case Address::literal: { \ 923 relocate(adr.rspec(), [&] { \ 924 NAME(Rd, adr.target()); \ 925 }); \ 926 break; \ 927 } \ 928 case Address::base_plus_offset: { \ 929 if (is_simm12(adr.offset())) { \ 930 Assembler::NAME(Rd, adr.base(), adr.offset()); \ 931 } else { \ 932 int32_t offset = ((int32_t)adr.offset() << 20) >> 20; \ 933 if (Rd == adr.base()) { \ 934 la(temp, Address(adr.base(), adr.offset() - offset)); \ 935 Assembler::NAME(Rd, temp, offset); \ 936 } else { \ 937 la(Rd, Address(adr.base(), adr.offset() - offset)); \ 938 Assembler::NAME(Rd, Rd, offset); \ 939 } \ 940 } \ 941 break; \ 942 } \ 943 default: \ 944 ShouldNotReachHere(); \ 945 } \ 946 } \ 947 void NAME(Register Rd, Label &L) { \ 948 wrap_label(Rd, L, &MacroAssembler::NAME); \ 949 } 950 951 INSN(lb); 952 INSN(lbu); 953 INSN(lh); 954 INSN(lhu); 955 INSN(lw); 956 INSN(lwu); 957 INSN(ld); 958 959 #undef INSN 960 961 #define INSN(NAME) \ 962 void NAME(FloatRegister Rd, address dest, Register temp = t0) { \ 963 assert_cond(dest != nullptr); \ 964 int64_t distance = dest - pc(); \ 965 if (is_valid_32bit_offset(distance)) { \ 966 auipc(temp, (int32_t)distance + 0x800); \ 967 Assembler::NAME(Rd, temp, ((int32_t)distance << 20) >> 20); \ 968 } else { \ 969 int32_t offset = 0; \ 970 movptr(temp, dest, offset); \ 971 Assembler::NAME(Rd, temp, offset); \ 972 } \ 973 } \ 974 INSN_ENTRY_RELOC(void, NAME(FloatRegister Rd, address dest, \ 975 relocInfo::relocType rtype, Register temp = t0)) \ 976 NAME(Rd, dest, temp); \ 977 } \ 978 void NAME(FloatRegister Rd, const Address &adr, Register temp = t0) { \ 979 switch (adr.getMode()) { \ 980 case Address::literal: { \ 981 relocate(adr.rspec(), [&] { \ 982 NAME(Rd, adr.target(), temp); \ 983 }); \ 984 break; \ 985 } \ 986 case Address::base_plus_offset: { \ 987 if (is_simm12(adr.offset())) { \ 988 Assembler::NAME(Rd, adr.base(), adr.offset()); \ 989 } else { \ 990 int32_t offset = ((int32_t)adr.offset() << 20) >> 20; \ 991 la(temp, Address(adr.base(), adr.offset() - offset)); \ 992 Assembler::NAME(Rd, temp, offset); \ 993 } \ 994 break; \ 995 } \ 996 default: \ 997 ShouldNotReachHere(); \ 998 } \ 999 } 1000 1001 INSN(flw); 1002 INSN(fld); 1003 1004 #undef INSN 1005 1006 #define INSN(NAME, REGISTER) \ 1007 INSN_ENTRY_RELOC(void, NAME(REGISTER Rs, address dest, \ 1008 relocInfo::relocType rtype, Register temp = t0)) \ 1009 NAME(Rs, dest, temp); \ 1010 } 1011 1012 INSN(sb, Register); 1013 INSN(sh, Register); 1014 INSN(sw, Register); 1015 INSN(sd, Register); 1016 INSN(fsw, FloatRegister); 1017 INSN(fsd, FloatRegister); 1018 1019 #undef INSN 1020 1021 #define INSN(NAME) \ 1022 void NAME(Register Rs, address dest, Register temp = t0) { \ 1023 assert_cond(dest != nullptr); \ 1024 assert_different_registers(Rs, temp); \ 1025 int64_t distance = dest - pc(); \ 1026 if (is_valid_32bit_offset(distance)) { \ 1027 auipc(temp, (int32_t)distance + 0x800); \ 1028 Assembler::NAME(Rs, temp, ((int32_t)distance << 20) >> 20); \ 1029 } else { \ 1030 int32_t offset = 0; \ 1031 movptr(temp, dest, offset); \ 1032 Assembler::NAME(Rs, temp, offset); \ 1033 } \ 1034 } \ 1035 void NAME(Register Rs, const Address &adr, Register temp = t0) { \ 1036 switch (adr.getMode()) { \ 1037 case Address::literal: { \ 1038 assert_different_registers(Rs, temp); \ 1039 relocate(adr.rspec(), [&] { \ 1040 NAME(Rs, adr.target(), temp); \ 1041 }); \ 1042 break; \ 1043 } \ 1044 case Address::base_plus_offset: { \ 1045 if (is_simm12(adr.offset())) { \ 1046 Assembler::NAME(Rs, adr.base(), adr.offset()); \ 1047 } else { \ 1048 assert_different_registers(Rs, temp); \ 1049 int32_t offset = ((int32_t)adr.offset() << 20) >> 20; \ 1050 la(temp, Address(adr.base(), adr.offset() - offset)); \ 1051 Assembler::NAME(Rs, temp, offset); \ 1052 } \ 1053 break; \ 1054 } \ 1055 default: \ 1056 ShouldNotReachHere(); \ 1057 } \ 1058 } 1059 1060 INSN(sb); 1061 INSN(sh); 1062 INSN(sw); 1063 INSN(sd); 1064 1065 #undef INSN 1066 1067 #define INSN(NAME) \ 1068 void NAME(FloatRegister Rs, address dest, Register temp = t0) { \ 1069 assert_cond(dest != nullptr); \ 1070 int64_t distance = dest - pc(); \ 1071 if (is_valid_32bit_offset(distance)) { \ 1072 auipc(temp, (int32_t)distance + 0x800); \ 1073 Assembler::NAME(Rs, temp, ((int32_t)distance << 20) >> 20); \ 1074 } else { \ 1075 int32_t offset = 0; \ 1076 movptr(temp, dest, offset); \ 1077 Assembler::NAME(Rs, temp, offset); \ 1078 } \ 1079 } \ 1080 void NAME(FloatRegister Rs, const Address &adr, Register temp = t0) { \ 1081 switch (adr.getMode()) { \ 1082 case Address::literal: { \ 1083 relocate(adr.rspec(), [&] { \ 1084 NAME(Rs, adr.target(), temp); \ 1085 }); \ 1086 break; \ 1087 } \ 1088 case Address::base_plus_offset: { \ 1089 if (is_simm12(adr.offset())) { \ 1090 Assembler::NAME(Rs, adr.base(), adr.offset()); \ 1091 } else { \ 1092 int32_t offset = ((int32_t)adr.offset() << 20) >> 20; \ 1093 la(temp, Address(adr.base(), adr.offset() - offset)); \ 1094 Assembler::NAME(Rs, temp, offset); \ 1095 } \ 1096 break; \ 1097 } \ 1098 default: \ 1099 ShouldNotReachHere(); \ 1100 } \ 1101 } 1102 1103 INSN(fsw); 1104 INSN(fsd); 1105 1106 #undef INSN 1107 1108 #undef INSN_ENTRY_RELOC 1109 1110 void cmpxchg_obj_header(Register oldv, Register newv, Register obj, Register tmp, Label &succeed, Label *fail); 1111 void cmpxchgptr(Register oldv, Register newv, Register addr, Register tmp, Label &succeed, Label *fail); 1112 void cmpxchg(Register addr, Register expected, 1113 Register new_val, 1114 enum operand_size size, 1115 Assembler::Aqrl acquire, Assembler::Aqrl release, 1116 Register result, bool result_as_bool = false); 1117 void cmpxchg_weak(Register addr, Register expected, 1118 Register new_val, 1119 enum operand_size size, 1120 Assembler::Aqrl acquire, Assembler::Aqrl release, 1121 Register result); 1122 void cmpxchg_narrow_value_helper(Register addr, Register expected, 1123 Register new_val, 1124 enum operand_size size, 1125 Register tmp1, Register tmp2, Register tmp3); 1126 void cmpxchg_narrow_value(Register addr, Register expected, 1127 Register new_val, 1128 enum operand_size size, 1129 Assembler::Aqrl acquire, Assembler::Aqrl release, 1130 Register result, bool result_as_bool, 1131 Register tmp1, Register tmp2, Register tmp3); 1132 void weak_cmpxchg_narrow_value(Register addr, Register expected, 1133 Register new_val, 1134 enum operand_size size, 1135 Assembler::Aqrl acquire, Assembler::Aqrl release, 1136 Register result, 1137 Register tmp1, Register tmp2, Register tmp3); 1138 1139 void atomic_add(Register prev, RegisterOrConstant incr, Register addr); 1140 void atomic_addw(Register prev, RegisterOrConstant incr, Register addr); 1141 void atomic_addal(Register prev, RegisterOrConstant incr, Register addr); 1142 void atomic_addalw(Register prev, RegisterOrConstant incr, Register addr); 1143 1144 void atomic_xchg(Register prev, Register newv, Register addr); 1145 void atomic_xchgw(Register prev, Register newv, Register addr); 1146 void atomic_xchgal(Register prev, Register newv, Register addr); 1147 void atomic_xchgalw(Register prev, Register newv, Register addr); 1148 void atomic_xchgwu(Register prev, Register newv, Register addr); 1149 void atomic_xchgalwu(Register prev, Register newv, Register addr); 1150 1151 void atomic_cas(Register prev, Register newv, Register addr); 1152 void atomic_casw(Register prev, Register newv, Register addr); 1153 void atomic_casl(Register prev, Register newv, Register addr); 1154 void atomic_caslw(Register prev, Register newv, Register addr); 1155 void atomic_casal(Register prev, Register newv, Register addr); 1156 void atomic_casalw(Register prev, Register newv, Register addr); 1157 void atomic_caswu(Register prev, Register newv, Register addr); 1158 void atomic_caslwu(Register prev, Register newv, Register addr); 1159 void atomic_casalwu(Register prev, Register newv, Register addr); 1160 1161 void atomic_cas(Register prev, Register newv, Register addr, enum operand_size size, 1162 Assembler::Aqrl acquire = Assembler::relaxed, Assembler::Aqrl release = Assembler::relaxed); 1163 1164 // Emit a far call/jump. Only invalidates the tmp register which 1165 // is used to keep the entry address for jalr. 1166 // The address must be inside the code cache. 1167 // Supported entry.rspec(): 1168 // - relocInfo::external_word_type 1169 // - relocInfo::runtime_call_type 1170 // - relocInfo::none 1171 void far_call(const Address &entry, Register tmp = t0); 1172 void far_jump(const Address &entry, Register tmp = t0); 1173 1174 static int far_branch_size() { 1175 return 2 * 4; // auipc + jalr, see far_call() & far_jump() 1176 } 1177 1178 void load_byte_map_base(Register reg); 1179 1180 void bang_stack_with_offset(int offset) { 1181 // stack grows down, caller passes positive offset 1182 assert(offset > 0, "must bang with negative offset"); 1183 sub(t0, sp, offset); 1184 sd(zr, Address(t0)); 1185 } 1186 1187 virtual void _call_Unimplemented(address call_site) { 1188 mv(t1, call_site); 1189 } 1190 1191 #define call_Unimplemented() _call_Unimplemented((address)__PRETTY_FUNCTION__) 1192 1193 // Frame creation and destruction shared between JITs. 1194 void build_frame(int framesize); 1195 void remove_frame(int framesize); 1196 1197 void reserved_stack_check(); 1198 1199 void get_polling_page(Register dest, relocInfo::relocType rtype); 1200 void read_polling_page(Register r, int32_t offset, relocInfo::relocType rtype); 1201 1202 // RISCV64 OpenJDK uses four different types of calls: 1203 // - direct call: jal pc_relative_offset 1204 // This is the shortest and the fastest, but the offset has the range: +/-1MB. 1205 // 1206 // - far call: auipc reg, pc_relative_offset; jalr ra, reg, offset 1207 // This is longer than a direct call. The offset has 1208 // the range [-(2G + 2K), 2G - 2K). Addresses out of the range in the code cache 1209 // requires indirect call. 1210 // If a jump is needed rather than a call, a far jump 'jalr x0, reg, offset' can 1211 // be used instead. 1212 // All instructions are embedded at a call site. 1213 // 1214 // - indirect call: movptr + jalr 1215 // This too can reach anywhere in the address space, but it cannot be 1216 // patched while code is running, so it must only be modified at a safepoint. 1217 // This form of call is most suitable for targets at fixed addresses, which 1218 // will never be patched. 1219 // 1220 // - reloc call: 1221 // This is only available in C1/C2-generated code (nmethod). 1222 // 1223 // [Main code section] 1224 // auipc 1225 // ld <address_from_stub_section> 1226 // jalr 1227 // [Stub section] 1228 // trampoline: 1229 // <64-bit destination address> 1230 // 1231 // To change the destination we simply atomically store the new 1232 // address in the stub section. 1233 // 1234 // - trampoline call (old reloc call / -XX:+UseTrampolines): 1235 // This is only available in C1/C2-generated code (nmethod). It is a combination 1236 // of a direct call, which is used if the destination of a call is in range, 1237 // and a register-indirect call. It has the advantages of reaching anywhere in 1238 // the RISCV address space and being patchable at runtime when the generated 1239 // code is being executed by other threads. 1240 // 1241 // [Main code section] 1242 // jal trampoline 1243 // [Stub code section] 1244 // trampoline: 1245 // ld reg, pc + 8 (auipc + ld) 1246 // jr reg 1247 // <64-bit destination address> 1248 // 1249 // If the destination is in range when the generated code is moved to the code 1250 // cache, 'jal trampoline' is replaced with 'jal destination' and the trampoline 1251 // is not used. 1252 // The optimization does not remove the trampoline from the stub section. 1253 // 1254 // This is necessary because the trampoline may well be redirected later when 1255 // code is patched, and the new destination may not be reachable by a simple JAL 1256 // instruction. 1257 // 1258 // To patch a trampoline call when the JAL can't reach, we first modify 1259 // the 64-bit destination address in the trampoline, then modify the 1260 // JAL to point to the trampoline, then flush the instruction cache to 1261 // broadcast the change to all executing threads. See 1262 // NativeCall::set_destination_mt_safe for the details. 1263 // 1264 // There is a benign race in that the other thread might observe the 1265 // modified JAL before it observes the modified 64-bit destination 1266 // address. That does not matter because the destination method has been 1267 // invalidated, so there will be a trap at its start. 1268 // For this to work, the destination address in the trampoline is 1269 // always updated, even if we're not using the trampoline. 1270 // -- 1271 1272 // Emit a direct call if the entry address will always be in range, 1273 // otherwise a reloc call. 1274 // Supported entry.rspec(): 1275 // - relocInfo::runtime_call_type 1276 // - relocInfo::opt_virtual_call_type 1277 // - relocInfo::static_call_type 1278 // - relocInfo::virtual_call_type 1279 // 1280 // Return: the call PC or null if CodeCache is full. 1281 address reloc_call(Address entry) { 1282 return UseTrampolines ? trampoline_call(entry) : load_and_call(entry); 1283 } 1284 private: 1285 address trampoline_call(Address entry); 1286 address load_and_call(Address entry); 1287 public: 1288 1289 address ic_call(address entry, jint method_index = 0); 1290 static int ic_check_size(); 1291 int ic_check(int end_alignment = MacroAssembler::instruction_size); 1292 1293 // Support for memory inc/dec 1294 // n.b. increment/decrement calls with an Address destination will 1295 // need to use a scratch register to load the value to be 1296 // incremented. increment/decrement calls which add or subtract a 1297 // constant value other than sign-extended 12-bit immediate will need 1298 // to use a 2nd scratch register to hold the constant. so, an address 1299 // increment/decrement may trash both t0 and t1. 1300 1301 void increment(const Address dst, int64_t value = 1, Register tmp1 = t0, Register tmp2 = t1); 1302 void incrementw(const Address dst, int32_t value = 1, Register tmp1 = t0, Register tmp2 = t1); 1303 1304 void decrement(const Address dst, int64_t value = 1, Register tmp1 = t0, Register tmp2 = t1); 1305 void decrementw(const Address dst, int32_t value = 1, Register tmp1 = t0, Register tmp2 = t1); 1306 1307 void cmpptr(Register src1, Address src2, Label& equal); 1308 1309 void clinit_barrier(Register klass, Register tmp, Label* L_fast_path = nullptr, Label* L_slow_path = nullptr); 1310 void load_method_holder_cld(Register result, Register method); 1311 void load_method_holder(Register holder, Register method); 1312 1313 void compute_index(Register str1, Register trailing_zeros, Register match_mask, 1314 Register result, Register char_tmp, Register tmp, 1315 bool haystack_isL); 1316 void compute_match_mask(Register src, Register pattern, Register match_mask, 1317 Register mask1, Register mask2); 1318 1319 // CRC32 code for java.util.zip.CRC32::updateBytes() intrinsic. 1320 void kernel_crc32(Register crc, Register buf, Register len, 1321 Register table0, Register table1, Register table2, Register table3, 1322 Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5, Register tmp6); 1323 void update_word_crc32(Register crc, Register v, Register tmp1, Register tmp2, Register tmp3, 1324 Register table0, Register table1, Register table2, Register table3, 1325 bool upper); 1326 void update_byte_crc32(Register crc, Register val, Register table); 1327 1328 #ifdef COMPILER2 1329 void vector_update_crc32(Register crc, Register buf, Register len, 1330 Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register tmp5, 1331 Register table0, Register table3); 1332 1333 void mul_add(Register out, Register in, Register offset, 1334 Register len, Register k, Register tmp); 1335 void wide_mul(Register prod_lo, Register prod_hi, Register n, Register m); 1336 void wide_madd(Register sum_lo, Register sum_hi, Register n, 1337 Register m, Register tmp1, Register tmp2); 1338 void cad(Register dst, Register src1, Register src2, Register carry); 1339 void cadc(Register dst, Register src1, Register src2, Register carry); 1340 void adc(Register dst, Register src1, Register src2, Register carry); 1341 void add2_with_carry(Register final_dest_hi, Register dest_hi, Register dest_lo, 1342 Register src1, Register src2, Register carry); 1343 void multiply_32_x_32_loop(Register x, Register xstart, Register x_xstart, 1344 Register y, Register y_idx, Register z, 1345 Register carry, Register product, 1346 Register idx, Register kdx); 1347 void multiply_64_x_64_loop(Register x, Register xstart, Register x_xstart, 1348 Register y, Register y_idx, Register z, 1349 Register carry, Register product, 1350 Register idx, Register kdx); 1351 void multiply_128_x_128_loop(Register y, Register z, 1352 Register carry, Register carry2, 1353 Register idx, Register jdx, 1354 Register yz_idx1, Register yz_idx2, 1355 Register tmp, Register tmp3, Register tmp4, 1356 Register tmp6, Register product_hi); 1357 void multiply_to_len(Register x, Register xlen, Register y, Register ylen, 1358 Register z, Register tmp0, 1359 Register tmp1, Register tmp2, Register tmp3, Register tmp4, 1360 Register tmp5, Register tmp6, Register product_hi); 1361 1362 #endif // COMPILER2 1363 1364 void inflate_lo32(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1); 1365 void inflate_hi32(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1); 1366 1367 void ctzc_bit(Register Rd, Register Rs, bool isLL = false, Register tmp1 = t0, Register tmp2 = t1); 1368 1369 void zero_words(Register base, uint64_t cnt); 1370 address zero_words(Register ptr, Register cnt); 1371 void fill_words(Register base, Register cnt, Register value); 1372 void zero_memory(Register addr, Register len, Register tmp); 1373 void zero_dcache_blocks(Register base, Register cnt, Register tmp1, Register tmp2); 1374 1375 // shift left by shamt and add 1376 void shadd(Register Rd, Register Rs1, Register Rs2, Register tmp, int shamt); 1377 1378 // test single bit in Rs, result is set to Rd 1379 void test_bit(Register Rd, Register Rs, uint32_t bit_pos); 1380 1381 // Here the float instructions with safe deal with some exceptions. 1382 // e.g. convert from NaN, +Inf, -Inf to int, float, double 1383 // will trigger exception, we need to deal with these situations 1384 // to get correct results. 1385 void fcvt_w_s_safe(Register dst, FloatRegister src, Register tmp = t0); 1386 void fcvt_l_s_safe(Register dst, FloatRegister src, Register tmp = t0); 1387 void fcvt_w_d_safe(Register dst, FloatRegister src, Register tmp = t0); 1388 void fcvt_l_d_safe(Register dst, FloatRegister src, Register tmp = t0); 1389 1390 void java_round_float(Register dst, FloatRegister src, FloatRegister ftmp); 1391 void java_round_double(Register dst, FloatRegister src, FloatRegister ftmp); 1392 1393 // vector load/store unit-stride instructions 1394 void vlex_v(VectorRegister vd, Register base, Assembler::SEW sew, VectorMask vm = unmasked) { 1395 switch (sew) { 1396 case Assembler::e64: 1397 vle64_v(vd, base, vm); 1398 break; 1399 case Assembler::e32: 1400 vle32_v(vd, base, vm); 1401 break; 1402 case Assembler::e16: 1403 vle16_v(vd, base, vm); 1404 break; 1405 case Assembler::e8: // fall through 1406 default: 1407 vle8_v(vd, base, vm); 1408 break; 1409 } 1410 } 1411 1412 void vsex_v(VectorRegister store_data, Register base, Assembler::SEW sew, VectorMask vm = unmasked) { 1413 switch (sew) { 1414 case Assembler::e64: 1415 vse64_v(store_data, base, vm); 1416 break; 1417 case Assembler::e32: 1418 vse32_v(store_data, base, vm); 1419 break; 1420 case Assembler::e16: 1421 vse16_v(store_data, base, vm); 1422 break; 1423 case Assembler::e8: // fall through 1424 default: 1425 vse8_v(store_data, base, vm); 1426 break; 1427 } 1428 } 1429 1430 // vector pseudo instructions 1431 // rotate vector register left with shift bits, 32-bit version 1432 inline void vrole32_vi(VectorRegister vd, uint32_t shift, VectorRegister tmp_vr) { 1433 vsrl_vi(tmp_vr, vd, 32 - shift); 1434 vsll_vi(vd, vd, shift); 1435 vor_vv(vd, vd, tmp_vr); 1436 } 1437 1438 inline void vl1r_v(VectorRegister vd, Register rs) { 1439 vl1re8_v(vd, rs); 1440 } 1441 1442 inline void vmnot_m(VectorRegister vd, VectorRegister vs) { 1443 vmnand_mm(vd, vs, vs); 1444 } 1445 1446 inline void vncvt_x_x_w(VectorRegister vd, VectorRegister vs, VectorMask vm = unmasked) { 1447 vnsrl_wx(vd, vs, x0, vm); 1448 } 1449 1450 inline void vneg_v(VectorRegister vd, VectorRegister vs, VectorMask vm = unmasked) { 1451 vrsub_vx(vd, vs, x0, vm); 1452 } 1453 1454 inline void vfneg_v(VectorRegister vd, VectorRegister vs, VectorMask vm = unmasked) { 1455 vfsgnjn_vv(vd, vs, vs, vm); 1456 } 1457 1458 inline void vfabs_v(VectorRegister vd, VectorRegister vs, VectorMask vm = unmasked) { 1459 vfsgnjx_vv(vd, vs, vs, vm); 1460 } 1461 1462 inline void vmsgt_vv(VectorRegister vd, VectorRegister vs2, VectorRegister vs1, VectorMask vm = unmasked) { 1463 vmslt_vv(vd, vs1, vs2, vm); 1464 } 1465 1466 inline void vmsgtu_vv(VectorRegister vd, VectorRegister vs2, VectorRegister vs1, VectorMask vm = unmasked) { 1467 vmsltu_vv(vd, vs1, vs2, vm); 1468 } 1469 1470 inline void vmsge_vv(VectorRegister vd, VectorRegister vs2, VectorRegister vs1, VectorMask vm = unmasked) { 1471 vmsle_vv(vd, vs1, vs2, vm); 1472 } 1473 1474 inline void vmsgeu_vv(VectorRegister vd, VectorRegister vs2, VectorRegister vs1, VectorMask vm = unmasked) { 1475 vmsleu_vv(vd, vs1, vs2, vm); 1476 } 1477 1478 inline void vmfgt_vv(VectorRegister vd, VectorRegister vs2, VectorRegister vs1, VectorMask vm = unmasked) { 1479 vmflt_vv(vd, vs1, vs2, vm); 1480 } 1481 1482 inline void vmfge_vv(VectorRegister vd, VectorRegister vs2, VectorRegister vs1, VectorMask vm = unmasked) { 1483 vmfle_vv(vd, vs1, vs2, vm); 1484 } 1485 1486 inline void vmsltu_vi(VectorRegister Vd, VectorRegister Vs2, uint32_t imm, VectorMask vm = unmasked) { 1487 guarantee(imm >= 1 && imm <= 16, "imm is invalid"); 1488 vmsleu_vi(Vd, Vs2, imm-1, vm); 1489 } 1490 1491 inline void vmsgeu_vi(VectorRegister Vd, VectorRegister Vs2, uint32_t imm, VectorMask vm = unmasked) { 1492 guarantee(imm >= 1 && imm <= 16, "imm is invalid"); 1493 vmsgtu_vi(Vd, Vs2, imm-1, vm); 1494 } 1495 1496 // Copy mask register 1497 inline void vmmv_m(VectorRegister vd, VectorRegister vs) { 1498 vmand_mm(vd, vs, vs); 1499 } 1500 1501 // Clear mask register 1502 inline void vmclr_m(VectorRegister vd) { 1503 vmxor_mm(vd, vd, vd); 1504 } 1505 1506 // Set mask register 1507 inline void vmset_m(VectorRegister vd) { 1508 vmxnor_mm(vd, vd, vd); 1509 } 1510 1511 inline void vnot_v(VectorRegister Vd, VectorRegister Vs, VectorMask vm = unmasked) { 1512 vxor_vi(Vd, Vs, -1, vm); 1513 } 1514 1515 static const int zero_words_block_size; 1516 1517 void cast_primitive_type(BasicType type, Register Rt) { 1518 switch (type) { 1519 case T_BOOLEAN: 1520 sltu(Rt, zr, Rt); 1521 break; 1522 case T_CHAR : 1523 zero_extend(Rt, Rt, 16); 1524 break; 1525 case T_BYTE : 1526 sign_extend(Rt, Rt, 8); 1527 break; 1528 case T_SHORT : 1529 sign_extend(Rt, Rt, 16); 1530 break; 1531 case T_INT : 1532 sign_extend(Rt, Rt, 32); 1533 break; 1534 case T_LONG : /* nothing to do */ break; 1535 case T_VOID : /* nothing to do */ break; 1536 case T_FLOAT : /* nothing to do */ break; 1537 case T_DOUBLE : /* nothing to do */ break; 1538 default: ShouldNotReachHere(); 1539 } 1540 } 1541 1542 // float cmp with unordered_result 1543 void float_compare(Register result, FloatRegister Rs1, FloatRegister Rs2, int unordered_result); 1544 void double_compare(Register result, FloatRegister Rs1, FloatRegister Rs2, int unordered_result); 1545 1546 // Zero/Sign-extend 1547 void zero_extend(Register dst, Register src, int bits); 1548 void sign_extend(Register dst, Register src, int bits); 1549 1550 private: 1551 void cmp_x2i(Register dst, Register src1, Register src2, Register tmp, bool is_signed = true); 1552 1553 public: 1554 // compare src1 and src2 and get -1/0/1 in dst. 1555 // if [src1 > src2], dst = 1; 1556 // if [src1 == src2], dst = 0; 1557 // if [src1 < src2], dst = -1; 1558 void cmp_l2i(Register dst, Register src1, Register src2, Register tmp = t0); 1559 void cmp_ul2i(Register dst, Register src1, Register src2, Register tmp = t0); 1560 void cmp_uw2i(Register dst, Register src1, Register src2, Register tmp = t0); 1561 1562 // support for argument shuffling 1563 void move32_64(VMRegPair src, VMRegPair dst, Register tmp = t0); 1564 void float_move(VMRegPair src, VMRegPair dst, Register tmp = t0); 1565 void long_move(VMRegPair src, VMRegPair dst, Register tmp = t0); 1566 void double_move(VMRegPair src, VMRegPair dst, Register tmp = t0); 1567 void object_move(OopMap* map, 1568 int oop_handle_offset, 1569 int framesize_in_slots, 1570 VMRegPair src, 1571 VMRegPair dst, 1572 bool is_receiver, 1573 int* receiver_offset); 1574 1575 #ifdef ASSERT 1576 // Template short-hand support to clean-up after a failed call to trampoline 1577 // call generation (see trampoline_call() below), when a set of Labels must 1578 // be reset (before returning). 1579 template<typename Label, typename... More> 1580 void reset_labels(Label& lbl, More&... more) { 1581 lbl.reset(); reset_labels(more...); 1582 } 1583 template<typename Label> 1584 void reset_labels(Label& lbl) { 1585 lbl.reset(); 1586 } 1587 #endif 1588 1589 private: 1590 1591 void repne_scan(Register addr, Register value, Register count, Register tmp); 1592 1593 void ld_constant(Register dest, const Address &const_addr) { 1594 if (NearCpool) { 1595 ld(dest, const_addr); 1596 } else { 1597 InternalAddress target(const_addr.target()); 1598 relocate(target.rspec(), [&] { 1599 int32_t offset; 1600 la(dest, target.target(), offset); 1601 ld(dest, Address(dest, offset)); 1602 }); 1603 } 1604 } 1605 1606 int bitset_to_regs(unsigned int bitset, unsigned char* regs); 1607 Address add_memory_helper(const Address dst, Register tmp); 1608 1609 void load_reserved(Register dst, Register addr, enum operand_size size, Assembler::Aqrl acquire); 1610 void store_conditional(Register dst, Register new_val, Register addr, enum operand_size size, Assembler::Aqrl release); 1611 1612 public: 1613 void lightweight_lock(Register basic_lock, Register obj, Register tmp1, Register tmp2, Register tmp3, Label& slow); 1614 void lightweight_unlock(Register obj, Register tmp1, Register tmp2, Register tmp3, Label& slow); 1615 1616 public: 1617 enum { 1618 // movptr 1619 movptr1_instruction_size = 6 * instruction_size, // lui, addi, slli, addi, slli, addi. See movptr1(). 1620 movptr2_instruction_size = 5 * instruction_size, // lui, lui, slli, add, addi. See movptr2(). 1621 load_pc_relative_instruction_size = 2 * instruction_size // auipc, ld 1622 }; 1623 1624 enum NativeShortCall { 1625 trampoline_size = 3 * instruction_size + wordSize, 1626 trampoline_data_offset = 3 * instruction_size 1627 }; 1628 1629 static bool is_load_pc_relative_at(address branch); 1630 static bool is_li16u_at(address instr); 1631 1632 static bool is_jal_at(address instr) { assert_cond(instr != nullptr); return extract_opcode(instr) == 0b1101111; } 1633 static bool is_jalr_at(address instr) { assert_cond(instr != nullptr); return extract_opcode(instr) == 0b1100111 && extract_funct3(instr) == 0b000; } 1634 static bool is_branch_at(address instr) { assert_cond(instr != nullptr); return extract_opcode(instr) == 0b1100011; } 1635 static bool is_ld_at(address instr) { assert_cond(instr != nullptr); return is_load_at(instr) && extract_funct3(instr) == 0b011; } 1636 static bool is_load_at(address instr) { assert_cond(instr != nullptr); return extract_opcode(instr) == 0b0000011; } 1637 static bool is_float_load_at(address instr) { assert_cond(instr != nullptr); return extract_opcode(instr) == 0b0000111; } 1638 static bool is_auipc_at(address instr) { assert_cond(instr != nullptr); return extract_opcode(instr) == 0b0010111; } 1639 static bool is_jump_at(address instr) { assert_cond(instr != nullptr); return is_branch_at(instr) || is_jal_at(instr) || is_jalr_at(instr); } 1640 static bool is_add_at(address instr) { assert_cond(instr != nullptr); return extract_opcode(instr) == 0b0110011 && extract_funct3(instr) == 0b000; } 1641 static bool is_addi_at(address instr) { assert_cond(instr != nullptr); return extract_opcode(instr) == 0b0010011 && extract_funct3(instr) == 0b000; } 1642 static bool is_addiw_at(address instr) { assert_cond(instr != nullptr); return extract_opcode(instr) == 0b0011011 && extract_funct3(instr) == 0b000; } 1643 static bool is_addiw_to_zr_at(address instr){ assert_cond(instr != nullptr); return is_addiw_at(instr) && extract_rd(instr) == zr; } 1644 static bool is_lui_at(address instr) { assert_cond(instr != nullptr); return extract_opcode(instr) == 0b0110111; } 1645 static bool is_lui_to_zr_at(address instr) { assert_cond(instr != nullptr); return is_lui_at(instr) && extract_rd(instr) == zr; } 1646 1647 static bool is_srli_at(address instr) { 1648 assert_cond(instr != nullptr); 1649 return extract_opcode(instr) == 0b0010011 && 1650 extract_funct3(instr) == 0b101 && 1651 Assembler::extract(((unsigned*)instr)[0], 31, 26) == 0b000000; 1652 } 1653 1654 static bool is_slli_shift_at(address instr, uint32_t shift) { 1655 assert_cond(instr != nullptr); 1656 return (extract_opcode(instr) == 0b0010011 && // opcode field 1657 extract_funct3(instr) == 0b001 && // funct3 field, select the type of operation 1658 Assembler::extract(Assembler::ld_instr(instr), 25, 20) == shift); // shamt field 1659 } 1660 1661 static bool is_movptr1_at(address instr); 1662 static bool is_movptr2_at(address instr); 1663 1664 static bool is_lwu_to_zr(address instr); 1665 1666 static Register extract_rs1(address instr); 1667 static Register extract_rs2(address instr); 1668 static Register extract_rd(address instr); 1669 static uint32_t extract_opcode(address instr); 1670 static uint32_t extract_funct3(address instr); 1671 1672 // the instruction sequence of movptr is as below: 1673 // lui 1674 // addi 1675 // slli 1676 // addi 1677 // slli 1678 // addi/jalr/load 1679 static bool check_movptr1_data_dependency(address instr) { 1680 address lui = instr; 1681 address addi1 = lui + instruction_size; 1682 address slli1 = addi1 + instruction_size; 1683 address addi2 = slli1 + instruction_size; 1684 address slli2 = addi2 + instruction_size; 1685 address last_instr = slli2 + instruction_size; 1686 return extract_rs1(addi1) == extract_rd(lui) && 1687 extract_rs1(addi1) == extract_rd(addi1) && 1688 extract_rs1(slli1) == extract_rd(addi1) && 1689 extract_rs1(slli1) == extract_rd(slli1) && 1690 extract_rs1(addi2) == extract_rd(slli1) && 1691 extract_rs1(addi2) == extract_rd(addi2) && 1692 extract_rs1(slli2) == extract_rd(addi2) && 1693 extract_rs1(slli2) == extract_rd(slli2) && 1694 extract_rs1(last_instr) == extract_rd(slli2); 1695 } 1696 1697 // the instruction sequence of movptr2 is as below: 1698 // lui 1699 // lui 1700 // slli 1701 // add 1702 // addi/jalr/load 1703 static bool check_movptr2_data_dependency(address instr) { 1704 address lui1 = instr; 1705 address lui2 = lui1 + instruction_size; 1706 address slli = lui2 + instruction_size; 1707 address add = slli + instruction_size; 1708 address last_instr = add + instruction_size; 1709 return extract_rd(add) == extract_rd(lui2) && 1710 extract_rs1(add) == extract_rd(lui2) && 1711 extract_rs2(add) == extract_rd(slli) && 1712 extract_rs1(slli) == extract_rd(lui1) && 1713 extract_rd(slli) == extract_rd(lui1) && 1714 extract_rs1(last_instr) == extract_rd(add); 1715 } 1716 1717 // the instruction sequence of li16u is as below: 1718 // lui 1719 // srli 1720 static bool check_li16u_data_dependency(address instr) { 1721 address lui = instr; 1722 address srli = lui + instruction_size; 1723 1724 return extract_rs1(srli) == extract_rd(lui) && 1725 extract_rs1(srli) == extract_rd(srli); 1726 } 1727 1728 // the instruction sequence of li32 is as below: 1729 // lui 1730 // addiw 1731 static bool check_li32_data_dependency(address instr) { 1732 address lui = instr; 1733 address addiw = lui + instruction_size; 1734 1735 return extract_rs1(addiw) == extract_rd(lui) && 1736 extract_rs1(addiw) == extract_rd(addiw); 1737 } 1738 1739 // the instruction sequence of pc-relative is as below: 1740 // auipc 1741 // jalr/addi/load/float_load 1742 static bool check_pc_relative_data_dependency(address instr) { 1743 address auipc = instr; 1744 address last_instr = auipc + instruction_size; 1745 1746 return extract_rs1(last_instr) == extract_rd(auipc); 1747 } 1748 1749 // the instruction sequence of load_label is as below: 1750 // auipc 1751 // load 1752 static bool check_load_pc_relative_data_dependency(address instr) { 1753 address auipc = instr; 1754 address load = auipc + instruction_size; 1755 1756 return extract_rd(load) == extract_rd(auipc) && 1757 extract_rs1(load) == extract_rd(load); 1758 } 1759 1760 static bool is_li32_at(address instr); 1761 static bool is_pc_relative_at(address branch); 1762 1763 static bool is_membar(address addr) { 1764 return (Bytes::get_native_u4(addr) & 0x7f) == 0b1111 && extract_funct3(addr) == 0; 1765 } 1766 static uint32_t get_membar_kind(address addr); 1767 static void set_membar_kind(address addr, uint32_t order_kind); 1768 }; 1769 1770 #ifdef ASSERT 1771 inline bool AbstractAssembler::pd_check_instruction_mark() { return false; } 1772 #endif 1773 1774 /** 1775 * class SkipIfEqual: 1776 * 1777 * Instantiating this class will result in assembly code being output that will 1778 * jump around any code emitted between the creation of the instance and it's 1779 * automatic destruction at the end of a scope block, depending on the value of 1780 * the flag passed to the constructor, which will be checked at run-time. 1781 */ 1782 class SkipIfEqual { 1783 private: 1784 MacroAssembler* _masm; 1785 Label _label; 1786 1787 public: 1788 SkipIfEqual(MacroAssembler*, const bool* flag_addr, bool value); 1789 ~SkipIfEqual(); 1790 }; 1791 1792 #endif // CPU_RISCV_MACROASSEMBLER_RISCV_HPP