1 /* 2 * Copyright (c) 1997, 2023, Oracle and/or its affiliates. All rights reserved. 3 * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. 4 * Copyright (c) 2020, 2023, Huawei Technologies Co., Ltd. All rights reserved. 5 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 6 * 7 * This code is free software; you can redistribute it and/or modify it 8 * under the terms of the GNU General Public License version 2 only, as 9 * published by the Free Software Foundation. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 * 25 */ 26 27 #ifndef CPU_RISCV_MACROASSEMBLER_RISCV_HPP 28 #define CPU_RISCV_MACROASSEMBLER_RISCV_HPP 29 30 #include "asm/assembler.inline.hpp" 31 #include "code/vmreg.hpp" 32 #include "metaprogramming/enableIf.hpp" 33 #include "nativeInst_riscv.hpp" 34 #include "oops/compressedOops.hpp" 35 #include "utilities/powerOfTwo.hpp" 36 37 // MacroAssembler extends Assembler by frequently used macros. 38 // 39 // Instructions for which a 'better' code sequence exists depending 40 // on arguments should also go in here. 41 42 class MacroAssembler: public Assembler { 43 44 public: 45 MacroAssembler(CodeBuffer* code) : Assembler(code) {} 46 47 void safepoint_poll(Label& slow_path, bool at_return, bool acquire, bool in_nmethod); 48 49 // Alignment 50 int align(int modulus, int extra_offset = 0); 51 52 static inline void assert_alignment(address pc, int alignment = NativeInstruction::instruction_size) { 53 assert(is_aligned(pc, alignment), "bad alignment"); 54 } 55 56 // nop 57 void post_call_nop(); 58 59 // Stack frame creation/removal 60 // Note that SP must be updated to the right place before saving/restoring RA and FP 61 // because signal based thread suspend/resume could happen asynchronously. 62 void enter() { 63 addi(sp, sp, - 2 * wordSize); 64 sd(ra, Address(sp, wordSize)); 65 sd(fp, Address(sp)); 66 addi(fp, sp, 2 * wordSize); 67 } 68 69 void leave() { 70 addi(sp, fp, - 2 * wordSize); 71 ld(fp, Address(sp)); 72 ld(ra, Address(sp, wordSize)); 73 addi(sp, sp, 2 * wordSize); 74 } 75 76 77 // Support for getting the JavaThread pointer (i.e.; a reference to thread-local information) 78 // The pointer will be loaded into the thread register. 79 void get_thread(Register thread); 80 81 // Support for VM calls 82 // 83 // It is imperative that all calls into the VM are handled via the call_VM macros. 84 // They make sure that the stack linkage is setup correctly. call_VM's correspond 85 // to ENTRY/ENTRY_X entry points while call_VM_leaf's correspond to LEAF entry points. 86 87 void call_VM(Register oop_result, 88 address entry_point, 89 bool check_exceptions = true); 90 void call_VM(Register oop_result, 91 address entry_point, 92 Register arg_1, 93 bool check_exceptions = true); 94 void call_VM(Register oop_result, 95 address entry_point, 96 Register arg_1, Register arg_2, 97 bool check_exceptions = true); 98 void call_VM(Register oop_result, 99 address entry_point, 100 Register arg_1, Register arg_2, Register arg_3, 101 bool check_exceptions = true); 102 103 // Overloadings with last_Java_sp 104 void call_VM(Register oop_result, 105 Register last_java_sp, 106 address entry_point, 107 int number_of_arguments = 0, 108 bool check_exceptions = true); 109 void call_VM(Register oop_result, 110 Register last_java_sp, 111 address entry_point, 112 Register arg_1, 113 bool check_exceptions = true); 114 void call_VM(Register oop_result, 115 Register last_java_sp, 116 address entry_point, 117 Register arg_1, Register arg_2, 118 bool check_exceptions = true); 119 void call_VM(Register oop_result, 120 Register last_java_sp, 121 address entry_point, 122 Register arg_1, Register arg_2, Register arg_3, 123 bool check_exceptions = true); 124 125 void get_vm_result(Register oop_result, Register java_thread); 126 void get_vm_result_2(Register metadata_result, Register java_thread); 127 128 // These always tightly bind to MacroAssembler::call_VM_leaf_base 129 // bypassing the virtual implementation 130 void call_VM_leaf(address entry_point, 131 int number_of_arguments = 0); 132 void call_VM_leaf(address entry_point, 133 Register arg_0); 134 void call_VM_leaf(address entry_point, 135 Register arg_0, Register arg_1); 136 void call_VM_leaf(address entry_point, 137 Register arg_0, Register arg_1, Register arg_2); 138 139 // These always tightly bind to MacroAssembler::call_VM_base 140 // bypassing the virtual implementation 141 void super_call_VM_leaf(address entry_point, Register arg_0); 142 void super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1); 143 void super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2); 144 void super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2, Register arg_3); 145 146 // last Java Frame (fills frame anchor) 147 void set_last_Java_frame(Register last_java_sp, Register last_java_fp, address last_java_pc, Register tmp); 148 void set_last_Java_frame(Register last_java_sp, Register last_java_fp, Label &last_java_pc, Register tmp); 149 void set_last_Java_frame(Register last_java_sp, Register last_java_fp, Register last_java_pc, Register tmp); 150 151 // thread in the default location (xthread) 152 void reset_last_Java_frame(bool clear_fp); 153 154 virtual void call_VM_leaf_base( 155 address entry_point, // the entry point 156 int number_of_arguments, // the number of arguments to pop after the call 157 Label* retaddr = nullptr 158 ); 159 160 virtual void call_VM_leaf_base( 161 address entry_point, // the entry point 162 int number_of_arguments, // the number of arguments to pop after the call 163 Label& retaddr) { 164 call_VM_leaf_base(entry_point, number_of_arguments, &retaddr); 165 } 166 167 virtual void call_VM_base( // returns the register containing the thread upon return 168 Register oop_result, // where an oop-result ends up if any; use noreg otherwise 169 Register java_thread, // the thread if computed before ; use noreg otherwise 170 Register last_java_sp, // to set up last_Java_frame in stubs; use noreg otherwise 171 address entry_point, // the entry point 172 int number_of_arguments, // the number of arguments (w/o thread) to pop after the call 173 bool check_exceptions // whether to check for pending exceptions after return 174 ); 175 176 void call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions); 177 178 virtual void check_and_handle_earlyret(Register java_thread); 179 virtual void check_and_handle_popframe(Register java_thread); 180 181 void resolve_weak_handle(Register result, Register tmp1, Register tmp2); 182 void resolve_oop_handle(Register result, Register tmp1, Register tmp2); 183 void resolve_jobject(Register value, Register tmp1, Register tmp2); 184 void resolve_global_jobject(Register value, Register tmp1, Register tmp2); 185 186 void movoop(Register dst, jobject obj); 187 void mov_metadata(Register dst, Metadata* obj); 188 void bang_stack_size(Register size, Register tmp); 189 void set_narrow_oop(Register dst, jobject obj); 190 void set_narrow_klass(Register dst, Klass* k); 191 192 void load_mirror(Register dst, Register method, Register tmp1, Register tmp2); 193 void access_load_at(BasicType type, DecoratorSet decorators, Register dst, 194 Address src, Register tmp1, Register tmp2); 195 void access_store_at(BasicType type, DecoratorSet decorators, Address dst, 196 Register val, Register tmp1, Register tmp2, Register tmp3); 197 void load_klass(Register dst, Register src, Register tmp = t0); 198 void store_klass(Register dst, Register src, Register tmp = t0); 199 void cmp_klass(Register oop, Register trial_klass, Register tmp1, Register tmp2, Label &L); 200 201 void encode_klass_not_null(Register r, Register tmp = t0); 202 void decode_klass_not_null(Register r, Register tmp = t0); 203 void encode_klass_not_null(Register dst, Register src, Register tmp); 204 void decode_klass_not_null(Register dst, Register src, Register tmp); 205 void decode_heap_oop_not_null(Register r); 206 void decode_heap_oop_not_null(Register dst, Register src); 207 void decode_heap_oop(Register d, Register s); 208 void decode_heap_oop(Register r) { decode_heap_oop(r, r); } 209 void encode_heap_oop(Register d, Register s); 210 void encode_heap_oop(Register r) { encode_heap_oop(r, r); }; 211 void load_heap_oop(Register dst, Address src, Register tmp1, 212 Register tmp2, DecoratorSet decorators = 0); 213 void load_heap_oop_not_null(Register dst, Address src, Register tmp1, 214 Register tmp2, DecoratorSet decorators = 0); 215 void store_heap_oop(Address dst, Register val, Register tmp1, 216 Register tmp2, Register tmp3, DecoratorSet decorators = 0); 217 218 void store_klass_gap(Register dst, Register src); 219 220 // currently unimplemented 221 // Used for storing null. All other oop constants should be 222 // stored using routines that take a jobject. 223 void store_heap_oop_null(Address dst); 224 225 // This dummy is to prevent a call to store_heap_oop from 226 // converting a zero (linked null) into a Register by giving 227 // the compiler two choices it can't resolve 228 229 void store_heap_oop(Address dst, void* dummy); 230 231 // Support for null-checks 232 // 233 // Generates code that causes a null OS exception if the content of reg is null. 234 // If the accessed location is M[reg + offset] and the offset is known, provide the 235 // offset. No explicit code generateion is needed if the offset is within a certain 236 // range (0 <= offset <= page_size). 237 238 virtual void null_check(Register reg, int offset = -1); 239 static bool needs_explicit_null_check(intptr_t offset); 240 static bool uses_implicit_null_check(void* address); 241 242 // idiv variant which deals with MINLONG as dividend and -1 as divisor 243 int corrected_idivl(Register result, Register rs1, Register rs2, 244 bool want_remainder); 245 int corrected_idivq(Register result, Register rs1, Register rs2, 246 bool want_remainder); 247 248 // interface method calling 249 void lookup_interface_method(Register recv_klass, 250 Register intf_klass, 251 RegisterOrConstant itable_index, 252 Register method_result, 253 Register scan_tmp, 254 Label& no_such_interface, 255 bool return_method = true); 256 257 // virtual method calling 258 // n.n. x86 allows RegisterOrConstant for vtable_index 259 void lookup_virtual_method(Register recv_klass, 260 RegisterOrConstant vtable_index, 261 Register method_result); 262 263 // Form an address from base + offset in Rd. Rd my or may not 264 // actually be used: you must use the Address that is returned. It 265 // is up to you to ensure that the shift provided matches the size 266 // of your data. 267 Address form_address(Register Rd, Register base, int64_t byte_offset); 268 269 // Sometimes we get misaligned loads and stores, usually from Unsafe 270 // accesses, and these can exceed the offset range. 271 Address legitimize_address(Register Rd, const Address &adr) { 272 if (adr.getMode() == Address::base_plus_offset) { 273 if (!is_simm12(adr.offset())) { 274 return form_address(Rd, adr.base(), adr.offset()); 275 } 276 } 277 return adr; 278 } 279 280 // allocation 281 void tlab_allocate( 282 Register obj, // result: pointer to object after successful allocation 283 Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise 284 int con_size_in_bytes, // object size in bytes if known at compile time 285 Register tmp1, // temp register 286 Register tmp2, // temp register 287 Label& slow_case, // continuation point of fast allocation fails 288 bool is_far = false 289 ); 290 291 // Test sub_klass against super_klass, with fast and slow paths. 292 293 // The fast path produces a tri-state answer: yes / no / maybe-slow. 294 // One of the three labels can be null, meaning take the fall-through. 295 // If super_check_offset is -1, the value is loaded up from super_klass. 296 // No registers are killed, except tmp_reg 297 void check_klass_subtype_fast_path(Register sub_klass, 298 Register super_klass, 299 Register tmp_reg, 300 Label* L_success, 301 Label* L_failure, 302 Label* L_slow_path, 303 Register super_check_offset = noreg); 304 305 // The reset of the type check; must be wired to a corresponding fast path. 306 // It does not repeat the fast path logic, so don't use it standalone. 307 // The tmp1_reg and tmp2_reg can be noreg, if no temps are available. 308 // Updates the sub's secondary super cache as necessary. 309 void check_klass_subtype_slow_path(Register sub_klass, 310 Register super_klass, 311 Register tmp1_reg, 312 Register tmp2_reg, 313 Label* L_success, 314 Label* L_failure); 315 316 void check_klass_subtype(Register sub_klass, 317 Register super_klass, 318 Register tmp_reg, 319 Label& L_success); 320 321 Address argument_address(RegisterOrConstant arg_slot, int extra_slot_offset = 0); 322 323 // only if +VerifyOops 324 void _verify_oop(Register reg, const char* s, const char* file, int line); 325 void _verify_oop_addr(Address addr, const char* s, const char* file, int line); 326 327 void _verify_oop_checked(Register reg, const char* s, const char* file, int line) { 328 if (VerifyOops) { 329 _verify_oop(reg, s, file, line); 330 } 331 } 332 void _verify_oop_addr_checked(Address reg, const char* s, const char* file, int line) { 333 if (VerifyOops) { 334 _verify_oop_addr(reg, s, file, line); 335 } 336 } 337 338 void _verify_method_ptr(Register reg, const char* msg, const char* file, int line) {} 339 void _verify_klass_ptr(Register reg, const char* msg, const char* file, int line) {} 340 341 #define verify_oop(reg) _verify_oop_checked(reg, "broken oop " #reg, __FILE__, __LINE__) 342 #define verify_oop_msg(reg, msg) _verify_oop_checked(reg, "broken oop " #reg ", " #msg, __FILE__, __LINE__) 343 #define verify_oop_addr(addr) _verify_oop_addr_checked(addr, "broken oop addr " #addr, __FILE__, __LINE__) 344 #define verify_method_ptr(reg) _verify_method_ptr(reg, "broken method " #reg, __FILE__, __LINE__) 345 #define verify_klass_ptr(reg) _verify_method_ptr(reg, "broken klass " #reg, __FILE__, __LINE__) 346 347 // A more convenient access to fence for our purposes 348 // We used four bit to indicate the read and write bits in the predecessors and successors, 349 // and extended i for r, o for w if UseConservativeFence enabled. 350 enum Membar_mask_bits { 351 StoreStore = 0b0101, // (pred = ow + succ = ow) 352 LoadStore = 0b1001, // (pred = ir + succ = ow) 353 StoreLoad = 0b0110, // (pred = ow + succ = ir) 354 LoadLoad = 0b1010, // (pred = ir + succ = ir) 355 AnyAny = LoadStore | StoreLoad // (pred = iorw + succ = iorw) 356 }; 357 358 void membar(uint32_t order_constraint); 359 360 static void membar_mask_to_pred_succ(uint32_t order_constraint, 361 uint32_t& predecessor, uint32_t& successor) { 362 predecessor = (order_constraint >> 2) & 0x3; 363 successor = order_constraint & 0x3; 364 365 // extend rw -> iorw: 366 // 01(w) -> 0101(ow) 367 // 10(r) -> 1010(ir) 368 // 11(rw)-> 1111(iorw) 369 if (UseConservativeFence) { 370 predecessor |= predecessor << 2; 371 successor |= successor << 2; 372 } 373 } 374 375 static int pred_succ_to_membar_mask(uint32_t predecessor, uint32_t successor) { 376 return ((predecessor & 0x3) << 2) | (successor & 0x3); 377 } 378 379 void pause() { 380 fence(w, 0); 381 } 382 383 // prints msg, dumps registers and stops execution 384 void stop(const char* msg); 385 386 static void debug64(char* msg, int64_t pc, int64_t regs[]); 387 388 void unimplemented(const char* what = ""); 389 390 void should_not_reach_here() { stop("should not reach here"); } 391 392 static address target_addr_for_insn(address insn_addr); 393 394 // Required platform-specific helpers for Label::patch_instructions. 395 // They _shadow_ the declarations in AbstractAssembler, which are undefined. 396 static int pd_patch_instruction_size(address branch, address target); 397 static void pd_patch_instruction(address branch, address target, const char* file = nullptr, int line = 0) { 398 pd_patch_instruction_size(branch, target); 399 } 400 static address pd_call_destination(address branch) { 401 return target_addr_for_insn(branch); 402 } 403 404 static int patch_oop(address insn_addr, address o); 405 406 static address get_target_of_li32(address insn_addr); 407 static int patch_imm_in_li32(address branch, int32_t target); 408 409 // Return whether code is emitted to a scratch blob. 410 virtual bool in_scratch_emit_size() { 411 return false; 412 } 413 414 address emit_trampoline_stub(int insts_call_instruction_offset, address target); 415 static int max_trampoline_stub_size(); 416 void emit_static_call_stub(); 417 static int static_call_stub_size(); 418 419 // The following 4 methods return the offset of the appropriate move instruction 420 421 // Support for fast byte/short loading with zero extension (depending on particular CPU) 422 int load_unsigned_byte(Register dst, Address src); 423 int load_unsigned_short(Register dst, Address src); 424 425 // Support for fast byte/short loading with sign extension (depending on particular CPU) 426 int load_signed_byte(Register dst, Address src); 427 int load_signed_short(Register dst, Address src); 428 429 // Load and store values by size and signed-ness 430 void load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed); 431 void store_sized_value(Address dst, Register src, size_t size_in_bytes); 432 433 // Misaligned loads, will use the best way, according to the AvoidUnalignedAccess flag 434 void load_short_misaligned(Register dst, Address src, Register tmp, bool is_signed, int granularity = 1); 435 void load_int_misaligned(Register dst, Address src, Register tmp, bool is_signed, int granularity = 1); 436 void load_long_misaligned(Register dst, Address src, Register tmp, int granularity = 1); 437 438 public: 439 // Standard pseudo instructions 440 inline void nop() { 441 addi(x0, x0, 0); 442 } 443 444 inline void mv(Register Rd, Register Rs) { 445 if (Rd != Rs) { 446 addi(Rd, Rs, 0); 447 } 448 } 449 450 inline void notr(Register Rd, Register Rs) { 451 xori(Rd, Rs, -1); 452 } 453 454 inline void neg(Register Rd, Register Rs) { 455 sub(Rd, x0, Rs); 456 } 457 458 inline void negw(Register Rd, Register Rs) { 459 subw(Rd, x0, Rs); 460 } 461 462 inline void sext_w(Register Rd, Register Rs) { 463 addiw(Rd, Rs, 0); 464 } 465 466 inline void zext_b(Register Rd, Register Rs) { 467 andi(Rd, Rs, 0xFF); 468 } 469 470 inline void seqz(Register Rd, Register Rs) { 471 sltiu(Rd, Rs, 1); 472 } 473 474 inline void snez(Register Rd, Register Rs) { 475 sltu(Rd, x0, Rs); 476 } 477 478 inline void sltz(Register Rd, Register Rs) { 479 slt(Rd, Rs, x0); 480 } 481 482 inline void sgtz(Register Rd, Register Rs) { 483 slt(Rd, x0, Rs); 484 } 485 486 // Bit-manipulation extension pseudo instructions 487 // zero extend word 488 inline void zext_w(Register Rd, Register Rs) { 489 add_uw(Rd, Rs, zr); 490 } 491 492 // Floating-point data-processing pseudo instructions 493 inline void fmv_s(FloatRegister Rd, FloatRegister Rs) { 494 if (Rd != Rs) { 495 fsgnj_s(Rd, Rs, Rs); 496 } 497 } 498 499 inline void fabs_s(FloatRegister Rd, FloatRegister Rs) { 500 fsgnjx_s(Rd, Rs, Rs); 501 } 502 503 inline void fneg_s(FloatRegister Rd, FloatRegister Rs) { 504 fsgnjn_s(Rd, Rs, Rs); 505 } 506 507 inline void fmv_d(FloatRegister Rd, FloatRegister Rs) { 508 if (Rd != Rs) { 509 fsgnj_d(Rd, Rs, Rs); 510 } 511 } 512 513 inline void fabs_d(FloatRegister Rd, FloatRegister Rs) { 514 fsgnjx_d(Rd, Rs, Rs); 515 } 516 517 inline void fneg_d(FloatRegister Rd, FloatRegister Rs) { 518 fsgnjn_d(Rd, Rs, Rs); 519 } 520 521 // Control and status pseudo instructions 522 void rdinstret(Register Rd); // read instruction-retired counter 523 void rdcycle(Register Rd); // read cycle counter 524 void rdtime(Register Rd); // read time 525 void csrr(Register Rd, unsigned csr); // read csr 526 void csrw(unsigned csr, Register Rs); // write csr 527 void csrs(unsigned csr, Register Rs); // set bits in csr 528 void csrc(unsigned csr, Register Rs); // clear bits in csr 529 void csrwi(unsigned csr, unsigned imm); 530 void csrsi(unsigned csr, unsigned imm); 531 void csrci(unsigned csr, unsigned imm); 532 void frcsr(Register Rd); // read float-point csr 533 void fscsr(Register Rd, Register Rs); // swap float-point csr 534 void fscsr(Register Rs); // write float-point csr 535 void frrm(Register Rd); // read float-point rounding mode 536 void fsrm(Register Rd, Register Rs); // swap float-point rounding mode 537 void fsrm(Register Rs); // write float-point rounding mode 538 void fsrmi(Register Rd, unsigned imm); 539 void fsrmi(unsigned imm); 540 void frflags(Register Rd); // read float-point exception flags 541 void fsflags(Register Rd, Register Rs); // swap float-point exception flags 542 void fsflags(Register Rs); // write float-point exception flags 543 void fsflagsi(Register Rd, unsigned imm); 544 void fsflagsi(unsigned imm); 545 546 // Control transfer pseudo instructions 547 void beqz(Register Rs, const address dest); 548 void bnez(Register Rs, const address dest); 549 void blez(Register Rs, const address dest); 550 void bgez(Register Rs, const address dest); 551 void bltz(Register Rs, const address dest); 552 void bgtz(Register Rs, const address dest); 553 554 void j(Label &l, Register temp = t0); 555 void j(const address dest, Register temp = t0); 556 void j(const Address &adr, Register temp = t0); 557 void jal(Label &l, Register temp = t0); 558 void jal(const address dest, Register temp = t0); 559 void jal(const Address &adr, Register temp = t0); 560 void jal(Register Rd, Label &L, Register temp = t0); 561 void jal(Register Rd, const address dest, Register temp = t0); 562 563 //label 564 void beqz(Register Rs, Label &l, bool is_far = false); 565 void bnez(Register Rs, Label &l, bool is_far = false); 566 void blez(Register Rs, Label &l, bool is_far = false); 567 void bgez(Register Rs, Label &l, bool is_far = false); 568 void bltz(Register Rs, Label &l, bool is_far = false); 569 void bgtz(Register Rs, Label &l, bool is_far = false); 570 571 void beq (Register Rs1, Register Rs2, Label &L, bool is_far = false); 572 void bne (Register Rs1, Register Rs2, Label &L, bool is_far = false); 573 void blt (Register Rs1, Register Rs2, Label &L, bool is_far = false); 574 void bge (Register Rs1, Register Rs2, Label &L, bool is_far = false); 575 void bltu(Register Rs1, Register Rs2, Label &L, bool is_far = false); 576 void bgeu(Register Rs1, Register Rs2, Label &L, bool is_far = false); 577 578 void bgt (Register Rs, Register Rt, const address dest); 579 void ble (Register Rs, Register Rt, const address dest); 580 void bgtu(Register Rs, Register Rt, const address dest); 581 void bleu(Register Rs, Register Rt, const address dest); 582 583 void bgt (Register Rs, Register Rt, Label &l, bool is_far = false); 584 void ble (Register Rs, Register Rt, Label &l, bool is_far = false); 585 void bgtu(Register Rs, Register Rt, Label &l, bool is_far = false); 586 void bleu(Register Rs, Register Rt, Label &l, bool is_far = false); 587 588 #define INSN_ENTRY_RELOC(result_type, header) \ 589 result_type header { \ 590 guarantee(rtype == relocInfo::internal_word_type, \ 591 "only internal_word_type relocs make sense here"); \ 592 relocate(InternalAddress(dest).rspec()); \ 593 IncompressibleRegion ir(this); /* relocations */ 594 595 #define INSN(NAME) \ 596 void NAME(Register Rs1, Register Rs2, const address dest) { \ 597 assert_cond(dest != nullptr); \ 598 int64_t offset = dest - pc(); \ 599 guarantee(is_simm13(offset) && is_even(offset), \ 600 "offset is invalid: is_simm_13: %s offset: " INT64_FORMAT, \ 601 BOOL_TO_STR(is_simm13(offset)), offset); \ 602 Assembler::NAME(Rs1, Rs2, offset); \ 603 } \ 604 INSN_ENTRY_RELOC(void, NAME(Register Rs1, Register Rs2, address dest, relocInfo::relocType rtype)) \ 605 NAME(Rs1, Rs2, dest); \ 606 } 607 608 INSN(beq); 609 INSN(bne); 610 INSN(bge); 611 INSN(bgeu); 612 INSN(blt); 613 INSN(bltu); 614 615 #undef INSN 616 617 #undef INSN_ENTRY_RELOC 618 619 void float_beq(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false); 620 void float_bne(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false); 621 void float_ble(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false); 622 void float_bge(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false); 623 void float_blt(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false); 624 void float_bgt(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false); 625 626 void double_beq(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false); 627 void double_bne(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false); 628 void double_ble(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false); 629 void double_bge(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false); 630 void double_blt(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false); 631 void double_bgt(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false); 632 633 private: 634 int push_reg(unsigned int bitset, Register stack); 635 int pop_reg(unsigned int bitset, Register stack); 636 int push_fp(unsigned int bitset, Register stack); 637 int pop_fp(unsigned int bitset, Register stack); 638 #ifdef COMPILER2 639 int push_v(unsigned int bitset, Register stack); 640 int pop_v(unsigned int bitset, Register stack); 641 #endif // COMPILER2 642 643 // The signed 20-bit upper imm can materialize at most negative 0xF...F80000000, two G. 644 // The following signed 12-bit imm can at max subtract 0x800, two K, from that previously loaded two G. 645 bool is_valid_32bit_offset(int64_t x) { 646 constexpr int64_t twoG = (2 * G); 647 constexpr int64_t twoK = (2 * K); 648 return x < (twoG - twoK) && x >= (-twoG - twoK); 649 } 650 651 public: 652 void push_reg(Register Rs); 653 void pop_reg(Register Rd); 654 void push_reg(RegSet regs, Register stack) { if (regs.bits()) push_reg(regs.bits(), stack); } 655 void pop_reg(RegSet regs, Register stack) { if (regs.bits()) pop_reg(regs.bits(), stack); } 656 void push_fp(FloatRegSet regs, Register stack) { if (regs.bits()) push_fp(regs.bits(), stack); } 657 void pop_fp(FloatRegSet regs, Register stack) { if (regs.bits()) pop_fp(regs.bits(), stack); } 658 #ifdef COMPILER2 659 void push_v(VectorRegSet regs, Register stack) { if (regs.bits()) push_v(regs.bits(), stack); } 660 void pop_v(VectorRegSet regs, Register stack) { if (regs.bits()) pop_v(regs.bits(), stack); } 661 #endif // COMPILER2 662 663 // Push and pop everything that might be clobbered by a native 664 // runtime call except t0 and t1. (They are always 665 // temporary registers, so we don't have to protect them.) 666 // Additional registers can be excluded in a passed RegSet. 667 void push_call_clobbered_registers_except(RegSet exclude); 668 void pop_call_clobbered_registers_except(RegSet exclude); 669 670 void push_call_clobbered_registers() { 671 push_call_clobbered_registers_except(RegSet()); 672 } 673 void pop_call_clobbered_registers() { 674 pop_call_clobbered_registers_except(RegSet()); 675 } 676 677 void push_CPU_state(bool save_vectors = false, int vector_size_in_bytes = 0); 678 void pop_CPU_state(bool restore_vectors = false, int vector_size_in_bytes = 0); 679 680 void push_cont_fastpath(Register java_thread); 681 void pop_cont_fastpath(Register java_thread); 682 683 // if heap base register is used - reinit it with the correct value 684 void reinit_heapbase(); 685 686 void bind(Label& L) { 687 Assembler::bind(L); 688 // fences across basic blocks should not be merged 689 code()->clear_last_insn(); 690 } 691 692 typedef void (MacroAssembler::* compare_and_branch_insn)(Register Rs1, Register Rs2, const address dest); 693 typedef void (MacroAssembler::* compare_and_branch_label_insn)(Register Rs1, Register Rs2, Label &L, bool is_far); 694 typedef void (MacroAssembler::* jal_jalr_insn)(Register Rt, address dest); 695 typedef void (MacroAssembler::* load_insn_by_temp)(Register Rt, address dest, Register temp); 696 697 void wrap_label(Register r, Label &L, Register t, load_insn_by_temp insn); 698 void wrap_label(Register r, Label &L, jal_jalr_insn insn); 699 void wrap_label(Register r1, Register r2, Label &L, 700 compare_and_branch_insn insn, 701 compare_and_branch_label_insn neg_insn, bool is_far = false); 702 703 void la(Register Rd, Label &label); 704 void la(Register Rd, const address dest); 705 void la(Register Rd, const Address &adr); 706 707 void li16u(Register Rd, uint16_t imm); 708 void li32(Register Rd, int32_t imm); 709 void li64(Register Rd, int64_t imm); 710 void li (Register Rd, int64_t imm); // optimized load immediate 711 712 // mv 713 void mv(Register Rd, address addr) { li(Rd, (int64_t)addr); } 714 void mv(Register Rd, address addr, int32_t &offset) { 715 // Split address into a lower 12-bit sign-extended offset and the remainder, 716 // so that the offset could be encoded in jalr or load/store instruction. 717 offset = ((int32_t)(int64_t)addr << 20) >> 20; 718 li(Rd, (int64_t)addr - offset); 719 } 720 721 template<typename T, ENABLE_IF(std::is_integral<T>::value)> 722 inline void mv(Register Rd, T o) { li(Rd, (int64_t)o); } 723 724 void mv(Register Rd, Address dest) { 725 assert(dest.getMode() == Address::literal, "Address mode should be Address::literal"); 726 relocate(dest.rspec(), [&] { 727 movptr(Rd, dest.target()); 728 }); 729 } 730 731 void mv(Register Rd, RegisterOrConstant src) { 732 if (src.is_register()) { 733 mv(Rd, src.as_register()); 734 } else { 735 mv(Rd, src.as_constant()); 736 } 737 } 738 739 void movptr(Register Rd, address addr, int32_t &offset); 740 741 void movptr(Register Rd, address addr) { 742 int offset = 0; 743 movptr(Rd, addr, offset); 744 addi(Rd, Rd, offset); 745 } 746 747 inline void movptr(Register Rd, uintptr_t imm64) { 748 movptr(Rd, (address)imm64); 749 } 750 751 // arith 752 void add (Register Rd, Register Rn, int64_t increment, Register temp = t0); 753 void addw(Register Rd, Register Rn, int32_t increment, Register temp = t0); 754 void sub (Register Rd, Register Rn, int64_t decrement, Register temp = t0); 755 void subw(Register Rd, Register Rn, int32_t decrement, Register temp = t0); 756 757 #define INSN(NAME) \ 758 inline void NAME(Register Rd, Register Rs1, Register Rs2) { \ 759 Assembler::NAME(Rd, Rs1, Rs2); \ 760 } 761 762 INSN(add); 763 INSN(addw); 764 INSN(sub); 765 INSN(subw); 766 767 #undef INSN 768 769 // logic 770 void andrw(Register Rd, Register Rs1, Register Rs2); 771 void orrw(Register Rd, Register Rs1, Register Rs2); 772 void xorrw(Register Rd, Register Rs1, Register Rs2); 773 774 // logic with negate 775 void andn(Register Rd, Register Rs1, Register Rs2); 776 void orn(Register Rd, Register Rs1, Register Rs2); 777 778 // revb 779 void revb_h_h(Register Rd, Register Rs, Register tmp = t0); // reverse bytes in halfword in lower 16 bits, sign-extend 780 void revb_w_w(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1); // reverse bytes in lower word, sign-extend 781 void revb_h_h_u(Register Rd, Register Rs, Register tmp = t0); // reverse bytes in halfword in lower 16 bits, zero-extend 782 void revb_h_w_u(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1); // reverse bytes in halfwords in lower 32 bits, zero-extend 783 void revb_h_helper(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2= t1); // reverse bytes in upper 16 bits (48:63) and move to lower 784 void revb_h(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2= t1); // reverse bytes in each halfword 785 void revb_w(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2= t1); // reverse bytes in each word 786 void revb(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1); // reverse bytes in doubleword 787 788 void ror_imm(Register dst, Register src, uint32_t shift, Register tmp = t0); 789 void rolw_imm(Register dst, Register src, uint32_t, Register tmp = t0); 790 void andi(Register Rd, Register Rn, int64_t imm, Register tmp = t0); 791 void orptr(Address adr, RegisterOrConstant src, Register tmp1 = t0, Register tmp2 = t1); 792 793 // Load and Store Instructions 794 #define INSN_ENTRY_RELOC(result_type, header) \ 795 result_type header { \ 796 guarantee(rtype == relocInfo::internal_word_type, \ 797 "only internal_word_type relocs make sense here"); \ 798 relocate(InternalAddress(dest).rspec()); \ 799 IncompressibleRegion ir(this); /* relocations */ 800 801 #define INSN(NAME) \ 802 void NAME(Register Rd, address dest) { \ 803 assert_cond(dest != nullptr); \ 804 int64_t distance = dest - pc(); \ 805 if (is_valid_32bit_offset(distance)) { \ 806 auipc(Rd, (int32_t)distance + 0x800); \ 807 Assembler::NAME(Rd, Rd, ((int32_t)distance << 20) >> 20); \ 808 } else { \ 809 int32_t offset = 0; \ 810 movptr(Rd, dest, offset); \ 811 Assembler::NAME(Rd, Rd, offset); \ 812 } \ 813 } \ 814 INSN_ENTRY_RELOC(void, NAME(Register Rd, address dest, relocInfo::relocType rtype)) \ 815 NAME(Rd, dest); \ 816 } \ 817 void NAME(Register Rd, const Address &adr, Register temp = t0) { \ 818 switch (adr.getMode()) { \ 819 case Address::literal: { \ 820 relocate(adr.rspec(), [&] { \ 821 NAME(Rd, adr.target()); \ 822 }); \ 823 break; \ 824 } \ 825 case Address::base_plus_offset: { \ 826 if (is_simm12(adr.offset())) { \ 827 Assembler::NAME(Rd, adr.base(), adr.offset()); \ 828 } else { \ 829 int32_t offset = ((int32_t)adr.offset() << 20) >> 20; \ 830 if (Rd == adr.base()) { \ 831 la(temp, Address(adr.base(), adr.offset() - offset)); \ 832 Assembler::NAME(Rd, temp, offset); \ 833 } else { \ 834 la(Rd, Address(adr.base(), adr.offset() - offset)); \ 835 Assembler::NAME(Rd, Rd, offset); \ 836 } \ 837 } \ 838 break; \ 839 } \ 840 default: \ 841 ShouldNotReachHere(); \ 842 } \ 843 } \ 844 void NAME(Register Rd, Label &L) { \ 845 wrap_label(Rd, L, &MacroAssembler::NAME); \ 846 } 847 848 INSN(lb); 849 INSN(lbu); 850 INSN(lh); 851 INSN(lhu); 852 INSN(lw); 853 INSN(lwu); 854 INSN(ld); 855 856 #undef INSN 857 858 #define INSN(NAME) \ 859 void NAME(FloatRegister Rd, address dest, Register temp = t0) { \ 860 assert_cond(dest != nullptr); \ 861 int64_t distance = dest - pc(); \ 862 if (is_valid_32bit_offset(distance)) { \ 863 auipc(temp, (int32_t)distance + 0x800); \ 864 Assembler::NAME(Rd, temp, ((int32_t)distance << 20) >> 20); \ 865 } else { \ 866 int32_t offset = 0; \ 867 movptr(temp, dest, offset); \ 868 Assembler::NAME(Rd, temp, offset); \ 869 } \ 870 } \ 871 INSN_ENTRY_RELOC(void, NAME(FloatRegister Rd, address dest, \ 872 relocInfo::relocType rtype, Register temp = t0)) \ 873 NAME(Rd, dest, temp); \ 874 } \ 875 void NAME(FloatRegister Rd, const Address &adr, Register temp = t0) { \ 876 switch (adr.getMode()) { \ 877 case Address::literal: { \ 878 relocate(adr.rspec(), [&] { \ 879 NAME(Rd, adr.target(), temp); \ 880 }); \ 881 break; \ 882 } \ 883 case Address::base_plus_offset: { \ 884 if (is_simm12(adr.offset())) { \ 885 Assembler::NAME(Rd, adr.base(), adr.offset()); \ 886 } else { \ 887 int32_t offset = ((int32_t)adr.offset() << 20) >> 20; \ 888 la(temp, Address(adr.base(), adr.offset() - offset)); \ 889 Assembler::NAME(Rd, temp, offset); \ 890 } \ 891 break; \ 892 } \ 893 default: \ 894 ShouldNotReachHere(); \ 895 } \ 896 } 897 898 INSN(flw); 899 INSN(fld); 900 901 #undef INSN 902 903 #define INSN(NAME, REGISTER) \ 904 INSN_ENTRY_RELOC(void, NAME(REGISTER Rs, address dest, \ 905 relocInfo::relocType rtype, Register temp = t0)) \ 906 NAME(Rs, dest, temp); \ 907 } 908 909 INSN(sb, Register); 910 INSN(sh, Register); 911 INSN(sw, Register); 912 INSN(sd, Register); 913 INSN(fsw, FloatRegister); 914 INSN(fsd, FloatRegister); 915 916 #undef INSN 917 918 #define INSN(NAME) \ 919 void NAME(Register Rs, address dest, Register temp = t0) { \ 920 assert_cond(dest != nullptr); \ 921 assert_different_registers(Rs, temp); \ 922 int64_t distance = dest - pc(); \ 923 if (is_valid_32bit_offset(distance)) { \ 924 auipc(temp, (int32_t)distance + 0x800); \ 925 Assembler::NAME(Rs, temp, ((int32_t)distance << 20) >> 20); \ 926 } else { \ 927 int32_t offset = 0; \ 928 movptr(temp, dest, offset); \ 929 Assembler::NAME(Rs, temp, offset); \ 930 } \ 931 } \ 932 void NAME(Register Rs, const Address &adr, Register temp = t0) { \ 933 switch (adr.getMode()) { \ 934 case Address::literal: { \ 935 assert_different_registers(Rs, temp); \ 936 relocate(adr.rspec(), [&] { \ 937 NAME(Rs, adr.target(), temp); \ 938 }); \ 939 break; \ 940 } \ 941 case Address::base_plus_offset: { \ 942 if (is_simm12(adr.offset())) { \ 943 Assembler::NAME(Rs, adr.base(), adr.offset()); \ 944 } else { \ 945 assert_different_registers(Rs, temp); \ 946 int32_t offset = ((int32_t)adr.offset() << 20) >> 20; \ 947 la(temp, Address(adr.base(), adr.offset() - offset)); \ 948 Assembler::NAME(Rs, temp, offset); \ 949 } \ 950 break; \ 951 } \ 952 default: \ 953 ShouldNotReachHere(); \ 954 } \ 955 } 956 957 INSN(sb); 958 INSN(sh); 959 INSN(sw); 960 INSN(sd); 961 962 #undef INSN 963 964 #define INSN(NAME) \ 965 void NAME(FloatRegister Rs, address dest, Register temp = t0) { \ 966 assert_cond(dest != nullptr); \ 967 int64_t distance = dest - pc(); \ 968 if (is_valid_32bit_offset(distance)) { \ 969 auipc(temp, (int32_t)distance + 0x800); \ 970 Assembler::NAME(Rs, temp, ((int32_t)distance << 20) >> 20); \ 971 } else { \ 972 int32_t offset = 0; \ 973 movptr(temp, dest, offset); \ 974 Assembler::NAME(Rs, temp, offset); \ 975 } \ 976 } \ 977 void NAME(FloatRegister Rs, const Address &adr, Register temp = t0) { \ 978 switch (adr.getMode()) { \ 979 case Address::literal: { \ 980 relocate(adr.rspec(), [&] { \ 981 NAME(Rs, adr.target(), temp); \ 982 }); \ 983 break; \ 984 } \ 985 case Address::base_plus_offset: { \ 986 if (is_simm12(adr.offset())) { \ 987 Assembler::NAME(Rs, adr.base(), adr.offset()); \ 988 } else { \ 989 int32_t offset = ((int32_t)adr.offset() << 20) >> 20; \ 990 la(temp, Address(adr.base(), adr.offset() - offset)); \ 991 Assembler::NAME(Rs, temp, offset); \ 992 } \ 993 break; \ 994 } \ 995 default: \ 996 ShouldNotReachHere(); \ 997 } \ 998 } 999 1000 INSN(fsw); 1001 INSN(fsd); 1002 1003 #undef INSN 1004 1005 #undef INSN_ENTRY_RELOC 1006 1007 void cmpxchg_obj_header(Register oldv, Register newv, Register obj, Register tmp, Label &succeed, Label *fail); 1008 void cmpxchgptr(Register oldv, Register newv, Register addr, Register tmp, Label &succeed, Label *fail); 1009 void cmpxchg(Register addr, Register expected, 1010 Register new_val, 1011 enum operand_size size, 1012 Assembler::Aqrl acquire, Assembler::Aqrl release, 1013 Register result, bool result_as_bool = false); 1014 void cmpxchg_weak(Register addr, Register expected, 1015 Register new_val, 1016 enum operand_size size, 1017 Assembler::Aqrl acquire, Assembler::Aqrl release, 1018 Register result); 1019 void cmpxchg_narrow_value_helper(Register addr, Register expected, 1020 Register new_val, 1021 enum operand_size size, 1022 Register tmp1, Register tmp2, Register tmp3); 1023 void cmpxchg_narrow_value(Register addr, Register expected, 1024 Register new_val, 1025 enum operand_size size, 1026 Assembler::Aqrl acquire, Assembler::Aqrl release, 1027 Register result, bool result_as_bool, 1028 Register tmp1, Register tmp2, Register tmp3); 1029 void weak_cmpxchg_narrow_value(Register addr, Register expected, 1030 Register new_val, 1031 enum operand_size size, 1032 Assembler::Aqrl acquire, Assembler::Aqrl release, 1033 Register result, 1034 Register tmp1, Register tmp2, Register tmp3); 1035 1036 void atomic_add(Register prev, RegisterOrConstant incr, Register addr); 1037 void atomic_addw(Register prev, RegisterOrConstant incr, Register addr); 1038 void atomic_addal(Register prev, RegisterOrConstant incr, Register addr); 1039 void atomic_addalw(Register prev, RegisterOrConstant incr, Register addr); 1040 1041 void atomic_xchg(Register prev, Register newv, Register addr); 1042 void atomic_xchgw(Register prev, Register newv, Register addr); 1043 void atomic_xchgal(Register prev, Register newv, Register addr); 1044 void atomic_xchgalw(Register prev, Register newv, Register addr); 1045 void atomic_xchgwu(Register prev, Register newv, Register addr); 1046 void atomic_xchgalwu(Register prev, Register newv, Register addr); 1047 1048 static bool far_branches() { 1049 return ReservedCodeCacheSize > branch_range; 1050 } 1051 1052 // Emit a direct call/jump if the entry address will always be in range, 1053 // otherwise a far call/jump. 1054 // The address must be inside the code cache. 1055 // Supported entry.rspec(): 1056 // - relocInfo::external_word_type 1057 // - relocInfo::runtime_call_type 1058 // - relocInfo::none 1059 // In the case of a far call/jump, the entry address is put in the tmp register. 1060 // The tmp register is invalidated. 1061 void far_call(Address entry, Register tmp = t0); 1062 void far_jump(Address entry, Register tmp = t0); 1063 1064 static int far_branch_size() { 1065 if (far_branches()) { 1066 return 2 * 4; // auipc + jalr, see far_call() & far_jump() 1067 } else { 1068 return 4; 1069 } 1070 } 1071 1072 void load_byte_map_base(Register reg); 1073 1074 void bang_stack_with_offset(int offset) { 1075 // stack grows down, caller passes positive offset 1076 assert(offset > 0, "must bang with negative offset"); 1077 sub(t0, sp, offset); 1078 sd(zr, Address(t0)); 1079 } 1080 1081 void la_patchable(Register reg1, const Address &dest, int32_t &offset); 1082 1083 virtual void _call_Unimplemented(address call_site) { 1084 mv(t1, call_site); 1085 } 1086 1087 #define call_Unimplemented() _call_Unimplemented((address)__PRETTY_FUNCTION__) 1088 1089 // Frame creation and destruction shared between JITs. 1090 void build_frame(int framesize); 1091 void remove_frame(int framesize); 1092 1093 void reserved_stack_check(); 1094 1095 void get_polling_page(Register dest, relocInfo::relocType rtype); 1096 void read_polling_page(Register r, int32_t offset, relocInfo::relocType rtype); 1097 1098 // RISCV64 OpenJDK uses four different types of calls: 1099 // - direct call: jal pc_relative_offset 1100 // This is the shortest and the fastest, but the offset has the range: +/-1MB. 1101 // 1102 // - far call: auipc reg, pc_relative_offset; jalr ra, reg, offset 1103 // This is longer than a direct call. The offset has 1104 // the range [-(2G + 2K), 2G - 2K). Addresses out of the range in the code cache 1105 // requires indirect call. 1106 // If a jump is needed rather than a call, a far jump 'jalr x0, reg, offset' can 1107 // be used instead. 1108 // All instructions are embedded at a call site. 1109 // 1110 // - trampoline call: 1111 // This is only available in C1/C2-generated code (nmethod). It is a combination 1112 // of a direct call, which is used if the destination of a call is in range, 1113 // and a register-indirect call. It has the advantages of reaching anywhere in 1114 // the RISCV address space and being patchable at runtime when the generated 1115 // code is being executed by other threads. 1116 // 1117 // [Main code section] 1118 // jal trampoline 1119 // [Stub code section] 1120 // trampoline: 1121 // ld reg, pc + 8 (auipc + ld) 1122 // jr reg 1123 // <64-bit destination address> 1124 // 1125 // If the destination is in range when the generated code is moved to the code 1126 // cache, 'jal trampoline' is replaced with 'jal destination' and the trampoline 1127 // is not used. 1128 // The optimization does not remove the trampoline from the stub section. 1129 1130 // This is necessary because the trampoline may well be redirected later when 1131 // code is patched, and the new destination may not be reachable by a simple JAL 1132 // instruction. 1133 // 1134 // - indirect call: movptr + jalr 1135 // This too can reach anywhere in the address space, but it cannot be 1136 // patched while code is running, so it must only be modified at a safepoint. 1137 // This form of call is most suitable for targets at fixed addresses, which 1138 // will never be patched. 1139 // 1140 // 1141 // To patch a trampoline call when the JAL can't reach, we first modify 1142 // the 64-bit destination address in the trampoline, then modify the 1143 // JAL to point to the trampoline, then flush the instruction cache to 1144 // broadcast the change to all executing threads. See 1145 // NativeCall::set_destination_mt_safe for the details. 1146 // 1147 // There is a benign race in that the other thread might observe the 1148 // modified JAL before it observes the modified 64-bit destination 1149 // address. That does not matter because the destination method has been 1150 // invalidated, so there will be a trap at its start. 1151 // For this to work, the destination address in the trampoline is 1152 // always updated, even if we're not using the trampoline. 1153 1154 // Emit a direct call if the entry address will always be in range, 1155 // otherwise a trampoline call. 1156 // Supported entry.rspec(): 1157 // - relocInfo::runtime_call_type 1158 // - relocInfo::opt_virtual_call_type 1159 // - relocInfo::static_call_type 1160 // - relocInfo::virtual_call_type 1161 // 1162 // Return: the call PC or null if CodeCache is full. 1163 address trampoline_call(Address entry); 1164 address ic_call(address entry, jint method_index = 0); 1165 1166 // Support for memory inc/dec 1167 // n.b. increment/decrement calls with an Address destination will 1168 // need to use a scratch register to load the value to be 1169 // incremented. increment/decrement calls which add or subtract a 1170 // constant value other than sign-extended 12-bit immediate will need 1171 // to use a 2nd scratch register to hold the constant. so, an address 1172 // increment/decrement may trash both t0 and t1. 1173 1174 void increment(const Address dst, int64_t value = 1, Register tmp1 = t0, Register tmp2 = t1); 1175 void incrementw(const Address dst, int32_t value = 1, Register tmp1 = t0, Register tmp2 = t1); 1176 1177 void decrement(const Address dst, int64_t value = 1, Register tmp1 = t0, Register tmp2 = t1); 1178 void decrementw(const Address dst, int32_t value = 1, Register tmp1 = t0, Register tmp2 = t1); 1179 1180 void cmpptr(Register src1, Address src2, Label& equal); 1181 1182 void clinit_barrier(Register klass, Register tmp, Label* L_fast_path = nullptr, Label* L_slow_path = nullptr); 1183 void load_method_holder_cld(Register result, Register method); 1184 void load_method_holder(Register holder, Register method); 1185 1186 void compute_index(Register str1, Register trailing_zeros, Register match_mask, 1187 Register result, Register char_tmp, Register tmp, 1188 bool haystack_isL); 1189 void compute_match_mask(Register src, Register pattern, Register match_mask, 1190 Register mask1, Register mask2); 1191 1192 #ifdef COMPILER2 1193 void mul_add(Register out, Register in, Register offset, 1194 Register len, Register k, Register tmp); 1195 void cad(Register dst, Register src1, Register src2, Register carry); 1196 void cadc(Register dst, Register src1, Register src2, Register carry); 1197 void adc(Register dst, Register src1, Register src2, Register carry); 1198 void add2_with_carry(Register final_dest_hi, Register dest_hi, Register dest_lo, 1199 Register src1, Register src2, Register carry); 1200 void multiply_32_x_32_loop(Register x, Register xstart, Register x_xstart, 1201 Register y, Register y_idx, Register z, 1202 Register carry, Register product, 1203 Register idx, Register kdx); 1204 void multiply_64_x_64_loop(Register x, Register xstart, Register x_xstart, 1205 Register y, Register y_idx, Register z, 1206 Register carry, Register product, 1207 Register idx, Register kdx); 1208 void multiply_128_x_128_loop(Register y, Register z, 1209 Register carry, Register carry2, 1210 Register idx, Register jdx, 1211 Register yz_idx1, Register yz_idx2, 1212 Register tmp, Register tmp3, Register tmp4, 1213 Register tmp6, Register product_hi); 1214 void multiply_to_len(Register x, Register xlen, Register y, Register ylen, 1215 Register z, Register zlen, 1216 Register tmp1, Register tmp2, Register tmp3, Register tmp4, 1217 Register tmp5, Register tmp6, Register product_hi); 1218 #endif 1219 1220 void inflate_lo32(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1); 1221 void inflate_hi32(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1); 1222 1223 void ctzc_bit(Register Rd, Register Rs, bool isLL = false, Register tmp1 = t0, Register tmp2 = t1); 1224 1225 void zero_words(Register base, uint64_t cnt); 1226 address zero_words(Register ptr, Register cnt); 1227 void fill_words(Register base, Register cnt, Register value); 1228 void zero_memory(Register addr, Register len, Register tmp); 1229 void zero_dcache_blocks(Register base, Register cnt, Register tmp1, Register tmp2); 1230 1231 // shift left by shamt and add 1232 void shadd(Register Rd, Register Rs1, Register Rs2, Register tmp, int shamt); 1233 1234 // test single bit in Rs, result is set to Rd 1235 void test_bit(Register Rd, Register Rs, uint32_t bit_pos); 1236 1237 // Here the float instructions with safe deal with some exceptions. 1238 // e.g. convert from NaN, +Inf, -Inf to int, float, double 1239 // will trigger exception, we need to deal with these situations 1240 // to get correct results. 1241 void fcvt_w_s_safe(Register dst, FloatRegister src, Register tmp = t0); 1242 void fcvt_l_s_safe(Register dst, FloatRegister src, Register tmp = t0); 1243 void fcvt_w_d_safe(Register dst, FloatRegister src, Register tmp = t0); 1244 void fcvt_l_d_safe(Register dst, FloatRegister src, Register tmp = t0); 1245 1246 void java_round_float(Register dst, FloatRegister src, FloatRegister ftmp); 1247 void java_round_double(Register dst, FloatRegister src, FloatRegister ftmp); 1248 1249 // vector load/store unit-stride instructions 1250 void vlex_v(VectorRegister vd, Register base, Assembler::SEW sew, VectorMask vm = unmasked) { 1251 switch (sew) { 1252 case Assembler::e64: 1253 vle64_v(vd, base, vm); 1254 break; 1255 case Assembler::e32: 1256 vle32_v(vd, base, vm); 1257 break; 1258 case Assembler::e16: 1259 vle16_v(vd, base, vm); 1260 break; 1261 case Assembler::e8: // fall through 1262 default: 1263 vle8_v(vd, base, vm); 1264 break; 1265 } 1266 } 1267 1268 void vsex_v(VectorRegister store_data, Register base, Assembler::SEW sew, VectorMask vm = unmasked) { 1269 switch (sew) { 1270 case Assembler::e64: 1271 vse64_v(store_data, base, vm); 1272 break; 1273 case Assembler::e32: 1274 vse32_v(store_data, base, vm); 1275 break; 1276 case Assembler::e16: 1277 vse16_v(store_data, base, vm); 1278 break; 1279 case Assembler::e8: // fall through 1280 default: 1281 vse8_v(store_data, base, vm); 1282 break; 1283 } 1284 } 1285 1286 // vector pseudo instructions 1287 inline void vl1r_v(VectorRegister vd, Register rs) { 1288 vl1re8_v(vd, rs); 1289 } 1290 1291 inline void vmnot_m(VectorRegister vd, VectorRegister vs) { 1292 vmnand_mm(vd, vs, vs); 1293 } 1294 1295 inline void vncvt_x_x_w(VectorRegister vd, VectorRegister vs, VectorMask vm = unmasked) { 1296 vnsrl_wx(vd, vs, x0, vm); 1297 } 1298 1299 inline void vneg_v(VectorRegister vd, VectorRegister vs, VectorMask vm = unmasked) { 1300 vrsub_vx(vd, vs, x0, vm); 1301 } 1302 1303 inline void vfneg_v(VectorRegister vd, VectorRegister vs, VectorMask vm = unmasked) { 1304 vfsgnjn_vv(vd, vs, vs, vm); 1305 } 1306 1307 inline void vfabs_v(VectorRegister vd, VectorRegister vs, VectorMask vm = unmasked) { 1308 vfsgnjx_vv(vd, vs, vs, vm); 1309 } 1310 1311 inline void vmsgt_vv(VectorRegister vd, VectorRegister vs2, VectorRegister vs1, VectorMask vm = unmasked) { 1312 vmslt_vv(vd, vs1, vs2, vm); 1313 } 1314 1315 inline void vmsgtu_vv(VectorRegister vd, VectorRegister vs2, VectorRegister vs1, VectorMask vm = unmasked) { 1316 vmsltu_vv(vd, vs1, vs2, vm); 1317 } 1318 1319 inline void vmsge_vv(VectorRegister vd, VectorRegister vs2, VectorRegister vs1, VectorMask vm = unmasked) { 1320 vmsle_vv(vd, vs1, vs2, vm); 1321 } 1322 1323 inline void vmsgeu_vv(VectorRegister vd, VectorRegister vs2, VectorRegister vs1, VectorMask vm = unmasked) { 1324 vmsleu_vv(vd, vs1, vs2, vm); 1325 } 1326 1327 inline void vmfgt_vv(VectorRegister vd, VectorRegister vs2, VectorRegister vs1, VectorMask vm = unmasked) { 1328 vmflt_vv(vd, vs1, vs2, vm); 1329 } 1330 1331 inline void vmfge_vv(VectorRegister vd, VectorRegister vs2, VectorRegister vs1, VectorMask vm = unmasked) { 1332 vmfle_vv(vd, vs1, vs2, vm); 1333 } 1334 1335 // Copy mask register 1336 inline void vmmv_m(VectorRegister vd, VectorRegister vs) { 1337 vmand_mm(vd, vs, vs); 1338 } 1339 1340 // Clear mask register 1341 inline void vmclr_m(VectorRegister vd) { 1342 vmxor_mm(vd, vd, vd); 1343 } 1344 1345 // Set mask register 1346 inline void vmset_m(VectorRegister vd) { 1347 vmxnor_mm(vd, vd, vd); 1348 } 1349 1350 static const int zero_words_block_size; 1351 1352 void cast_primitive_type(BasicType type, Register Rt) { 1353 switch (type) { 1354 case T_BOOLEAN: 1355 sltu(Rt, zr, Rt); 1356 break; 1357 case T_CHAR : 1358 zero_extend(Rt, Rt, 16); 1359 break; 1360 case T_BYTE : 1361 sign_extend(Rt, Rt, 8); 1362 break; 1363 case T_SHORT : 1364 sign_extend(Rt, Rt, 16); 1365 break; 1366 case T_INT : 1367 sign_extend(Rt, Rt, 32); 1368 break; 1369 case T_LONG : /* nothing to do */ break; 1370 case T_VOID : /* nothing to do */ break; 1371 case T_FLOAT : /* nothing to do */ break; 1372 case T_DOUBLE : /* nothing to do */ break; 1373 default: ShouldNotReachHere(); 1374 } 1375 } 1376 1377 // float cmp with unordered_result 1378 void float_compare(Register result, FloatRegister Rs1, FloatRegister Rs2, int unordered_result); 1379 void double_compare(Register result, FloatRegister Rs1, FloatRegister Rs2, int unordered_result); 1380 1381 // Zero/Sign-extend 1382 void zero_extend(Register dst, Register src, int bits); 1383 void sign_extend(Register dst, Register src, int bits); 1384 1385 // compare src1 and src2 and get -1/0/1 in dst. 1386 // if [src1 > src2], dst = 1; 1387 // if [src1 == src2], dst = 0; 1388 // if [src1 < src2], dst = -1; 1389 void cmp_l2i(Register dst, Register src1, Register src2, Register tmp = t0); 1390 1391 // support for argument shuffling 1392 void move32_64(VMRegPair src, VMRegPair dst, Register tmp = t0); 1393 void float_move(VMRegPair src, VMRegPair dst, Register tmp = t0); 1394 void long_move(VMRegPair src, VMRegPair dst, Register tmp = t0); 1395 void double_move(VMRegPair src, VMRegPair dst, Register tmp = t0); 1396 void object_move(OopMap* map, 1397 int oop_handle_offset, 1398 int framesize_in_slots, 1399 VMRegPair src, 1400 VMRegPair dst, 1401 bool is_receiver, 1402 int* receiver_offset); 1403 void rt_call(address dest, Register tmp = t0); 1404 1405 void call(const address dest, Register temp = t0) { 1406 assert_cond(dest != nullptr); 1407 assert(temp != noreg, "expecting a register"); 1408 int32_t offset = 0; 1409 mv(temp, dest, offset); 1410 jalr(x1, temp, offset); 1411 } 1412 1413 inline void ret() { 1414 jalr(x0, x1, 0); 1415 } 1416 1417 #ifdef ASSERT 1418 // Template short-hand support to clean-up after a failed call to trampoline 1419 // call generation (see trampoline_call() below), when a set of Labels must 1420 // be reset (before returning). 1421 template<typename Label, typename... More> 1422 void reset_labels(Label& lbl, More&... more) { 1423 lbl.reset(); reset_labels(more...); 1424 } 1425 template<typename Label> 1426 void reset_labels(Label& lbl) { 1427 lbl.reset(); 1428 } 1429 #endif 1430 1431 private: 1432 1433 void repne_scan(Register addr, Register value, Register count, Register tmp); 1434 1435 void ld_constant(Register dest, const Address &const_addr) { 1436 if (NearCpool) { 1437 ld(dest, const_addr); 1438 } else { 1439 InternalAddress target(const_addr.target()); 1440 relocate(target.rspec(), [&] { 1441 int32_t offset; 1442 la_patchable(dest, target, offset); 1443 ld(dest, Address(dest, offset)); 1444 }); 1445 } 1446 } 1447 1448 int bitset_to_regs(unsigned int bitset, unsigned char* regs); 1449 Address add_memory_helper(const Address dst, Register tmp); 1450 1451 void load_reserved(Register addr, enum operand_size size, Assembler::Aqrl acquire); 1452 void store_conditional(Register addr, Register new_val, enum operand_size size, Assembler::Aqrl release); 1453 1454 public: 1455 void lightweight_lock(Register obj, Register hdr, Register tmp1, Register tmp2, Label& slow); 1456 void lightweight_unlock(Register obj, Register hdr, Register tmp1, Register tmp2, Label& slow); 1457 }; 1458 1459 #ifdef ASSERT 1460 inline bool AbstractAssembler::pd_check_instruction_mark() { return false; } 1461 #endif 1462 1463 /** 1464 * class SkipIfEqual: 1465 * 1466 * Instantiating this class will result in assembly code being output that will 1467 * jump around any code emitted between the creation of the instance and it's 1468 * automatic destruction at the end of a scope block, depending on the value of 1469 * the flag passed to the constructor, which will be checked at run-time. 1470 */ 1471 class SkipIfEqual { 1472 private: 1473 MacroAssembler* _masm; 1474 Label _label; 1475 1476 public: 1477 SkipIfEqual(MacroAssembler*, const bool* flag_addr, bool value); 1478 ~SkipIfEqual(); 1479 }; 1480 1481 #endif // CPU_RISCV_MACROASSEMBLER_RISCV_HPP