1 /* 2 * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. 3 * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. 4 * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. 5 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 6 * 7 * This code is free software; you can redistribute it and/or modify it 8 * under the terms of the GNU General Public License version 2 only, as 9 * published by the Free Software Foundation. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 * 25 */ 26 27 #ifndef CPU_RISCV_MACROASSEMBLER_RISCV_HPP 28 #define CPU_RISCV_MACROASSEMBLER_RISCV_HPP 29 30 #include "asm/assembler.hpp" 31 #include "metaprogramming/enableIf.hpp" 32 #include "oops/compressedOops.hpp" 33 #include "utilities/powerOfTwo.hpp" 34 35 // MacroAssembler extends Assembler by frequently used macros. 36 // 37 // Instructions for which a 'better' code sequence exists depending 38 // on arguments should also go in here. 39 40 class MacroAssembler: public Assembler { 41 42 public: 43 MacroAssembler(CodeBuffer* code) : Assembler(code) { 44 } 45 virtual ~MacroAssembler() {} 46 47 void safepoint_poll(Label& slow_path, bool at_return, bool acquire, bool in_nmethod); 48 49 // Place a fence.i after code may have been modified due to a safepoint. 50 void safepoint_ifence(); 51 52 // Alignment 53 void align(int modulus, int extra_offset = 0); 54 55 // Stack frame creation/removal 56 // Note that SP must be updated to the right place before saving/restoring RA and FP 57 // because signal based thread suspend/resume could happen asynchronously. 58 void enter() { 59 addi(sp, sp, - 2 * wordSize); 60 sd(ra, Address(sp, wordSize)); 61 sd(fp, Address(sp)); 62 addi(fp, sp, 2 * wordSize); 63 } 64 65 void leave() { 66 addi(sp, fp, - 2 * wordSize); 67 ld(fp, Address(sp)); 68 ld(ra, Address(sp, wordSize)); 69 addi(sp, sp, 2 * wordSize); 70 } 71 72 73 // Support for getting the JavaThread pointer (i.e.; a reference to thread-local information) 74 // The pointer will be loaded into the thread register. 75 void get_thread(Register thread); 76 77 // Support for VM calls 78 // 79 // It is imperative that all calls into the VM are handled via the call_VM macros. 80 // They make sure that the stack linkage is setup correctly. call_VM's correspond 81 // to ENTRY/ENTRY_X entry points while call_VM_leaf's correspond to LEAF entry points. 82 83 void call_VM(Register oop_result, 84 address entry_point, 85 bool check_exceptions = true); 86 void call_VM(Register oop_result, 87 address entry_point, 88 Register arg_1, 89 bool check_exceptions = true); 90 void call_VM(Register oop_result, 91 address entry_point, 92 Register arg_1, Register arg_2, 93 bool check_exceptions = true); 94 void call_VM(Register oop_result, 95 address entry_point, 96 Register arg_1, Register arg_2, Register arg_3, 97 bool check_exceptions = true); 98 99 // Overloadings with last_Java_sp 100 void call_VM(Register oop_result, 101 Register last_java_sp, 102 address entry_point, 103 int number_of_arguments = 0, 104 bool check_exceptions = true); 105 void call_VM(Register oop_result, 106 Register last_java_sp, 107 address entry_point, 108 Register arg_1, 109 bool check_exceptions = true); 110 void call_VM(Register oop_result, 111 Register last_java_sp, 112 address entry_point, 113 Register arg_1, Register arg_2, 114 bool check_exceptions = true); 115 void call_VM(Register oop_result, 116 Register last_java_sp, 117 address entry_point, 118 Register arg_1, Register arg_2, Register arg_3, 119 bool check_exceptions = true); 120 121 void get_vm_result(Register oop_result, Register java_thread); 122 void get_vm_result_2(Register metadata_result, Register java_thread); 123 124 // These always tightly bind to MacroAssembler::call_VM_leaf_base 125 // bypassing the virtual implementation 126 void call_VM_leaf(address entry_point, 127 int number_of_arguments = 0); 128 void call_VM_leaf(address entry_point, 129 Register arg_0); 130 void call_VM_leaf(address entry_point, 131 Register arg_0, Register arg_1); 132 void call_VM_leaf(address entry_point, 133 Register arg_0, Register arg_1, Register arg_2); 134 135 // These always tightly bind to MacroAssembler::call_VM_base 136 // bypassing the virtual implementation 137 void super_call_VM_leaf(address entry_point, Register arg_0); 138 void super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1); 139 void super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2); 140 void super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2, Register arg_3); 141 142 // last Java Frame (fills frame anchor) 143 void set_last_Java_frame(Register last_java_sp, Register last_java_fp, address last_java_pc, Register tmp); 144 void set_last_Java_frame(Register last_java_sp, Register last_java_fp, Label &last_java_pc, Register tmp); 145 void set_last_Java_frame(Register last_java_sp, Register last_java_fp, Register last_java_pc, Register tmp); 146 147 // thread in the default location (xthread) 148 void reset_last_Java_frame(bool clear_fp); 149 150 void call_native(address entry_point, 151 Register arg_0); 152 void call_native_base( 153 address entry_point, // the entry point 154 Label* retaddr = NULL 155 ); 156 157 virtual void call_VM_leaf_base( 158 address entry_point, // the entry point 159 int number_of_arguments, // the number of arguments to pop after the call 160 Label* retaddr = NULL 161 ); 162 163 virtual void call_VM_leaf_base( 164 address entry_point, // the entry point 165 int number_of_arguments, // the number of arguments to pop after the call 166 Label& retaddr) { 167 call_VM_leaf_base(entry_point, number_of_arguments, &retaddr); 168 } 169 170 virtual void call_VM_base( // returns the register containing the thread upon return 171 Register oop_result, // where an oop-result ends up if any; use noreg otherwise 172 Register java_thread, // the thread if computed before ; use noreg otherwise 173 Register last_java_sp, // to set up last_Java_frame in stubs; use noreg otherwise 174 address entry_point, // the entry point 175 int number_of_arguments, // the number of arguments (w/o thread) to pop after the call 176 bool check_exceptions // whether to check for pending exceptions after return 177 ); 178 179 void call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions); 180 181 virtual void check_and_handle_earlyret(Register java_thread); 182 virtual void check_and_handle_popframe(Register java_thread); 183 184 void resolve_weak_handle(Register result, Register tmp); 185 void resolve_oop_handle(Register result, Register tmp = x15); 186 void resolve_jobject(Register value, Register thread, Register tmp); 187 188 void movoop(Register dst, jobject obj, bool immediate = false); 189 void mov_metadata(Register dst, Metadata* obj); 190 void bang_stack_size(Register size, Register tmp); 191 void set_narrow_oop(Register dst, jobject obj); 192 void set_narrow_klass(Register dst, Klass* k); 193 194 void load_mirror(Register dst, Register method, Register tmp = x15); 195 void access_load_at(BasicType type, DecoratorSet decorators, Register dst, 196 Address src, Register tmp1, Register thread_tmp); 197 void access_store_at(BasicType type, DecoratorSet decorators, Address dst, 198 Register src, Register tmp1, Register thread_tmp); 199 void load_klass(Register dst, Register src); 200 void store_klass(Register dst, Register src); 201 void cmp_klass(Register oop, Register trial_klass, Register tmp, Label &L); 202 203 void encode_klass_not_null(Register r); 204 void decode_klass_not_null(Register r); 205 void encode_klass_not_null(Register dst, Register src, Register tmp = xheapbase); 206 void decode_klass_not_null(Register dst, Register src, Register tmp = xheapbase); 207 void decode_heap_oop_not_null(Register r); 208 void decode_heap_oop_not_null(Register dst, Register src); 209 void decode_heap_oop(Register d, Register s); 210 void decode_heap_oop(Register r) { decode_heap_oop(r, r); } 211 void encode_heap_oop(Register d, Register s); 212 void encode_heap_oop(Register r) { encode_heap_oop(r, r); }; 213 void load_heap_oop(Register dst, Address src, Register tmp1 = noreg, 214 Register thread_tmp = noreg, DecoratorSet decorators = 0); 215 void load_heap_oop_not_null(Register dst, Address src, Register tmp1 = noreg, 216 Register thread_tmp = noreg, DecoratorSet decorators = 0); 217 void store_heap_oop(Address dst, Register src, Register tmp1 = noreg, 218 Register thread_tmp = noreg, DecoratorSet decorators = 0); 219 220 void store_klass_gap(Register dst, Register src); 221 222 // currently unimplemented 223 // Used for storing NULL. All other oop constants should be 224 // stored using routines that take a jobject. 225 void store_heap_oop_null(Address dst); 226 227 // This dummy is to prevent a call to store_heap_oop from 228 // converting a zero (linke NULL) into a Register by giving 229 // the compiler two choices it can't resolve 230 231 void store_heap_oop(Address dst, void* dummy); 232 233 // Support for NULL-checks 234 // 235 // Generates code that causes a NULL OS exception if the content of reg is NULL. 236 // If the accessed location is M[reg + offset] and the offset is known, provide the 237 // offset. No explicit code generateion is needed if the offset is within a certain 238 // range (0 <= offset <= page_size). 239 240 virtual void null_check(Register reg, int offset = -1); 241 static bool needs_explicit_null_check(intptr_t offset); 242 static bool uses_implicit_null_check(void* address); 243 244 // idiv variant which deals with MINLONG as dividend and -1 as divisor 245 int corrected_idivl(Register result, Register rs1, Register rs2, 246 bool want_remainder); 247 int corrected_idivq(Register result, Register rs1, Register rs2, 248 bool want_remainder); 249 250 // interface method calling 251 void lookup_interface_method(Register recv_klass, 252 Register intf_klass, 253 RegisterOrConstant itable_index, 254 Register method_result, 255 Register scan_tmp, 256 Label& no_such_interface, 257 bool return_method = true); 258 259 // virtual method calling 260 // n.n. x86 allows RegisterOrConstant for vtable_index 261 void lookup_virtual_method(Register recv_klass, 262 RegisterOrConstant vtable_index, 263 Register method_result); 264 265 // Form an addres from base + offset in Rd. Rd my or may not 266 // actually be used: you must use the Address that is returned. It 267 // is up to you to ensure that the shift provided mathces the size 268 // of your data. 269 Address form_address(Register Rd, Register base, long byte_offset); 270 271 // allocation 272 void tlab_allocate( 273 Register obj, // result: pointer to object after successful allocation 274 Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise 275 int con_size_in_bytes, // object size in bytes if known at compile time 276 Register tmp1, // temp register 277 Register tmp2, // temp register 278 Label& slow_case, // continuation point of fast allocation fails 279 bool is_far = false 280 ); 281 282 void eden_allocate( 283 Register obj, // result: pointer to object after successful allocation 284 Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise 285 int con_size_in_bytes, // object size in bytes if known at compile time 286 Register tmp, // temp register 287 Label& slow_case, // continuation point if fast allocation fails 288 bool is_far = false 289 ); 290 291 // Test sub_klass against super_klass, with fast and slow paths. 292 293 // The fast path produces a tri-state answer: yes / no / maybe-slow. 294 // One of the three labels can be NULL, meaning take the fall-through. 295 // If super_check_offset is -1, the value is loaded up from super_klass. 296 // No registers are killed, except tmp_reg 297 void check_klass_subtype_fast_path(Register sub_klass, 298 Register super_klass, 299 Register tmp_reg, 300 Label* L_success, 301 Label* L_failure, 302 Label* L_slow_path, 303 Register super_check_offset = noreg); 304 305 // The reset of the type cehck; must be wired to a corresponding fast path. 306 // It does not repeat the fast path logic, so don't use it standalone. 307 // The tmp1_reg and tmp2_reg can be noreg, if no temps are avaliable. 308 // Updates the sub's secondary super cache as necessary. 309 void check_klass_subtype_slow_path(Register sub_klass, 310 Register super_klass, 311 Register tmp1_reg, 312 Register tmp2_reg, 313 Label* L_success, 314 Label* L_failure); 315 316 void check_klass_subtype(Register sub_klass, 317 Register super_klass, 318 Register tmp_reg, 319 Label& L_success); 320 321 Address argument_address(RegisterOrConstant arg_slot, int extra_slot_offset = 0); 322 323 // only if +VerifyOops 324 void verify_oop(Register reg, const char* s = "broken oop"); 325 void verify_oop_addr(Address addr, const char* s = "broken oop addr"); 326 327 void _verify_method_ptr(Register reg, const char* msg, const char* file, int line) {} 328 void _verify_klass_ptr(Register reg, const char* msg, const char* file, int line) {} 329 330 #define verify_method_ptr(reg) _verify_method_ptr(reg, "broken method " #reg, __FILE__, __LINE__) 331 #define verify_klass_ptr(reg) _verify_method_ptr(reg, "broken klass " #reg, __FILE__, __LINE__) 332 333 // A more convenient access to fence for our purposes 334 // We used four bit to indicate the read and write bits in the predecessors and successors, 335 // and extended i for r, o for w if UseConservativeFence enabled. 336 enum Membar_mask_bits { 337 StoreStore = 0b0101, // (pred = ow + succ = ow) 338 LoadStore = 0b1001, // (pred = ir + succ = ow) 339 StoreLoad = 0b0110, // (pred = ow + succ = ir) 340 LoadLoad = 0b1010, // (pred = ir + succ = ir) 341 AnyAny = LoadStore | StoreLoad // (pred = iorw + succ = iorw) 342 }; 343 344 void membar(uint32_t order_constraint); 345 346 static void membar_mask_to_pred_succ(uint32_t order_constraint, uint32_t& predecessor, uint32_t& successor) { 347 predecessor = (order_constraint >> 2) & 0x3; 348 successor = order_constraint & 0x3; 349 350 // extend rw -> iorw: 351 // 01(w) -> 0101(ow) 352 // 10(r) -> 1010(ir) 353 // 11(rw)-> 1111(iorw) 354 if (UseConservativeFence) { 355 predecessor |= predecessor << 2; 356 successor |= successor << 2; 357 } 358 } 359 360 static int pred_succ_to_membar_mask(uint32_t predecessor, uint32_t successor) { 361 return ((predecessor & 0x3) << 2) | (successor & 0x3); 362 } 363 364 // prints msg, dumps registers and stops execution 365 void stop(const char* msg); 366 367 static void debug64(char* msg, int64_t pc, int64_t regs[]); 368 369 void unimplemented(const char* what = ""); 370 371 void should_not_reach_here() { stop("should not reach here"); } 372 373 static address target_addr_for_insn(address insn_addr); 374 375 // Required platform-specific helpers for Label::patch_instructions. 376 // They _shadow_ the declarations in AbstractAssembler, which are undefined. 377 static int pd_patch_instruction_size(address branch, address target); 378 static void pd_patch_instruction(address branch, address target, const char* file = NULL, int line = 0) { 379 pd_patch_instruction_size(branch, target); 380 } 381 static address pd_call_destination(address branch) { 382 return target_addr_for_insn(branch); 383 } 384 385 static int patch_oop(address insn_addr, address o); 386 address emit_trampoline_stub(int insts_call_instruction_offset, address target); 387 void emit_static_call_stub(); 388 389 // The following 4 methods return the offset of the appropriate move instruction 390 391 // Support for fast byte/short loading with zero extension (depending on particular CPU) 392 int load_unsigned_byte(Register dst, Address src); 393 int load_unsigned_short(Register dst, Address src); 394 395 // Support for fast byte/short loading with sign extension (depending on particular CPU) 396 int load_signed_byte(Register dst, Address src); 397 int load_signed_short(Register dst, Address src); 398 399 // Load and store values by size and signed-ness 400 void load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2 = noreg); 401 void store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2 = noreg); 402 403 public: 404 // Standard pseudoinstruction 405 void nop(); 406 void mv(Register Rd, Register Rs); 407 void notr(Register Rd, Register Rs); 408 void neg(Register Rd, Register Rs); 409 void negw(Register Rd, Register Rs); 410 void sext_w(Register Rd, Register Rs); 411 void zext_b(Register Rd, Register Rs); 412 void seqz(Register Rd, Register Rs); // set if = zero 413 void snez(Register Rd, Register Rs); // set if != zero 414 void sltz(Register Rd, Register Rs); // set if < zero 415 void sgtz(Register Rd, Register Rs); // set if > zero 416 417 // Float pseudoinstruction 418 void fmv_s(FloatRegister Rd, FloatRegister Rs); 419 void fabs_s(FloatRegister Rd, FloatRegister Rs); // single-precision absolute value 420 void fneg_s(FloatRegister Rd, FloatRegister Rs); 421 422 // Double pseudoinstruction 423 void fmv_d(FloatRegister Rd, FloatRegister Rs); 424 void fabs_d(FloatRegister Rd, FloatRegister Rs); 425 void fneg_d(FloatRegister Rd, FloatRegister Rs); 426 427 // Pseudoinstruction for control and status register 428 void rdinstret(Register Rd); // read instruction-retired counter 429 void rdcycle(Register Rd); // read cycle counter 430 void rdtime(Register Rd); // read time 431 void csrr(Register Rd, unsigned csr); // read csr 432 void csrw(unsigned csr, Register Rs); // write csr 433 void csrs(unsigned csr, Register Rs); // set bits in csr 434 void csrc(unsigned csr, Register Rs); // clear bits in csr 435 void csrwi(unsigned csr, unsigned imm); 436 void csrsi(unsigned csr, unsigned imm); 437 void csrci(unsigned csr, unsigned imm); 438 void frcsr(Register Rd); // read float-point csr 439 void fscsr(Register Rd, Register Rs); // swap float-point csr 440 void fscsr(Register Rs); // write float-point csr 441 void frrm(Register Rd); // read float-point rounding mode 442 void fsrm(Register Rd, Register Rs); // swap float-point rounding mode 443 void fsrm(Register Rs); // write float-point rounding mode 444 void fsrmi(Register Rd, unsigned imm); 445 void fsrmi(unsigned imm); 446 void frflags(Register Rd); // read float-point exception flags 447 void fsflags(Register Rd, Register Rs); // swap float-point exception flags 448 void fsflags(Register Rs); // write float-point exception flags 449 void fsflagsi(Register Rd, unsigned imm); 450 void fsflagsi(unsigned imm); 451 452 void beqz(Register Rs, const address &dest); 453 void bnez(Register Rs, const address &dest); 454 void blez(Register Rs, const address &dest); 455 void bgez(Register Rs, const address &dest); 456 void bltz(Register Rs, const address &dest); 457 void bgtz(Register Rs, const address &dest); 458 void la(Register Rd, Label &label); 459 void la(Register Rd, const address &dest); 460 void la(Register Rd, const Address &adr); 461 //label 462 void beqz(Register Rs, Label &l, bool is_far = false); 463 void bnez(Register Rs, Label &l, bool is_far = false); 464 void blez(Register Rs, Label &l, bool is_far = false); 465 void bgez(Register Rs, Label &l, bool is_far = false); 466 void bltz(Register Rs, Label &l, bool is_far = false); 467 void bgtz(Register Rs, Label &l, bool is_far = false); 468 void float_beq(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false); 469 void float_bne(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false); 470 void float_ble(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false); 471 void float_bge(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false); 472 void float_blt(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false); 473 void float_bgt(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false); 474 void double_beq(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false); 475 void double_bne(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false); 476 void double_ble(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false); 477 void double_bge(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false); 478 void double_blt(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false); 479 void double_bgt(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false); 480 481 void push_reg(RegSet regs, Register stack) { if (regs.bits()) { push_reg(regs.bits(), stack); } } 482 void pop_reg(RegSet regs, Register stack) { if (regs.bits()) { pop_reg(regs.bits(), stack); } } 483 void push_reg(Register Rs); 484 void pop_reg(Register Rd); 485 int push_reg(unsigned int bitset, Register stack); 486 int pop_reg(unsigned int bitset, Register stack); 487 void push_fp(FloatRegSet regs, Register stack) { if (regs.bits()) push_fp(regs.bits(), stack); } 488 void pop_fp(FloatRegSet regs, Register stack) { if (regs.bits()) pop_fp(regs.bits(), stack); } 489 #ifdef COMPILER2 490 void push_vp(VectorRegSet regs, Register stack) { if (regs.bits()) push_vp(regs.bits(), stack); } 491 void pop_vp(VectorRegSet regs, Register stack) { if (regs.bits()) pop_vp(regs.bits(), stack); } 492 #endif // COMPILER2 493 494 // Push and pop everything that might be clobbered by a native 495 // runtime call except t0 and t1. (They are always 496 // temporary registers, so we don't have to protect them.) 497 // Additional registers can be excluded in a passed RegSet. 498 void push_call_clobbered_registers_except(RegSet exclude); 499 void pop_call_clobbered_registers_except(RegSet exclude); 500 501 void push_call_clobbered_registers() { 502 push_call_clobbered_registers_except(RegSet()); 503 } 504 void pop_call_clobbered_registers() { 505 pop_call_clobbered_registers_except(RegSet()); 506 } 507 508 void pusha(); 509 void popa(); 510 void push_CPU_state(bool save_vectors = false, int vector_size_in_bytes = 0); 511 void pop_CPU_state(bool restore_vectors = false, int vector_size_in_bytes = 0); 512 513 // if heap base register is used - reinit it with the correct value 514 void reinit_heapbase(); 515 516 void bind(Label& L) { 517 Assembler::bind(L); 518 // fences across basic blocks should not be merged 519 code()->clear_last_insn(); 520 } 521 522 // mv 523 template<typename T, ENABLE_IF(std::is_integral<T>::value)> 524 inline void mv(Register Rd, T o) { 525 li(Rd, (int64_t)o); 526 } 527 528 inline void mvw(Register Rd, int32_t imm32) { mv(Rd, imm32); } 529 530 void mv(Register Rd, Address dest); 531 void mv(Register Rd, address addr); 532 void mv(Register Rd, RegisterOrConstant src); 533 534 // logic 535 void andrw(Register Rd, Register Rs1, Register Rs2); 536 void orrw(Register Rd, Register Rs1, Register Rs2); 537 void xorrw(Register Rd, Register Rs1, Register Rs2); 538 539 // revb 540 void revb_h_h(Register Rd, Register Rs, Register tmp = t0); // reverse bytes in halfword in lower 16 bits, sign-extend 541 void revb_w_w(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1); // reverse bytes in lower word, sign-extend 542 void revb_h_h_u(Register Rd, Register Rs, Register tmp = t0); // reverse bytes in halfword in lower 16 bits, zero-extend 543 void revb_h_w_u(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1); // reverse bytes in halfwords in lower 32 bits, zero-extend 544 void revb_h_helper(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2= t1); // reverse bytes in upper 16 bits (48:63) and move to lower 545 void revb_h(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2= t1); // reverse bytes in each halfword 546 void revb_w(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2= t1); // reverse bytes in each word 547 void revb(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1); // reverse bytes in doubleword 548 549 void ror_imm(Register dst, Register src, uint32_t shift, Register tmp = t0); 550 void andi(Register Rd, Register Rn, int64_t imm, Register tmp = t0); 551 void orptr(Address adr, RegisterOrConstant src, Register tmp1 = t0, Register tmp2 = t1); 552 553 void cmpxchg_obj_header(Register oldv, Register newv, Register obj, Register tmp, Label &succeed, Label *fail); 554 void cmpxchgptr(Register oldv, Register newv, Register addr, Register tmp, Label &succeed, Label *fail); 555 void cmpxchg(Register addr, Register expected, 556 Register new_val, 557 enum operand_size size, 558 Assembler::Aqrl acquire, Assembler::Aqrl release, 559 Register result, bool result_as_bool = false); 560 void cmpxchg_weak(Register addr, Register expected, 561 Register new_val, 562 enum operand_size size, 563 Assembler::Aqrl acquire, Assembler::Aqrl release, 564 Register result); 565 void cmpxchg_narrow_value_helper(Register addr, Register expected, 566 Register new_val, 567 enum operand_size size, 568 Register tmp1, Register tmp2, Register tmp3); 569 void cmpxchg_narrow_value(Register addr, Register expected, 570 Register new_val, 571 enum operand_size size, 572 Assembler::Aqrl acquire, Assembler::Aqrl release, 573 Register result, bool result_as_bool, 574 Register tmp1, Register tmp2, Register tmp3); 575 void weak_cmpxchg_narrow_value(Register addr, Register expected, 576 Register new_val, 577 enum operand_size size, 578 Assembler::Aqrl acquire, Assembler::Aqrl release, 579 Register result, 580 Register tmp1, Register tmp2, Register tmp3); 581 582 void atomic_add(Register prev, RegisterOrConstant incr, Register addr); 583 void atomic_addw(Register prev, RegisterOrConstant incr, Register addr); 584 void atomic_addal(Register prev, RegisterOrConstant incr, Register addr); 585 void atomic_addalw(Register prev, RegisterOrConstant incr, Register addr); 586 587 void atomic_xchg(Register prev, Register newv, Register addr); 588 void atomic_xchgw(Register prev, Register newv, Register addr); 589 void atomic_xchgal(Register prev, Register newv, Register addr); 590 void atomic_xchgalw(Register prev, Register newv, Register addr); 591 void atomic_xchgwu(Register prev, Register newv, Register addr); 592 void atomic_xchgalwu(Register prev, Register newv, Register addr); 593 594 static bool far_branches() { 595 return ReservedCodeCacheSize > branch_range; 596 } 597 598 // Jumps that can reach anywhere in the code cache. 599 // Trashes tmp. 600 void far_call(Address entry, CodeBuffer *cbuf = NULL, Register tmp = t0); 601 void far_jump(Address entry, CodeBuffer *cbuf = NULL, Register tmp = t0); 602 603 static int far_branch_size() { 604 if (far_branches()) { 605 return 2 * 4; // auipc + jalr, see far_call() & far_jump() 606 } else { 607 return 4; 608 } 609 } 610 611 void load_byte_map_base(Register reg); 612 613 void bang_stack_with_offset(int offset) { 614 // stack grows down, caller passes positive offset 615 assert(offset > 0, "must bang with negative offset"); 616 sub(t0, sp, offset); 617 sd(zr, Address(t0)); 618 } 619 620 void la_patchable(Register reg1, const Address &dest, int32_t &offset); 621 622 virtual void _call_Unimplemented(address call_site) { 623 mv(t1, call_site); 624 } 625 626 #define call_Unimplemented() _call_Unimplemented((address)__PRETTY_FUNCTION__) 627 628 // Frame creation and destruction shared between JITs. 629 void build_frame(int framesize); 630 void remove_frame(int framesize); 631 632 void reserved_stack_check(); 633 634 void get_polling_page(Register dest, relocInfo::relocType rtype); 635 address read_polling_page(Register r, int32_t offset, relocInfo::relocType rtype); 636 637 address trampoline_call(Address entry, CodeBuffer* cbuf = NULL); 638 address ic_call(address entry, jint method_index = 0); 639 640 void add_memory_int64(const Address dst, int64_t imm); 641 void add_memory_int32(const Address dst, int32_t imm); 642 643 void cmpptr(Register src1, Address src2, Label& equal); 644 645 void clinit_barrier(Register klass, Register tmp, Label* L_fast_path = NULL, Label* L_slow_path = NULL); 646 void load_method_holder_cld(Register result, Register method); 647 void load_method_holder(Register holder, Register method); 648 649 void compute_index(Register str1, Register trailing_zeros, Register match_mask, 650 Register result, Register char_tmp, Register tmp, 651 bool haystack_isL); 652 void compute_match_mask(Register src, Register pattern, Register match_mask, 653 Register mask1, Register mask2); 654 655 #ifdef COMPILER2 656 void mul_add(Register out, Register in, Register offset, 657 Register len, Register k, Register tmp); 658 void cad(Register dst, Register src1, Register src2, Register carry); 659 void cadc(Register dst, Register src1, Register src2, Register carry); 660 void adc(Register dst, Register src1, Register src2, Register carry); 661 void add2_with_carry(Register final_dest_hi, Register dest_hi, Register dest_lo, 662 Register src1, Register src2, Register carry); 663 void multiply_32_x_32_loop(Register x, Register xstart, Register x_xstart, 664 Register y, Register y_idx, Register z, 665 Register carry, Register product, 666 Register idx, Register kdx); 667 void multiply_64_x_64_loop(Register x, Register xstart, Register x_xstart, 668 Register y, Register y_idx, Register z, 669 Register carry, Register product, 670 Register idx, Register kdx); 671 void multiply_128_x_128_loop(Register y, Register z, 672 Register carry, Register carry2, 673 Register idx, Register jdx, 674 Register yz_idx1, Register yz_idx2, 675 Register tmp, Register tmp3, Register tmp4, 676 Register tmp6, Register product_hi); 677 void multiply_to_len(Register x, Register xlen, Register y, Register ylen, 678 Register z, Register zlen, 679 Register tmp1, Register tmp2, Register tmp3, Register tmp4, 680 Register tmp5, Register tmp6, Register product_hi); 681 #endif 682 683 void inflate_lo32(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1); 684 void inflate_hi32(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1); 685 686 void ctzc_bit(Register Rd, Register Rs, bool isLL = false, Register tmp1 = t0, Register tmp2 = t1); 687 688 void zero_words(Register base, u_int64_t cnt); 689 address zero_words(Register ptr, Register cnt); 690 void fill_words(Register base, Register cnt, Register value); 691 void zero_memory(Register addr, Register len, Register tmp); 692 693 // shift left by shamt and add 694 void shadd(Register Rd, Register Rs1, Register Rs2, Register tmp, int shamt); 695 696 // Here the float instructions with safe deal with some exceptions. 697 // e.g. convert from NaN, +Inf, -Inf to int, float, double 698 // will trigger exception, we need to deal with these situations 699 // to get correct results. 700 void fcvt_w_s_safe(Register dst, FloatRegister src, Register tmp = t0); 701 void fcvt_l_s_safe(Register dst, FloatRegister src, Register tmp = t0); 702 void fcvt_w_d_safe(Register dst, FloatRegister src, Register tmp = t0); 703 void fcvt_l_d_safe(Register dst, FloatRegister src, Register tmp = t0); 704 705 // vector load/store unit-stride instructions 706 void vlex_v(VectorRegister vd, Register base, Assembler::SEW sew, VectorMask vm = unmasked) { 707 switch (sew) { 708 case Assembler::e64: 709 vle64_v(vd, base, vm); 710 break; 711 case Assembler::e32: 712 vle32_v(vd, base, vm); 713 break; 714 case Assembler::e16: 715 vle16_v(vd, base, vm); 716 break; 717 case Assembler::e8: // fall through 718 default: 719 vle8_v(vd, base, vm); 720 break; 721 } 722 } 723 724 void vsex_v(VectorRegister store_data, Register base, Assembler::SEW sew, VectorMask vm = unmasked) { 725 switch (sew) { 726 case Assembler::e64: 727 vse64_v(store_data, base, vm); 728 break; 729 case Assembler::e32: 730 vse32_v(store_data, base, vm); 731 break; 732 case Assembler::e16: 733 vse16_v(store_data, base, vm); 734 break; 735 case Assembler::e8: // fall through 736 default: 737 vse8_v(store_data, base, vm); 738 break; 739 } 740 } 741 742 static const int zero_words_block_size; 743 744 void cast_primitive_type(BasicType type, Register Rt) { 745 switch (type) { 746 case T_BOOLEAN: 747 sltu(Rt, zr, Rt); 748 break; 749 case T_CHAR : 750 zero_extend(Rt, Rt, 16); 751 break; 752 case T_BYTE : 753 sign_extend(Rt, Rt, 8); 754 break; 755 case T_SHORT : 756 sign_extend(Rt, Rt, 16); 757 break; 758 case T_INT : 759 addw(Rt, Rt, zr); 760 break; 761 case T_LONG : /* nothing to do */ break; 762 case T_VOID : /* nothing to do */ break; 763 case T_FLOAT : /* nothing to do */ break; 764 case T_DOUBLE : /* nothing to do */ break; 765 default: ShouldNotReachHere(); 766 } 767 } 768 769 // float cmp with unordered_result 770 void float_compare(Register result, FloatRegister Rs1, FloatRegister Rs2, int unordered_result); 771 void double_compare(Register result, FloatRegister Rs1, FloatRegister Rs2, int unordered_result); 772 773 // Zero/Sign-extend 774 void zero_extend(Register dst, Register src, int bits); 775 void sign_extend(Register dst, Register src, int bits); 776 777 // compare src1 and src2 and get -1/0/1 in dst. 778 // if [src1 > src2], dst = 1; 779 // if [src1 == src2], dst = 0; 780 // if [src1 < src2], dst = -1; 781 void cmp_l2i(Register dst, Register src1, Register src2, Register tmp = t0); 782 783 int push_fp(unsigned int bitset, Register stack); 784 int pop_fp(unsigned int bitset, Register stack); 785 786 int push_vp(unsigned int bitset, Register stack); 787 int pop_vp(unsigned int bitset, Register stack); 788 789 // vext 790 void vmnot_m(VectorRegister vd, VectorRegister vs); 791 void vncvt_x_x_w(VectorRegister vd, VectorRegister vs, VectorMask vm = unmasked); 792 void vfneg_v(VectorRegister vd, VectorRegister vs); 793 794 private: 795 796 #ifdef ASSERT 797 // Template short-hand support to clean-up after a failed call to trampoline 798 // call generation (see trampoline_call() below), when a set of Labels must 799 // be reset (before returning). 800 template<typename Label, typename... More> 801 void reset_labels(Label& lbl, More&... more) { 802 lbl.reset(); reset_labels(more...); 803 } 804 template<typename Label> 805 void reset_labels(Label& lbl) { 806 lbl.reset(); 807 } 808 #endif 809 void repne_scan(Register addr, Register value, Register count, Register tmp); 810 811 // Return true if an address is within the 48-bit RISCV64 address space. 812 bool is_valid_riscv64_address(address addr) { 813 return ((uintptr_t)addr >> 48) == 0; 814 } 815 816 void ld_constant(Register dest, const Address &const_addr) { 817 if (NearCpool) { 818 ld(dest, const_addr); 819 } else { 820 int32_t offset = 0; 821 la_patchable(dest, InternalAddress(const_addr.target()), offset); 822 ld(dest, Address(dest, offset)); 823 } 824 } 825 826 int bitset_to_regs(unsigned int bitset, unsigned char* regs); 827 Address add_memory_helper(const Address dst); 828 829 void load_reserved(Register addr, enum operand_size size, Assembler::Aqrl acquire); 830 void store_conditional(Register addr, Register new_val, enum operand_size size, Assembler::Aqrl release); 831 832 // Check the current thread doesn't need a cross modify fence. 833 void verify_cross_modify_fence_not_required() PRODUCT_RETURN; 834 }; 835 836 #ifdef ASSERT 837 inline bool AbstractAssembler::pd_check_instruction_mark() { return false; } 838 #endif 839 840 /** 841 * class SkipIfEqual: 842 * 843 * Instantiating this class will result in assembly code being output that will 844 * jump around any code emitted between the creation of the instance and it's 845 * automatic destruction at the end of a scope block, depending on the value of 846 * the flag passed to the constructor, which will be checked at run-time. 847 */ 848 class SkipIfEqual { 849 private: 850 MacroAssembler* _masm; 851 Label _label; 852 853 public: 854 SkipIfEqual(MacroAssembler*, const bool* flag_addr, bool value); 855 ~SkipIfEqual(); 856 }; 857 #endif // CPU_RISCV_MACROASSEMBLER_RISCV_HPP