1 /* 2 * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. 3 * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. 4 * Copyright (c) 2020, 2022, Huawei Technologies Co., Ltd. All rights reserved. 5 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 6 * 7 * This code is free software; you can redistribute it and/or modify it 8 * under the terms of the GNU General Public License version 2 only, as 9 * published by the Free Software Foundation. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 * 25 */ 26 27 #ifndef CPU_RISCV_MACROASSEMBLER_RISCV_HPP 28 #define CPU_RISCV_MACROASSEMBLER_RISCV_HPP 29 30 #include "asm/assembler.hpp" 31 #include "metaprogramming/enableIf.hpp" 32 #include "nativeInst_riscv.hpp" 33 #include "oops/compressedOops.hpp" 34 #include "utilities/powerOfTwo.hpp" 35 36 // MacroAssembler extends Assembler by frequently used macros. 37 // 38 // Instructions for which a 'better' code sequence exists depending 39 // on arguments should also go in here. 40 41 class MacroAssembler: public Assembler { 42 43 public: 44 MacroAssembler(CodeBuffer* code) : Assembler(code) { 45 } 46 virtual ~MacroAssembler() {} 47 48 void safepoint_poll(Label& slow_path, bool at_return, bool acquire, bool in_nmethod); 49 50 // Alignment 51 void align(int modulus, int extra_offset = 0); 52 static inline void assert_alignment(address pc, int alignment = NativeInstruction::instruction_size) { 53 assert(is_aligned(pc, alignment), "bad alignment"); 54 } 55 56 // Stack frame creation/removal 57 // Note that SP must be updated to the right place before saving/restoring RA and FP 58 // because signal based thread suspend/resume could happen asynchronously. 59 void enter() { 60 addi(sp, sp, - 2 * wordSize); 61 sd(ra, Address(sp, wordSize)); 62 sd(fp, Address(sp)); 63 addi(fp, sp, 2 * wordSize); 64 } 65 66 void leave() { 67 addi(sp, fp, - 2 * wordSize); 68 ld(fp, Address(sp)); 69 ld(ra, Address(sp, wordSize)); 70 addi(sp, sp, 2 * wordSize); 71 } 72 73 74 // Support for getting the JavaThread pointer (i.e.; a reference to thread-local information) 75 // The pointer will be loaded into the thread register. 76 void get_thread(Register thread); 77 78 // Support for VM calls 79 // 80 // It is imperative that all calls into the VM are handled via the call_VM macros. 81 // They make sure that the stack linkage is setup correctly. call_VM's correspond 82 // to ENTRY/ENTRY_X entry points while call_VM_leaf's correspond to LEAF entry points. 83 84 void call_VM(Register oop_result, 85 address entry_point, 86 bool check_exceptions = true); 87 void call_VM(Register oop_result, 88 address entry_point, 89 Register arg_1, 90 bool check_exceptions = true); 91 void call_VM(Register oop_result, 92 address entry_point, 93 Register arg_1, Register arg_2, 94 bool check_exceptions = true); 95 void call_VM(Register oop_result, 96 address entry_point, 97 Register arg_1, Register arg_2, Register arg_3, 98 bool check_exceptions = true); 99 100 // Overloadings with last_Java_sp 101 void call_VM(Register oop_result, 102 Register last_java_sp, 103 address entry_point, 104 int number_of_arguments = 0, 105 bool check_exceptions = true); 106 void call_VM(Register oop_result, 107 Register last_java_sp, 108 address entry_point, 109 Register arg_1, 110 bool check_exceptions = true); 111 void call_VM(Register oop_result, 112 Register last_java_sp, 113 address entry_point, 114 Register arg_1, Register arg_2, 115 bool check_exceptions = true); 116 void call_VM(Register oop_result, 117 Register last_java_sp, 118 address entry_point, 119 Register arg_1, Register arg_2, Register arg_3, 120 bool check_exceptions = true); 121 122 void get_vm_result(Register oop_result, Register java_thread); 123 void get_vm_result_2(Register metadata_result, Register java_thread); 124 125 // These always tightly bind to MacroAssembler::call_VM_leaf_base 126 // bypassing the virtual implementation 127 void call_VM_leaf(address entry_point, 128 int number_of_arguments = 0); 129 void call_VM_leaf(address entry_point, 130 Register arg_0); 131 void call_VM_leaf(address entry_point, 132 Register arg_0, Register arg_1); 133 void call_VM_leaf(address entry_point, 134 Register arg_0, Register arg_1, Register arg_2); 135 136 // These always tightly bind to MacroAssembler::call_VM_base 137 // bypassing the virtual implementation 138 void super_call_VM_leaf(address entry_point, Register arg_0); 139 void super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1); 140 void super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2); 141 void super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2, Register arg_3); 142 143 // last Java Frame (fills frame anchor) 144 void set_last_Java_frame(Register last_java_sp, Register last_java_fp, address last_java_pc, Register tmp); 145 void set_last_Java_frame(Register last_java_sp, Register last_java_fp, Label &last_java_pc, Register tmp); 146 void set_last_Java_frame(Register last_java_sp, Register last_java_fp, Register last_java_pc, Register tmp); 147 148 // thread in the default location (xthread) 149 void reset_last_Java_frame(bool clear_fp); 150 151 virtual void call_VM_leaf_base( 152 address entry_point, // the entry point 153 int number_of_arguments, // the number of arguments to pop after the call 154 Label* retaddr = NULL 155 ); 156 157 virtual void call_VM_leaf_base( 158 address entry_point, // the entry point 159 int number_of_arguments, // the number of arguments to pop after the call 160 Label& retaddr) { 161 call_VM_leaf_base(entry_point, number_of_arguments, &retaddr); 162 } 163 164 virtual void call_VM_base( // returns the register containing the thread upon return 165 Register oop_result, // where an oop-result ends up if any; use noreg otherwise 166 Register java_thread, // the thread if computed before ; use noreg otherwise 167 Register last_java_sp, // to set up last_Java_frame in stubs; use noreg otherwise 168 address entry_point, // the entry point 169 int number_of_arguments, // the number of arguments (w/o thread) to pop after the call 170 bool check_exceptions // whether to check for pending exceptions after return 171 ); 172 173 void call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions); 174 175 virtual void check_and_handle_earlyret(Register java_thread); 176 virtual void check_and_handle_popframe(Register java_thread); 177 178 void resolve_weak_handle(Register result, Register tmp); 179 void resolve_oop_handle(Register result, Register tmp = x15); 180 void resolve_jobject(Register value, Register thread, Register tmp); 181 182 void movoop(Register dst, jobject obj, bool immediate = false); 183 void mov_metadata(Register dst, Metadata* obj); 184 void bang_stack_size(Register size, Register tmp); 185 void set_narrow_oop(Register dst, jobject obj); 186 void set_narrow_klass(Register dst, Klass* k); 187 188 void load_mirror(Register dst, Register method, Register tmp = x15); 189 void access_load_at(BasicType type, DecoratorSet decorators, Register dst, 190 Address src, Register tmp1, Register thread_tmp); 191 void access_store_at(BasicType type, DecoratorSet decorators, Address dst, 192 Register src, Register tmp1, Register thread_tmp); 193 void load_klass(Register dst, Register src, Register tmp = t0); 194 void store_klass(Register dst, Register src, Register tmp = t0); 195 void cmp_klass(Register oop, Register trial_klass, Register tmp1, Register tmp2, Label &L); 196 197 void encode_klass_not_null(Register r, Register tmp = t0); 198 void decode_klass_not_null(Register r, Register tmp = t0); 199 void encode_klass_not_null(Register dst, Register src, Register tmp); 200 void decode_klass_not_null(Register dst, Register src, Register tmp); 201 void decode_heap_oop_not_null(Register r); 202 void decode_heap_oop_not_null(Register dst, Register src); 203 void decode_heap_oop(Register d, Register s); 204 void decode_heap_oop(Register r) { decode_heap_oop(r, r); } 205 void encode_heap_oop(Register d, Register s); 206 void encode_heap_oop(Register r) { encode_heap_oop(r, r); }; 207 void load_heap_oop(Register dst, Address src, Register tmp1 = noreg, 208 Register thread_tmp = noreg, DecoratorSet decorators = 0); 209 void load_heap_oop_not_null(Register dst, Address src, Register tmp1 = noreg, 210 Register thread_tmp = noreg, DecoratorSet decorators = 0); 211 void store_heap_oop(Address dst, Register src, Register tmp1 = noreg, 212 Register thread_tmp = noreg, DecoratorSet decorators = 0); 213 214 void store_klass_gap(Register dst, Register src); 215 216 // currently unimplemented 217 // Used for storing NULL. All other oop constants should be 218 // stored using routines that take a jobject. 219 void store_heap_oop_null(Address dst); 220 221 // This dummy is to prevent a call to store_heap_oop from 222 // converting a zero (linke NULL) into a Register by giving 223 // the compiler two choices it can't resolve 224 225 void store_heap_oop(Address dst, void* dummy); 226 227 // Support for NULL-checks 228 // 229 // Generates code that causes a NULL OS exception if the content of reg is NULL. 230 // If the accessed location is M[reg + offset] and the offset is known, provide the 231 // offset. No explicit code generateion is needed if the offset is within a certain 232 // range (0 <= offset <= page_size). 233 234 virtual void null_check(Register reg, int offset = -1); 235 static bool needs_explicit_null_check(intptr_t offset); 236 static bool uses_implicit_null_check(void* address); 237 238 // idiv variant which deals with MINLONG as dividend and -1 as divisor 239 int corrected_idivl(Register result, Register rs1, Register rs2, 240 bool want_remainder); 241 int corrected_idivq(Register result, Register rs1, Register rs2, 242 bool want_remainder); 243 244 // interface method calling 245 void lookup_interface_method(Register recv_klass, 246 Register intf_klass, 247 RegisterOrConstant itable_index, 248 Register method_result, 249 Register scan_tmp, 250 Label& no_such_interface, 251 bool return_method = true); 252 253 // virtual method calling 254 // n.n. x86 allows RegisterOrConstant for vtable_index 255 void lookup_virtual_method(Register recv_klass, 256 RegisterOrConstant vtable_index, 257 Register method_result); 258 259 // Form an addres from base + offset in Rd. Rd my or may not 260 // actually be used: you must use the Address that is returned. It 261 // is up to you to ensure that the shift provided mathces the size 262 // of your data. 263 Address form_address(Register Rd, Register base, long byte_offset); 264 265 // allocation 266 void tlab_allocate( 267 Register obj, // result: pointer to object after successful allocation 268 Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise 269 int con_size_in_bytes, // object size in bytes if known at compile time 270 Register tmp1, // temp register 271 Register tmp2, // temp register 272 Label& slow_case, // continuation point of fast allocation fails 273 bool is_far = false 274 ); 275 276 void eden_allocate( 277 Register obj, // result: pointer to object after successful allocation 278 Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise 279 int con_size_in_bytes, // object size in bytes if known at compile time 280 Register tmp, // temp register 281 Label& slow_case, // continuation point if fast allocation fails 282 bool is_far = false 283 ); 284 285 // Test sub_klass against super_klass, with fast and slow paths. 286 287 // The fast path produces a tri-state answer: yes / no / maybe-slow. 288 // One of the three labels can be NULL, meaning take the fall-through. 289 // If super_check_offset is -1, the value is loaded up from super_klass. 290 // No registers are killed, except tmp_reg 291 void check_klass_subtype_fast_path(Register sub_klass, 292 Register super_klass, 293 Register tmp_reg, 294 Label* L_success, 295 Label* L_failure, 296 Label* L_slow_path, 297 Register super_check_offset = noreg); 298 299 // The reset of the type cehck; must be wired to a corresponding fast path. 300 // It does not repeat the fast path logic, so don't use it standalone. 301 // The tmp1_reg and tmp2_reg can be noreg, if no temps are avaliable. 302 // Updates the sub's secondary super cache as necessary. 303 void check_klass_subtype_slow_path(Register sub_klass, 304 Register super_klass, 305 Register tmp1_reg, 306 Register tmp2_reg, 307 Label* L_success, 308 Label* L_failure); 309 310 void check_klass_subtype(Register sub_klass, 311 Register super_klass, 312 Register tmp_reg, 313 Label& L_success); 314 315 Address argument_address(RegisterOrConstant arg_slot, int extra_slot_offset = 0); 316 317 // only if +VerifyOops 318 void verify_oop(Register reg, const char* s = "broken oop"); 319 void verify_oop_addr(Address addr, const char* s = "broken oop addr"); 320 321 void _verify_method_ptr(Register reg, const char* msg, const char* file, int line) {} 322 void _verify_klass_ptr(Register reg, const char* msg, const char* file, int line) {} 323 324 #define verify_method_ptr(reg) _verify_method_ptr(reg, "broken method " #reg, __FILE__, __LINE__) 325 #define verify_klass_ptr(reg) _verify_method_ptr(reg, "broken klass " #reg, __FILE__, __LINE__) 326 327 // A more convenient access to fence for our purposes 328 // We used four bit to indicate the read and write bits in the predecessors and successors, 329 // and extended i for r, o for w if UseConservativeFence enabled. 330 enum Membar_mask_bits { 331 StoreStore = 0b0101, // (pred = ow + succ = ow) 332 LoadStore = 0b1001, // (pred = ir + succ = ow) 333 StoreLoad = 0b0110, // (pred = ow + succ = ir) 334 LoadLoad = 0b1010, // (pred = ir + succ = ir) 335 AnyAny = LoadStore | StoreLoad // (pred = iorw + succ = iorw) 336 }; 337 338 void membar(uint32_t order_constraint); 339 340 static void membar_mask_to_pred_succ(uint32_t order_constraint, uint32_t& predecessor, uint32_t& successor) { 341 predecessor = (order_constraint >> 2) & 0x3; 342 successor = order_constraint & 0x3; 343 344 // extend rw -> iorw: 345 // 01(w) -> 0101(ow) 346 // 10(r) -> 1010(ir) 347 // 11(rw)-> 1111(iorw) 348 if (UseConservativeFence) { 349 predecessor |= predecessor << 2; 350 successor |= successor << 2; 351 } 352 } 353 354 static int pred_succ_to_membar_mask(uint32_t predecessor, uint32_t successor) { 355 return ((predecessor & 0x3) << 2) | (successor & 0x3); 356 } 357 358 // prints msg, dumps registers and stops execution 359 void stop(const char* msg); 360 361 static void debug64(char* msg, int64_t pc, int64_t regs[]); 362 363 void unimplemented(const char* what = ""); 364 365 void should_not_reach_here() { stop("should not reach here"); } 366 367 static address target_addr_for_insn(address insn_addr); 368 369 // Required platform-specific helpers for Label::patch_instructions. 370 // They _shadow_ the declarations in AbstractAssembler, which are undefined. 371 static int pd_patch_instruction_size(address branch, address target); 372 static void pd_patch_instruction(address branch, address target, const char* file = NULL, int line = 0) { 373 pd_patch_instruction_size(branch, target); 374 } 375 static address pd_call_destination(address branch) { 376 return target_addr_for_insn(branch); 377 } 378 379 static int patch_oop(address insn_addr, address o); 380 address emit_trampoline_stub(int insts_call_instruction_offset, address target); 381 void emit_static_call_stub(); 382 383 // The following 4 methods return the offset of the appropriate move instruction 384 385 // Support for fast byte/short loading with zero extension (depending on particular CPU) 386 int load_unsigned_byte(Register dst, Address src); 387 int load_unsigned_short(Register dst, Address src); 388 389 // Support for fast byte/short loading with sign extension (depending on particular CPU) 390 int load_signed_byte(Register dst, Address src); 391 int load_signed_short(Register dst, Address src); 392 393 // Load and store values by size and signed-ness 394 void load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2 = noreg); 395 void store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2 = noreg); 396 397 public: 398 // Standard pseudoinstruction 399 void nop(); 400 void mv(Register Rd, Register Rs); 401 void notr(Register Rd, Register Rs); 402 void neg(Register Rd, Register Rs); 403 void negw(Register Rd, Register Rs); 404 void sext_w(Register Rd, Register Rs); 405 void zext_b(Register Rd, Register Rs); 406 void seqz(Register Rd, Register Rs); // set if = zero 407 void snez(Register Rd, Register Rs); // set if != zero 408 void sltz(Register Rd, Register Rs); // set if < zero 409 void sgtz(Register Rd, Register Rs); // set if > zero 410 411 // Float pseudoinstruction 412 void fmv_s(FloatRegister Rd, FloatRegister Rs); 413 void fabs_s(FloatRegister Rd, FloatRegister Rs); // single-precision absolute value 414 void fneg_s(FloatRegister Rd, FloatRegister Rs); 415 416 // Double pseudoinstruction 417 void fmv_d(FloatRegister Rd, FloatRegister Rs); 418 void fabs_d(FloatRegister Rd, FloatRegister Rs); 419 void fneg_d(FloatRegister Rd, FloatRegister Rs); 420 421 // Pseudoinstruction for control and status register 422 void rdinstret(Register Rd); // read instruction-retired counter 423 void rdcycle(Register Rd); // read cycle counter 424 void rdtime(Register Rd); // read time 425 void csrr(Register Rd, unsigned csr); // read csr 426 void csrw(unsigned csr, Register Rs); // write csr 427 void csrs(unsigned csr, Register Rs); // set bits in csr 428 void csrc(unsigned csr, Register Rs); // clear bits in csr 429 void csrwi(unsigned csr, unsigned imm); 430 void csrsi(unsigned csr, unsigned imm); 431 void csrci(unsigned csr, unsigned imm); 432 void frcsr(Register Rd); // read float-point csr 433 void fscsr(Register Rd, Register Rs); // swap float-point csr 434 void fscsr(Register Rs); // write float-point csr 435 void frrm(Register Rd); // read float-point rounding mode 436 void fsrm(Register Rd, Register Rs); // swap float-point rounding mode 437 void fsrm(Register Rs); // write float-point rounding mode 438 void fsrmi(Register Rd, unsigned imm); 439 void fsrmi(unsigned imm); 440 void frflags(Register Rd); // read float-point exception flags 441 void fsflags(Register Rd, Register Rs); // swap float-point exception flags 442 void fsflags(Register Rs); // write float-point exception flags 443 void fsflagsi(Register Rd, unsigned imm); 444 void fsflagsi(unsigned imm); 445 446 void beqz(Register Rs, const address &dest); 447 void bnez(Register Rs, const address &dest); 448 void blez(Register Rs, const address &dest); 449 void bgez(Register Rs, const address &dest); 450 void bltz(Register Rs, const address &dest); 451 void bgtz(Register Rs, const address &dest); 452 void la(Register Rd, Label &label); 453 void la(Register Rd, const address &dest); 454 void la(Register Rd, const Address &adr); 455 //label 456 void beqz(Register Rs, Label &l, bool is_far = false); 457 void bnez(Register Rs, Label &l, bool is_far = false); 458 void blez(Register Rs, Label &l, bool is_far = false); 459 void bgez(Register Rs, Label &l, bool is_far = false); 460 void bltz(Register Rs, Label &l, bool is_far = false); 461 void bgtz(Register Rs, Label &l, bool is_far = false); 462 void float_beq(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false); 463 void float_bne(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false); 464 void float_ble(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false); 465 void float_bge(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false); 466 void float_blt(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false); 467 void float_bgt(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false); 468 void double_beq(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false); 469 void double_bne(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false); 470 void double_ble(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false); 471 void double_bge(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false); 472 void double_blt(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false); 473 void double_bgt(FloatRegister Rs1, FloatRegister Rs2, Label &l, bool is_far = false, bool is_unordered = false); 474 475 void push_reg(RegSet regs, Register stack) { if (regs.bits()) { push_reg(regs.bits(), stack); } } 476 void pop_reg(RegSet regs, Register stack) { if (regs.bits()) { pop_reg(regs.bits(), stack); } } 477 void push_reg(Register Rs); 478 void pop_reg(Register Rd); 479 int push_reg(unsigned int bitset, Register stack); 480 int pop_reg(unsigned int bitset, Register stack); 481 void push_fp(FloatRegSet regs, Register stack) { if (regs.bits()) push_fp(regs.bits(), stack); } 482 void pop_fp(FloatRegSet regs, Register stack) { if (regs.bits()) pop_fp(regs.bits(), stack); } 483 #ifdef COMPILER2 484 void push_vp(VectorRegSet regs, Register stack) { if (regs.bits()) push_vp(regs.bits(), stack); } 485 void pop_vp(VectorRegSet regs, Register stack) { if (regs.bits()) pop_vp(regs.bits(), stack); } 486 #endif // COMPILER2 487 488 // Push and pop everything that might be clobbered by a native 489 // runtime call except t0 and t1. (They are always 490 // temporary registers, so we don't have to protect them.) 491 // Additional registers can be excluded in a passed RegSet. 492 void push_call_clobbered_registers_except(RegSet exclude); 493 void pop_call_clobbered_registers_except(RegSet exclude); 494 495 void push_call_clobbered_registers() { 496 push_call_clobbered_registers_except(RegSet()); 497 } 498 void pop_call_clobbered_registers() { 499 pop_call_clobbered_registers_except(RegSet()); 500 } 501 502 void push_CPU_state(bool save_vectors = false, int vector_size_in_bytes = 0); 503 void pop_CPU_state(bool restore_vectors = false, int vector_size_in_bytes = 0); 504 505 // if heap base register is used - reinit it with the correct value 506 void reinit_heapbase(); 507 508 void bind(Label& L) { 509 Assembler::bind(L); 510 // fences across basic blocks should not be merged 511 code()->clear_last_insn(); 512 } 513 514 // mv 515 template<typename T, ENABLE_IF(std::is_integral<T>::value)> 516 inline void mv(Register Rd, T o) { 517 li(Rd, (int64_t)o); 518 } 519 520 inline void mvw(Register Rd, int32_t imm32) { mv(Rd, imm32); } 521 522 void mv(Register Rd, Address dest); 523 void mv(Register Rd, address addr); 524 void mv(Register Rd, RegisterOrConstant src); 525 526 // logic 527 void andrw(Register Rd, Register Rs1, Register Rs2); 528 void orrw(Register Rd, Register Rs1, Register Rs2); 529 void xorrw(Register Rd, Register Rs1, Register Rs2); 530 531 // revb 532 void revb_h_h(Register Rd, Register Rs, Register tmp = t0); // reverse bytes in halfword in lower 16 bits, sign-extend 533 void revb_w_w(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1); // reverse bytes in lower word, sign-extend 534 void revb_h_h_u(Register Rd, Register Rs, Register tmp = t0); // reverse bytes in halfword in lower 16 bits, zero-extend 535 void revb_h_w_u(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1); // reverse bytes in halfwords in lower 32 bits, zero-extend 536 void revb_h_helper(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2= t1); // reverse bytes in upper 16 bits (48:63) and move to lower 537 void revb_h(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2= t1); // reverse bytes in each halfword 538 void revb_w(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2= t1); // reverse bytes in each word 539 void revb(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1); // reverse bytes in doubleword 540 541 void ror_imm(Register dst, Register src, uint32_t shift, Register tmp = t0); 542 void andi(Register Rd, Register Rn, int64_t imm, Register tmp = t0); 543 void orptr(Address adr, RegisterOrConstant src, Register tmp1 = t0, Register tmp2 = t1); 544 545 void cmpxchg_obj_header(Register oldv, Register newv, Register obj, Register tmp, Label &succeed, Label *fail); 546 void cmpxchgptr(Register oldv, Register newv, Register addr, Register tmp, Label &succeed, Label *fail); 547 void cmpxchg(Register addr, Register expected, 548 Register new_val, 549 enum operand_size size, 550 Assembler::Aqrl acquire, Assembler::Aqrl release, 551 Register result, bool result_as_bool = false); 552 void cmpxchg_weak(Register addr, Register expected, 553 Register new_val, 554 enum operand_size size, 555 Assembler::Aqrl acquire, Assembler::Aqrl release, 556 Register result); 557 void cmpxchg_narrow_value_helper(Register addr, Register expected, 558 Register new_val, 559 enum operand_size size, 560 Register tmp1, Register tmp2, Register tmp3); 561 void cmpxchg_narrow_value(Register addr, Register expected, 562 Register new_val, 563 enum operand_size size, 564 Assembler::Aqrl acquire, Assembler::Aqrl release, 565 Register result, bool result_as_bool, 566 Register tmp1, Register tmp2, Register tmp3); 567 void weak_cmpxchg_narrow_value(Register addr, Register expected, 568 Register new_val, 569 enum operand_size size, 570 Assembler::Aqrl acquire, Assembler::Aqrl release, 571 Register result, 572 Register tmp1, Register tmp2, Register tmp3); 573 574 void atomic_add(Register prev, RegisterOrConstant incr, Register addr); 575 void atomic_addw(Register prev, RegisterOrConstant incr, Register addr); 576 void atomic_addal(Register prev, RegisterOrConstant incr, Register addr); 577 void atomic_addalw(Register prev, RegisterOrConstant incr, Register addr); 578 579 void atomic_xchg(Register prev, Register newv, Register addr); 580 void atomic_xchgw(Register prev, Register newv, Register addr); 581 void atomic_xchgal(Register prev, Register newv, Register addr); 582 void atomic_xchgalw(Register prev, Register newv, Register addr); 583 void atomic_xchgwu(Register prev, Register newv, Register addr); 584 void atomic_xchgalwu(Register prev, Register newv, Register addr); 585 586 void atomic_incw(Register counter_addr, Register tmp); 587 void atomic_incw(Address counter_addr, Register tmp1, Register tmp2) { 588 la(tmp1, counter_addr); 589 atomic_incw(tmp1, tmp2); 590 } 591 592 // Biased locking support 593 // lock_reg and obj_reg must be loaded up with the appropriate values. 594 // swap_reg is killed. 595 // tmp_reg must be supplied and must not be t0 or t1 596 // Optional slow case is for implementations (interpreter and C1) which branch to 597 // slow case directly. Leaves condition codes set for C2's Fast_Lock done. 598 // Returns offset of first potentially-faulting instruction for null 599 // check info (currently consumed only by C1). If 600 // swap_reg_contains_mark is true then returns -1 as it as assumed 601 // the calling code has already passed any potential faults. 602 void biased_locking_enter(Register lock_reg, Register obj_reg, 603 Register swap_reg, Register tmp_Reg, 604 bool swap_reg_contains_mark, 605 Label& done, Label* slow_case = NULL, 606 BiasedLockingCounters* counters = NULL, 607 Register flag = noreg); 608 void biased_locking_exit(Register obj_reg, Register tmp_reg, Label& done, Register flag = noreg); 609 610 static bool far_branches() { 611 return ReservedCodeCacheSize > branch_range; 612 } 613 614 // Jumps that can reach anywhere in the code cache. 615 // Trashes tmp. 616 void far_call(Address entry, CodeBuffer *cbuf = NULL, Register tmp = t0); 617 void far_jump(Address entry, CodeBuffer *cbuf = NULL, Register tmp = t0); 618 619 static int far_branch_size() { 620 if (far_branches()) { 621 return 2 * 4; // auipc + jalr, see far_call() & far_jump() 622 } else { 623 return 4; 624 } 625 } 626 627 void load_byte_map_base(Register reg); 628 629 void bang_stack_with_offset(int offset) { 630 // stack grows down, caller passes positive offset 631 assert(offset > 0, "must bang with negative offset"); 632 sub(t0, sp, offset); 633 sd(zr, Address(t0)); 634 } 635 636 void la_patchable(Register reg1, const Address &dest, int32_t &offset); 637 638 virtual void _call_Unimplemented(address call_site) { 639 mv(t1, call_site); 640 } 641 642 #define call_Unimplemented() _call_Unimplemented((address)__PRETTY_FUNCTION__) 643 644 // Frame creation and destruction shared between JITs. 645 void build_frame(int framesize); 646 void remove_frame(int framesize); 647 648 void reserved_stack_check(); 649 650 void get_polling_page(Register dest, relocInfo::relocType rtype); 651 void read_polling_page(Register r, int32_t offset, relocInfo::relocType rtype); 652 653 address trampoline_call(Address entry, CodeBuffer* cbuf = NULL); 654 address ic_call(address entry, jint method_index = 0); 655 656 void add_memory_int64(const Address dst, int64_t imm); 657 void add_memory_int32(const Address dst, int32_t imm); 658 659 void cmpptr(Register src1, Address src2, Label& equal); 660 661 void clinit_barrier(Register klass, Register tmp, Label* L_fast_path = NULL, Label* L_slow_path = NULL); 662 void load_method_holder_cld(Register result, Register method); 663 void load_method_holder(Register holder, Register method); 664 665 void compute_index(Register str1, Register trailing_zeros, Register match_mask, 666 Register result, Register char_tmp, Register tmp, 667 bool haystack_isL); 668 void compute_match_mask(Register src, Register pattern, Register match_mask, 669 Register mask1, Register mask2); 670 671 #ifdef COMPILER2 672 void mul_add(Register out, Register in, Register offset, 673 Register len, Register k, Register tmp); 674 void cad(Register dst, Register src1, Register src2, Register carry); 675 void cadc(Register dst, Register src1, Register src2, Register carry); 676 void adc(Register dst, Register src1, Register src2, Register carry); 677 void add2_with_carry(Register final_dest_hi, Register dest_hi, Register dest_lo, 678 Register src1, Register src2, Register carry); 679 void multiply_32_x_32_loop(Register x, Register xstart, Register x_xstart, 680 Register y, Register y_idx, Register z, 681 Register carry, Register product, 682 Register idx, Register kdx); 683 void multiply_64_x_64_loop(Register x, Register xstart, Register x_xstart, 684 Register y, Register y_idx, Register z, 685 Register carry, Register product, 686 Register idx, Register kdx); 687 void multiply_128_x_128_loop(Register y, Register z, 688 Register carry, Register carry2, 689 Register idx, Register jdx, 690 Register yz_idx1, Register yz_idx2, 691 Register tmp, Register tmp3, Register tmp4, 692 Register tmp6, Register product_hi); 693 void multiply_to_len(Register x, Register xlen, Register y, Register ylen, 694 Register z, Register zlen, 695 Register tmp1, Register tmp2, Register tmp3, Register tmp4, 696 Register tmp5, Register tmp6, Register product_hi); 697 #endif 698 699 void inflate_lo32(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1); 700 void inflate_hi32(Register Rd, Register Rs, Register tmp1 = t0, Register tmp2 = t1); 701 702 void ctzc_bit(Register Rd, Register Rs, bool isLL = false, Register tmp1 = t0, Register tmp2 = t1); 703 704 void zero_words(Register base, u_int64_t cnt); 705 address zero_words(Register ptr, Register cnt); 706 void fill_words(Register base, Register cnt, Register value); 707 void zero_memory(Register addr, Register len, Register tmp); 708 709 // shift left by shamt and add 710 void shadd(Register Rd, Register Rs1, Register Rs2, Register tmp, int shamt); 711 712 // Here the float instructions with safe deal with some exceptions. 713 // e.g. convert from NaN, +Inf, -Inf to int, float, double 714 // will trigger exception, we need to deal with these situations 715 // to get correct results. 716 void fcvt_w_s_safe(Register dst, FloatRegister src, Register tmp = t0); 717 void fcvt_l_s_safe(Register dst, FloatRegister src, Register tmp = t0); 718 void fcvt_w_d_safe(Register dst, FloatRegister src, Register tmp = t0); 719 void fcvt_l_d_safe(Register dst, FloatRegister src, Register tmp = t0); 720 721 // vector load/store unit-stride instructions 722 void vlex_v(VectorRegister vd, Register base, Assembler::SEW sew, VectorMask vm = unmasked) { 723 switch (sew) { 724 case Assembler::e64: 725 vle64_v(vd, base, vm); 726 break; 727 case Assembler::e32: 728 vle32_v(vd, base, vm); 729 break; 730 case Assembler::e16: 731 vle16_v(vd, base, vm); 732 break; 733 case Assembler::e8: // fall through 734 default: 735 vle8_v(vd, base, vm); 736 break; 737 } 738 } 739 740 void vsex_v(VectorRegister store_data, Register base, Assembler::SEW sew, VectorMask vm = unmasked) { 741 switch (sew) { 742 case Assembler::e64: 743 vse64_v(store_data, base, vm); 744 break; 745 case Assembler::e32: 746 vse32_v(store_data, base, vm); 747 break; 748 case Assembler::e16: 749 vse16_v(store_data, base, vm); 750 break; 751 case Assembler::e8: // fall through 752 default: 753 vse8_v(store_data, base, vm); 754 break; 755 } 756 } 757 758 static const int zero_words_block_size; 759 760 void cast_primitive_type(BasicType type, Register Rt) { 761 switch (type) { 762 case T_BOOLEAN: 763 sltu(Rt, zr, Rt); 764 break; 765 case T_CHAR : 766 zero_extend(Rt, Rt, 16); 767 break; 768 case T_BYTE : 769 sign_extend(Rt, Rt, 8); 770 break; 771 case T_SHORT : 772 sign_extend(Rt, Rt, 16); 773 break; 774 case T_INT : 775 addw(Rt, Rt, zr); 776 break; 777 case T_LONG : /* nothing to do */ break; 778 case T_VOID : /* nothing to do */ break; 779 case T_FLOAT : /* nothing to do */ break; 780 case T_DOUBLE : /* nothing to do */ break; 781 default: ShouldNotReachHere(); 782 } 783 } 784 785 // float cmp with unordered_result 786 void float_compare(Register result, FloatRegister Rs1, FloatRegister Rs2, int unordered_result); 787 void double_compare(Register result, FloatRegister Rs1, FloatRegister Rs2, int unordered_result); 788 789 // Zero/Sign-extend 790 void zero_extend(Register dst, Register src, int bits); 791 void sign_extend(Register dst, Register src, int bits); 792 793 // compare src1 and src2 and get -1/0/1 in dst. 794 // if [src1 > src2], dst = 1; 795 // if [src1 == src2], dst = 0; 796 // if [src1 < src2], dst = -1; 797 void cmp_l2i(Register dst, Register src1, Register src2, Register tmp = t0); 798 799 int push_fp(unsigned int bitset, Register stack); 800 int pop_fp(unsigned int bitset, Register stack); 801 802 int push_vp(unsigned int bitset, Register stack); 803 int pop_vp(unsigned int bitset, Register stack); 804 805 // vext 806 void vmnot_m(VectorRegister vd, VectorRegister vs); 807 void vncvt_x_x_w(VectorRegister vd, VectorRegister vs, VectorMask vm = unmasked); 808 void vfneg_v(VectorRegister vd, VectorRegister vs); 809 810 private: 811 812 #ifdef ASSERT 813 // Template short-hand support to clean-up after a failed call to trampoline 814 // call generation (see trampoline_call() below), when a set of Labels must 815 // be reset (before returning). 816 template<typename Label, typename... More> 817 void reset_labels(Label& lbl, More&... more) { 818 lbl.reset(); reset_labels(more...); 819 } 820 template<typename Label> 821 void reset_labels(Label& lbl) { 822 lbl.reset(); 823 } 824 #endif 825 void repne_scan(Register addr, Register value, Register count, Register tmp); 826 827 // Return true if an address is within the 48-bit RISCV64 address space. 828 bool is_valid_riscv64_address(address addr) { 829 // sv48: must have bits 63–48 all equal to bit 47 830 return ((uintptr_t)addr >> 47) == 0; 831 } 832 833 void ld_constant(Register dest, const Address &const_addr) { 834 if (NearCpool) { 835 ld(dest, const_addr); 836 } else { 837 int32_t offset = 0; 838 la_patchable(dest, InternalAddress(const_addr.target()), offset); 839 ld(dest, Address(dest, offset)); 840 } 841 } 842 843 int bitset_to_regs(unsigned int bitset, unsigned char* regs); 844 Address add_memory_helper(const Address dst); 845 846 void load_reserved(Register addr, enum operand_size size, Assembler::Aqrl acquire); 847 void store_conditional(Register addr, Register new_val, enum operand_size size, Assembler::Aqrl release); 848 849 void load_prototype_header(Register dst, Register src); 850 }; 851 852 #ifdef ASSERT 853 inline bool AbstractAssembler::pd_check_instruction_mark() { return false; } 854 #endif 855 856 /** 857 * class SkipIfEqual: 858 * 859 * Instantiating this class will result in assembly code being output that will 860 * jump around any code emitted between the creation of the instance and it's 861 * automatic destruction at the end of a scope block, depending on the value of 862 * the flag passed to the constructor, which will be checked at run-time. 863 */ 864 class SkipIfEqual { 865 private: 866 MacroAssembler* _masm; 867 Label _label; 868 869 public: 870 SkipIfEqual(MacroAssembler*, const bool* flag_addr, bool value); 871 ~SkipIfEqual(); 872 }; 873 874 #endif // CPU_RISCV_MACROASSEMBLER_RISCV_HPP