1 /* 2 * Copyright (c) 1997, 2022, Oracle and/or its affiliates. All rights reserved. 3 * Copyright (c) 2014, 2021, Red Hat Inc. All rights reserved. 4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 5 * 6 * This code is free software; you can redistribute it and/or modify it 7 * under the terms of the GNU General Public License version 2 only, as 8 * published by the Free Software Foundation. 9 * 10 * This code is distributed in the hope that it will be useful, but WITHOUT 11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 13 * version 2 for more details (a copy is included in the LICENSE file that 14 * accompanied this code). 15 * 16 * You should have received a copy of the GNU General Public License version 17 * 2 along with this work; if not, write to the Free Software Foundation, 18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 19 * 20 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 21 * or visit www.oracle.com if you need additional information or have any 22 * questions. 23 * 24 */ 25 26 #ifndef CPU_AARCH64_MACROASSEMBLER_AARCH64_HPP 27 #define CPU_AARCH64_MACROASSEMBLER_AARCH64_HPP 28 29 #include "asm/assembler.inline.hpp" 30 #include "code/vmreg.hpp" 31 #include "metaprogramming/enableIf.hpp" 32 #include "oops/compressedOops.hpp" 33 #include "runtime/vm_version.hpp" 34 #include "utilities/powerOfTwo.hpp" 35 36 class OopMap; 37 38 // MacroAssembler extends Assembler by frequently used macros. 39 // 40 // Instructions for which a 'better' code sequence exists depending 41 // on arguments should also go in here. 42 43 class MacroAssembler: public Assembler { 44 friend class LIR_Assembler; 45 46 public: 47 using Assembler::mov; 48 using Assembler::movi; 49 50 protected: 51 52 // Support for VM calls 53 // 54 // This is the base routine called by the different versions of call_VM_leaf. The interpreter 55 // may customize this version by overriding it for its purposes (e.g., to save/restore 56 // additional registers when doing a VM call). 57 virtual void call_VM_leaf_base( 58 address entry_point, // the entry point 59 int number_of_arguments, // the number of arguments to pop after the call 60 Label *retaddr = NULL 61 ); 62 63 virtual void call_VM_leaf_base( 64 address entry_point, // the entry point 65 int number_of_arguments, // the number of arguments to pop after the call 66 Label &retaddr) { 67 call_VM_leaf_base(entry_point, number_of_arguments, &retaddr); 68 } 69 70 // This is the base routine called by the different versions of call_VM. The interpreter 71 // may customize this version by overriding it for its purposes (e.g., to save/restore 72 // additional registers when doing a VM call). 73 // 74 // If no java_thread register is specified (noreg) than rthread will be used instead. call_VM_base 75 // returns the register which contains the thread upon return. If a thread register has been 76 // specified, the return value will correspond to that register. If no last_java_sp is specified 77 // (noreg) than rsp will be used instead. 78 virtual void call_VM_base( // returns the register containing the thread upon return 79 Register oop_result, // where an oop-result ends up if any; use noreg otherwise 80 Register java_thread, // the thread if computed before ; use noreg otherwise 81 Register last_java_sp, // to set up last_Java_frame in stubs; use noreg otherwise 82 address entry_point, // the entry point 83 int number_of_arguments, // the number of arguments (w/o thread) to pop after the call 84 bool check_exceptions // whether to check for pending exceptions after return 85 ); 86 87 void call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions = true); 88 89 enum KlassDecodeMode { 90 KlassDecodeNone, 91 KlassDecodeZero, 92 KlassDecodeXor, 93 KlassDecodeMovk 94 }; 95 96 KlassDecodeMode klass_decode_mode(); 97 98 private: 99 static KlassDecodeMode _klass_decode_mode; 100 101 public: 102 MacroAssembler(CodeBuffer* code) : Assembler(code) {} 103 104 // These routines should emit JVMTI PopFrame and ForceEarlyReturn handling code. 105 // The implementation is only non-empty for the InterpreterMacroAssembler, 106 // as only the interpreter handles PopFrame and ForceEarlyReturn requests. 107 virtual void check_and_handle_popframe(Register java_thread); 108 virtual void check_and_handle_earlyret(Register java_thread); 109 110 void safepoint_poll(Label& slow_path, bool at_return, bool acquire, bool in_nmethod, Register tmp = rscratch1); 111 void rt_call(address dest, Register tmp = rscratch1); 112 113 // Helper functions for statistics gathering. 114 // Unconditional atomic increment. 115 void atomic_incw(Register counter_addr, Register tmp, Register tmp2); 116 void atomic_incw(Address counter_addr, Register tmp1, Register tmp2, Register tmp3) { 117 lea(tmp1, counter_addr); 118 atomic_incw(tmp1, tmp2, tmp3); 119 } 120 // Load Effective Address 121 void lea(Register r, const Address &a) { 122 InstructionMark im(this); 123 code_section()->relocate(inst_mark(), a.rspec()); 124 a.lea(this, r); 125 } 126 127 /* Sometimes we get misaligned loads and stores, usually from Unsafe 128 accesses, and these can exceed the offset range. */ 129 Address legitimize_address(const Address &a, int size, Register scratch) { 130 if (a.getMode() == Address::base_plus_offset) { 131 if (! Address::offset_ok_for_immed(a.offset(), exact_log2(size))) { 132 block_comment("legitimize_address {"); 133 lea(scratch, a); 134 block_comment("} legitimize_address"); 135 return Address(scratch); 136 } 137 } 138 return a; 139 } 140 141 void addmw(Address a, Register incr, Register scratch) { 142 ldrw(scratch, a); 143 addw(scratch, scratch, incr); 144 strw(scratch, a); 145 } 146 147 // Add constant to memory word 148 void addmw(Address a, int imm, Register scratch) { 149 ldrw(scratch, a); 150 if (imm > 0) 151 addw(scratch, scratch, (unsigned)imm); 152 else 153 subw(scratch, scratch, (unsigned)-imm); 154 strw(scratch, a); 155 } 156 157 void bind(Label& L) { 158 Assembler::bind(L); 159 code()->clear_last_insn(); 160 } 161 162 void membar(Membar_mask_bits order_constraint); 163 164 using Assembler::ldr; 165 using Assembler::str; 166 using Assembler::ldrw; 167 using Assembler::strw; 168 169 void ldr(Register Rx, const Address &adr); 170 void ldrw(Register Rw, const Address &adr); 171 void str(Register Rx, const Address &adr); 172 void strw(Register Rx, const Address &adr); 173 174 // Frame creation and destruction shared between JITs. 175 void build_frame(int framesize); 176 void remove_frame(int framesize); 177 178 virtual void _call_Unimplemented(address call_site) { 179 mov(rscratch2, call_site); 180 } 181 182 // Microsoft's MSVC team thinks that the __FUNCSIG__ is approximately (sympathy for calling conventions) equivalent to __PRETTY_FUNCTION__ 183 // Also, from Clang patch: "It is very similar to GCC's PRETTY_FUNCTION, except it prints the calling convention." 184 // https://reviews.llvm.org/D3311 185 186 #ifdef _WIN64 187 #define call_Unimplemented() _call_Unimplemented((address)__FUNCSIG__) 188 #else 189 #define call_Unimplemented() _call_Unimplemented((address)__PRETTY_FUNCTION__) 190 #endif 191 192 // aliases defined in AARCH64 spec 193 194 template<class T> 195 inline void cmpw(Register Rd, T imm) { subsw(zr, Rd, imm); } 196 197 inline void cmp(Register Rd, unsigned char imm8) { subs(zr, Rd, imm8); } 198 inline void cmp(Register Rd, unsigned imm) = delete; 199 200 inline void cmnw(Register Rd, unsigned imm) { addsw(zr, Rd, imm); } 201 inline void cmn(Register Rd, unsigned imm) { adds(zr, Rd, imm); } 202 203 void cset(Register Rd, Assembler::Condition cond) { 204 csinc(Rd, zr, zr, ~cond); 205 } 206 void csetw(Register Rd, Assembler::Condition cond) { 207 csincw(Rd, zr, zr, ~cond); 208 } 209 210 void cneg(Register Rd, Register Rn, Assembler::Condition cond) { 211 csneg(Rd, Rn, Rn, ~cond); 212 } 213 void cnegw(Register Rd, Register Rn, Assembler::Condition cond) { 214 csnegw(Rd, Rn, Rn, ~cond); 215 } 216 217 inline void movw(Register Rd, Register Rn) { 218 if (Rd == sp || Rn == sp) { 219 addw(Rd, Rn, 0U); 220 } else { 221 orrw(Rd, zr, Rn); 222 } 223 } 224 inline void mov(Register Rd, Register Rn) { 225 assert(Rd != r31_sp && Rn != r31_sp, "should be"); 226 if (Rd == Rn) { 227 } else if (Rd == sp || Rn == sp) { 228 add(Rd, Rn, 0U); 229 } else { 230 orr(Rd, zr, Rn); 231 } 232 } 233 234 inline void moviw(Register Rd, unsigned imm) { orrw(Rd, zr, imm); } 235 inline void movi(Register Rd, unsigned imm) { orr(Rd, zr, imm); } 236 237 inline void tstw(Register Rd, Register Rn) { andsw(zr, Rd, Rn); } 238 inline void tst(Register Rd, Register Rn) { ands(zr, Rd, Rn); } 239 240 inline void tstw(Register Rd, uint64_t imm) { andsw(zr, Rd, imm); } 241 inline void tst(Register Rd, uint64_t imm) { ands(zr, Rd, imm); } 242 243 inline void bfiw(Register Rd, Register Rn, unsigned lsb, unsigned width) { 244 bfmw(Rd, Rn, ((32 - lsb) & 31), (width - 1)); 245 } 246 inline void bfi(Register Rd, Register Rn, unsigned lsb, unsigned width) { 247 bfm(Rd, Rn, ((64 - lsb) & 63), (width - 1)); 248 } 249 250 inline void bfxilw(Register Rd, Register Rn, unsigned lsb, unsigned width) { 251 bfmw(Rd, Rn, lsb, (lsb + width - 1)); 252 } 253 inline void bfxil(Register Rd, Register Rn, unsigned lsb, unsigned width) { 254 bfm(Rd, Rn, lsb , (lsb + width - 1)); 255 } 256 257 inline void sbfizw(Register Rd, Register Rn, unsigned lsb, unsigned width) { 258 sbfmw(Rd, Rn, ((32 - lsb) & 31), (width - 1)); 259 } 260 inline void sbfiz(Register Rd, Register Rn, unsigned lsb, unsigned width) { 261 sbfm(Rd, Rn, ((64 - lsb) & 63), (width - 1)); 262 } 263 264 inline void sbfxw(Register Rd, Register Rn, unsigned lsb, unsigned width) { 265 sbfmw(Rd, Rn, lsb, (lsb + width - 1)); 266 } 267 inline void sbfx(Register Rd, Register Rn, unsigned lsb, unsigned width) { 268 sbfm(Rd, Rn, lsb , (lsb + width - 1)); 269 } 270 271 inline void ubfizw(Register Rd, Register Rn, unsigned lsb, unsigned width) { 272 ubfmw(Rd, Rn, ((32 - lsb) & 31), (width - 1)); 273 } 274 inline void ubfiz(Register Rd, Register Rn, unsigned lsb, unsigned width) { 275 ubfm(Rd, Rn, ((64 - lsb) & 63), (width - 1)); 276 } 277 278 inline void ubfxw(Register Rd, Register Rn, unsigned lsb, unsigned width) { 279 ubfmw(Rd, Rn, lsb, (lsb + width - 1)); 280 } 281 inline void ubfx(Register Rd, Register Rn, unsigned lsb, unsigned width) { 282 ubfm(Rd, Rn, lsb , (lsb + width - 1)); 283 } 284 285 inline void asrw(Register Rd, Register Rn, unsigned imm) { 286 sbfmw(Rd, Rn, imm, 31); 287 } 288 289 inline void asr(Register Rd, Register Rn, unsigned imm) { 290 sbfm(Rd, Rn, imm, 63); 291 } 292 293 inline void lslw(Register Rd, Register Rn, unsigned imm) { 294 ubfmw(Rd, Rn, ((32 - imm) & 31), (31 - imm)); 295 } 296 297 inline void lsl(Register Rd, Register Rn, unsigned imm) { 298 ubfm(Rd, Rn, ((64 - imm) & 63), (63 - imm)); 299 } 300 301 inline void lsrw(Register Rd, Register Rn, unsigned imm) { 302 ubfmw(Rd, Rn, imm, 31); 303 } 304 305 inline void lsr(Register Rd, Register Rn, unsigned imm) { 306 ubfm(Rd, Rn, imm, 63); 307 } 308 309 inline void rorw(Register Rd, Register Rn, unsigned imm) { 310 extrw(Rd, Rn, Rn, imm); 311 } 312 313 inline void ror(Register Rd, Register Rn, unsigned imm) { 314 extr(Rd, Rn, Rn, imm); 315 } 316 317 inline void sxtbw(Register Rd, Register Rn) { 318 sbfmw(Rd, Rn, 0, 7); 319 } 320 inline void sxthw(Register Rd, Register Rn) { 321 sbfmw(Rd, Rn, 0, 15); 322 } 323 inline void sxtb(Register Rd, Register Rn) { 324 sbfm(Rd, Rn, 0, 7); 325 } 326 inline void sxth(Register Rd, Register Rn) { 327 sbfm(Rd, Rn, 0, 15); 328 } 329 inline void sxtw(Register Rd, Register Rn) { 330 sbfm(Rd, Rn, 0, 31); 331 } 332 333 inline void uxtbw(Register Rd, Register Rn) { 334 ubfmw(Rd, Rn, 0, 7); 335 } 336 inline void uxthw(Register Rd, Register Rn) { 337 ubfmw(Rd, Rn, 0, 15); 338 } 339 inline void uxtb(Register Rd, Register Rn) { 340 ubfm(Rd, Rn, 0, 7); 341 } 342 inline void uxth(Register Rd, Register Rn) { 343 ubfm(Rd, Rn, 0, 15); 344 } 345 inline void uxtw(Register Rd, Register Rn) { 346 ubfm(Rd, Rn, 0, 31); 347 } 348 349 inline void cmnw(Register Rn, Register Rm) { 350 addsw(zr, Rn, Rm); 351 } 352 inline void cmn(Register Rn, Register Rm) { 353 adds(zr, Rn, Rm); 354 } 355 356 inline void cmpw(Register Rn, Register Rm) { 357 subsw(zr, Rn, Rm); 358 } 359 inline void cmp(Register Rn, Register Rm) { 360 subs(zr, Rn, Rm); 361 } 362 363 inline void negw(Register Rd, Register Rn) { 364 subw(Rd, zr, Rn); 365 } 366 367 inline void neg(Register Rd, Register Rn) { 368 sub(Rd, zr, Rn); 369 } 370 371 inline void negsw(Register Rd, Register Rn) { 372 subsw(Rd, zr, Rn); 373 } 374 375 inline void negs(Register Rd, Register Rn) { 376 subs(Rd, zr, Rn); 377 } 378 379 inline void cmnw(Register Rn, Register Rm, enum shift_kind kind, unsigned shift = 0) { 380 addsw(zr, Rn, Rm, kind, shift); 381 } 382 inline void cmn(Register Rn, Register Rm, enum shift_kind kind, unsigned shift = 0) { 383 adds(zr, Rn, Rm, kind, shift); 384 } 385 386 inline void cmpw(Register Rn, Register Rm, enum shift_kind kind, unsigned shift = 0) { 387 subsw(zr, Rn, Rm, kind, shift); 388 } 389 inline void cmp(Register Rn, Register Rm, enum shift_kind kind, unsigned shift = 0) { 390 subs(zr, Rn, Rm, kind, shift); 391 } 392 393 inline void negw(Register Rd, Register Rn, enum shift_kind kind, unsigned shift = 0) { 394 subw(Rd, zr, Rn, kind, shift); 395 } 396 397 inline void neg(Register Rd, Register Rn, enum shift_kind kind, unsigned shift = 0) { 398 sub(Rd, zr, Rn, kind, shift); 399 } 400 401 inline void negsw(Register Rd, Register Rn, enum shift_kind kind, unsigned shift = 0) { 402 subsw(Rd, zr, Rn, kind, shift); 403 } 404 405 inline void negs(Register Rd, Register Rn, enum shift_kind kind, unsigned shift = 0) { 406 subs(Rd, zr, Rn, kind, shift); 407 } 408 409 inline void mnegw(Register Rd, Register Rn, Register Rm) { 410 msubw(Rd, Rn, Rm, zr); 411 } 412 inline void mneg(Register Rd, Register Rn, Register Rm) { 413 msub(Rd, Rn, Rm, zr); 414 } 415 416 inline void mulw(Register Rd, Register Rn, Register Rm) { 417 maddw(Rd, Rn, Rm, zr); 418 } 419 inline void mul(Register Rd, Register Rn, Register Rm) { 420 madd(Rd, Rn, Rm, zr); 421 } 422 423 inline void smnegl(Register Rd, Register Rn, Register Rm) { 424 smsubl(Rd, Rn, Rm, zr); 425 } 426 inline void smull(Register Rd, Register Rn, Register Rm) { 427 smaddl(Rd, Rn, Rm, zr); 428 } 429 430 inline void umnegl(Register Rd, Register Rn, Register Rm) { 431 umsubl(Rd, Rn, Rm, zr); 432 } 433 inline void umull(Register Rd, Register Rn, Register Rm) { 434 umaddl(Rd, Rn, Rm, zr); 435 } 436 437 #define WRAP(INSN) \ 438 void INSN(Register Rd, Register Rn, Register Rm, Register Ra) { \ 439 if (VM_Version::supports_a53mac() && Ra != zr) \ 440 nop(); \ 441 Assembler::INSN(Rd, Rn, Rm, Ra); \ 442 } 443 444 WRAP(madd) WRAP(msub) WRAP(maddw) WRAP(msubw) 445 WRAP(smaddl) WRAP(smsubl) WRAP(umaddl) WRAP(umsubl) 446 #undef WRAP 447 448 449 // macro assembly operations needed for aarch64 450 451 // first two private routines for loading 32 bit or 64 bit constants 452 private: 453 454 void mov_immediate64(Register dst, uint64_t imm64); 455 void mov_immediate32(Register dst, uint32_t imm32); 456 457 int push(unsigned int bitset, Register stack); 458 int pop(unsigned int bitset, Register stack); 459 460 int push_fp(unsigned int bitset, Register stack); 461 int pop_fp(unsigned int bitset, Register stack); 462 463 int push_p(unsigned int bitset, Register stack); 464 int pop_p(unsigned int bitset, Register stack); 465 466 void mov(Register dst, Address a); 467 468 public: 469 void push(RegSet regs, Register stack) { if (regs.bits()) push(regs.bits(), stack); } 470 void pop(RegSet regs, Register stack) { if (regs.bits()) pop(regs.bits(), stack); } 471 472 void push_fp(FloatRegSet regs, Register stack) { if (regs.bits()) push_fp(regs.bits(), stack); } 473 void pop_fp(FloatRegSet regs, Register stack) { if (regs.bits()) pop_fp(regs.bits(), stack); } 474 475 static RegSet call_clobbered_gp_registers(); 476 477 void push_p(PRegSet regs, Register stack) { if (regs.bits()) push_p(regs.bits(), stack); } 478 void pop_p(PRegSet regs, Register stack) { if (regs.bits()) pop_p(regs.bits(), stack); } 479 480 // Push and pop everything that might be clobbered by a native 481 // runtime call except rscratch1 and rscratch2. (They are always 482 // scratch, so we don't have to protect them.) Only save the lower 483 // 64 bits of each vector register. Additional registers can be excluded 484 // in a passed RegSet. 485 void push_call_clobbered_registers_except(RegSet exclude); 486 void pop_call_clobbered_registers_except(RegSet exclude); 487 488 void push_call_clobbered_registers() { 489 push_call_clobbered_registers_except(RegSet()); 490 } 491 void pop_call_clobbered_registers() { 492 pop_call_clobbered_registers_except(RegSet()); 493 } 494 495 496 // now mov instructions for loading absolute addresses and 32 or 497 // 64 bit integers 498 499 inline void mov(Register dst, address addr) { mov_immediate64(dst, (uint64_t)addr); } 500 501 template<typename T, ENABLE_IF(std::is_integral<T>::value)> 502 inline void mov(Register dst, T o) { mov_immediate64(dst, (uint64_t)o); } 503 504 inline void movw(Register dst, uint32_t imm32) { mov_immediate32(dst, imm32); } 505 506 void mov(Register dst, RegisterOrConstant src) { 507 if (src.is_register()) 508 mov(dst, src.as_register()); 509 else 510 mov(dst, src.as_constant()); 511 } 512 513 void movptr(Register r, uintptr_t imm64); 514 515 void mov(FloatRegister Vd, SIMD_Arrangement T, uint64_t imm64); 516 517 void mov(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn) { 518 orr(Vd, T, Vn, Vn); 519 } 520 521 522 public: 523 524 // Generalized Test Bit And Branch, including a "far" variety which 525 // spans more than 32KiB. 526 void tbr(Condition cond, Register Rt, int bitpos, Label &dest, bool isfar = false) { 527 assert(cond == EQ || cond == NE, "must be"); 528 529 if (isfar) 530 cond = ~cond; 531 532 void (Assembler::* branch)(Register Rt, int bitpos, Label &L); 533 if (cond == Assembler::EQ) 534 branch = &Assembler::tbz; 535 else 536 branch = &Assembler::tbnz; 537 538 if (isfar) { 539 Label L; 540 (this->*branch)(Rt, bitpos, L); 541 b(dest); 542 bind(L); 543 } else { 544 (this->*branch)(Rt, bitpos, dest); 545 } 546 } 547 548 // macro instructions for accessing and updating floating point 549 // status register 550 // 551 // FPSR : op1 == 011 552 // CRn == 0100 553 // CRm == 0100 554 // op2 == 001 555 556 inline void get_fpsr(Register reg) 557 { 558 mrs(0b11, 0b0100, 0b0100, 0b001, reg); 559 } 560 561 inline void set_fpsr(Register reg) 562 { 563 msr(0b011, 0b0100, 0b0100, 0b001, reg); 564 } 565 566 inline void clear_fpsr() 567 { 568 msr(0b011, 0b0100, 0b0100, 0b001, zr); 569 } 570 571 // DCZID_EL0: op1 == 011 572 // CRn == 0000 573 // CRm == 0000 574 // op2 == 111 575 inline void get_dczid_el0(Register reg) 576 { 577 mrs(0b011, 0b0000, 0b0000, 0b111, reg); 578 } 579 580 // CTR_EL0: op1 == 011 581 // CRn == 0000 582 // CRm == 0000 583 // op2 == 001 584 inline void get_ctr_el0(Register reg) 585 { 586 mrs(0b011, 0b0000, 0b0000, 0b001, reg); 587 } 588 589 // idiv variant which deals with MINLONG as dividend and -1 as divisor 590 int corrected_idivl(Register result, Register ra, Register rb, 591 bool want_remainder, Register tmp = rscratch1); 592 int corrected_idivq(Register result, Register ra, Register rb, 593 bool want_remainder, Register tmp = rscratch1); 594 595 // Support for NULL-checks 596 // 597 // Generates code that causes a NULL OS exception if the content of reg is NULL. 598 // If the accessed location is M[reg + offset] and the offset is known, provide the 599 // offset. No explicit code generation is needed if the offset is within a certain 600 // range (0 <= offset <= page_size). 601 602 virtual void null_check(Register reg, int offset = -1); 603 static bool needs_explicit_null_check(intptr_t offset); 604 static bool uses_implicit_null_check(void* address); 605 606 static address target_addr_for_insn(address insn_addr, unsigned insn); 607 static address target_addr_for_insn_or_null(address insn_addr, unsigned insn); 608 static address target_addr_for_insn(address insn_addr) { 609 unsigned insn = *(unsigned*)insn_addr; 610 return target_addr_for_insn(insn_addr, insn); 611 } 612 static address target_addr_for_insn_or_null(address insn_addr) { 613 unsigned insn = *(unsigned*)insn_addr; 614 return target_addr_for_insn_or_null(insn_addr, insn); 615 } 616 617 // Required platform-specific helpers for Label::patch_instructions. 618 // They _shadow_ the declarations in AbstractAssembler, which are undefined. 619 static int pd_patch_instruction_size(address branch, address target); 620 static void pd_patch_instruction(address branch, address target, const char* file = NULL, int line = 0) { 621 pd_patch_instruction_size(branch, target); 622 } 623 static address pd_call_destination(address branch) { 624 return target_addr_for_insn(branch); 625 } 626 #ifndef PRODUCT 627 static void pd_print_patched_instruction(address branch); 628 #endif 629 630 static int patch_oop(address insn_addr, address o); 631 static int patch_narrow_klass(address insn_addr, narrowKlass n); 632 633 address emit_trampoline_stub(int insts_call_instruction_offset, address target); 634 void emit_static_call_stub(); 635 636 // The following 4 methods return the offset of the appropriate move instruction 637 638 // Support for fast byte/short loading with zero extension (depending on particular CPU) 639 int load_unsigned_byte(Register dst, Address src); 640 int load_unsigned_short(Register dst, Address src); 641 642 // Support for fast byte/short loading with sign extension (depending on particular CPU) 643 int load_signed_byte(Register dst, Address src); 644 int load_signed_short(Register dst, Address src); 645 646 int load_signed_byte32(Register dst, Address src); 647 int load_signed_short32(Register dst, Address src); 648 649 // Support for sign-extension (hi:lo = extend_sign(lo)) 650 void extend_sign(Register hi, Register lo); 651 652 // Load and store values by size and signed-ness 653 void load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2 = noreg); 654 void store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2 = noreg); 655 656 // Support for inc/dec with optimal instruction selection depending on value 657 658 // x86_64 aliases an unqualified register/address increment and 659 // decrement to call incrementq and decrementq but also supports 660 // explicitly sized calls to incrementq/decrementq or 661 // incrementl/decrementl 662 663 // for aarch64 the proper convention would be to use 664 // increment/decrement for 64 bit operations and 665 // incrementw/decrementw for 32 bit operations. so when porting 666 // x86_64 code we can leave calls to increment/decrement as is, 667 // replace incrementq/decrementq with increment/decrement and 668 // replace incrementl/decrementl with incrementw/decrementw. 669 670 // n.b. increment/decrement calls with an Address destination will 671 // need to use a scratch register to load the value to be 672 // incremented. increment/decrement calls which add or subtract a 673 // constant value greater than 2^12 will need to use a 2nd scratch 674 // register to hold the constant. so, a register increment/decrement 675 // may trash rscratch2 and an address increment/decrement trash 676 // rscratch and rscratch2 677 678 void decrementw(Address dst, int value = 1); 679 void decrementw(Register reg, int value = 1); 680 681 void decrement(Register reg, int value = 1); 682 void decrement(Address dst, int value = 1); 683 684 void incrementw(Address dst, int value = 1); 685 void incrementw(Register reg, int value = 1); 686 687 void increment(Register reg, int value = 1); 688 void increment(Address dst, int value = 1); 689 690 691 // Alignment 692 void align(int modulus); 693 694 // Stack frame creation/removal 695 void enter(bool strip_ret_addr = false); 696 void leave(); 697 698 // ROP Protection 699 void protect_return_address(); 700 void protect_return_address(Register return_reg, Register temp_reg); 701 void authenticate_return_address(Register return_reg = lr); 702 void authenticate_return_address(Register return_reg, Register temp_reg); 703 void strip_return_address(); 704 void check_return_address(Register return_reg=lr) PRODUCT_RETURN; 705 706 // Support for getting the JavaThread pointer (i.e.; a reference to thread-local information) 707 // The pointer will be loaded into the thread register. 708 void get_thread(Register thread); 709 710 // support for argument shuffling 711 void move32_64(VMRegPair src, VMRegPair dst, Register tmp = rscratch1); 712 void float_move(VMRegPair src, VMRegPair dst, Register tmp = rscratch1); 713 void long_move(VMRegPair src, VMRegPair dst, Register tmp = rscratch1); 714 void double_move(VMRegPair src, VMRegPair dst, Register tmp = rscratch1); 715 void object_move( 716 OopMap* map, 717 int oop_handle_offset, 718 int framesize_in_slots, 719 VMRegPair src, 720 VMRegPair dst, 721 bool is_receiver, 722 int* receiver_offset); 723 724 725 // Support for VM calls 726 // 727 // It is imperative that all calls into the VM are handled via the call_VM macros. 728 // They make sure that the stack linkage is setup correctly. call_VM's correspond 729 // to ENTRY/ENTRY_X entry points while call_VM_leaf's correspond to LEAF entry points. 730 731 732 void call_VM(Register oop_result, 733 address entry_point, 734 bool check_exceptions = true); 735 void call_VM(Register oop_result, 736 address entry_point, 737 Register arg_1, 738 bool check_exceptions = true); 739 void call_VM(Register oop_result, 740 address entry_point, 741 Register arg_1, Register arg_2, 742 bool check_exceptions = true); 743 void call_VM(Register oop_result, 744 address entry_point, 745 Register arg_1, Register arg_2, Register arg_3, 746 bool check_exceptions = true); 747 748 // Overloadings with last_Java_sp 749 void call_VM(Register oop_result, 750 Register last_java_sp, 751 address entry_point, 752 int number_of_arguments = 0, 753 bool check_exceptions = true); 754 void call_VM(Register oop_result, 755 Register last_java_sp, 756 address entry_point, 757 Register arg_1, bool 758 check_exceptions = true); 759 void call_VM(Register oop_result, 760 Register last_java_sp, 761 address entry_point, 762 Register arg_1, Register arg_2, 763 bool check_exceptions = true); 764 void call_VM(Register oop_result, 765 Register last_java_sp, 766 address entry_point, 767 Register arg_1, Register arg_2, Register arg_3, 768 bool check_exceptions = true); 769 770 void get_vm_result (Register oop_result, Register thread); 771 void get_vm_result_2(Register metadata_result, Register thread); 772 773 // These always tightly bind to MacroAssembler::call_VM_base 774 // bypassing the virtual implementation 775 void super_call_VM(Register oop_result, Register last_java_sp, address entry_point, int number_of_arguments = 0, bool check_exceptions = true); 776 void super_call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, bool check_exceptions = true); 777 void super_call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, bool check_exceptions = true); 778 void super_call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, Register arg_3, bool check_exceptions = true); 779 void super_call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, Register arg_3, Register arg_4, bool check_exceptions = true); 780 781 void call_VM_leaf(address entry_point, 782 int number_of_arguments = 0); 783 void call_VM_leaf(address entry_point, 784 Register arg_1); 785 void call_VM_leaf(address entry_point, 786 Register arg_1, Register arg_2); 787 void call_VM_leaf(address entry_point, 788 Register arg_1, Register arg_2, Register arg_3); 789 790 // These always tightly bind to MacroAssembler::call_VM_leaf_base 791 // bypassing the virtual implementation 792 void super_call_VM_leaf(address entry_point); 793 void super_call_VM_leaf(address entry_point, Register arg_1); 794 void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2); 795 void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3); 796 void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3, Register arg_4); 797 798 // last Java Frame (fills frame anchor) 799 void set_last_Java_frame(Register last_java_sp, 800 Register last_java_fp, 801 address last_java_pc, 802 Register scratch); 803 804 void set_last_Java_frame(Register last_java_sp, 805 Register last_java_fp, 806 Label &last_java_pc, 807 Register scratch); 808 809 void set_last_Java_frame(Register last_java_sp, 810 Register last_java_fp, 811 Register last_java_pc, 812 Register scratch); 813 814 void reset_last_Java_frame(Register thread); 815 816 // thread in the default location (rthread) 817 void reset_last_Java_frame(bool clear_fp); 818 819 // Stores 820 void store_check(Register obj); // store check for obj - register is destroyed afterwards 821 void store_check(Register obj, Address dst); // same as above, dst is exact store location (reg. is destroyed) 822 823 void resolve_jobject(Register value, Register thread, Register tmp); 824 825 // C 'boolean' to Java boolean: x == 0 ? 0 : 1 826 void c2bool(Register x); 827 828 void load_method_holder_cld(Register rresult, Register rmethod); 829 void load_method_holder(Register holder, Register method); 830 831 // oop manipulations 832 void load_klass(Register dst, Register src); 833 void store_klass(Register dst, Register src); 834 void cmp_klass(Register oop, Register trial_klass, Register tmp); 835 836 void resolve_weak_handle(Register result, Register tmp); 837 void resolve_oop_handle(Register result, Register tmp = r5); 838 void load_mirror(Register dst, Register method, Register tmp = r5); 839 840 void access_load_at(BasicType type, DecoratorSet decorators, Register dst, Address src, 841 Register tmp1, Register tmp_thread); 842 843 void access_store_at(BasicType type, DecoratorSet decorators, Address dst, Register src, 844 Register tmp1, Register tmp_thread); 845 846 void load_heap_oop(Register dst, Address src, Register tmp1 = noreg, 847 Register thread_tmp = noreg, DecoratorSet decorators = 0); 848 849 void load_heap_oop_not_null(Register dst, Address src, Register tmp1 = noreg, 850 Register thread_tmp = noreg, DecoratorSet decorators = 0); 851 void store_heap_oop(Address dst, Register src, Register tmp1 = noreg, 852 Register tmp_thread = noreg, DecoratorSet decorators = 0); 853 854 // currently unimplemented 855 // Used for storing NULL. All other oop constants should be 856 // stored using routines that take a jobject. 857 void store_heap_oop_null(Address dst); 858 859 void store_klass_gap(Register dst, Register src); 860 861 // This dummy is to prevent a call to store_heap_oop from 862 // converting a zero (like NULL) into a Register by giving 863 // the compiler two choices it can't resolve 864 865 void store_heap_oop(Address dst, void* dummy); 866 867 void encode_heap_oop(Register d, Register s); 868 void encode_heap_oop(Register r) { encode_heap_oop(r, r); } 869 void decode_heap_oop(Register d, Register s); 870 void decode_heap_oop(Register r) { decode_heap_oop(r, r); } 871 void encode_heap_oop_not_null(Register r); 872 void decode_heap_oop_not_null(Register r); 873 void encode_heap_oop_not_null(Register dst, Register src); 874 void decode_heap_oop_not_null(Register dst, Register src); 875 876 void set_narrow_oop(Register dst, jobject obj); 877 878 void encode_klass_not_null(Register r); 879 void decode_klass_not_null(Register r); 880 void encode_klass_not_null(Register dst, Register src); 881 void decode_klass_not_null(Register dst, Register src); 882 883 void set_narrow_klass(Register dst, Klass* k); 884 885 // if heap base register is used - reinit it with the correct value 886 void reinit_heapbase(); 887 888 DEBUG_ONLY(void verify_heapbase(const char* msg);) 889 890 void push_CPU_state(bool save_vectors = false, bool use_sve = false, 891 int sve_vector_size_in_bytes = 0, int total_predicate_in_bytes = 0); 892 void pop_CPU_state(bool restore_vectors = false, bool use_sve = false, 893 int sve_vector_size_in_bytes = 0, int total_predicate_in_bytes = 0); 894 895 // Round up to a power of two 896 void round_to(Register reg, int modulus); 897 898 // java.lang.Math::round intrinsics 899 void java_round_double(Register dst, FloatRegister src, FloatRegister ftmp); 900 void java_round_float(Register dst, FloatRegister src, FloatRegister ftmp); 901 902 // allocation 903 void eden_allocate( 904 Register obj, // result: pointer to object after successful allocation 905 Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise 906 int con_size_in_bytes, // object size in bytes if known at compile time 907 Register t1, // temp register 908 Label& slow_case // continuation point if fast allocation fails 909 ); 910 void tlab_allocate( 911 Register obj, // result: pointer to object after successful allocation 912 Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise 913 int con_size_in_bytes, // object size in bytes if known at compile time 914 Register t1, // temp register 915 Register t2, // temp register 916 Label& slow_case // continuation point if fast allocation fails 917 ); 918 void verify_tlab(); 919 920 // interface method calling 921 void lookup_interface_method(Register recv_klass, 922 Register intf_klass, 923 RegisterOrConstant itable_index, 924 Register method_result, 925 Register scan_temp, 926 Label& no_such_interface, 927 bool return_method = true); 928 929 // virtual method calling 930 // n.b. x86 allows RegisterOrConstant for vtable_index 931 void lookup_virtual_method(Register recv_klass, 932 RegisterOrConstant vtable_index, 933 Register method_result); 934 935 // Test sub_klass against super_klass, with fast and slow paths. 936 937 // The fast path produces a tri-state answer: yes / no / maybe-slow. 938 // One of the three labels can be NULL, meaning take the fall-through. 939 // If super_check_offset is -1, the value is loaded up from super_klass. 940 // No registers are killed, except temp_reg. 941 void check_klass_subtype_fast_path(Register sub_klass, 942 Register super_klass, 943 Register temp_reg, 944 Label* L_success, 945 Label* L_failure, 946 Label* L_slow_path, 947 RegisterOrConstant super_check_offset = RegisterOrConstant(-1)); 948 949 // The rest of the type check; must be wired to a corresponding fast path. 950 // It does not repeat the fast path logic, so don't use it standalone. 951 // The temp_reg and temp2_reg can be noreg, if no temps are available. 952 // Updates the sub's secondary super cache as necessary. 953 // If set_cond_codes, condition codes will be Z on success, NZ on failure. 954 void check_klass_subtype_slow_path(Register sub_klass, 955 Register super_klass, 956 Register temp_reg, 957 Register temp2_reg, 958 Label* L_success, 959 Label* L_failure, 960 bool set_cond_codes = false); 961 962 // Simplified, combined version, good for typical uses. 963 // Falls through on failure. 964 void check_klass_subtype(Register sub_klass, 965 Register super_klass, 966 Register temp_reg, 967 Label& L_success); 968 969 void clinit_barrier(Register klass, 970 Register thread, 971 Label* L_fast_path = NULL, 972 Label* L_slow_path = NULL); 973 974 Address argument_address(RegisterOrConstant arg_slot, int extra_slot_offset = 0); 975 976 void verify_sve_vector_length(Register tmp = rscratch1); 977 void reinitialize_ptrue() { 978 if (UseSVE > 0) { 979 sve_ptrue(ptrue, B); 980 } 981 } 982 void verify_ptrue(); 983 984 // Debugging 985 986 // only if +VerifyOops 987 void _verify_oop(Register reg, const char* s, const char* file, int line); 988 void _verify_oop_addr(Address addr, const char * s, const char* file, int line); 989 990 void _verify_oop_checked(Register reg, const char* s, const char* file, int line) { 991 if (VerifyOops) { 992 _verify_oop(reg, s, file, line); 993 } 994 } 995 void _verify_oop_addr_checked(Address reg, const char* s, const char* file, int line) { 996 if (VerifyOops) { 997 _verify_oop_addr(reg, s, file, line); 998 } 999 } 1000 1001 // TODO: verify method and klass metadata (compare against vptr?) 1002 void _verify_method_ptr(Register reg, const char * msg, const char * file, int line) {} 1003 void _verify_klass_ptr(Register reg, const char * msg, const char * file, int line){} 1004 1005 #define verify_oop(reg) _verify_oop_checked(reg, "broken oop " #reg, __FILE__, __LINE__) 1006 #define verify_oop_msg(reg, msg) _verify_oop_checked(reg, "broken oop " #reg ", " #msg, __FILE__, __LINE__) 1007 #define verify_oop_addr(addr) _verify_oop_addr_checked(addr, "broken oop addr " #addr, __FILE__, __LINE__) 1008 #define verify_method_ptr(reg) _verify_method_ptr(reg, "broken method " #reg, __FILE__, __LINE__) 1009 #define verify_klass_ptr(reg) _verify_klass_ptr(reg, "broken klass " #reg, __FILE__, __LINE__) 1010 1011 // only if +VerifyFPU 1012 void verify_FPU(int stack_depth, const char* s = "illegal FPU state"); 1013 1014 // prints msg, dumps registers and stops execution 1015 void stop(const char* msg); 1016 1017 static void debug64(char* msg, int64_t pc, int64_t regs[]); 1018 1019 void untested() { stop("untested"); } 1020 1021 void unimplemented(const char* what = ""); 1022 1023 void should_not_reach_here() { stop("should not reach here"); } 1024 1025 // Stack overflow checking 1026 void bang_stack_with_offset(int offset) { 1027 // stack grows down, caller passes positive offset 1028 assert(offset > 0, "must bang with negative offset"); 1029 sub(rscratch2, sp, offset); 1030 str(zr, Address(rscratch2)); 1031 } 1032 1033 // Writes to stack successive pages until offset reached to check for 1034 // stack overflow + shadow pages. Also, clobbers tmp 1035 void bang_stack_size(Register size, Register tmp); 1036 1037 // Check for reserved stack access in method being exited (for JIT) 1038 void reserved_stack_check(); 1039 1040 // Arithmetics 1041 1042 void addptr(const Address &dst, int32_t src); 1043 void cmpptr(Register src1, Address src2); 1044 1045 void cmpoop(Register obj1, Register obj2); 1046 1047 // Various forms of CAS 1048 1049 void cmpxchg_obj_header(Register oldv, Register newv, Register obj, Register tmp, 1050 Label &succeed, Label *fail); 1051 void cmpxchgptr(Register oldv, Register newv, Register addr, Register tmp, 1052 Label &succeed, Label *fail); 1053 1054 void cmpxchgw(Register oldv, Register newv, Register addr, Register tmp, 1055 Label &succeed, Label *fail); 1056 1057 void atomic_add(Register prev, RegisterOrConstant incr, Register addr); 1058 void atomic_addw(Register prev, RegisterOrConstant incr, Register addr); 1059 void atomic_addal(Register prev, RegisterOrConstant incr, Register addr); 1060 void atomic_addalw(Register prev, RegisterOrConstant incr, Register addr); 1061 1062 void atomic_xchg(Register prev, Register newv, Register addr); 1063 void atomic_xchgw(Register prev, Register newv, Register addr); 1064 void atomic_xchgl(Register prev, Register newv, Register addr); 1065 void atomic_xchglw(Register prev, Register newv, Register addr); 1066 void atomic_xchgal(Register prev, Register newv, Register addr); 1067 void atomic_xchgalw(Register prev, Register newv, Register addr); 1068 1069 void orptr(Address adr, RegisterOrConstant src) { 1070 ldr(rscratch1, adr); 1071 if (src.is_register()) 1072 orr(rscratch1, rscratch1, src.as_register()); 1073 else 1074 orr(rscratch1, rscratch1, src.as_constant()); 1075 str(rscratch1, adr); 1076 } 1077 1078 // A generic CAS; success or failure is in the EQ flag. 1079 // Clobbers rscratch1 1080 void cmpxchg(Register addr, Register expected, Register new_val, 1081 enum operand_size size, 1082 bool acquire, bool release, bool weak, 1083 Register result); 1084 1085 private: 1086 void compare_eq(Register rn, Register rm, enum operand_size size); 1087 1088 #ifdef ASSERT 1089 // Template short-hand support to clean-up after a failed call to trampoline 1090 // call generation (see trampoline_call() below), when a set of Labels must 1091 // be reset (before returning). 1092 template<typename Label, typename... More> 1093 void reset_labels(Label &lbl, More&... more) { 1094 lbl.reset(); reset_labels(more...); 1095 } 1096 template<typename Label> 1097 void reset_labels(Label &lbl) { 1098 lbl.reset(); 1099 } 1100 #endif 1101 1102 public: 1103 // Calls 1104 1105 address trampoline_call(Address entry, CodeBuffer* cbuf = NULL); 1106 1107 static bool far_branches() { 1108 return ReservedCodeCacheSize > branch_range; 1109 } 1110 1111 // Check if branches to the the non nmethod section require a far jump 1112 static bool codestub_branch_needs_far_jump() { 1113 return CodeCache::max_distance_to_non_nmethod() > branch_range; 1114 } 1115 1116 // Far_call and far_jump generate a call of/jump to the provided address. 1117 // The address must be inside the code cache. 1118 // Supported entry.rspec(): 1119 // - relocInfo::external_word_type 1120 // - relocInfo::runtime_call_type 1121 // - relocInfo::none 1122 // If the distance to the address can exceed the branch range 1123 // (128M for the release build, 2M for the debug build; see branch_range definition) 1124 // for direct calls(BL)/jumps(B), a call(BLR)/jump(BR) with the address put in 1125 // the tmp register is generated. Instructions putting the address in the tmp register 1126 // are embedded at a call site. The tmp register is invalidated. 1127 // This differs from trampoline_call which puts additional code (trampoline) including 1128 // BR into the stub code section and a BL to the trampoline at a call site. 1129 void far_call(Address entry, CodeBuffer *cbuf = NULL, Register tmp = rscratch1); 1130 int far_jump(Address entry, CodeBuffer *cbuf = NULL, Register tmp = rscratch1); 1131 1132 static int far_codestub_branch_size() { 1133 if (codestub_branch_needs_far_jump()) { 1134 return 3 * 4; // adrp, add, br 1135 } else { 1136 return 4; 1137 } 1138 } 1139 1140 // Emit the CompiledIC call idiom 1141 address ic_call(address entry, jint method_index = 0); 1142 1143 public: 1144 1145 // Data 1146 1147 void mov_metadata(Register dst, Metadata* obj); 1148 Address allocate_metadata_address(Metadata* obj); 1149 Address constant_oop_address(jobject obj); 1150 1151 void movoop(Register dst, jobject obj, bool immediate = false); 1152 1153 // CRC32 code for java.util.zip.CRC32::updateBytes() intrinsic. 1154 void kernel_crc32(Register crc, Register buf, Register len, 1155 Register table0, Register table1, Register table2, Register table3, 1156 Register tmp, Register tmp2, Register tmp3); 1157 // CRC32 code for java.util.zip.CRC32C::updateBytes() intrinsic. 1158 void kernel_crc32c(Register crc, Register buf, Register len, 1159 Register table0, Register table1, Register table2, Register table3, 1160 Register tmp, Register tmp2, Register tmp3); 1161 1162 // Stack push and pop individual 64 bit registers 1163 void push(Register src); 1164 void pop(Register dst); 1165 1166 void repne_scan(Register addr, Register value, Register count, 1167 Register scratch); 1168 void repne_scanw(Register addr, Register value, Register count, 1169 Register scratch); 1170 1171 typedef void (MacroAssembler::* add_sub_imm_insn)(Register Rd, Register Rn, unsigned imm); 1172 typedef void (MacroAssembler::* add_sub_reg_insn)(Register Rd, Register Rn, Register Rm, enum shift_kind kind, unsigned shift); 1173 1174 // If a constant does not fit in an immediate field, generate some 1175 // number of MOV instructions and then perform the operation 1176 void wrap_add_sub_imm_insn(Register Rd, Register Rn, unsigned imm, 1177 add_sub_imm_insn insn1, 1178 add_sub_reg_insn insn2); 1179 // Separate vsn which sets the flags 1180 void wrap_adds_subs_imm_insn(Register Rd, Register Rn, unsigned imm, 1181 add_sub_imm_insn insn1, 1182 add_sub_reg_insn insn2); 1183 1184 #define WRAP(INSN) \ 1185 void INSN(Register Rd, Register Rn, unsigned imm) { \ 1186 wrap_add_sub_imm_insn(Rd, Rn, imm, &Assembler::INSN, &Assembler::INSN); \ 1187 } \ 1188 \ 1189 void INSN(Register Rd, Register Rn, Register Rm, \ 1190 enum shift_kind kind, unsigned shift = 0) { \ 1191 Assembler::INSN(Rd, Rn, Rm, kind, shift); \ 1192 } \ 1193 \ 1194 void INSN(Register Rd, Register Rn, Register Rm) { \ 1195 Assembler::INSN(Rd, Rn, Rm); \ 1196 } \ 1197 \ 1198 void INSN(Register Rd, Register Rn, Register Rm, \ 1199 ext::operation option, int amount = 0) { \ 1200 Assembler::INSN(Rd, Rn, Rm, option, amount); \ 1201 } 1202 1203 WRAP(add) WRAP(addw) WRAP(sub) WRAP(subw) 1204 1205 #undef WRAP 1206 #define WRAP(INSN) \ 1207 void INSN(Register Rd, Register Rn, unsigned imm) { \ 1208 wrap_adds_subs_imm_insn(Rd, Rn, imm, &Assembler::INSN, &Assembler::INSN); \ 1209 } \ 1210 \ 1211 void INSN(Register Rd, Register Rn, Register Rm, \ 1212 enum shift_kind kind, unsigned shift = 0) { \ 1213 Assembler::INSN(Rd, Rn, Rm, kind, shift); \ 1214 } \ 1215 \ 1216 void INSN(Register Rd, Register Rn, Register Rm) { \ 1217 Assembler::INSN(Rd, Rn, Rm); \ 1218 } \ 1219 \ 1220 void INSN(Register Rd, Register Rn, Register Rm, \ 1221 ext::operation option, int amount = 0) { \ 1222 Assembler::INSN(Rd, Rn, Rm, option, amount); \ 1223 } 1224 1225 WRAP(adds) WRAP(addsw) WRAP(subs) WRAP(subsw) 1226 1227 void add(Register Rd, Register Rn, RegisterOrConstant increment); 1228 void addw(Register Rd, Register Rn, RegisterOrConstant increment); 1229 void sub(Register Rd, Register Rn, RegisterOrConstant decrement); 1230 void subw(Register Rd, Register Rn, RegisterOrConstant decrement); 1231 1232 void adrp(Register reg1, const Address &dest, uint64_t &byte_offset); 1233 1234 void tableswitch(Register index, jint lowbound, jint highbound, 1235 Label &jumptable, Label &jumptable_end, int stride = 1) { 1236 adr(rscratch1, jumptable); 1237 subsw(rscratch2, index, lowbound); 1238 subsw(zr, rscratch2, highbound - lowbound); 1239 br(Assembler::HS, jumptable_end); 1240 add(rscratch1, rscratch1, rscratch2, 1241 ext::sxtw, exact_log2(stride * Assembler::instruction_size)); 1242 br(rscratch1); 1243 } 1244 1245 // Form an address from base + offset in Rd. Rd may or may not 1246 // actually be used: you must use the Address that is returned. It 1247 // is up to you to ensure that the shift provided matches the size 1248 // of your data. 1249 Address form_address(Register Rd, Register base, int64_t byte_offset, int shift); 1250 1251 // Return true iff an address is within the 48-bit AArch64 address 1252 // space. 1253 bool is_valid_AArch64_address(address a) { 1254 return ((uint64_t)a >> 48) == 0; 1255 } 1256 1257 // Load the base of the cardtable byte map into reg. 1258 void load_byte_map_base(Register reg); 1259 1260 // Prolog generator routines to support switch between x86 code and 1261 // generated ARM code 1262 1263 // routine to generate an x86 prolog for a stub function which 1264 // bootstraps into the generated ARM code which directly follows the 1265 // stub 1266 // 1267 1268 public: 1269 1270 void ldr_constant(Register dest, const Address &const_addr) { 1271 if (NearCpool) { 1272 ldr(dest, const_addr); 1273 } else { 1274 uint64_t offset; 1275 adrp(dest, InternalAddress(const_addr.target()), offset); 1276 ldr(dest, Address(dest, offset)); 1277 } 1278 } 1279 1280 address read_polling_page(Register r, relocInfo::relocType rtype); 1281 void get_polling_page(Register dest, relocInfo::relocType rtype); 1282 1283 // CRC32 code for java.util.zip.CRC32::updateBytes() intrinsic. 1284 void update_byte_crc32(Register crc, Register val, Register table); 1285 void update_word_crc32(Register crc, Register v, Register tmp, 1286 Register table0, Register table1, Register table2, Register table3, 1287 bool upper = false); 1288 1289 address count_positives(Register ary1, Register len, Register result); 1290 1291 address arrays_equals(Register a1, Register a2, Register result, Register cnt1, 1292 Register tmp1, Register tmp2, Register tmp3, int elem_size); 1293 1294 void string_equals(Register a1, Register a2, Register result, Register cnt1, 1295 int elem_size); 1296 1297 void fill_words(Register base, Register cnt, Register value); 1298 void zero_words(Register base, uint64_t cnt); 1299 address zero_words(Register ptr, Register cnt); 1300 void zero_dcache_blocks(Register base, Register cnt); 1301 1302 static const int zero_words_block_size; 1303 1304 address byte_array_inflate(Register src, Register dst, Register len, 1305 FloatRegister vtmp1, FloatRegister vtmp2, 1306 FloatRegister vtmp3, Register tmp4); 1307 1308 void char_array_compress(Register src, Register dst, Register len, 1309 Register res, 1310 FloatRegister vtmp0, FloatRegister vtmp1, 1311 FloatRegister vtmp2, FloatRegister vtmp3); 1312 1313 void encode_iso_array(Register src, Register dst, 1314 Register len, Register res, bool ascii, 1315 FloatRegister vtmp0, FloatRegister vtmp1, 1316 FloatRegister vtmp2, FloatRegister vtmp3); 1317 1318 void fast_log(FloatRegister vtmp0, FloatRegister vtmp1, FloatRegister vtmp2, 1319 FloatRegister vtmp3, FloatRegister vtmp4, FloatRegister vtmp5, 1320 FloatRegister tmpC1, FloatRegister tmpC2, FloatRegister tmpC3, 1321 FloatRegister tmpC4, Register tmp1, Register tmp2, 1322 Register tmp3, Register tmp4, Register tmp5); 1323 void generate_dsin_dcos(bool isCos, address npio2_hw, address two_over_pi, 1324 address pio2, address dsin_coef, address dcos_coef); 1325 private: 1326 // begin trigonometric functions support block 1327 void generate__ieee754_rem_pio2(address npio2_hw, address two_over_pi, address pio2); 1328 void generate__kernel_rem_pio2(address two_over_pi, address pio2); 1329 void generate_kernel_sin(FloatRegister x, bool iyIsOne, address dsin_coef); 1330 void generate_kernel_cos(FloatRegister x, address dcos_coef); 1331 // end trigonometric functions support block 1332 void add2_with_carry(Register final_dest_hi, Register dest_hi, Register dest_lo, 1333 Register src1, Register src2); 1334 void add2_with_carry(Register dest_hi, Register dest_lo, Register src1, Register src2) { 1335 add2_with_carry(dest_hi, dest_hi, dest_lo, src1, src2); 1336 } 1337 void multiply_64_x_64_loop(Register x, Register xstart, Register x_xstart, 1338 Register y, Register y_idx, Register z, 1339 Register carry, Register product, 1340 Register idx, Register kdx); 1341 void multiply_128_x_128_loop(Register y, Register z, 1342 Register carry, Register carry2, 1343 Register idx, Register jdx, 1344 Register yz_idx1, Register yz_idx2, 1345 Register tmp, Register tmp3, Register tmp4, 1346 Register tmp7, Register product_hi); 1347 void kernel_crc32_using_crc32(Register crc, Register buf, 1348 Register len, Register tmp0, Register tmp1, Register tmp2, 1349 Register tmp3); 1350 void kernel_crc32c_using_crc32c(Register crc, Register buf, 1351 Register len, Register tmp0, Register tmp1, Register tmp2, 1352 Register tmp3); 1353 1354 void ghash_modmul (FloatRegister result, 1355 FloatRegister result_lo, FloatRegister result_hi, FloatRegister b, 1356 FloatRegister a, FloatRegister vzr, FloatRegister a1_xor_a0, FloatRegister p, 1357 FloatRegister t1, FloatRegister t2, FloatRegister t3); 1358 void ghash_load_wide(int index, Register data, FloatRegister result, FloatRegister state); 1359 public: 1360 void multiply_to_len(Register x, Register xlen, Register y, Register ylen, Register z, 1361 Register zlen, Register tmp1, Register tmp2, Register tmp3, 1362 Register tmp4, Register tmp5, Register tmp6, Register tmp7); 1363 void mul_add(Register out, Register in, Register offs, Register len, Register k); 1364 void ghash_multiply(FloatRegister result_lo, FloatRegister result_hi, 1365 FloatRegister a, FloatRegister b, FloatRegister a1_xor_a0, 1366 FloatRegister tmp1, FloatRegister tmp2, FloatRegister tmp3); 1367 void ghash_multiply_wide(int index, 1368 FloatRegister result_lo, FloatRegister result_hi, 1369 FloatRegister a, FloatRegister b, FloatRegister a1_xor_a0, 1370 FloatRegister tmp1, FloatRegister tmp2, FloatRegister tmp3); 1371 void ghash_reduce(FloatRegister result, FloatRegister lo, FloatRegister hi, 1372 FloatRegister p, FloatRegister z, FloatRegister t1); 1373 void ghash_reduce_wide(int index, FloatRegister result, FloatRegister lo, FloatRegister hi, 1374 FloatRegister p, FloatRegister z, FloatRegister t1); 1375 void ghash_processBlocks_wide(address p, Register state, Register subkeyH, 1376 Register data, Register blocks, int unrolls); 1377 1378 1379 void aesenc_loadkeys(Register key, Register keylen); 1380 void aesecb_encrypt(Register from, Register to, Register keylen, 1381 FloatRegister data = v0, int unrolls = 1); 1382 void aesecb_decrypt(Register from, Register to, Register key, Register keylen); 1383 void aes_round(FloatRegister input, FloatRegister subkey); 1384 1385 // Place an ISB after code may have been modified due to a safepoint. 1386 void safepoint_isb(); 1387 1388 private: 1389 // Return the effective address r + (r1 << ext) + offset. 1390 // Uses rscratch2. 1391 Address offsetted_address(Register r, Register r1, Address::extend ext, 1392 int offset, int size); 1393 1394 private: 1395 // Returns an address on the stack which is reachable with a ldr/str of size 1396 // Uses rscratch2 if the address is not directly reachable 1397 Address spill_address(int size, int offset, Register tmp=rscratch2); 1398 Address sve_spill_address(int sve_reg_size_in_bytes, int offset, Register tmp=rscratch2); 1399 1400 bool merge_alignment_check(Register base, size_t size, int64_t cur_offset, int64_t prev_offset) const; 1401 1402 // Check whether two loads/stores can be merged into ldp/stp. 1403 bool ldst_can_merge(Register rx, const Address &adr, size_t cur_size_in_bytes, bool is_store) const; 1404 1405 // Merge current load/store with previous load/store into ldp/stp. 1406 void merge_ldst(Register rx, const Address &adr, size_t cur_size_in_bytes, bool is_store); 1407 1408 // Try to merge two loads/stores into ldp/stp. If success, returns true else false. 1409 bool try_merge_ldst(Register rt, const Address &adr, size_t cur_size_in_bytes, bool is_store); 1410 1411 public: 1412 void spill(Register Rx, bool is64, int offset) { 1413 if (is64) { 1414 str(Rx, spill_address(8, offset)); 1415 } else { 1416 strw(Rx, spill_address(4, offset)); 1417 } 1418 } 1419 void spill(FloatRegister Vx, SIMD_RegVariant T, int offset) { 1420 str(Vx, T, spill_address(1 << (int)T, offset)); 1421 } 1422 1423 void spill_sve_vector(FloatRegister Zx, int offset, int vector_reg_size_in_bytes) { 1424 sve_str(Zx, sve_spill_address(vector_reg_size_in_bytes, offset)); 1425 } 1426 void spill_sve_predicate(PRegister pr, int offset, int predicate_reg_size_in_bytes) { 1427 sve_str(pr, sve_spill_address(predicate_reg_size_in_bytes, offset)); 1428 } 1429 1430 void unspill(Register Rx, bool is64, int offset) { 1431 if (is64) { 1432 ldr(Rx, spill_address(8, offset)); 1433 } else { 1434 ldrw(Rx, spill_address(4, offset)); 1435 } 1436 } 1437 void unspill(FloatRegister Vx, SIMD_RegVariant T, int offset) { 1438 ldr(Vx, T, spill_address(1 << (int)T, offset)); 1439 } 1440 1441 void unspill_sve_vector(FloatRegister Zx, int offset, int vector_reg_size_in_bytes) { 1442 sve_ldr(Zx, sve_spill_address(vector_reg_size_in_bytes, offset)); 1443 } 1444 void unspill_sve_predicate(PRegister pr, int offset, int predicate_reg_size_in_bytes) { 1445 sve_ldr(pr, sve_spill_address(predicate_reg_size_in_bytes, offset)); 1446 } 1447 1448 void spill_copy128(int src_offset, int dst_offset, 1449 Register tmp1=rscratch1, Register tmp2=rscratch2) { 1450 if (src_offset < 512 && (src_offset & 7) == 0 && 1451 dst_offset < 512 && (dst_offset & 7) == 0) { 1452 ldp(tmp1, tmp2, Address(sp, src_offset)); 1453 stp(tmp1, tmp2, Address(sp, dst_offset)); 1454 } else { 1455 unspill(tmp1, true, src_offset); 1456 spill(tmp1, true, dst_offset); 1457 unspill(tmp1, true, src_offset+8); 1458 spill(tmp1, true, dst_offset+8); 1459 } 1460 } 1461 void spill_copy_sve_vector_stack_to_stack(int src_offset, int dst_offset, 1462 int sve_vec_reg_size_in_bytes) { 1463 assert(sve_vec_reg_size_in_bytes % 16 == 0, "unexpected sve vector reg size"); 1464 for (int i = 0; i < sve_vec_reg_size_in_bytes / 16; i++) { 1465 spill_copy128(src_offset, dst_offset); 1466 src_offset += 16; 1467 dst_offset += 16; 1468 } 1469 } 1470 void spill_copy_sve_predicate_stack_to_stack(int src_offset, int dst_offset, 1471 int sve_predicate_reg_size_in_bytes) { 1472 sve_ldr(ptrue, sve_spill_address(sve_predicate_reg_size_in_bytes, src_offset)); 1473 sve_str(ptrue, sve_spill_address(sve_predicate_reg_size_in_bytes, dst_offset)); 1474 reinitialize_ptrue(); 1475 } 1476 void cache_wb(Address line); 1477 void cache_wbsync(bool is_pre); 1478 1479 // Code for java.lang.Thread::onSpinWait() intrinsic. 1480 void spin_wait(); 1481 1482 private: 1483 // Check the current thread doesn't need a cross modify fence. 1484 void verify_cross_modify_fence_not_required() PRODUCT_RETURN; 1485 1486 }; 1487 1488 #ifdef ASSERT 1489 inline bool AbstractAssembler::pd_check_instruction_mark() { return false; } 1490 #endif 1491 1492 /** 1493 * class SkipIfEqual: 1494 * 1495 * Instantiating this class will result in assembly code being output that will 1496 * jump around any code emitted between the creation of the instance and it's 1497 * automatic destruction at the end of a scope block, depending on the value of 1498 * the flag passed to the constructor, which will be checked at run-time. 1499 */ 1500 class SkipIfEqual { 1501 private: 1502 MacroAssembler* _masm; 1503 Label _label; 1504 1505 public: 1506 SkipIfEqual(MacroAssembler*, const bool* flag_addr, bool value); 1507 ~SkipIfEqual(); 1508 }; 1509 1510 struct tableswitch { 1511 Register _reg; 1512 int _insn_index; jint _first_key; jint _last_key; 1513 Label _after; 1514 Label _branches; 1515 }; 1516 1517 #endif // CPU_AARCH64_MACROASSEMBLER_AARCH64_HPP