1 /* 2 * Copyright (c) 1997, 2024, Oracle and/or its affiliates. All rights reserved. 3 * Copyright (c) 2014, 2021, Red Hat Inc. All rights reserved. 4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 5 * 6 * This code is free software; you can redistribute it and/or modify it 7 * under the terms of the GNU General Public License version 2 only, as 8 * published by the Free Software Foundation. 9 * 10 * This code is distributed in the hope that it will be useful, but WITHOUT 11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 13 * version 2 for more details (a copy is included in the LICENSE file that 14 * accompanied this code). 15 * 16 * You should have received a copy of the GNU General Public License version 17 * 2 along with this work; if not, write to the Free Software Foundation, 18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 19 * 20 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 21 * or visit www.oracle.com if you need additional information or have any 22 * questions. 23 * 24 */ 25 26 #ifndef CPU_AARCH64_MACROASSEMBLER_AARCH64_HPP 27 #define CPU_AARCH64_MACROASSEMBLER_AARCH64_HPP 28 29 #include "asm/assembler.inline.hpp" 30 #include "code/vmreg.hpp" 31 #include "metaprogramming/enableIf.hpp" 32 #include "oops/compressedOops.hpp" 33 #include "oops/compressedKlass.hpp" 34 #include "runtime/vm_version.hpp" 35 #include "utilities/powerOfTwo.hpp" 36 37 class OopMap; 38 39 // MacroAssembler extends Assembler by frequently used macros. 40 // 41 // Instructions for which a 'better' code sequence exists depending 42 // on arguments should also go in here. 43 44 class MacroAssembler: public Assembler { 45 friend class LIR_Assembler; 46 47 public: 48 using Assembler::mov; 49 using Assembler::movi; 50 51 protected: 52 53 // Support for VM calls 54 // 55 // This is the base routine called by the different versions of call_VM_leaf. The interpreter 56 // may customize this version by overriding it for its purposes (e.g., to save/restore 57 // additional registers when doing a VM call). 58 virtual void call_VM_leaf_base( 59 address entry_point, // the entry point 60 int number_of_arguments, // the number of arguments to pop after the call 61 Label *retaddr = nullptr 62 ); 63 64 virtual void call_VM_leaf_base( 65 address entry_point, // the entry point 66 int number_of_arguments, // the number of arguments to pop after the call 67 Label &retaddr) { 68 call_VM_leaf_base(entry_point, number_of_arguments, &retaddr); 69 } 70 71 // This is the base routine called by the different versions of call_VM. The interpreter 72 // may customize this version by overriding it for its purposes (e.g., to save/restore 73 // additional registers when doing a VM call). 74 // 75 // If no java_thread register is specified (noreg) than rthread will be used instead. call_VM_base 76 // returns the register which contains the thread upon return. If a thread register has been 77 // specified, the return value will correspond to that register. If no last_java_sp is specified 78 // (noreg) than rsp will be used instead. 79 virtual void call_VM_base( // returns the register containing the thread upon return 80 Register oop_result, // where an oop-result ends up if any; use noreg otherwise 81 Register java_thread, // the thread if computed before ; use noreg otherwise 82 Register last_java_sp, // to set up last_Java_frame in stubs; use noreg otherwise 83 address entry_point, // the entry point 84 int number_of_arguments, // the number of arguments (w/o thread) to pop after the call 85 bool check_exceptions // whether to check for pending exceptions after return 86 ); 87 88 void call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions = true); 89 90 enum KlassDecodeMode { 91 KlassDecodeNone, 92 KlassDecodeZero, 93 KlassDecodeXor, 94 KlassDecodeMovk 95 }; 96 97 KlassDecodeMode klass_decode_mode(); 98 99 private: 100 static KlassDecodeMode _klass_decode_mode; 101 102 public: 103 MacroAssembler(CodeBuffer* code) : Assembler(code) {} 104 105 // These routines should emit JVMTI PopFrame and ForceEarlyReturn handling code. 106 // The implementation is only non-empty for the InterpreterMacroAssembler, 107 // as only the interpreter handles PopFrame and ForceEarlyReturn requests. 108 virtual void check_and_handle_popframe(Register java_thread); 109 virtual void check_and_handle_earlyret(Register java_thread); 110 111 void safepoint_poll(Label& slow_path, bool at_return, bool acquire, bool in_nmethod, Register tmp = rscratch1); 112 void rt_call(address dest, Register tmp = rscratch1); 113 114 // Load Effective Address 115 void lea(Register r, const Address &a) { 116 InstructionMark im(this); 117 a.lea(this, r); 118 } 119 120 /* Sometimes we get misaligned loads and stores, usually from Unsafe 121 accesses, and these can exceed the offset range. */ 122 Address legitimize_address(const Address &a, int size, Register scratch) { 123 if (a.getMode() == Address::base_plus_offset) { 124 if (! Address::offset_ok_for_immed(a.offset(), exact_log2(size))) { 125 block_comment("legitimize_address {"); 126 lea(scratch, a); 127 block_comment("} legitimize_address"); 128 return Address(scratch); 129 } 130 } 131 return a; 132 } 133 134 void addmw(Address a, Register incr, Register scratch) { 135 ldrw(scratch, a); 136 addw(scratch, scratch, incr); 137 strw(scratch, a); 138 } 139 140 // Add constant to memory word 141 void addmw(Address a, int imm, Register scratch) { 142 ldrw(scratch, a); 143 if (imm > 0) 144 addw(scratch, scratch, (unsigned)imm); 145 else 146 subw(scratch, scratch, (unsigned)-imm); 147 strw(scratch, a); 148 } 149 150 void bind(Label& L) { 151 Assembler::bind(L); 152 code()->clear_last_insn(); 153 } 154 155 void membar(Membar_mask_bits order_constraint); 156 157 using Assembler::ldr; 158 using Assembler::str; 159 using Assembler::ldrw; 160 using Assembler::strw; 161 162 void ldr(Register Rx, const Address &adr); 163 void ldrw(Register Rw, const Address &adr); 164 void str(Register Rx, const Address &adr); 165 void strw(Register Rx, const Address &adr); 166 167 // Frame creation and destruction shared between JITs. 168 void build_frame(int framesize); 169 void remove_frame(int framesize); 170 171 virtual void _call_Unimplemented(address call_site) { 172 mov(rscratch2, call_site); 173 } 174 175 // Microsoft's MSVC team thinks that the __FUNCSIG__ is approximately (sympathy for calling conventions) equivalent to __PRETTY_FUNCTION__ 176 // Also, from Clang patch: "It is very similar to GCC's PRETTY_FUNCTION, except it prints the calling convention." 177 // https://reviews.llvm.org/D3311 178 179 #ifdef _WIN64 180 #define call_Unimplemented() _call_Unimplemented((address)__FUNCSIG__) 181 #else 182 #define call_Unimplemented() _call_Unimplemented((address)__PRETTY_FUNCTION__) 183 #endif 184 185 // aliases defined in AARCH64 spec 186 187 template<class T> 188 inline void cmpw(Register Rd, T imm) { subsw(zr, Rd, imm); } 189 190 inline void cmp(Register Rd, unsigned char imm8) { subs(zr, Rd, imm8); } 191 inline void cmp(Register Rd, unsigned imm) = delete; 192 193 template<class T> 194 inline void cmnw(Register Rd, T imm) { addsw(zr, Rd, imm); } 195 196 inline void cmn(Register Rd, unsigned char imm8) { adds(zr, Rd, imm8); } 197 inline void cmn(Register Rd, unsigned imm) = delete; 198 199 void cset(Register Rd, Assembler::Condition cond) { 200 csinc(Rd, zr, zr, ~cond); 201 } 202 void csetw(Register Rd, Assembler::Condition cond) { 203 csincw(Rd, zr, zr, ~cond); 204 } 205 206 void cneg(Register Rd, Register Rn, Assembler::Condition cond) { 207 csneg(Rd, Rn, Rn, ~cond); 208 } 209 void cnegw(Register Rd, Register Rn, Assembler::Condition cond) { 210 csnegw(Rd, Rn, Rn, ~cond); 211 } 212 213 inline void movw(Register Rd, Register Rn) { 214 if (Rd == sp || Rn == sp) { 215 Assembler::addw(Rd, Rn, 0U); 216 } else { 217 orrw(Rd, zr, Rn); 218 } 219 } 220 inline void mov(Register Rd, Register Rn) { 221 assert(Rd != r31_sp && Rn != r31_sp, "should be"); 222 if (Rd == Rn) { 223 } else if (Rd == sp || Rn == sp) { 224 Assembler::add(Rd, Rn, 0U); 225 } else { 226 orr(Rd, zr, Rn); 227 } 228 } 229 230 inline void moviw(Register Rd, unsigned imm) { orrw(Rd, zr, imm); } 231 inline void movi(Register Rd, unsigned imm) { orr(Rd, zr, imm); } 232 233 inline void tstw(Register Rd, Register Rn) { andsw(zr, Rd, Rn); } 234 inline void tst(Register Rd, Register Rn) { ands(zr, Rd, Rn); } 235 236 inline void tstw(Register Rd, uint64_t imm) { andsw(zr, Rd, imm); } 237 inline void tst(Register Rd, uint64_t imm) { ands(zr, Rd, imm); } 238 239 inline void bfiw(Register Rd, Register Rn, unsigned lsb, unsigned width) { 240 bfmw(Rd, Rn, ((32 - lsb) & 31), (width - 1)); 241 } 242 inline void bfi(Register Rd, Register Rn, unsigned lsb, unsigned width) { 243 bfm(Rd, Rn, ((64 - lsb) & 63), (width - 1)); 244 } 245 246 inline void bfxilw(Register Rd, Register Rn, unsigned lsb, unsigned width) { 247 bfmw(Rd, Rn, lsb, (lsb + width - 1)); 248 } 249 inline void bfxil(Register Rd, Register Rn, unsigned lsb, unsigned width) { 250 bfm(Rd, Rn, lsb , (lsb + width - 1)); 251 } 252 253 inline void sbfizw(Register Rd, Register Rn, unsigned lsb, unsigned width) { 254 sbfmw(Rd, Rn, ((32 - lsb) & 31), (width - 1)); 255 } 256 inline void sbfiz(Register Rd, Register Rn, unsigned lsb, unsigned width) { 257 sbfm(Rd, Rn, ((64 - lsb) & 63), (width - 1)); 258 } 259 260 inline void sbfxw(Register Rd, Register Rn, unsigned lsb, unsigned width) { 261 sbfmw(Rd, Rn, lsb, (lsb + width - 1)); 262 } 263 inline void sbfx(Register Rd, Register Rn, unsigned lsb, unsigned width) { 264 sbfm(Rd, Rn, lsb , (lsb + width - 1)); 265 } 266 267 inline void ubfizw(Register Rd, Register Rn, unsigned lsb, unsigned width) { 268 ubfmw(Rd, Rn, ((32 - lsb) & 31), (width - 1)); 269 } 270 inline void ubfiz(Register Rd, Register Rn, unsigned lsb, unsigned width) { 271 ubfm(Rd, Rn, ((64 - lsb) & 63), (width - 1)); 272 } 273 274 inline void ubfxw(Register Rd, Register Rn, unsigned lsb, unsigned width) { 275 ubfmw(Rd, Rn, lsb, (lsb + width - 1)); 276 } 277 inline void ubfx(Register Rd, Register Rn, unsigned lsb, unsigned width) { 278 ubfm(Rd, Rn, lsb , (lsb + width - 1)); 279 } 280 281 inline void asrw(Register Rd, Register Rn, unsigned imm) { 282 sbfmw(Rd, Rn, imm, 31); 283 } 284 285 inline void asr(Register Rd, Register Rn, unsigned imm) { 286 sbfm(Rd, Rn, imm, 63); 287 } 288 289 inline void lslw(Register Rd, Register Rn, unsigned imm) { 290 ubfmw(Rd, Rn, ((32 - imm) & 31), (31 - imm)); 291 } 292 293 inline void lsl(Register Rd, Register Rn, unsigned imm) { 294 ubfm(Rd, Rn, ((64 - imm) & 63), (63 - imm)); 295 } 296 297 inline void lsrw(Register Rd, Register Rn, unsigned imm) { 298 ubfmw(Rd, Rn, imm, 31); 299 } 300 301 inline void lsr(Register Rd, Register Rn, unsigned imm) { 302 ubfm(Rd, Rn, imm, 63); 303 } 304 305 inline void rorw(Register Rd, Register Rn, unsigned imm) { 306 extrw(Rd, Rn, Rn, imm); 307 } 308 309 inline void ror(Register Rd, Register Rn, unsigned imm) { 310 extr(Rd, Rn, Rn, imm); 311 } 312 313 inline void sxtbw(Register Rd, Register Rn) { 314 sbfmw(Rd, Rn, 0, 7); 315 } 316 inline void sxthw(Register Rd, Register Rn) { 317 sbfmw(Rd, Rn, 0, 15); 318 } 319 inline void sxtb(Register Rd, Register Rn) { 320 sbfm(Rd, Rn, 0, 7); 321 } 322 inline void sxth(Register Rd, Register Rn) { 323 sbfm(Rd, Rn, 0, 15); 324 } 325 inline void sxtw(Register Rd, Register Rn) { 326 sbfm(Rd, Rn, 0, 31); 327 } 328 329 inline void uxtbw(Register Rd, Register Rn) { 330 ubfmw(Rd, Rn, 0, 7); 331 } 332 inline void uxthw(Register Rd, Register Rn) { 333 ubfmw(Rd, Rn, 0, 15); 334 } 335 inline void uxtb(Register Rd, Register Rn) { 336 ubfm(Rd, Rn, 0, 7); 337 } 338 inline void uxth(Register Rd, Register Rn) { 339 ubfm(Rd, Rn, 0, 15); 340 } 341 inline void uxtw(Register Rd, Register Rn) { 342 ubfm(Rd, Rn, 0, 31); 343 } 344 345 inline void cmnw(Register Rn, Register Rm) { 346 addsw(zr, Rn, Rm); 347 } 348 inline void cmn(Register Rn, Register Rm) { 349 adds(zr, Rn, Rm); 350 } 351 352 inline void cmpw(Register Rn, Register Rm) { 353 subsw(zr, Rn, Rm); 354 } 355 inline void cmp(Register Rn, Register Rm) { 356 subs(zr, Rn, Rm); 357 } 358 359 inline void negw(Register Rd, Register Rn) { 360 subw(Rd, zr, Rn); 361 } 362 363 inline void neg(Register Rd, Register Rn) { 364 sub(Rd, zr, Rn); 365 } 366 367 inline void negsw(Register Rd, Register Rn) { 368 subsw(Rd, zr, Rn); 369 } 370 371 inline void negs(Register Rd, Register Rn) { 372 subs(Rd, zr, Rn); 373 } 374 375 inline void cmnw(Register Rn, Register Rm, enum shift_kind kind, unsigned shift = 0) { 376 addsw(zr, Rn, Rm, kind, shift); 377 } 378 inline void cmn(Register Rn, Register Rm, enum shift_kind kind, unsigned shift = 0) { 379 adds(zr, Rn, Rm, kind, shift); 380 } 381 382 inline void cmpw(Register Rn, Register Rm, enum shift_kind kind, unsigned shift = 0) { 383 subsw(zr, Rn, Rm, kind, shift); 384 } 385 inline void cmp(Register Rn, Register Rm, enum shift_kind kind, unsigned shift = 0) { 386 subs(zr, Rn, Rm, kind, shift); 387 } 388 389 inline void negw(Register Rd, Register Rn, enum shift_kind kind, unsigned shift = 0) { 390 subw(Rd, zr, Rn, kind, shift); 391 } 392 393 inline void neg(Register Rd, Register Rn, enum shift_kind kind, unsigned shift = 0) { 394 sub(Rd, zr, Rn, kind, shift); 395 } 396 397 inline void negsw(Register Rd, Register Rn, enum shift_kind kind, unsigned shift = 0) { 398 subsw(Rd, zr, Rn, kind, shift); 399 } 400 401 inline void negs(Register Rd, Register Rn, enum shift_kind kind, unsigned shift = 0) { 402 subs(Rd, zr, Rn, kind, shift); 403 } 404 405 inline void mnegw(Register Rd, Register Rn, Register Rm) { 406 msubw(Rd, Rn, Rm, zr); 407 } 408 inline void mneg(Register Rd, Register Rn, Register Rm) { 409 msub(Rd, Rn, Rm, zr); 410 } 411 412 inline void mulw(Register Rd, Register Rn, Register Rm) { 413 maddw(Rd, Rn, Rm, zr); 414 } 415 inline void mul(Register Rd, Register Rn, Register Rm) { 416 madd(Rd, Rn, Rm, zr); 417 } 418 419 inline void smnegl(Register Rd, Register Rn, Register Rm) { 420 smsubl(Rd, Rn, Rm, zr); 421 } 422 inline void smull(Register Rd, Register Rn, Register Rm) { 423 smaddl(Rd, Rn, Rm, zr); 424 } 425 426 inline void umnegl(Register Rd, Register Rn, Register Rm) { 427 umsubl(Rd, Rn, Rm, zr); 428 } 429 inline void umull(Register Rd, Register Rn, Register Rm) { 430 umaddl(Rd, Rn, Rm, zr); 431 } 432 433 #define WRAP(INSN) \ 434 void INSN(Register Rd, Register Rn, Register Rm, Register Ra) { \ 435 if (VM_Version::supports_a53mac() && Ra != zr) \ 436 nop(); \ 437 Assembler::INSN(Rd, Rn, Rm, Ra); \ 438 } 439 440 WRAP(madd) WRAP(msub) WRAP(maddw) WRAP(msubw) 441 WRAP(smaddl) WRAP(smsubl) WRAP(umaddl) WRAP(umsubl) 442 #undef WRAP 443 444 445 // macro assembly operations needed for aarch64 446 447 // first two private routines for loading 32 bit or 64 bit constants 448 private: 449 450 void mov_immediate64(Register dst, uint64_t imm64); 451 void mov_immediate32(Register dst, uint32_t imm32); 452 453 int push(unsigned int bitset, Register stack); 454 int pop(unsigned int bitset, Register stack); 455 456 int push_fp(unsigned int bitset, Register stack); 457 int pop_fp(unsigned int bitset, Register stack); 458 459 int push_p(unsigned int bitset, Register stack); 460 int pop_p(unsigned int bitset, Register stack); 461 462 void mov(Register dst, Address a); 463 464 public: 465 void push(RegSet regs, Register stack) { if (regs.bits()) push(regs.bits(), stack); } 466 void pop(RegSet regs, Register stack) { if (regs.bits()) pop(regs.bits(), stack); } 467 468 void push_fp(FloatRegSet regs, Register stack) { if (regs.bits()) push_fp(regs.bits(), stack); } 469 void pop_fp(FloatRegSet regs, Register stack) { if (regs.bits()) pop_fp(regs.bits(), stack); } 470 471 static RegSet call_clobbered_gp_registers(); 472 473 void push_p(PRegSet regs, Register stack) { if (regs.bits()) push_p(regs.bits(), stack); } 474 void pop_p(PRegSet regs, Register stack) { if (regs.bits()) pop_p(regs.bits(), stack); } 475 476 // Push and pop everything that might be clobbered by a native 477 // runtime call except rscratch1 and rscratch2. (They are always 478 // scratch, so we don't have to protect them.) Only save the lower 479 // 64 bits of each vector register. Additional registers can be excluded 480 // in a passed RegSet. 481 void push_call_clobbered_registers_except(RegSet exclude); 482 void pop_call_clobbered_registers_except(RegSet exclude); 483 484 void push_call_clobbered_registers() { 485 push_call_clobbered_registers_except(RegSet()); 486 } 487 void pop_call_clobbered_registers() { 488 pop_call_clobbered_registers_except(RegSet()); 489 } 490 491 492 // now mov instructions for loading absolute addresses and 32 or 493 // 64 bit integers 494 495 inline void mov(Register dst, address addr) { mov_immediate64(dst, (uint64_t)addr); } 496 497 template<typename T, ENABLE_IF(std::is_integral<T>::value)> 498 inline void mov(Register dst, T o) { mov_immediate64(dst, (uint64_t)o); } 499 500 inline void movw(Register dst, uint32_t imm32) { mov_immediate32(dst, imm32); } 501 502 void mov(Register dst, RegisterOrConstant src) { 503 if (src.is_register()) 504 mov(dst, src.as_register()); 505 else 506 mov(dst, src.as_constant()); 507 } 508 509 void movptr(Register r, uintptr_t imm64); 510 511 void mov(FloatRegister Vd, SIMD_Arrangement T, uint64_t imm64); 512 513 void mov(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn) { 514 orr(Vd, T, Vn, Vn); 515 } 516 517 void flt_to_flt16(Register dst, FloatRegister src, FloatRegister tmp) { 518 fcvtsh(tmp, src); 519 smov(dst, tmp, H, 0); 520 } 521 522 void flt16_to_flt(FloatRegister dst, Register src, FloatRegister tmp) { 523 mov(tmp, H, 0, src); 524 fcvths(dst, tmp); 525 } 526 527 // Generalized Test Bit And Branch, including a "far" variety which 528 // spans more than 32KiB. 529 void tbr(Condition cond, Register Rt, int bitpos, Label &dest, bool isfar = false) { 530 assert(cond == EQ || cond == NE, "must be"); 531 532 if (isfar) 533 cond = ~cond; 534 535 void (Assembler::* branch)(Register Rt, int bitpos, Label &L); 536 if (cond == Assembler::EQ) 537 branch = &Assembler::tbz; 538 else 539 branch = &Assembler::tbnz; 540 541 if (isfar) { 542 Label L; 543 (this->*branch)(Rt, bitpos, L); 544 b(dest); 545 bind(L); 546 } else { 547 (this->*branch)(Rt, bitpos, dest); 548 } 549 } 550 551 // macro instructions for accessing and updating floating point 552 // status register 553 // 554 // FPSR : op1 == 011 555 // CRn == 0100 556 // CRm == 0100 557 // op2 == 001 558 559 inline void get_fpsr(Register reg) 560 { 561 mrs(0b11, 0b0100, 0b0100, 0b001, reg); 562 } 563 564 inline void set_fpsr(Register reg) 565 { 566 msr(0b011, 0b0100, 0b0100, 0b001, reg); 567 } 568 569 inline void clear_fpsr() 570 { 571 msr(0b011, 0b0100, 0b0100, 0b001, zr); 572 } 573 574 // FPCR : op1 == 011 575 // CRn == 0100 576 // CRm == 0100 577 // op2 == 000 578 579 inline void get_fpcr(Register reg) { 580 mrs(0b11, 0b0100, 0b0100, 0b000, reg); 581 } 582 583 inline void set_fpcr(Register reg) { 584 msr(0b011, 0b0100, 0b0100, 0b000, reg); 585 } 586 587 // DCZID_EL0: op1 == 011 588 // CRn == 0000 589 // CRm == 0000 590 // op2 == 111 591 inline void get_dczid_el0(Register reg) 592 { 593 mrs(0b011, 0b0000, 0b0000, 0b111, reg); 594 } 595 596 // CTR_EL0: op1 == 011 597 // CRn == 0000 598 // CRm == 0000 599 // op2 == 001 600 inline void get_ctr_el0(Register reg) 601 { 602 mrs(0b011, 0b0000, 0b0000, 0b001, reg); 603 } 604 605 inline void get_nzcv(Register reg) { 606 mrs(0b011, 0b0100, 0b0010, 0b000, reg); 607 } 608 609 inline void set_nzcv(Register reg) { 610 msr(0b011, 0b0100, 0b0010, 0b000, reg); 611 } 612 613 // idiv variant which deals with MINLONG as dividend and -1 as divisor 614 int corrected_idivl(Register result, Register ra, Register rb, 615 bool want_remainder, Register tmp = rscratch1); 616 int corrected_idivq(Register result, Register ra, Register rb, 617 bool want_remainder, Register tmp = rscratch1); 618 619 // Support for null-checks 620 // 621 // Generates code that causes a null OS exception if the content of reg is null. 622 // If the accessed location is M[reg + offset] and the offset is known, provide the 623 // offset. No explicit code generation is needed if the offset is within a certain 624 // range (0 <= offset <= page_size). 625 626 virtual void null_check(Register reg, int offset = -1); 627 static bool needs_explicit_null_check(intptr_t offset); 628 static bool uses_implicit_null_check(void* address); 629 630 static address target_addr_for_insn(address insn_addr, unsigned insn); 631 static address target_addr_for_insn_or_null(address insn_addr, unsigned insn); 632 static address target_addr_for_insn(address insn_addr) { 633 unsigned insn = *(unsigned*)insn_addr; 634 return target_addr_for_insn(insn_addr, insn); 635 } 636 static address target_addr_for_insn_or_null(address insn_addr) { 637 unsigned insn = *(unsigned*)insn_addr; 638 return target_addr_for_insn_or_null(insn_addr, insn); 639 } 640 641 // Required platform-specific helpers for Label::patch_instructions. 642 // They _shadow_ the declarations in AbstractAssembler, which are undefined. 643 static int pd_patch_instruction_size(address branch, address target); 644 static void pd_patch_instruction(address branch, address target, const char* file = nullptr, int line = 0) { 645 pd_patch_instruction_size(branch, target); 646 } 647 static address pd_call_destination(address branch) { 648 return target_addr_for_insn(branch); 649 } 650 #ifndef PRODUCT 651 static void pd_print_patched_instruction(address branch); 652 #endif 653 654 static int patch_oop(address insn_addr, address o); 655 static int patch_narrow_klass(address insn_addr, narrowKlass n); 656 657 // Return whether code is emitted to a scratch blob. 658 virtual bool in_scratch_emit_size() { 659 return false; 660 } 661 address emit_trampoline_stub(int insts_call_instruction_offset, address target); 662 static int max_trampoline_stub_size(); 663 void emit_static_call_stub(); 664 static int static_call_stub_size(); 665 666 // The following 4 methods return the offset of the appropriate move instruction 667 668 // Support for fast byte/short loading with zero extension (depending on particular CPU) 669 int load_unsigned_byte(Register dst, Address src); 670 int load_unsigned_short(Register dst, Address src); 671 672 // Support for fast byte/short loading with sign extension (depending on particular CPU) 673 int load_signed_byte(Register dst, Address src); 674 int load_signed_short(Register dst, Address src); 675 676 int load_signed_byte32(Register dst, Address src); 677 int load_signed_short32(Register dst, Address src); 678 679 // Support for sign-extension (hi:lo = extend_sign(lo)) 680 void extend_sign(Register hi, Register lo); 681 682 // Load and store values by size and signed-ness 683 void load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed); 684 void store_sized_value(Address dst, Register src, size_t size_in_bytes); 685 686 // Support for inc/dec with optimal instruction selection depending on value 687 688 // x86_64 aliases an unqualified register/address increment and 689 // decrement to call incrementq and decrementq but also supports 690 // explicitly sized calls to incrementq/decrementq or 691 // incrementl/decrementl 692 693 // for aarch64 the proper convention would be to use 694 // increment/decrement for 64 bit operations and 695 // incrementw/decrementw for 32 bit operations. so when porting 696 // x86_64 code we can leave calls to increment/decrement as is, 697 // replace incrementq/decrementq with increment/decrement and 698 // replace incrementl/decrementl with incrementw/decrementw. 699 700 // n.b. increment/decrement calls with an Address destination will 701 // need to use a scratch register to load the value to be 702 // incremented. increment/decrement calls which add or subtract a 703 // constant value greater than 2^12 will need to use a 2nd scratch 704 // register to hold the constant. so, a register increment/decrement 705 // may trash rscratch2 and an address increment/decrement trash 706 // rscratch and rscratch2 707 708 void decrementw(Address dst, int value = 1); 709 void decrementw(Register reg, int value = 1); 710 711 void decrement(Register reg, int value = 1); 712 void decrement(Address dst, int value = 1); 713 714 void incrementw(Address dst, int value = 1); 715 void incrementw(Register reg, int value = 1); 716 717 void increment(Register reg, int value = 1); 718 void increment(Address dst, int value = 1); 719 720 721 // Alignment 722 void align(int modulus); 723 void align(int modulus, int target); 724 725 // nop 726 void post_call_nop(); 727 728 // Stack frame creation/removal 729 void enter(bool strip_ret_addr = false); 730 void leave(); 731 732 // ROP Protection 733 void protect_return_address(); 734 void protect_return_address(Register return_reg); 735 void authenticate_return_address(); 736 void authenticate_return_address(Register return_reg); 737 void strip_return_address(); 738 void check_return_address(Register return_reg=lr) PRODUCT_RETURN; 739 740 // Support for getting the JavaThread pointer (i.e.; a reference to thread-local information) 741 // The pointer will be loaded into the thread register. 742 void get_thread(Register thread); 743 744 // support for argument shuffling 745 void move32_64(VMRegPair src, VMRegPair dst, Register tmp = rscratch1); 746 void float_move(VMRegPair src, VMRegPair dst, Register tmp = rscratch1); 747 void long_move(VMRegPair src, VMRegPair dst, Register tmp = rscratch1); 748 void double_move(VMRegPair src, VMRegPair dst, Register tmp = rscratch1); 749 void object_move( 750 OopMap* map, 751 int oop_handle_offset, 752 int framesize_in_slots, 753 VMRegPair src, 754 VMRegPair dst, 755 bool is_receiver, 756 int* receiver_offset); 757 758 759 // Support for VM calls 760 // 761 // It is imperative that all calls into the VM are handled via the call_VM macros. 762 // They make sure that the stack linkage is setup correctly. call_VM's correspond 763 // to ENTRY/ENTRY_X entry points while call_VM_leaf's correspond to LEAF entry points. 764 765 766 void call_VM(Register oop_result, 767 address entry_point, 768 bool check_exceptions = true); 769 void call_VM(Register oop_result, 770 address entry_point, 771 Register arg_1, 772 bool check_exceptions = true); 773 void call_VM(Register oop_result, 774 address entry_point, 775 Register arg_1, Register arg_2, 776 bool check_exceptions = true); 777 void call_VM(Register oop_result, 778 address entry_point, 779 Register arg_1, Register arg_2, Register arg_3, 780 bool check_exceptions = true); 781 782 // Overloadings with last_Java_sp 783 void call_VM(Register oop_result, 784 Register last_java_sp, 785 address entry_point, 786 int number_of_arguments = 0, 787 bool check_exceptions = true); 788 void call_VM(Register oop_result, 789 Register last_java_sp, 790 address entry_point, 791 Register arg_1, bool 792 check_exceptions = true); 793 void call_VM(Register oop_result, 794 Register last_java_sp, 795 address entry_point, 796 Register arg_1, Register arg_2, 797 bool check_exceptions = true); 798 void call_VM(Register oop_result, 799 Register last_java_sp, 800 address entry_point, 801 Register arg_1, Register arg_2, Register arg_3, 802 bool check_exceptions = true); 803 804 void get_vm_result (Register oop_result, Register thread); 805 void get_vm_result_2(Register metadata_result, Register thread); 806 807 // These always tightly bind to MacroAssembler::call_VM_base 808 // bypassing the virtual implementation 809 void super_call_VM(Register oop_result, Register last_java_sp, address entry_point, int number_of_arguments = 0, bool check_exceptions = true); 810 void super_call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, bool check_exceptions = true); 811 void super_call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, bool check_exceptions = true); 812 void super_call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, Register arg_3, bool check_exceptions = true); 813 void super_call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, Register arg_3, Register arg_4, bool check_exceptions = true); 814 815 void call_VM_leaf(address entry_point, 816 int number_of_arguments = 0); 817 void call_VM_leaf(address entry_point, 818 Register arg_1); 819 void call_VM_leaf(address entry_point, 820 Register arg_1, Register arg_2); 821 void call_VM_leaf(address entry_point, 822 Register arg_1, Register arg_2, Register arg_3); 823 824 // These always tightly bind to MacroAssembler::call_VM_leaf_base 825 // bypassing the virtual implementation 826 void super_call_VM_leaf(address entry_point); 827 void super_call_VM_leaf(address entry_point, Register arg_1); 828 void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2); 829 void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3); 830 void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3, Register arg_4); 831 832 // last Java Frame (fills frame anchor) 833 void set_last_Java_frame(Register last_java_sp, 834 Register last_java_fp, 835 address last_java_pc, 836 Register scratch); 837 838 void set_last_Java_frame(Register last_java_sp, 839 Register last_java_fp, 840 Label &last_java_pc, 841 Register scratch); 842 843 void set_last_Java_frame(Register last_java_sp, 844 Register last_java_fp, 845 Register last_java_pc, 846 Register scratch); 847 848 void reset_last_Java_frame(Register thread); 849 850 // thread in the default location (rthread) 851 void reset_last_Java_frame(bool clear_fp); 852 853 // Stores 854 void store_check(Register obj); // store check for obj - register is destroyed afterwards 855 void store_check(Register obj, Address dst); // same as above, dst is exact store location (reg. is destroyed) 856 857 void resolve_jobject(Register value, Register tmp1, Register tmp2); 858 void resolve_global_jobject(Register value, Register tmp1, Register tmp2); 859 860 // C 'boolean' to Java boolean: x == 0 ? 0 : 1 861 void c2bool(Register x); 862 863 void load_method_holder_cld(Register rresult, Register rmethod); 864 void load_method_holder(Register holder, Register method); 865 866 // oop manipulations 867 void load_nklass(Register dst, Register src); 868 void load_klass(Register dst, Register src); 869 void store_klass(Register dst, Register src); 870 void cmp_klass(Register oop, Register trial_klass, Register tmp); 871 void cmp_klass(Register src, Register dst, Register tmp1, Register tmp2); 872 873 void resolve_weak_handle(Register result, Register tmp1, Register tmp2); 874 void resolve_oop_handle(Register result, Register tmp1, Register tmp2); 875 void load_mirror(Register dst, Register method, Register tmp1, Register tmp2); 876 877 void access_load_at(BasicType type, DecoratorSet decorators, Register dst, Address src, 878 Register tmp1, Register tmp2); 879 880 void access_store_at(BasicType type, DecoratorSet decorators, Address dst, Register val, 881 Register tmp1, Register tmp2, Register tmp3); 882 883 void load_heap_oop(Register dst, Address src, Register tmp1, 884 Register tmp2, DecoratorSet decorators = 0); 885 886 void load_heap_oop_not_null(Register dst, Address src, Register tmp1, 887 Register tmp2, DecoratorSet decorators = 0); 888 void store_heap_oop(Address dst, Register val, Register tmp1, 889 Register tmp2, Register tmp3, DecoratorSet decorators = 0); 890 891 // currently unimplemented 892 // Used for storing null. All other oop constants should be 893 // stored using routines that take a jobject. 894 void store_heap_oop_null(Address dst); 895 896 void store_klass_gap(Register dst, Register src); 897 898 // This dummy is to prevent a call to store_heap_oop from 899 // converting a zero (like null) into a Register by giving 900 // the compiler two choices it can't resolve 901 902 void store_heap_oop(Address dst, void* dummy); 903 904 void encode_heap_oop(Register d, Register s); 905 void encode_heap_oop(Register r) { encode_heap_oop(r, r); } 906 void decode_heap_oop(Register d, Register s); 907 void decode_heap_oop(Register r) { decode_heap_oop(r, r); } 908 void encode_heap_oop_not_null(Register r); 909 void decode_heap_oop_not_null(Register r); 910 void encode_heap_oop_not_null(Register dst, Register src); 911 void decode_heap_oop_not_null(Register dst, Register src); 912 913 void set_narrow_oop(Register dst, jobject obj); 914 915 void encode_klass_not_null(Register r); 916 void decode_klass_not_null(Register r); 917 void encode_klass_not_null(Register dst, Register src); 918 void decode_klass_not_null(Register dst, Register src); 919 920 void set_narrow_klass(Register dst, Klass* k); 921 922 // if heap base register is used - reinit it with the correct value 923 void reinit_heapbase(); 924 925 DEBUG_ONLY(void verify_heapbase(const char* msg);) 926 927 void push_CPU_state(bool save_vectors = false, bool use_sve = false, 928 int sve_vector_size_in_bytes = 0, int total_predicate_in_bytes = 0); 929 void pop_CPU_state(bool restore_vectors = false, bool use_sve = false, 930 int sve_vector_size_in_bytes = 0, int total_predicate_in_bytes = 0); 931 932 void push_cont_fastpath(Register java_thread); 933 void pop_cont_fastpath(Register java_thread); 934 935 // Round up to a power of two 936 void round_to(Register reg, int modulus); 937 938 // java.lang.Math::round intrinsics 939 void java_round_double(Register dst, FloatRegister src, FloatRegister ftmp); 940 void java_round_float(Register dst, FloatRegister src, FloatRegister ftmp); 941 942 // allocation 943 void tlab_allocate( 944 Register obj, // result: pointer to object after successful allocation 945 Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise 946 int con_size_in_bytes, // object size in bytes if known at compile time 947 Register t1, // temp register 948 Register t2, // temp register 949 Label& slow_case // continuation point if fast allocation fails 950 ); 951 void verify_tlab(); 952 953 // interface method calling 954 void lookup_interface_method(Register recv_klass, 955 Register intf_klass, 956 RegisterOrConstant itable_index, 957 Register method_result, 958 Register scan_temp, 959 Label& no_such_interface, 960 bool return_method = true); 961 962 void lookup_interface_method_stub(Register recv_klass, 963 Register holder_klass, 964 Register resolved_klass, 965 Register method_result, 966 Register temp_reg, 967 Register temp_reg2, 968 int itable_index, 969 Label& L_no_such_interface); 970 971 // virtual method calling 972 // n.b. x86 allows RegisterOrConstant for vtable_index 973 void lookup_virtual_method(Register recv_klass, 974 RegisterOrConstant vtable_index, 975 Register method_result); 976 977 // Test sub_klass against super_klass, with fast and slow paths. 978 979 // The fast path produces a tri-state answer: yes / no / maybe-slow. 980 // One of the three labels can be null, meaning take the fall-through. 981 // If super_check_offset is -1, the value is loaded up from super_klass. 982 // No registers are killed, except temp_reg. 983 void check_klass_subtype_fast_path(Register sub_klass, 984 Register super_klass, 985 Register temp_reg, 986 Label* L_success, 987 Label* L_failure, 988 Label* L_slow_path, 989 RegisterOrConstant super_check_offset = RegisterOrConstant(-1)); 990 991 // The rest of the type check; must be wired to a corresponding fast path. 992 // It does not repeat the fast path logic, so don't use it standalone. 993 // The temp_reg and temp2_reg can be noreg, if no temps are available. 994 // Updates the sub's secondary super cache as necessary. 995 // If set_cond_codes, condition codes will be Z on success, NZ on failure. 996 void check_klass_subtype_slow_path(Register sub_klass, 997 Register super_klass, 998 Register temp_reg, 999 Register temp2_reg, 1000 Label* L_success, 1001 Label* L_failure, 1002 bool set_cond_codes = false); 1003 1004 // Simplified, combined version, good for typical uses. 1005 // Falls through on failure. 1006 void check_klass_subtype(Register sub_klass, 1007 Register super_klass, 1008 Register temp_reg, 1009 Label& L_success); 1010 1011 void clinit_barrier(Register klass, 1012 Register thread, 1013 Label* L_fast_path = nullptr, 1014 Label* L_slow_path = nullptr); 1015 1016 Address argument_address(RegisterOrConstant arg_slot, int extra_slot_offset = 0); 1017 1018 void verify_sve_vector_length(Register tmp = rscratch1); 1019 void reinitialize_ptrue() { 1020 if (UseSVE > 0) { 1021 sve_ptrue(ptrue, B); 1022 } 1023 } 1024 void verify_ptrue(); 1025 1026 // Debugging 1027 1028 // only if +VerifyOops 1029 void _verify_oop(Register reg, const char* s, const char* file, int line); 1030 void _verify_oop_addr(Address addr, const char * s, const char* file, int line); 1031 1032 void _verify_oop_checked(Register reg, const char* s, const char* file, int line) { 1033 if (VerifyOops) { 1034 _verify_oop(reg, s, file, line); 1035 } 1036 } 1037 void _verify_oop_addr_checked(Address reg, const char* s, const char* file, int line) { 1038 if (VerifyOops) { 1039 _verify_oop_addr(reg, s, file, line); 1040 } 1041 } 1042 1043 // TODO: verify method and klass metadata (compare against vptr?) 1044 void _verify_method_ptr(Register reg, const char * msg, const char * file, int line) {} 1045 void _verify_klass_ptr(Register reg, const char * msg, const char * file, int line){} 1046 1047 #define verify_oop(reg) _verify_oop_checked(reg, "broken oop " #reg, __FILE__, __LINE__) 1048 #define verify_oop_msg(reg, msg) _verify_oop_checked(reg, "broken oop " #reg ", " #msg, __FILE__, __LINE__) 1049 #define verify_oop_addr(addr) _verify_oop_addr_checked(addr, "broken oop addr " #addr, __FILE__, __LINE__) 1050 #define verify_method_ptr(reg) _verify_method_ptr(reg, "broken method " #reg, __FILE__, __LINE__) 1051 #define verify_klass_ptr(reg) _verify_klass_ptr(reg, "broken klass " #reg, __FILE__, __LINE__) 1052 1053 // Restore cpu control state after JNI call 1054 void restore_cpu_control_state_after_jni(Register tmp1, Register tmp2); 1055 1056 // prints msg, dumps registers and stops execution 1057 void stop(const char* msg); 1058 1059 static void debug64(char* msg, int64_t pc, int64_t regs[]); 1060 1061 void untested() { stop("untested"); } 1062 1063 void unimplemented(const char* what = ""); 1064 1065 void should_not_reach_here() { stop("should not reach here"); } 1066 1067 void _assert_asm(Condition cc, const char* msg); 1068 #define assert_asm0(cc, msg) _assert_asm(cc, FILE_AND_LINE ": " msg) 1069 #define assert_asm(masm, command, cc, msg) DEBUG_ONLY(command; (masm)->_assert_asm(cc, FILE_AND_LINE ": " #command " " #cc ": " msg)) 1070 1071 // Stack overflow checking 1072 void bang_stack_with_offset(int offset) { 1073 // stack grows down, caller passes positive offset 1074 assert(offset > 0, "must bang with negative offset"); 1075 sub(rscratch2, sp, offset); 1076 str(zr, Address(rscratch2)); 1077 } 1078 1079 // Writes to stack successive pages until offset reached to check for 1080 // stack overflow + shadow pages. Also, clobbers tmp 1081 void bang_stack_size(Register size, Register tmp); 1082 1083 // Check for reserved stack access in method being exited (for JIT) 1084 void reserved_stack_check(); 1085 1086 // Arithmetics 1087 1088 void addptr(const Address &dst, int32_t src); 1089 void cmpptr(Register src1, Address src2); 1090 1091 void cmpoop(Register obj1, Register obj2); 1092 1093 // Various forms of CAS 1094 1095 void cmpxchg_obj_header(Register oldv, Register newv, Register obj, Register tmp, 1096 Label &succeed, Label *fail); 1097 void cmpxchgptr(Register oldv, Register newv, Register addr, Register tmp, 1098 Label &succeed, Label *fail); 1099 1100 void cmpxchgw(Register oldv, Register newv, Register addr, Register tmp, 1101 Label &succeed, Label *fail); 1102 1103 void atomic_add(Register prev, RegisterOrConstant incr, Register addr); 1104 void atomic_addw(Register prev, RegisterOrConstant incr, Register addr); 1105 void atomic_addal(Register prev, RegisterOrConstant incr, Register addr); 1106 void atomic_addalw(Register prev, RegisterOrConstant incr, Register addr); 1107 1108 void atomic_xchg(Register prev, Register newv, Register addr); 1109 void atomic_xchgw(Register prev, Register newv, Register addr); 1110 void atomic_xchgl(Register prev, Register newv, Register addr); 1111 void atomic_xchglw(Register prev, Register newv, Register addr); 1112 void atomic_xchgal(Register prev, Register newv, Register addr); 1113 void atomic_xchgalw(Register prev, Register newv, Register addr); 1114 1115 void orptr(Address adr, RegisterOrConstant src) { 1116 ldr(rscratch1, adr); 1117 if (src.is_register()) 1118 orr(rscratch1, rscratch1, src.as_register()); 1119 else 1120 orr(rscratch1, rscratch1, src.as_constant()); 1121 str(rscratch1, adr); 1122 } 1123 1124 // A generic CAS; success or failure is in the EQ flag. 1125 // Clobbers rscratch1 1126 void cmpxchg(Register addr, Register expected, Register new_val, 1127 enum operand_size size, 1128 bool acquire, bool release, bool weak, 1129 Register result); 1130 1131 #ifdef ASSERT 1132 // Template short-hand support to clean-up after a failed call to trampoline 1133 // call generation (see trampoline_call() below), when a set of Labels must 1134 // be reset (before returning). 1135 template<typename Label, typename... More> 1136 void reset_labels(Label &lbl, More&... more) { 1137 lbl.reset(); reset_labels(more...); 1138 } 1139 template<typename Label> 1140 void reset_labels(Label &lbl) { 1141 lbl.reset(); 1142 } 1143 #endif 1144 1145 private: 1146 void compare_eq(Register rn, Register rm, enum operand_size size); 1147 1148 public: 1149 // AArch64 OpenJDK uses four different types of calls: 1150 // - direct call: bl pc_relative_offset 1151 // This is the shortest and the fastest, but the offset has the range: 1152 // +/-128MB for the release build, +/-2MB for the debug build. 1153 // 1154 // - far call: adrp reg, pc_relative_offset; add; bl reg 1155 // This is longer than a direct call. The offset has 1156 // the range +/-4GB. As the code cache size is limited to 4GB, 1157 // far calls can reach anywhere in the code cache. If a jump is 1158 // needed rather than a call, a far jump 'b reg' can be used instead. 1159 // All instructions are embedded at a call site. 1160 // 1161 // - trampoline call: 1162 // This is only available in C1/C2-generated code (nmethod). It is a combination 1163 // of a direct call, which is used if the destination of a call is in range, 1164 // and a register-indirect call. It has the advantages of reaching anywhere in 1165 // the AArch64 address space and being patchable at runtime when the generated 1166 // code is being executed by other threads. 1167 // 1168 // [Main code section] 1169 // bl trampoline 1170 // [Stub code section] 1171 // trampoline: 1172 // ldr reg, pc + 8 1173 // br reg 1174 // <64-bit destination address> 1175 // 1176 // If the destination is in range when the generated code is moved to the code 1177 // cache, 'bl trampoline' is replaced with 'bl destination' and the trampoline 1178 // is not used. 1179 // The optimization does not remove the trampoline from the stub section. 1180 // This is necessary because the trampoline may well be redirected later when 1181 // code is patched, and the new destination may not be reachable by a simple BR 1182 // instruction. 1183 // 1184 // - indirect call: move reg, address; blr reg 1185 // This too can reach anywhere in the address space, but it cannot be 1186 // patched while code is running, so it must only be modified at a safepoint. 1187 // This form of call is most suitable for targets at fixed addresses, which 1188 // will never be patched. 1189 // 1190 // The patching we do conforms to the "Concurrent modification and 1191 // execution of instructions" section of the Arm Architectural 1192 // Reference Manual, which only allows B, BL, BRK, HVC, ISB, NOP, SMC, 1193 // or SVC instructions to be modified while another thread is 1194 // executing them. 1195 // 1196 // To patch a trampoline call when the BL can't reach, we first modify 1197 // the 64-bit destination address in the trampoline, then modify the 1198 // BL to point to the trampoline, then flush the instruction cache to 1199 // broadcast the change to all executing threads. See 1200 // NativeCall::set_destination_mt_safe for the details. 1201 // 1202 // There is a benign race in that the other thread might observe the 1203 // modified BL before it observes the modified 64-bit destination 1204 // address. That does not matter because the destination method has been 1205 // invalidated, so there will be a trap at its start. 1206 // For this to work, the destination address in the trampoline is 1207 // always updated, even if we're not using the trampoline. 1208 1209 // Emit a direct call if the entry address will always be in range, 1210 // otherwise a trampoline call. 1211 // Supported entry.rspec(): 1212 // - relocInfo::runtime_call_type 1213 // - relocInfo::opt_virtual_call_type 1214 // - relocInfo::static_call_type 1215 // - relocInfo::virtual_call_type 1216 // 1217 // Return: the call PC or null if CodeCache is full. 1218 address trampoline_call(Address entry); 1219 1220 static bool far_branches() { 1221 return ReservedCodeCacheSize > branch_range; 1222 } 1223 1224 // Check if branches to the non nmethod section require a far jump 1225 static bool codestub_branch_needs_far_jump() { 1226 return CodeCache::max_distance_to_non_nmethod() > branch_range; 1227 } 1228 1229 // Emit a direct call/jump if the entry address will always be in range, 1230 // otherwise a far call/jump. 1231 // The address must be inside the code cache. 1232 // Supported entry.rspec(): 1233 // - relocInfo::external_word_type 1234 // - relocInfo::runtime_call_type 1235 // - relocInfo::none 1236 // In the case of a far call/jump, the entry address is put in the tmp register. 1237 // The tmp register is invalidated. 1238 // 1239 // Far_jump returns the amount of the emitted code. 1240 void far_call(Address entry, Register tmp = rscratch1); 1241 int far_jump(Address entry, Register tmp = rscratch1); 1242 1243 static int far_codestub_branch_size() { 1244 if (codestub_branch_needs_far_jump()) { 1245 return 3 * 4; // adrp, add, br 1246 } else { 1247 return 4; 1248 } 1249 } 1250 1251 // Emit the CompiledIC call idiom 1252 address ic_call(address entry, jint method_index = 0); 1253 static int ic_check_size(); 1254 int ic_check(int end_alignment); 1255 1256 public: 1257 1258 // Data 1259 1260 void mov_metadata(Register dst, Metadata* obj); 1261 Address allocate_metadata_address(Metadata* obj); 1262 Address constant_oop_address(jobject obj); 1263 1264 void movoop(Register dst, jobject obj); 1265 1266 // CRC32 code for java.util.zip.CRC32::updateBytes() intrinsic. 1267 void kernel_crc32(Register crc, Register buf, Register len, 1268 Register table0, Register table1, Register table2, Register table3, 1269 Register tmp, Register tmp2, Register tmp3); 1270 // CRC32 code for java.util.zip.CRC32C::updateBytes() intrinsic. 1271 void kernel_crc32c(Register crc, Register buf, Register len, 1272 Register table0, Register table1, Register table2, Register table3, 1273 Register tmp, Register tmp2, Register tmp3); 1274 1275 // Stack push and pop individual 64 bit registers 1276 void push(Register src); 1277 void pop(Register dst); 1278 1279 void repne_scan(Register addr, Register value, Register count, 1280 Register scratch); 1281 void repne_scanw(Register addr, Register value, Register count, 1282 Register scratch); 1283 1284 typedef void (MacroAssembler::* add_sub_imm_insn)(Register Rd, Register Rn, unsigned imm); 1285 typedef void (MacroAssembler::* add_sub_reg_insn)(Register Rd, Register Rn, Register Rm, enum shift_kind kind, unsigned shift); 1286 1287 // If a constant does not fit in an immediate field, generate some 1288 // number of MOV instructions and then perform the operation 1289 void wrap_add_sub_imm_insn(Register Rd, Register Rn, uint64_t imm, 1290 add_sub_imm_insn insn1, 1291 add_sub_reg_insn insn2, bool is32); 1292 // Separate vsn which sets the flags 1293 void wrap_adds_subs_imm_insn(Register Rd, Register Rn, uint64_t imm, 1294 add_sub_imm_insn insn1, 1295 add_sub_reg_insn insn2, bool is32); 1296 1297 #define WRAP(INSN, is32) \ 1298 void INSN(Register Rd, Register Rn, uint64_t imm) { \ 1299 wrap_add_sub_imm_insn(Rd, Rn, imm, &Assembler::INSN, &Assembler::INSN, is32); \ 1300 } \ 1301 \ 1302 void INSN(Register Rd, Register Rn, Register Rm, \ 1303 enum shift_kind kind, unsigned shift = 0) { \ 1304 Assembler::INSN(Rd, Rn, Rm, kind, shift); \ 1305 } \ 1306 \ 1307 void INSN(Register Rd, Register Rn, Register Rm) { \ 1308 Assembler::INSN(Rd, Rn, Rm); \ 1309 } \ 1310 \ 1311 void INSN(Register Rd, Register Rn, Register Rm, \ 1312 ext::operation option, int amount = 0) { \ 1313 Assembler::INSN(Rd, Rn, Rm, option, amount); \ 1314 } 1315 1316 WRAP(add, false) WRAP(addw, true) WRAP(sub, false) WRAP(subw, true) 1317 1318 #undef WRAP 1319 #define WRAP(INSN, is32) \ 1320 void INSN(Register Rd, Register Rn, uint64_t imm) { \ 1321 wrap_adds_subs_imm_insn(Rd, Rn, imm, &Assembler::INSN, &Assembler::INSN, is32); \ 1322 } \ 1323 \ 1324 void INSN(Register Rd, Register Rn, Register Rm, \ 1325 enum shift_kind kind, unsigned shift = 0) { \ 1326 Assembler::INSN(Rd, Rn, Rm, kind, shift); \ 1327 } \ 1328 \ 1329 void INSN(Register Rd, Register Rn, Register Rm) { \ 1330 Assembler::INSN(Rd, Rn, Rm); \ 1331 } \ 1332 \ 1333 void INSN(Register Rd, Register Rn, Register Rm, \ 1334 ext::operation option, int amount = 0) { \ 1335 Assembler::INSN(Rd, Rn, Rm, option, amount); \ 1336 } 1337 1338 WRAP(adds, false) WRAP(addsw, true) WRAP(subs, false) WRAP(subsw, true) 1339 1340 void add(Register Rd, Register Rn, RegisterOrConstant increment); 1341 void addw(Register Rd, Register Rn, RegisterOrConstant increment); 1342 void sub(Register Rd, Register Rn, RegisterOrConstant decrement); 1343 void subw(Register Rd, Register Rn, RegisterOrConstant decrement); 1344 1345 void adrp(Register reg1, const Address &dest, uint64_t &byte_offset); 1346 1347 void tableswitch(Register index, jint lowbound, jint highbound, 1348 Label &jumptable, Label &jumptable_end, int stride = 1) { 1349 adr(rscratch1, jumptable); 1350 subsw(rscratch2, index, lowbound); 1351 subsw(zr, rscratch2, highbound - lowbound); 1352 br(Assembler::HS, jumptable_end); 1353 add(rscratch1, rscratch1, rscratch2, 1354 ext::sxtw, exact_log2(stride * Assembler::instruction_size)); 1355 br(rscratch1); 1356 } 1357 1358 // Form an address from base + offset in Rd. Rd may or may not 1359 // actually be used: you must use the Address that is returned. It 1360 // is up to you to ensure that the shift provided matches the size 1361 // of your data. 1362 Address form_address(Register Rd, Register base, int64_t byte_offset, int shift); 1363 1364 // Return true iff an address is within the 48-bit AArch64 address 1365 // space. 1366 bool is_valid_AArch64_address(address a) { 1367 return ((uint64_t)a >> 48) == 0; 1368 } 1369 1370 // Load the base of the cardtable byte map into reg. 1371 void load_byte_map_base(Register reg); 1372 1373 // Prolog generator routines to support switch between x86 code and 1374 // generated ARM code 1375 1376 // routine to generate an x86 prolog for a stub function which 1377 // bootstraps into the generated ARM code which directly follows the 1378 // stub 1379 // 1380 1381 public: 1382 1383 void ldr_constant(Register dest, const Address &const_addr) { 1384 if (NearCpool) { 1385 ldr(dest, const_addr); 1386 } else { 1387 uint64_t offset; 1388 adrp(dest, InternalAddress(const_addr.target()), offset); 1389 ldr(dest, Address(dest, offset)); 1390 } 1391 } 1392 1393 address read_polling_page(Register r, relocInfo::relocType rtype); 1394 void get_polling_page(Register dest, relocInfo::relocType rtype); 1395 1396 // CRC32 code for java.util.zip.CRC32::updateBytes() intrinsic. 1397 void update_byte_crc32(Register crc, Register val, Register table); 1398 void update_word_crc32(Register crc, Register v, Register tmp, 1399 Register table0, Register table1, Register table2, Register table3, 1400 bool upper = false); 1401 1402 address count_positives(Register ary1, Register len, Register result); 1403 1404 address arrays_equals(Register a1, Register a2, Register result, Register cnt1, 1405 Register tmp1, Register tmp2, Register tmp3, int elem_size); 1406 1407 void string_equals(Register a1, Register a2, Register result, Register cnt1); 1408 1409 void fill_words(Register base, Register cnt, Register value); 1410 address zero_words(Register base, uint64_t cnt); 1411 address zero_words(Register ptr, Register cnt); 1412 void zero_dcache_blocks(Register base, Register cnt); 1413 1414 static const int zero_words_block_size; 1415 1416 address byte_array_inflate(Register src, Register dst, Register len, 1417 FloatRegister vtmp1, FloatRegister vtmp2, 1418 FloatRegister vtmp3, Register tmp4); 1419 1420 void char_array_compress(Register src, Register dst, Register len, 1421 Register res, 1422 FloatRegister vtmp0, FloatRegister vtmp1, 1423 FloatRegister vtmp2, FloatRegister vtmp3, 1424 FloatRegister vtmp4, FloatRegister vtmp5); 1425 1426 void encode_iso_array(Register src, Register dst, 1427 Register len, Register res, bool ascii, 1428 FloatRegister vtmp0, FloatRegister vtmp1, 1429 FloatRegister vtmp2, FloatRegister vtmp3, 1430 FloatRegister vtmp4, FloatRegister vtmp5); 1431 1432 void generate_dsin_dcos(bool isCos, address npio2_hw, address two_over_pi, 1433 address pio2, address dsin_coef, address dcos_coef); 1434 private: 1435 // begin trigonometric functions support block 1436 void generate__ieee754_rem_pio2(address npio2_hw, address two_over_pi, address pio2); 1437 void generate__kernel_rem_pio2(address two_over_pi, address pio2); 1438 void generate_kernel_sin(FloatRegister x, bool iyIsOne, address dsin_coef); 1439 void generate_kernel_cos(FloatRegister x, address dcos_coef); 1440 // end trigonometric functions support block 1441 void add2_with_carry(Register final_dest_hi, Register dest_hi, Register dest_lo, 1442 Register src1, Register src2); 1443 void add2_with_carry(Register dest_hi, Register dest_lo, Register src1, Register src2) { 1444 add2_with_carry(dest_hi, dest_hi, dest_lo, src1, src2); 1445 } 1446 void multiply_64_x_64_loop(Register x, Register xstart, Register x_xstart, 1447 Register y, Register y_idx, Register z, 1448 Register carry, Register product, 1449 Register idx, Register kdx); 1450 void multiply_128_x_128_loop(Register y, Register z, 1451 Register carry, Register carry2, 1452 Register idx, Register jdx, 1453 Register yz_idx1, Register yz_idx2, 1454 Register tmp, Register tmp3, Register tmp4, 1455 Register tmp7, Register product_hi); 1456 void kernel_crc32_using_crypto_pmull(Register crc, Register buf, 1457 Register len, Register tmp0, Register tmp1, Register tmp2, 1458 Register tmp3); 1459 void kernel_crc32_using_crc32(Register crc, Register buf, 1460 Register len, Register tmp0, Register tmp1, Register tmp2, 1461 Register tmp3); 1462 void kernel_crc32c_using_crypto_pmull(Register crc, Register buf, 1463 Register len, Register tmp0, Register tmp1, Register tmp2, 1464 Register tmp3); 1465 void kernel_crc32c_using_crc32c(Register crc, Register buf, 1466 Register len, Register tmp0, Register tmp1, Register tmp2, 1467 Register tmp3); 1468 void kernel_crc32_common_fold_using_crypto_pmull(Register crc, Register buf, 1469 Register len, Register tmp0, Register tmp1, Register tmp2, 1470 size_t table_offset); 1471 1472 void ghash_modmul (FloatRegister result, 1473 FloatRegister result_lo, FloatRegister result_hi, FloatRegister b, 1474 FloatRegister a, FloatRegister vzr, FloatRegister a1_xor_a0, FloatRegister p, 1475 FloatRegister t1, FloatRegister t2, FloatRegister t3); 1476 void ghash_load_wide(int index, Register data, FloatRegister result, FloatRegister state); 1477 public: 1478 void multiply_to_len(Register x, Register xlen, Register y, Register ylen, Register z, 1479 Register zlen, Register tmp1, Register tmp2, Register tmp3, 1480 Register tmp4, Register tmp5, Register tmp6, Register tmp7); 1481 void mul_add(Register out, Register in, Register offs, Register len, Register k); 1482 void ghash_multiply(FloatRegister result_lo, FloatRegister result_hi, 1483 FloatRegister a, FloatRegister b, FloatRegister a1_xor_a0, 1484 FloatRegister tmp1, FloatRegister tmp2, FloatRegister tmp3); 1485 void ghash_multiply_wide(int index, 1486 FloatRegister result_lo, FloatRegister result_hi, 1487 FloatRegister a, FloatRegister b, FloatRegister a1_xor_a0, 1488 FloatRegister tmp1, FloatRegister tmp2, FloatRegister tmp3); 1489 void ghash_reduce(FloatRegister result, FloatRegister lo, FloatRegister hi, 1490 FloatRegister p, FloatRegister z, FloatRegister t1); 1491 void ghash_reduce_wide(int index, FloatRegister result, FloatRegister lo, FloatRegister hi, 1492 FloatRegister p, FloatRegister z, FloatRegister t1); 1493 void ghash_processBlocks_wide(address p, Register state, Register subkeyH, 1494 Register data, Register blocks, int unrolls); 1495 1496 1497 void aesenc_loadkeys(Register key, Register keylen); 1498 void aesecb_encrypt(Register from, Register to, Register keylen, 1499 FloatRegister data = v0, int unrolls = 1); 1500 void aesecb_decrypt(Register from, Register to, Register key, Register keylen); 1501 void aes_round(FloatRegister input, FloatRegister subkey); 1502 1503 // ChaCha20 functions support block 1504 void cc20_quarter_round(FloatRegister aVec, FloatRegister bVec, 1505 FloatRegister cVec, FloatRegister dVec, FloatRegister scratch, 1506 FloatRegister tbl); 1507 void cc20_shift_lane_org(FloatRegister bVec, FloatRegister cVec, 1508 FloatRegister dVec, bool colToDiag); 1509 1510 // Place an ISB after code may have been modified due to a safepoint. 1511 void safepoint_isb(); 1512 1513 private: 1514 // Return the effective address r + (r1 << ext) + offset. 1515 // Uses rscratch2. 1516 Address offsetted_address(Register r, Register r1, Address::extend ext, 1517 int offset, int size); 1518 1519 private: 1520 // Returns an address on the stack which is reachable with a ldr/str of size 1521 // Uses rscratch2 if the address is not directly reachable 1522 Address spill_address(int size, int offset, Register tmp=rscratch2); 1523 Address sve_spill_address(int sve_reg_size_in_bytes, int offset, Register tmp=rscratch2); 1524 1525 bool merge_alignment_check(Register base, size_t size, int64_t cur_offset, int64_t prev_offset) const; 1526 1527 // Check whether two loads/stores can be merged into ldp/stp. 1528 bool ldst_can_merge(Register rx, const Address &adr, size_t cur_size_in_bytes, bool is_store) const; 1529 1530 // Merge current load/store with previous load/store into ldp/stp. 1531 void merge_ldst(Register rx, const Address &adr, size_t cur_size_in_bytes, bool is_store); 1532 1533 // Try to merge two loads/stores into ldp/stp. If success, returns true else false. 1534 bool try_merge_ldst(Register rt, const Address &adr, size_t cur_size_in_bytes, bool is_store); 1535 1536 public: 1537 void spill(Register Rx, bool is64, int offset) { 1538 if (is64) { 1539 str(Rx, spill_address(8, offset)); 1540 } else { 1541 strw(Rx, spill_address(4, offset)); 1542 } 1543 } 1544 void spill(FloatRegister Vx, SIMD_RegVariant T, int offset) { 1545 str(Vx, T, spill_address(1 << (int)T, offset)); 1546 } 1547 1548 void spill_sve_vector(FloatRegister Zx, int offset, int vector_reg_size_in_bytes) { 1549 sve_str(Zx, sve_spill_address(vector_reg_size_in_bytes, offset)); 1550 } 1551 void spill_sve_predicate(PRegister pr, int offset, int predicate_reg_size_in_bytes) { 1552 sve_str(pr, sve_spill_address(predicate_reg_size_in_bytes, offset)); 1553 } 1554 1555 void unspill(Register Rx, bool is64, int offset) { 1556 if (is64) { 1557 ldr(Rx, spill_address(8, offset)); 1558 } else { 1559 ldrw(Rx, spill_address(4, offset)); 1560 } 1561 } 1562 void unspill(FloatRegister Vx, SIMD_RegVariant T, int offset) { 1563 ldr(Vx, T, spill_address(1 << (int)T, offset)); 1564 } 1565 1566 void unspill_sve_vector(FloatRegister Zx, int offset, int vector_reg_size_in_bytes) { 1567 sve_ldr(Zx, sve_spill_address(vector_reg_size_in_bytes, offset)); 1568 } 1569 void unspill_sve_predicate(PRegister pr, int offset, int predicate_reg_size_in_bytes) { 1570 sve_ldr(pr, sve_spill_address(predicate_reg_size_in_bytes, offset)); 1571 } 1572 1573 void spill_copy128(int src_offset, int dst_offset, 1574 Register tmp1=rscratch1, Register tmp2=rscratch2) { 1575 if (src_offset < 512 && (src_offset & 7) == 0 && 1576 dst_offset < 512 && (dst_offset & 7) == 0) { 1577 ldp(tmp1, tmp2, Address(sp, src_offset)); 1578 stp(tmp1, tmp2, Address(sp, dst_offset)); 1579 } else { 1580 unspill(tmp1, true, src_offset); 1581 spill(tmp1, true, dst_offset); 1582 unspill(tmp1, true, src_offset+8); 1583 spill(tmp1, true, dst_offset+8); 1584 } 1585 } 1586 void spill_copy_sve_vector_stack_to_stack(int src_offset, int dst_offset, 1587 int sve_vec_reg_size_in_bytes) { 1588 assert(sve_vec_reg_size_in_bytes % 16 == 0, "unexpected sve vector reg size"); 1589 for (int i = 0; i < sve_vec_reg_size_in_bytes / 16; i++) { 1590 spill_copy128(src_offset, dst_offset); 1591 src_offset += 16; 1592 dst_offset += 16; 1593 } 1594 } 1595 void spill_copy_sve_predicate_stack_to_stack(int src_offset, int dst_offset, 1596 int sve_predicate_reg_size_in_bytes) { 1597 sve_ldr(ptrue, sve_spill_address(sve_predicate_reg_size_in_bytes, src_offset)); 1598 sve_str(ptrue, sve_spill_address(sve_predicate_reg_size_in_bytes, dst_offset)); 1599 reinitialize_ptrue(); 1600 } 1601 void cache_wb(Address line); 1602 void cache_wbsync(bool is_pre); 1603 1604 // Code for java.lang.Thread::onSpinWait() intrinsic. 1605 void spin_wait(); 1606 1607 void lightweight_lock(Register obj, Register t1, Register t2, Register t3, Label& slow); 1608 void lightweight_unlock(Register obj, Register t1, Register t2, Register t3, Label& slow); 1609 1610 private: 1611 // Check the current thread doesn't need a cross modify fence. 1612 void verify_cross_modify_fence_not_required() PRODUCT_RETURN; 1613 1614 }; 1615 1616 #ifdef ASSERT 1617 inline bool AbstractAssembler::pd_check_instruction_mark() { return false; } 1618 #endif 1619 1620 /** 1621 * class SkipIfEqual: 1622 * 1623 * Instantiating this class will result in assembly code being output that will 1624 * jump around any code emitted between the creation of the instance and it's 1625 * automatic destruction at the end of a scope block, depending on the value of 1626 * the flag passed to the constructor, which will be checked at run-time. 1627 */ 1628 class SkipIfEqual { 1629 private: 1630 MacroAssembler* _masm; 1631 Label _label; 1632 1633 public: 1634 SkipIfEqual(MacroAssembler*, const bool* flag_addr, bool value); 1635 ~SkipIfEqual(); 1636 }; 1637 1638 struct tableswitch { 1639 Register _reg; 1640 int _insn_index; jint _first_key; jint _last_key; 1641 Label _after; 1642 Label _branches; 1643 }; 1644 1645 #endif // CPU_AARCH64_MACROASSEMBLER_AARCH64_HPP