1 /* 2 * Copyright (c) 1997, 2023, Oracle and/or its affiliates. All rights reserved. 3 * Copyright (c) 2014, 2021, Red Hat Inc. All rights reserved. 4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 5 * 6 * This code is free software; you can redistribute it and/or modify it 7 * under the terms of the GNU General Public License version 2 only, as 8 * published by the Free Software Foundation. 9 * 10 * This code is distributed in the hope that it will be useful, but WITHOUT 11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 13 * version 2 for more details (a copy is included in the LICENSE file that 14 * accompanied this code). 15 * 16 * You should have received a copy of the GNU General Public License version 17 * 2 along with this work; if not, write to the Free Software Foundation, 18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 19 * 20 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 21 * or visit www.oracle.com if you need additional information or have any 22 * questions. 23 * 24 */ 25 26 #ifndef CPU_AARCH64_MACROASSEMBLER_AARCH64_HPP 27 #define CPU_AARCH64_MACROASSEMBLER_AARCH64_HPP 28 29 #include "asm/assembler.inline.hpp" 30 #include "code/vmreg.hpp" 31 #include "metaprogramming/enableIf.hpp" 32 #include "oops/compressedOops.hpp" 33 #include "runtime/vm_version.hpp" 34 #include "utilities/macros.hpp" 35 #include "utilities/powerOfTwo.hpp" 36 #include "runtime/signature.hpp" 37 38 39 class ciInlineKlass; 40 41 class OopMap; 42 43 // MacroAssembler extends Assembler by frequently used macros. 44 // 45 // Instructions for which a 'better' code sequence exists depending 46 // on arguments should also go in here. 47 48 class MacroAssembler: public Assembler { 49 friend class LIR_Assembler; 50 51 public: 52 using Assembler::mov; 53 using Assembler::movi; 54 55 protected: 56 57 // Support for VM calls 58 // 59 // This is the base routine called by the different versions of call_VM_leaf. The interpreter 60 // may customize this version by overriding it for its purposes (e.g., to save/restore 61 // additional registers when doing a VM call). 62 virtual void call_VM_leaf_base( 63 address entry_point, // the entry point 64 int number_of_arguments, // the number of arguments to pop after the call 65 Label *retaddr = NULL 66 ); 67 68 virtual void call_VM_leaf_base( 69 address entry_point, // the entry point 70 int number_of_arguments, // the number of arguments to pop after the call 71 Label &retaddr) { 72 call_VM_leaf_base(entry_point, number_of_arguments, &retaddr); 73 } 74 75 // This is the base routine called by the different versions of call_VM. The interpreter 76 // may customize this version by overriding it for its purposes (e.g., to save/restore 77 // additional registers when doing a VM call). 78 // 79 // If no java_thread register is specified (noreg) than rthread will be used instead. call_VM_base 80 // returns the register which contains the thread upon return. If a thread register has been 81 // specified, the return value will correspond to that register. If no last_java_sp is specified 82 // (noreg) than rsp will be used instead. 83 virtual void call_VM_base( // returns the register containing the thread upon return 84 Register oop_result, // where an oop-result ends up if any; use noreg otherwise 85 Register java_thread, // the thread if computed before ; use noreg otherwise 86 Register last_java_sp, // to set up last_Java_frame in stubs; use noreg otherwise 87 address entry_point, // the entry point 88 int number_of_arguments, // the number of arguments (w/o thread) to pop after the call 89 bool check_exceptions // whether to check for pending exceptions after return 90 ); 91 92 void call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions = true); 93 94 enum KlassDecodeMode { 95 KlassDecodeNone, 96 KlassDecodeZero, 97 KlassDecodeXor, 98 KlassDecodeMovk 99 }; 100 101 KlassDecodeMode klass_decode_mode(); 102 103 private: 104 static KlassDecodeMode _klass_decode_mode; 105 106 public: 107 MacroAssembler(CodeBuffer* code) : Assembler(code) {} 108 109 // These routines should emit JVMTI PopFrame and ForceEarlyReturn handling code. 110 // The implementation is only non-empty for the InterpreterMacroAssembler, 111 // as only the interpreter handles PopFrame and ForceEarlyReturn requests. 112 virtual void check_and_handle_popframe(Register java_thread); 113 virtual void check_and_handle_earlyret(Register java_thread); 114 115 void safepoint_poll(Label& slow_path, bool at_return, bool acquire, bool in_nmethod, Register tmp = rscratch1); 116 void rt_call(address dest, Register tmp = rscratch1); 117 118 // Load Effective Address 119 void lea(Register r, const Address &a) { 120 InstructionMark im(this); 121 a.lea(this, r); 122 } 123 124 /* Sometimes we get misaligned loads and stores, usually from Unsafe 125 accesses, and these can exceed the offset range. */ 126 Address legitimize_address(const Address &a, int size, Register scratch) { 127 if (a.getMode() == Address::base_plus_offset) { 128 if (! Address::offset_ok_for_immed(a.offset(), exact_log2(size))) { 129 block_comment("legitimize_address {"); 130 lea(scratch, a); 131 block_comment("} legitimize_address"); 132 return Address(scratch); 133 } 134 } 135 return a; 136 } 137 138 void addmw(Address a, Register incr, Register scratch) { 139 ldrw(scratch, a); 140 addw(scratch, scratch, incr); 141 strw(scratch, a); 142 } 143 144 // Add constant to memory word 145 void addmw(Address a, int imm, Register scratch) { 146 ldrw(scratch, a); 147 if (imm > 0) 148 addw(scratch, scratch, (unsigned)imm); 149 else 150 subw(scratch, scratch, (unsigned)-imm); 151 strw(scratch, a); 152 } 153 154 void bind(Label& L) { 155 Assembler::bind(L); 156 code()->clear_last_insn(); 157 } 158 159 void membar(Membar_mask_bits order_constraint); 160 161 using Assembler::ldr; 162 using Assembler::str; 163 using Assembler::ldrw; 164 using Assembler::strw; 165 166 void ldr(Register Rx, const Address &adr); 167 void ldrw(Register Rw, const Address &adr); 168 void str(Register Rx, const Address &adr); 169 void strw(Register Rx, const Address &adr); 170 171 // Frame creation and destruction shared between JITs. 172 void build_frame(int framesize); 173 void remove_frame(int framesize); 174 175 virtual void _call_Unimplemented(address call_site) { 176 mov(rscratch2, call_site); 177 } 178 179 // Microsoft's MSVC team thinks that the __FUNCSIG__ is approximately (sympathy for calling conventions) equivalent to __PRETTY_FUNCTION__ 180 // Also, from Clang patch: "It is very similar to GCC's PRETTY_FUNCTION, except it prints the calling convention." 181 // https://reviews.llvm.org/D3311 182 183 #ifdef _WIN64 184 #define call_Unimplemented() _call_Unimplemented((address)__FUNCSIG__) 185 #else 186 #define call_Unimplemented() _call_Unimplemented((address)__PRETTY_FUNCTION__) 187 #endif 188 189 // aliases defined in AARCH64 spec 190 191 template<class T> 192 inline void cmpw(Register Rd, T imm) { subsw(zr, Rd, imm); } 193 194 inline void cmp(Register Rd, unsigned char imm8) { subs(zr, Rd, imm8); } 195 inline void cmp(Register Rd, unsigned imm) = delete; 196 197 template<class T> 198 inline void cmnw(Register Rd, T imm) { addsw(zr, Rd, imm); } 199 200 inline void cmn(Register Rd, unsigned char imm8) { adds(zr, Rd, imm8); } 201 inline void cmn(Register Rd, unsigned imm) = delete; 202 203 void cset(Register Rd, Assembler::Condition cond) { 204 csinc(Rd, zr, zr, ~cond); 205 } 206 void csetw(Register Rd, Assembler::Condition cond) { 207 csincw(Rd, zr, zr, ~cond); 208 } 209 210 void cneg(Register Rd, Register Rn, Assembler::Condition cond) { 211 csneg(Rd, Rn, Rn, ~cond); 212 } 213 void cnegw(Register Rd, Register Rn, Assembler::Condition cond) { 214 csnegw(Rd, Rn, Rn, ~cond); 215 } 216 217 inline void movw(Register Rd, Register Rn) { 218 if (Rd == sp || Rn == sp) { 219 Assembler::addw(Rd, Rn, 0U); 220 } else { 221 orrw(Rd, zr, Rn); 222 } 223 } 224 inline void mov(Register Rd, Register Rn) { 225 assert(Rd != r31_sp && Rn != r31_sp, "should be"); 226 if (Rd == Rn) { 227 } else if (Rd == sp || Rn == sp) { 228 Assembler::add(Rd, Rn, 0U); 229 } else { 230 orr(Rd, zr, Rn); 231 } 232 } 233 234 inline void moviw(Register Rd, unsigned imm) { orrw(Rd, zr, imm); } 235 inline void movi(Register Rd, unsigned imm) { orr(Rd, zr, imm); } 236 237 inline void tstw(Register Rd, Register Rn) { andsw(zr, Rd, Rn); } 238 inline void tst(Register Rd, Register Rn) { ands(zr, Rd, Rn); } 239 240 inline void tstw(Register Rd, uint64_t imm) { andsw(zr, Rd, imm); } 241 inline void tst(Register Rd, uint64_t imm) { ands(zr, Rd, imm); } 242 243 inline void bfiw(Register Rd, Register Rn, unsigned lsb, unsigned width) { 244 bfmw(Rd, Rn, ((32 - lsb) & 31), (width - 1)); 245 } 246 inline void bfi(Register Rd, Register Rn, unsigned lsb, unsigned width) { 247 bfm(Rd, Rn, ((64 - lsb) & 63), (width - 1)); 248 } 249 250 inline void bfxilw(Register Rd, Register Rn, unsigned lsb, unsigned width) { 251 bfmw(Rd, Rn, lsb, (lsb + width - 1)); 252 } 253 inline void bfxil(Register Rd, Register Rn, unsigned lsb, unsigned width) { 254 bfm(Rd, Rn, lsb , (lsb + width - 1)); 255 } 256 257 inline void sbfizw(Register Rd, Register Rn, unsigned lsb, unsigned width) { 258 sbfmw(Rd, Rn, ((32 - lsb) & 31), (width - 1)); 259 } 260 inline void sbfiz(Register Rd, Register Rn, unsigned lsb, unsigned width) { 261 sbfm(Rd, Rn, ((64 - lsb) & 63), (width - 1)); 262 } 263 264 inline void sbfxw(Register Rd, Register Rn, unsigned lsb, unsigned width) { 265 sbfmw(Rd, Rn, lsb, (lsb + width - 1)); 266 } 267 inline void sbfx(Register Rd, Register Rn, unsigned lsb, unsigned width) { 268 sbfm(Rd, Rn, lsb , (lsb + width - 1)); 269 } 270 271 inline void ubfizw(Register Rd, Register Rn, unsigned lsb, unsigned width) { 272 ubfmw(Rd, Rn, ((32 - lsb) & 31), (width - 1)); 273 } 274 inline void ubfiz(Register Rd, Register Rn, unsigned lsb, unsigned width) { 275 ubfm(Rd, Rn, ((64 - lsb) & 63), (width - 1)); 276 } 277 278 inline void ubfxw(Register Rd, Register Rn, unsigned lsb, unsigned width) { 279 ubfmw(Rd, Rn, lsb, (lsb + width - 1)); 280 } 281 inline void ubfx(Register Rd, Register Rn, unsigned lsb, unsigned width) { 282 ubfm(Rd, Rn, lsb , (lsb + width - 1)); 283 } 284 285 inline void asrw(Register Rd, Register Rn, unsigned imm) { 286 sbfmw(Rd, Rn, imm, 31); 287 } 288 289 inline void asr(Register Rd, Register Rn, unsigned imm) { 290 sbfm(Rd, Rn, imm, 63); 291 } 292 293 inline void lslw(Register Rd, Register Rn, unsigned imm) { 294 ubfmw(Rd, Rn, ((32 - imm) & 31), (31 - imm)); 295 } 296 297 inline void lsl(Register Rd, Register Rn, unsigned imm) { 298 ubfm(Rd, Rn, ((64 - imm) & 63), (63 - imm)); 299 } 300 301 inline void lsrw(Register Rd, Register Rn, unsigned imm) { 302 ubfmw(Rd, Rn, imm, 31); 303 } 304 305 inline void lsr(Register Rd, Register Rn, unsigned imm) { 306 ubfm(Rd, Rn, imm, 63); 307 } 308 309 inline void rorw(Register Rd, Register Rn, unsigned imm) { 310 extrw(Rd, Rn, Rn, imm); 311 } 312 313 inline void ror(Register Rd, Register Rn, unsigned imm) { 314 extr(Rd, Rn, Rn, imm); 315 } 316 317 inline void sxtbw(Register Rd, Register Rn) { 318 sbfmw(Rd, Rn, 0, 7); 319 } 320 inline void sxthw(Register Rd, Register Rn) { 321 sbfmw(Rd, Rn, 0, 15); 322 } 323 inline void sxtb(Register Rd, Register Rn) { 324 sbfm(Rd, Rn, 0, 7); 325 } 326 inline void sxth(Register Rd, Register Rn) { 327 sbfm(Rd, Rn, 0, 15); 328 } 329 inline void sxtw(Register Rd, Register Rn) { 330 sbfm(Rd, Rn, 0, 31); 331 } 332 333 inline void uxtbw(Register Rd, Register Rn) { 334 ubfmw(Rd, Rn, 0, 7); 335 } 336 inline void uxthw(Register Rd, Register Rn) { 337 ubfmw(Rd, Rn, 0, 15); 338 } 339 inline void uxtb(Register Rd, Register Rn) { 340 ubfm(Rd, Rn, 0, 7); 341 } 342 inline void uxth(Register Rd, Register Rn) { 343 ubfm(Rd, Rn, 0, 15); 344 } 345 inline void uxtw(Register Rd, Register Rn) { 346 ubfm(Rd, Rn, 0, 31); 347 } 348 349 inline void cmnw(Register Rn, Register Rm) { 350 addsw(zr, Rn, Rm); 351 } 352 inline void cmn(Register Rn, Register Rm) { 353 adds(zr, Rn, Rm); 354 } 355 356 inline void cmpw(Register Rn, Register Rm) { 357 subsw(zr, Rn, Rm); 358 } 359 inline void cmp(Register Rn, Register Rm) { 360 subs(zr, Rn, Rm); 361 } 362 363 inline void negw(Register Rd, Register Rn) { 364 subw(Rd, zr, Rn); 365 } 366 367 inline void neg(Register Rd, Register Rn) { 368 sub(Rd, zr, Rn); 369 } 370 371 inline void negsw(Register Rd, Register Rn) { 372 subsw(Rd, zr, Rn); 373 } 374 375 inline void negs(Register Rd, Register Rn) { 376 subs(Rd, zr, Rn); 377 } 378 379 inline void cmnw(Register Rn, Register Rm, enum shift_kind kind, unsigned shift = 0) { 380 addsw(zr, Rn, Rm, kind, shift); 381 } 382 inline void cmn(Register Rn, Register Rm, enum shift_kind kind, unsigned shift = 0) { 383 adds(zr, Rn, Rm, kind, shift); 384 } 385 386 inline void cmpw(Register Rn, Register Rm, enum shift_kind kind, unsigned shift = 0) { 387 subsw(zr, Rn, Rm, kind, shift); 388 } 389 inline void cmp(Register Rn, Register Rm, enum shift_kind kind, unsigned shift = 0) { 390 subs(zr, Rn, Rm, kind, shift); 391 } 392 393 inline void negw(Register Rd, Register Rn, enum shift_kind kind, unsigned shift = 0) { 394 subw(Rd, zr, Rn, kind, shift); 395 } 396 397 inline void neg(Register Rd, Register Rn, enum shift_kind kind, unsigned shift = 0) { 398 sub(Rd, zr, Rn, kind, shift); 399 } 400 401 inline void negsw(Register Rd, Register Rn, enum shift_kind kind, unsigned shift = 0) { 402 subsw(Rd, zr, Rn, kind, shift); 403 } 404 405 inline void negs(Register Rd, Register Rn, enum shift_kind kind, unsigned shift = 0) { 406 subs(Rd, zr, Rn, kind, shift); 407 } 408 409 inline void mnegw(Register Rd, Register Rn, Register Rm) { 410 msubw(Rd, Rn, Rm, zr); 411 } 412 inline void mneg(Register Rd, Register Rn, Register Rm) { 413 msub(Rd, Rn, Rm, zr); 414 } 415 416 inline void mulw(Register Rd, Register Rn, Register Rm) { 417 maddw(Rd, Rn, Rm, zr); 418 } 419 inline void mul(Register Rd, Register Rn, Register Rm) { 420 madd(Rd, Rn, Rm, zr); 421 } 422 423 inline void smnegl(Register Rd, Register Rn, Register Rm) { 424 smsubl(Rd, Rn, Rm, zr); 425 } 426 inline void smull(Register Rd, Register Rn, Register Rm) { 427 smaddl(Rd, Rn, Rm, zr); 428 } 429 430 inline void umnegl(Register Rd, Register Rn, Register Rm) { 431 umsubl(Rd, Rn, Rm, zr); 432 } 433 inline void umull(Register Rd, Register Rn, Register Rm) { 434 umaddl(Rd, Rn, Rm, zr); 435 } 436 437 #define WRAP(INSN) \ 438 void INSN(Register Rd, Register Rn, Register Rm, Register Ra) { \ 439 if (VM_Version::supports_a53mac() && Ra != zr) \ 440 nop(); \ 441 Assembler::INSN(Rd, Rn, Rm, Ra); \ 442 } 443 444 WRAP(madd) WRAP(msub) WRAP(maddw) WRAP(msubw) 445 WRAP(smaddl) WRAP(smsubl) WRAP(umaddl) WRAP(umsubl) 446 #undef WRAP 447 448 449 // macro assembly operations needed for aarch64 450 451 // first two private routines for loading 32 bit or 64 bit constants 452 private: 453 454 void mov_immediate64(Register dst, uint64_t imm64); 455 void mov_immediate32(Register dst, uint32_t imm32); 456 457 int push(unsigned int bitset, Register stack); 458 int pop(unsigned int bitset, Register stack); 459 460 int push_fp(unsigned int bitset, Register stack); 461 int pop_fp(unsigned int bitset, Register stack); 462 463 int push_p(unsigned int bitset, Register stack); 464 int pop_p(unsigned int bitset, Register stack); 465 466 void mov(Register dst, Address a); 467 468 public: 469 void push(RegSet regs, Register stack) { if (regs.bits()) push(regs.bits(), stack); } 470 void pop(RegSet regs, Register stack) { if (regs.bits()) pop(regs.bits(), stack); } 471 472 void push_fp(FloatRegSet regs, Register stack) { if (regs.bits()) push_fp(regs.bits(), stack); } 473 void pop_fp(FloatRegSet regs, Register stack) { if (regs.bits()) pop_fp(regs.bits(), stack); } 474 475 static RegSet call_clobbered_gp_registers(); 476 477 void push_p(PRegSet regs, Register stack) { if (regs.bits()) push_p(regs.bits(), stack); } 478 void pop_p(PRegSet regs, Register stack) { if (regs.bits()) pop_p(regs.bits(), stack); } 479 480 // Push and pop everything that might be clobbered by a native 481 // runtime call except rscratch1 and rscratch2. (They are always 482 // scratch, so we don't have to protect them.) Only save the lower 483 // 64 bits of each vector register. Additional registers can be excluded 484 // in a passed RegSet. 485 void push_call_clobbered_registers_except(RegSet exclude); 486 void pop_call_clobbered_registers_except(RegSet exclude); 487 488 void push_call_clobbered_registers() { 489 push_call_clobbered_registers_except(RegSet()); 490 } 491 void pop_call_clobbered_registers() { 492 pop_call_clobbered_registers_except(RegSet()); 493 } 494 495 496 // now mov instructions for loading absolute addresses and 32 or 497 // 64 bit integers 498 499 inline void mov(Register dst, address addr) { mov_immediate64(dst, (uint64_t)addr); } 500 501 template<typename T, ENABLE_IF(std::is_integral<T>::value)> 502 inline void mov(Register dst, T o) { mov_immediate64(dst, (uint64_t)o); } 503 504 inline void movw(Register dst, uint32_t imm32) { mov_immediate32(dst, imm32); } 505 506 void mov(Register dst, RegisterOrConstant src) { 507 if (src.is_register()) 508 mov(dst, src.as_register()); 509 else 510 mov(dst, src.as_constant()); 511 } 512 513 void movptr(Register r, uintptr_t imm64); 514 515 void mov(FloatRegister Vd, SIMD_Arrangement T, uint64_t imm64); 516 517 void mov(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn) { 518 orr(Vd, T, Vn, Vn); 519 } 520 521 522 public: 523 524 // Generalized Test Bit And Branch, including a "far" variety which 525 // spans more than 32KiB. 526 void tbr(Condition cond, Register Rt, int bitpos, Label &dest, bool isfar = false) { 527 assert(cond == EQ || cond == NE, "must be"); 528 529 if (isfar) 530 cond = ~cond; 531 532 void (Assembler::* branch)(Register Rt, int bitpos, Label &L); 533 if (cond == Assembler::EQ) 534 branch = &Assembler::tbz; 535 else 536 branch = &Assembler::tbnz; 537 538 if (isfar) { 539 Label L; 540 (this->*branch)(Rt, bitpos, L); 541 b(dest); 542 bind(L); 543 } else { 544 (this->*branch)(Rt, bitpos, dest); 545 } 546 } 547 548 // macro instructions for accessing and updating floating point 549 // status register 550 // 551 // FPSR : op1 == 011 552 // CRn == 0100 553 // CRm == 0100 554 // op2 == 001 555 556 inline void get_fpsr(Register reg) 557 { 558 mrs(0b11, 0b0100, 0b0100, 0b001, reg); 559 } 560 561 inline void set_fpsr(Register reg) 562 { 563 msr(0b011, 0b0100, 0b0100, 0b001, reg); 564 } 565 566 inline void clear_fpsr() 567 { 568 msr(0b011, 0b0100, 0b0100, 0b001, zr); 569 } 570 571 // DCZID_EL0: op1 == 011 572 // CRn == 0000 573 // CRm == 0000 574 // op2 == 111 575 inline void get_dczid_el0(Register reg) 576 { 577 mrs(0b011, 0b0000, 0b0000, 0b111, reg); 578 } 579 580 // CTR_EL0: op1 == 011 581 // CRn == 0000 582 // CRm == 0000 583 // op2 == 001 584 inline void get_ctr_el0(Register reg) 585 { 586 mrs(0b011, 0b0000, 0b0000, 0b001, reg); 587 } 588 589 // idiv variant which deals with MINLONG as dividend and -1 as divisor 590 int corrected_idivl(Register result, Register ra, Register rb, 591 bool want_remainder, Register tmp = rscratch1); 592 int corrected_idivq(Register result, Register ra, Register rb, 593 bool want_remainder, Register tmp = rscratch1); 594 595 // Support for NULL-checks 596 // 597 // Generates code that causes a NULL OS exception if the content of reg is NULL. 598 // If the accessed location is M[reg + offset] and the offset is known, provide the 599 // offset. No explicit code generation is needed if the offset is within a certain 600 // range (0 <= offset <= page_size). 601 602 virtual void null_check(Register reg, int offset = -1); 603 static bool needs_explicit_null_check(intptr_t offset); 604 static bool uses_implicit_null_check(void* address); 605 606 // markWord tests, kills markWord reg 607 void test_markword_is_inline_type(Register markword, Label& is_inline_type); 608 609 // inlineKlass queries, kills temp_reg 610 void test_klass_is_inline_type(Register klass, Register temp_reg, Label& is_inline_type); 611 void test_klass_is_empty_inline_type(Register klass, Register temp_reg, Label& is_empty_inline_type); 612 void test_oop_is_not_inline_type(Register object, Register tmp, Label& not_inline_type); 613 614 // Get the default value oop for the given InlineKlass 615 void get_default_value_oop(Register inline_klass, Register temp_reg, Register obj); 616 // The empty value oop, for the given InlineKlass ("empty" as in no instance fields) 617 // get_default_value_oop with extra assertion for empty inline klass 618 void get_empty_inline_type_oop(Register inline_klass, Register temp_reg, Register obj); 619 620 void test_field_is_null_free_inline_type(Register flags, Register temp_reg, Label& is_null_free); 621 void test_field_is_not_null_free_inline_type(Register flags, Register temp_reg, Label& not_null_free); 622 void test_field_is_inlined(Register flags, Register temp_reg, Label& is_flattened); 623 624 // Check oops for special arrays, i.e. flattened and/or null-free 625 void test_oop_prototype_bit(Register oop, Register temp_reg, int32_t test_bit, bool jmp_set, Label& jmp_label); 626 void test_flattened_array_oop(Register klass, Register temp_reg, Label& is_flattened_array); 627 void test_non_flattened_array_oop(Register oop, Register temp_reg, Label&is_non_flattened_array); 628 void test_null_free_array_oop(Register oop, Register temp_reg, Label& is_null_free_array); 629 void test_non_null_free_array_oop(Register oop, Register temp_reg, Label&is_non_null_free_array); 630 631 // Check array klass layout helper for flatten or null-free arrays... 632 void test_flattened_array_layout(Register lh, Label& is_flattened_array); 633 void test_non_flattened_array_layout(Register lh, Label& is_non_flattened_array); 634 void test_null_free_array_layout(Register lh, Label& is_null_free_array); 635 void test_non_null_free_array_layout(Register lh, Label& is_non_null_free_array); 636 637 static address target_addr_for_insn(address insn_addr, unsigned insn); 638 static address target_addr_for_insn_or_null(address insn_addr, unsigned insn); 639 static address target_addr_for_insn(address insn_addr) { 640 unsigned insn = *(unsigned*)insn_addr; 641 return target_addr_for_insn(insn_addr, insn); 642 } 643 static address target_addr_for_insn_or_null(address insn_addr) { 644 unsigned insn = *(unsigned*)insn_addr; 645 return target_addr_for_insn_or_null(insn_addr, insn); 646 } 647 648 // Required platform-specific helpers for Label::patch_instructions. 649 // They _shadow_ the declarations in AbstractAssembler, which are undefined. 650 static int pd_patch_instruction_size(address branch, address target); 651 static void pd_patch_instruction(address branch, address target, const char* file = NULL, int line = 0) { 652 pd_patch_instruction_size(branch, target); 653 } 654 static address pd_call_destination(address branch) { 655 return target_addr_for_insn(branch); 656 } 657 #ifndef PRODUCT 658 static void pd_print_patched_instruction(address branch); 659 #endif 660 661 static int patch_oop(address insn_addr, address o); 662 static int patch_narrow_klass(address insn_addr, narrowKlass n); 663 664 // Return whether code is emitted to a scratch blob. 665 virtual bool in_scratch_emit_size() { 666 return false; 667 } 668 address emit_trampoline_stub(int insts_call_instruction_offset, address target); 669 void emit_static_call_stub(); 670 671 // The following 4 methods return the offset of the appropriate move instruction 672 673 // Support for fast byte/short loading with zero extension (depending on particular CPU) 674 int load_unsigned_byte(Register dst, Address src); 675 int load_unsigned_short(Register dst, Address src); 676 677 // Support for fast byte/short loading with sign extension (depending on particular CPU) 678 int load_signed_byte(Register dst, Address src); 679 int load_signed_short(Register dst, Address src); 680 681 int load_signed_byte32(Register dst, Address src); 682 int load_signed_short32(Register dst, Address src); 683 684 // Support for sign-extension (hi:lo = extend_sign(lo)) 685 void extend_sign(Register hi, Register lo); 686 687 // Load and store values by size and signed-ness 688 void load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed); 689 void store_sized_value(Address dst, Register src, size_t size_in_bytes); 690 691 // Support for inc/dec with optimal instruction selection depending on value 692 693 // x86_64 aliases an unqualified register/address increment and 694 // decrement to call incrementq and decrementq but also supports 695 // explicitly sized calls to incrementq/decrementq or 696 // incrementl/decrementl 697 698 // for aarch64 the proper convention would be to use 699 // increment/decrement for 64 bit operations and 700 // incrementw/decrementw for 32 bit operations. so when porting 701 // x86_64 code we can leave calls to increment/decrement as is, 702 // replace incrementq/decrementq with increment/decrement and 703 // replace incrementl/decrementl with incrementw/decrementw. 704 705 // n.b. increment/decrement calls with an Address destination will 706 // need to use a scratch register to load the value to be 707 // incremented. increment/decrement calls which add or subtract a 708 // constant value greater than 2^12 will need to use a 2nd scratch 709 // register to hold the constant. so, a register increment/decrement 710 // may trash rscratch2 and an address increment/decrement trash 711 // rscratch and rscratch2 712 713 void decrementw(Address dst, int value = 1); 714 void decrementw(Register reg, int value = 1); 715 716 void decrement(Register reg, int value = 1); 717 void decrement(Address dst, int value = 1); 718 719 void incrementw(Address dst, int value = 1); 720 void incrementw(Register reg, int value = 1); 721 722 void increment(Register reg, int value = 1); 723 void increment(Address dst, int value = 1); 724 725 726 // Alignment 727 void align(int modulus); 728 729 // nop 730 void post_call_nop(); 731 732 // Stack frame creation/removal 733 void enter(bool strip_ret_addr = false); 734 void leave(); 735 736 // ROP Protection 737 void protect_return_address(); 738 void protect_return_address(Register return_reg, Register temp_reg); 739 void authenticate_return_address(Register return_reg = lr); 740 void authenticate_return_address(Register return_reg, Register temp_reg); 741 void strip_return_address(); 742 void check_return_address(Register return_reg=lr) PRODUCT_RETURN; 743 744 // Support for getting the JavaThread pointer (i.e.; a reference to thread-local information) 745 // The pointer will be loaded into the thread register. 746 void get_thread(Register thread); 747 748 // support for argument shuffling 749 void move32_64(VMRegPair src, VMRegPair dst, Register tmp = rscratch1); 750 void float_move(VMRegPair src, VMRegPair dst, Register tmp = rscratch1); 751 void long_move(VMRegPair src, VMRegPair dst, Register tmp = rscratch1); 752 void double_move(VMRegPair src, VMRegPair dst, Register tmp = rscratch1); 753 void object_move( 754 OopMap* map, 755 int oop_handle_offset, 756 int framesize_in_slots, 757 VMRegPair src, 758 VMRegPair dst, 759 bool is_receiver, 760 int* receiver_offset); 761 762 763 // Support for VM calls 764 // 765 // It is imperative that all calls into the VM are handled via the call_VM macros. 766 // They make sure that the stack linkage is setup correctly. call_VM's correspond 767 // to ENTRY/ENTRY_X entry points while call_VM_leaf's correspond to LEAF entry points. 768 769 770 void call_VM(Register oop_result, 771 address entry_point, 772 bool check_exceptions = true); 773 void call_VM(Register oop_result, 774 address entry_point, 775 Register arg_1, 776 bool check_exceptions = true); 777 void call_VM(Register oop_result, 778 address entry_point, 779 Register arg_1, Register arg_2, 780 bool check_exceptions = true); 781 void call_VM(Register oop_result, 782 address entry_point, 783 Register arg_1, Register arg_2, Register arg_3, 784 bool check_exceptions = true); 785 786 // Overloadings with last_Java_sp 787 void call_VM(Register oop_result, 788 Register last_java_sp, 789 address entry_point, 790 int number_of_arguments = 0, 791 bool check_exceptions = true); 792 void call_VM(Register oop_result, 793 Register last_java_sp, 794 address entry_point, 795 Register arg_1, bool 796 check_exceptions = true); 797 void call_VM(Register oop_result, 798 Register last_java_sp, 799 address entry_point, 800 Register arg_1, Register arg_2, 801 bool check_exceptions = true); 802 void call_VM(Register oop_result, 803 Register last_java_sp, 804 address entry_point, 805 Register arg_1, Register arg_2, Register arg_3, 806 bool check_exceptions = true); 807 808 void get_vm_result (Register oop_result, Register thread); 809 void get_vm_result_2(Register metadata_result, Register thread); 810 811 // These always tightly bind to MacroAssembler::call_VM_base 812 // bypassing the virtual implementation 813 void super_call_VM(Register oop_result, Register last_java_sp, address entry_point, int number_of_arguments = 0, bool check_exceptions = true); 814 void super_call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, bool check_exceptions = true); 815 void super_call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, bool check_exceptions = true); 816 void super_call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, Register arg_3, bool check_exceptions = true); 817 void super_call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, Register arg_3, Register arg_4, bool check_exceptions = true); 818 819 void call_VM_leaf(address entry_point, 820 int number_of_arguments = 0); 821 void call_VM_leaf(address entry_point, 822 Register arg_1); 823 void call_VM_leaf(address entry_point, 824 Register arg_1, Register arg_2); 825 void call_VM_leaf(address entry_point, 826 Register arg_1, Register arg_2, Register arg_3); 827 828 // These always tightly bind to MacroAssembler::call_VM_leaf_base 829 // bypassing the virtual implementation 830 void super_call_VM_leaf(address entry_point); 831 void super_call_VM_leaf(address entry_point, Register arg_1); 832 void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2); 833 void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3); 834 void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3, Register arg_4); 835 836 // last Java Frame (fills frame anchor) 837 void set_last_Java_frame(Register last_java_sp, 838 Register last_java_fp, 839 address last_java_pc, 840 Register scratch); 841 842 void set_last_Java_frame(Register last_java_sp, 843 Register last_java_fp, 844 Label &last_java_pc, 845 Register scratch); 846 847 void set_last_Java_frame(Register last_java_sp, 848 Register last_java_fp, 849 Register last_java_pc, 850 Register scratch); 851 852 void reset_last_Java_frame(Register thread); 853 854 // thread in the default location (rthread) 855 void reset_last_Java_frame(bool clear_fp); 856 857 // Stores 858 void store_check(Register obj); // store check for obj - register is destroyed afterwards 859 void store_check(Register obj, Address dst); // same as above, dst is exact store location (reg. is destroyed) 860 861 void resolve_jobject(Register value, Register tmp1, Register tmp2); 862 863 // C 'boolean' to Java boolean: x == 0 ? 0 : 1 864 void c2bool(Register x); 865 866 void load_method_holder_cld(Register rresult, Register rmethod); 867 void load_method_holder(Register holder, Register method); 868 869 // oop manipulations 870 void load_metadata(Register dst, Register src); 871 872 void load_klass(Register dst, Register src); 873 void store_klass(Register dst, Register src); 874 void cmp_klass(Register oop, Register trial_klass, Register tmp); 875 876 void resolve_weak_handle(Register result, Register tmp1, Register tmp2); 877 void resolve_oop_handle(Register result, Register tmp1, Register tmp2); 878 void load_mirror(Register dst, Register method, Register tmp1, Register tmp2); 879 880 void access_load_at(BasicType type, DecoratorSet decorators, Register dst, Address src, 881 Register tmp1, Register tmp2); 882 883 void access_store_at(BasicType type, DecoratorSet decorators, Address dst, Register val, 884 Register tmp1, Register tmp2, Register tmp3); 885 886 void access_value_copy(DecoratorSet decorators, Register src, Register dst, Register inline_klass); 887 888 // inline type data payload offsets... 889 void first_field_offset(Register inline_klass, Register offset); 890 void data_for_oop(Register oop, Register data, Register inline_klass); 891 // get data payload ptr a flat value array at index, kills rcx and index 892 void data_for_value_array_index(Register array, Register array_klass, 893 Register index, Register data); 894 895 void load_heap_oop(Register dst, Address src, Register tmp1, 896 Register tmp2, DecoratorSet decorators = 0); 897 898 void load_heap_oop_not_null(Register dst, Address src, Register tmp1, 899 Register tmp2, DecoratorSet decorators = 0); 900 void store_heap_oop(Address dst, Register val, Register tmp1, 901 Register tmp2, Register tmp3, DecoratorSet decorators = 0); 902 903 // currently unimplemented 904 // Used for storing NULL. All other oop constants should be 905 // stored using routines that take a jobject. 906 void store_heap_oop_null(Address dst); 907 908 void load_prototype_header(Register dst, Register src); 909 910 void store_klass_gap(Register dst, Register src); 911 912 // This dummy is to prevent a call to store_heap_oop from 913 // converting a zero (like NULL) into a Register by giving 914 // the compiler two choices it can't resolve 915 916 void store_heap_oop(Address dst, void* dummy); 917 918 void encode_heap_oop(Register d, Register s); 919 void encode_heap_oop(Register r) { encode_heap_oop(r, r); } 920 void decode_heap_oop(Register d, Register s); 921 void decode_heap_oop(Register r) { decode_heap_oop(r, r); } 922 void encode_heap_oop_not_null(Register r); 923 void decode_heap_oop_not_null(Register r); 924 void encode_heap_oop_not_null(Register dst, Register src); 925 void decode_heap_oop_not_null(Register dst, Register src); 926 927 void set_narrow_oop(Register dst, jobject obj); 928 929 void encode_klass_not_null(Register r); 930 void decode_klass_not_null(Register r); 931 void encode_klass_not_null(Register dst, Register src); 932 void decode_klass_not_null(Register dst, Register src); 933 934 void set_narrow_klass(Register dst, Klass* k); 935 936 // if heap base register is used - reinit it with the correct value 937 void reinit_heapbase(); 938 939 DEBUG_ONLY(void verify_heapbase(const char* msg);) 940 941 void push_CPU_state(bool save_vectors = false, bool use_sve = false, 942 int sve_vector_size_in_bytes = 0, int total_predicate_in_bytes = 0); 943 void pop_CPU_state(bool restore_vectors = false, bool use_sve = false, 944 int sve_vector_size_in_bytes = 0, int total_predicate_in_bytes = 0); 945 946 void push_cont_fastpath(Register java_thread); 947 void pop_cont_fastpath(Register java_thread); 948 949 // Round up to a power of two 950 void round_to(Register reg, int modulus); 951 952 // java.lang.Math::round intrinsics 953 void java_round_double(Register dst, FloatRegister src, FloatRegister ftmp); 954 void java_round_float(Register dst, FloatRegister src, FloatRegister ftmp); 955 956 // allocation 957 958 // Object / value buffer allocation... 959 // Allocate instance of klass, assumes klass initialized by caller 960 // new_obj prefers to be rax 961 // Kills t1 and t2, perserves klass, return allocation in new_obj (rsi on LP64) 962 void allocate_instance(Register klass, Register new_obj, 963 Register t1, Register t2, 964 bool clear_fields, Label& alloc_failed); 965 966 void tlab_allocate( 967 Register obj, // result: pointer to object after successful allocation 968 Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise 969 int con_size_in_bytes, // object size in bytes if known at compile time 970 Register t1, // temp register 971 Register t2, // temp register 972 Label& slow_case // continuation point if fast allocation fails 973 ); 974 void verify_tlab(); 975 976 // For field "index" within "klass", return inline_klass ... 977 void get_inline_type_field_klass(Register klass, Register index, Register inline_klass); 978 979 // interface method calling 980 void lookup_interface_method(Register recv_klass, 981 Register intf_klass, 982 RegisterOrConstant itable_index, 983 Register method_result, 984 Register scan_temp, 985 Label& no_such_interface, 986 bool return_method = true); 987 988 // virtual method calling 989 // n.b. x86 allows RegisterOrConstant for vtable_index 990 void lookup_virtual_method(Register recv_klass, 991 RegisterOrConstant vtable_index, 992 Register method_result); 993 994 // Test sub_klass against super_klass, with fast and slow paths. 995 996 // The fast path produces a tri-state answer: yes / no / maybe-slow. 997 // One of the three labels can be NULL, meaning take the fall-through. 998 // If super_check_offset is -1, the value is loaded up from super_klass. 999 // No registers are killed, except temp_reg. 1000 void check_klass_subtype_fast_path(Register sub_klass, 1001 Register super_klass, 1002 Register temp_reg, 1003 Label* L_success, 1004 Label* L_failure, 1005 Label* L_slow_path, 1006 RegisterOrConstant super_check_offset = RegisterOrConstant(-1)); 1007 1008 // The rest of the type check; must be wired to a corresponding fast path. 1009 // It does not repeat the fast path logic, so don't use it standalone. 1010 // The temp_reg and temp2_reg can be noreg, if no temps are available. 1011 // Updates the sub's secondary super cache as necessary. 1012 // If set_cond_codes, condition codes will be Z on success, NZ on failure. 1013 void check_klass_subtype_slow_path(Register sub_klass, 1014 Register super_klass, 1015 Register temp_reg, 1016 Register temp2_reg, 1017 Label* L_success, 1018 Label* L_failure, 1019 bool set_cond_codes = false); 1020 1021 // Simplified, combined version, good for typical uses. 1022 // Falls through on failure. 1023 void check_klass_subtype(Register sub_klass, 1024 Register super_klass, 1025 Register temp_reg, 1026 Label& L_success); 1027 1028 void clinit_barrier(Register klass, 1029 Register thread, 1030 Label* L_fast_path = NULL, 1031 Label* L_slow_path = NULL); 1032 1033 Address argument_address(RegisterOrConstant arg_slot, int extra_slot_offset = 0); 1034 1035 void verify_sve_vector_length(Register tmp = rscratch1); 1036 void reinitialize_ptrue() { 1037 if (UseSVE > 0) { 1038 sve_ptrue(ptrue, B); 1039 } 1040 } 1041 void verify_ptrue(); 1042 1043 // Debugging 1044 1045 // only if +VerifyOops 1046 void _verify_oop(Register reg, const char* s, const char* file, int line); 1047 void _verify_oop_addr(Address addr, const char * s, const char* file, int line); 1048 1049 void _verify_oop_checked(Register reg, const char* s, const char* file, int line) { 1050 if (VerifyOops) { 1051 _verify_oop(reg, s, file, line); 1052 } 1053 } 1054 void _verify_oop_addr_checked(Address reg, const char* s, const char* file, int line) { 1055 if (VerifyOops) { 1056 _verify_oop_addr(reg, s, file, line); 1057 } 1058 } 1059 1060 // TODO: verify method and klass metadata (compare against vptr?) 1061 void _verify_method_ptr(Register reg, const char * msg, const char * file, int line) {} 1062 void _verify_klass_ptr(Register reg, const char * msg, const char * file, int line){} 1063 1064 #define verify_oop(reg) _verify_oop_checked(reg, "broken oop " #reg, __FILE__, __LINE__) 1065 #define verify_oop_msg(reg, msg) _verify_oop_checked(reg, "broken oop " #reg ", " #msg, __FILE__, __LINE__) 1066 #define verify_oop_addr(addr) _verify_oop_addr_checked(addr, "broken oop addr " #addr, __FILE__, __LINE__) 1067 #define verify_method_ptr(reg) _verify_method_ptr(reg, "broken method " #reg, __FILE__, __LINE__) 1068 #define verify_klass_ptr(reg) _verify_klass_ptr(reg, "broken klass " #reg, __FILE__, __LINE__) 1069 1070 // only if +VerifyFPU 1071 void verify_FPU(int stack_depth, const char* s = "illegal FPU state"); 1072 1073 // prints msg, dumps registers and stops execution 1074 void stop(const char* msg); 1075 1076 static void debug64(char* msg, int64_t pc, int64_t regs[]); 1077 1078 void untested() { stop("untested"); } 1079 1080 void unimplemented(const char* what = ""); 1081 1082 void should_not_reach_here() { stop("should not reach here"); } 1083 1084 void _assert_asm(Condition cc, const char* msg); 1085 #define assert_asm0(cc, msg) _assert_asm(cc, FILE_AND_LINE ": " msg) 1086 #define assert_asm(masm, command, cc, msg) DEBUG_ONLY(command; (masm)->_assert_asm(cc, FILE_AND_LINE ": " #command " " #cc ": " msg)) 1087 1088 // Stack overflow checking 1089 void bang_stack_with_offset(int offset) { 1090 // stack grows down, caller passes positive offset 1091 assert(offset > 0, "must bang with negative offset"); 1092 sub(rscratch2, sp, offset); 1093 str(zr, Address(rscratch2)); 1094 } 1095 1096 // Writes to stack successive pages until offset reached to check for 1097 // stack overflow + shadow pages. Also, clobbers tmp 1098 void bang_stack_size(Register size, Register tmp); 1099 1100 // Check for reserved stack access in method being exited (for JIT) 1101 void reserved_stack_check(); 1102 1103 // Arithmetics 1104 1105 void addptr(const Address &dst, int32_t src); 1106 void cmpptr(Register src1, Address src2); 1107 1108 void cmpoop(Register obj1, Register obj2); 1109 1110 // Various forms of CAS 1111 1112 void cmpxchg_obj_header(Register oldv, Register newv, Register obj, Register tmp, 1113 Label &succeed, Label *fail); 1114 void cmpxchgptr(Register oldv, Register newv, Register addr, Register tmp, 1115 Label &succeed, Label *fail); 1116 1117 void cmpxchgw(Register oldv, Register newv, Register addr, Register tmp, 1118 Label &succeed, Label *fail); 1119 1120 void atomic_add(Register prev, RegisterOrConstant incr, Register addr); 1121 void atomic_addw(Register prev, RegisterOrConstant incr, Register addr); 1122 void atomic_addal(Register prev, RegisterOrConstant incr, Register addr); 1123 void atomic_addalw(Register prev, RegisterOrConstant incr, Register addr); 1124 1125 void atomic_xchg(Register prev, Register newv, Register addr); 1126 void atomic_xchgw(Register prev, Register newv, Register addr); 1127 void atomic_xchgl(Register prev, Register newv, Register addr); 1128 void atomic_xchglw(Register prev, Register newv, Register addr); 1129 void atomic_xchgal(Register prev, Register newv, Register addr); 1130 void atomic_xchgalw(Register prev, Register newv, Register addr); 1131 1132 void orptr(Address adr, RegisterOrConstant src) { 1133 ldr(rscratch1, adr); 1134 if (src.is_register()) 1135 orr(rscratch1, rscratch1, src.as_register()); 1136 else 1137 orr(rscratch1, rscratch1, src.as_constant()); 1138 str(rscratch1, adr); 1139 } 1140 1141 // A generic CAS; success or failure is in the EQ flag. 1142 // Clobbers rscratch1 1143 void cmpxchg(Register addr, Register expected, Register new_val, 1144 enum operand_size size, 1145 bool acquire, bool release, bool weak, 1146 Register result); 1147 1148 #ifdef ASSERT 1149 // Template short-hand support to clean-up after a failed call to trampoline 1150 // call generation (see trampoline_call() below), when a set of Labels must 1151 // be reset (before returning). 1152 template<typename Label, typename... More> 1153 void reset_labels(Label &lbl, More&... more) { 1154 lbl.reset(); reset_labels(more...); 1155 } 1156 template<typename Label> 1157 void reset_labels(Label &lbl) { 1158 lbl.reset(); 1159 } 1160 #endif 1161 1162 private: 1163 void compare_eq(Register rn, Register rm, enum operand_size size); 1164 1165 public: 1166 // AArch64 OpenJDK uses four different types of calls: 1167 // - direct call: bl pc_relative_offset 1168 // This is the shortest and the fastest, but the offset has the range: 1169 // +/-128MB for the release build, +/-2MB for the debug build. 1170 // 1171 // - far call: adrp reg, pc_relative_offset; add; bl reg 1172 // This is longer than a direct call. The offset has 1173 // the range +/-4GB. As the code cache size is limited to 4GB, 1174 // far calls can reach anywhere in the code cache. If a jump is 1175 // needed rather than a call, a far jump 'b reg' can be used instead. 1176 // All instructions are embedded at a call site. 1177 // 1178 // - trampoline call: 1179 // This is only available in C1/C2-generated code (nmethod). It is a combination 1180 // of a direct call, which is used if the destination of a call is in range, 1181 // and a register-indirect call. It has the advantages of reaching anywhere in 1182 // the AArch64 address space and being patchable at runtime when the generated 1183 // code is being executed by other threads. 1184 // 1185 // [Main code section] 1186 // bl trampoline 1187 // [Stub code section] 1188 // trampoline: 1189 // ldr reg, pc + 8 1190 // br reg 1191 // <64-bit destination address> 1192 // 1193 // If the destination is in range when the generated code is moved to the code 1194 // cache, 'bl trampoline' is replaced with 'bl destination' and the trampoline 1195 // is not used. 1196 // The optimization does not remove the trampoline from the stub section. 1197 // This is necessary because the trampoline may well be redirected later when 1198 // code is patched, and the new destination may not be reachable by a simple BR 1199 // instruction. 1200 // 1201 // - indirect call: move reg, address; blr reg 1202 // This too can reach anywhere in the address space, but it cannot be 1203 // patched while code is running, so it must only be modified at a safepoint. 1204 // This form of call is most suitable for targets at fixed addresses, which 1205 // will never be patched. 1206 // 1207 // The patching we do conforms to the "Concurrent modification and 1208 // execution of instructions" section of the Arm Architectural 1209 // Reference Manual, which only allows B, BL, BRK, HVC, ISB, NOP, SMC, 1210 // or SVC instructions to be modified while another thread is 1211 // executing them. 1212 // 1213 // To patch a trampoline call when the BL can't reach, we first modify 1214 // the 64-bit destination address in the trampoline, then modify the 1215 // BL to point to the trampoline, then flush the instruction cache to 1216 // broadcast the change to all executing threads. See 1217 // NativeCall::set_destination_mt_safe for the details. 1218 // 1219 // There is a benign race in that the other thread might observe the 1220 // modified BL before it observes the modified 64-bit destination 1221 // address. That does not matter because the destination method has been 1222 // invalidated, so there will be a trap at its start. 1223 // For this to work, the destination address in the trampoline is 1224 // always updated, even if we're not using the trampoline. 1225 1226 // Emit a direct call if the entry address will always be in range, 1227 // otherwise a trampoline call. 1228 // Supported entry.rspec(): 1229 // - relocInfo::runtime_call_type 1230 // - relocInfo::opt_virtual_call_type 1231 // - relocInfo::static_call_type 1232 // - relocInfo::virtual_call_type 1233 // 1234 // Return: the call PC or NULL if CodeCache is full. 1235 address trampoline_call(Address entry); 1236 1237 static bool far_branches() { 1238 return ReservedCodeCacheSize > branch_range; 1239 } 1240 1241 // Check if branches to the non nmethod section require a far jump 1242 static bool codestub_branch_needs_far_jump() { 1243 return CodeCache::max_distance_to_non_nmethod() > branch_range; 1244 } 1245 1246 // Emit a direct call/jump if the entry address will always be in range, 1247 // otherwise a far call/jump. 1248 // The address must be inside the code cache. 1249 // Supported entry.rspec(): 1250 // - relocInfo::external_word_type 1251 // - relocInfo::runtime_call_type 1252 // - relocInfo::none 1253 // In the case of a far call/jump, the entry address is put in the tmp register. 1254 // The tmp register is invalidated. 1255 // 1256 // Far_jump returns the amount of the emitted code. 1257 void far_call(Address entry, Register tmp = rscratch1); 1258 int far_jump(Address entry, Register tmp = rscratch1); 1259 1260 static int far_codestub_branch_size() { 1261 if (codestub_branch_needs_far_jump()) { 1262 return 3 * 4; // adrp, add, br 1263 } else { 1264 return 4; 1265 } 1266 } 1267 1268 // Emit the CompiledIC call idiom 1269 address ic_call(address entry, jint method_index = 0); 1270 1271 public: 1272 1273 // Data 1274 1275 void mov_metadata(Register dst, Metadata* obj); 1276 Address allocate_metadata_address(Metadata* obj); 1277 Address constant_oop_address(jobject obj); 1278 1279 void movoop(Register dst, jobject obj); 1280 1281 // CRC32 code for java.util.zip.CRC32::updateBytes() intrinsic. 1282 void kernel_crc32(Register crc, Register buf, Register len, 1283 Register table0, Register table1, Register table2, Register table3, 1284 Register tmp, Register tmp2, Register tmp3); 1285 // CRC32 code for java.util.zip.CRC32C::updateBytes() intrinsic. 1286 void kernel_crc32c(Register crc, Register buf, Register len, 1287 Register table0, Register table1, Register table2, Register table3, 1288 Register tmp, Register tmp2, Register tmp3); 1289 1290 // Stack push and pop individual 64 bit registers 1291 void push(Register src); 1292 void pop(Register dst); 1293 1294 void repne_scan(Register addr, Register value, Register count, 1295 Register scratch); 1296 void repne_scanw(Register addr, Register value, Register count, 1297 Register scratch); 1298 1299 typedef void (MacroAssembler::* add_sub_imm_insn)(Register Rd, Register Rn, unsigned imm); 1300 typedef void (MacroAssembler::* add_sub_reg_insn)(Register Rd, Register Rn, Register Rm, enum shift_kind kind, unsigned shift); 1301 1302 // If a constant does not fit in an immediate field, generate some 1303 // number of MOV instructions and then perform the operation 1304 void wrap_add_sub_imm_insn(Register Rd, Register Rn, uint64_t imm, 1305 add_sub_imm_insn insn1, 1306 add_sub_reg_insn insn2, bool is32); 1307 // Separate vsn which sets the flags 1308 void wrap_adds_subs_imm_insn(Register Rd, Register Rn, uint64_t imm, 1309 add_sub_imm_insn insn1, 1310 add_sub_reg_insn insn2, bool is32); 1311 1312 #define WRAP(INSN, is32) \ 1313 void INSN(Register Rd, Register Rn, uint64_t imm) { \ 1314 wrap_add_sub_imm_insn(Rd, Rn, imm, &Assembler::INSN, &Assembler::INSN, is32); \ 1315 } \ 1316 \ 1317 void INSN(Register Rd, Register Rn, Register Rm, \ 1318 enum shift_kind kind, unsigned shift = 0) { \ 1319 Assembler::INSN(Rd, Rn, Rm, kind, shift); \ 1320 } \ 1321 \ 1322 void INSN(Register Rd, Register Rn, Register Rm) { \ 1323 Assembler::INSN(Rd, Rn, Rm); \ 1324 } \ 1325 \ 1326 void INSN(Register Rd, Register Rn, Register Rm, \ 1327 ext::operation option, int amount = 0) { \ 1328 Assembler::INSN(Rd, Rn, Rm, option, amount); \ 1329 } 1330 1331 WRAP(add, false) WRAP(addw, true) WRAP(sub, false) WRAP(subw, true) 1332 1333 #undef WRAP 1334 #define WRAP(INSN, is32) \ 1335 void INSN(Register Rd, Register Rn, uint64_t imm) { \ 1336 wrap_adds_subs_imm_insn(Rd, Rn, imm, &Assembler::INSN, &Assembler::INSN, is32); \ 1337 } \ 1338 \ 1339 void INSN(Register Rd, Register Rn, Register Rm, \ 1340 enum shift_kind kind, unsigned shift = 0) { \ 1341 Assembler::INSN(Rd, Rn, Rm, kind, shift); \ 1342 } \ 1343 \ 1344 void INSN(Register Rd, Register Rn, Register Rm) { \ 1345 Assembler::INSN(Rd, Rn, Rm); \ 1346 } \ 1347 \ 1348 void INSN(Register Rd, Register Rn, Register Rm, \ 1349 ext::operation option, int amount = 0) { \ 1350 Assembler::INSN(Rd, Rn, Rm, option, amount); \ 1351 } 1352 1353 WRAP(adds, false) WRAP(addsw, true) WRAP(subs, false) WRAP(subsw, true) 1354 1355 void add(Register Rd, Register Rn, RegisterOrConstant increment); 1356 void addw(Register Rd, Register Rn, RegisterOrConstant increment); 1357 void sub(Register Rd, Register Rn, RegisterOrConstant decrement); 1358 void subw(Register Rd, Register Rn, RegisterOrConstant decrement); 1359 1360 void adrp(Register reg1, const Address &dest, uint64_t &byte_offset); 1361 1362 void verified_entry(Compile* C, int sp_inc); 1363 1364 // Inline type specific methods 1365 #include "asm/macroAssembler_common.hpp" 1366 1367 int store_inline_type_fields_to_buf(ciInlineKlass* vk, bool from_interpreter = true); 1368 bool move_helper(VMReg from, VMReg to, BasicType bt, RegState reg_state[]); 1369 bool unpack_inline_helper(const GrowableArray<SigEntry>* sig, int& sig_index, 1370 VMReg from, int& from_index, VMRegPair* to, int to_count, int& to_index, 1371 RegState reg_state[]); 1372 bool pack_inline_helper(const GrowableArray<SigEntry>* sig, int& sig_index, int vtarg_index, 1373 VMRegPair* from, int from_count, int& from_index, VMReg to, 1374 RegState reg_state[], Register val_array); 1375 int extend_stack_for_inline_args(int args_on_stack); 1376 void remove_frame(int initial_framesize, bool needs_stack_repair); 1377 VMReg spill_reg_for(VMReg reg); 1378 void save_stack_increment(int sp_inc, int frame_size); 1379 1380 void tableswitch(Register index, jint lowbound, jint highbound, 1381 Label &jumptable, Label &jumptable_end, int stride = 1) { 1382 adr(rscratch1, jumptable); 1383 subsw(rscratch2, index, lowbound); 1384 subsw(zr, rscratch2, highbound - lowbound); 1385 br(Assembler::HS, jumptable_end); 1386 add(rscratch1, rscratch1, rscratch2, 1387 ext::sxtw, exact_log2(stride * Assembler::instruction_size)); 1388 br(rscratch1); 1389 } 1390 1391 // Form an address from base + offset in Rd. Rd may or may not 1392 // actually be used: you must use the Address that is returned. It 1393 // is up to you to ensure that the shift provided matches the size 1394 // of your data. 1395 Address form_address(Register Rd, Register base, int64_t byte_offset, int shift); 1396 1397 // Return true iff an address is within the 48-bit AArch64 address 1398 // space. 1399 bool is_valid_AArch64_address(address a) { 1400 return ((uint64_t)a >> 48) == 0; 1401 } 1402 1403 // Load the base of the cardtable byte map into reg. 1404 void load_byte_map_base(Register reg); 1405 1406 // Prolog generator routines to support switch between x86 code and 1407 // generated ARM code 1408 1409 // routine to generate an x86 prolog for a stub function which 1410 // bootstraps into the generated ARM code which directly follows the 1411 // stub 1412 // 1413 1414 public: 1415 1416 void ldr_constant(Register dest, const Address &const_addr) { 1417 if (NearCpool) { 1418 ldr(dest, const_addr); 1419 } else { 1420 uint64_t offset; 1421 adrp(dest, InternalAddress(const_addr.target()), offset); 1422 ldr(dest, Address(dest, offset)); 1423 } 1424 } 1425 1426 address read_polling_page(Register r, relocInfo::relocType rtype); 1427 void get_polling_page(Register dest, relocInfo::relocType rtype); 1428 1429 // CRC32 code for java.util.zip.CRC32::updateBytes() intrinsic. 1430 void update_byte_crc32(Register crc, Register val, Register table); 1431 void update_word_crc32(Register crc, Register v, Register tmp, 1432 Register table0, Register table1, Register table2, Register table3, 1433 bool upper = false); 1434 1435 address count_positives(Register ary1, Register len, Register result); 1436 1437 address arrays_equals(Register a1, Register a2, Register result, Register cnt1, 1438 Register tmp1, Register tmp2, Register tmp3, int elem_size); 1439 1440 void string_equals(Register a1, Register a2, Register result, Register cnt1, 1441 int elem_size); 1442 1443 void fill_words(Register base, Register cnt, Register value); 1444 void fill_words(Register base, uint64_t cnt, Register value); 1445 1446 address zero_words(Register base, uint64_t cnt); 1447 address zero_words(Register ptr, Register cnt); 1448 void zero_dcache_blocks(Register base, Register cnt); 1449 1450 static const int zero_words_block_size; 1451 1452 address byte_array_inflate(Register src, Register dst, Register len, 1453 FloatRegister vtmp1, FloatRegister vtmp2, 1454 FloatRegister vtmp3, Register tmp4); 1455 1456 void char_array_compress(Register src, Register dst, Register len, 1457 Register res, 1458 FloatRegister vtmp0, FloatRegister vtmp1, 1459 FloatRegister vtmp2, FloatRegister vtmp3); 1460 1461 void encode_iso_array(Register src, Register dst, 1462 Register len, Register res, bool ascii, 1463 FloatRegister vtmp0, FloatRegister vtmp1, 1464 FloatRegister vtmp2, FloatRegister vtmp3); 1465 1466 void fast_log(FloatRegister vtmp0, FloatRegister vtmp1, FloatRegister vtmp2, 1467 FloatRegister vtmp3, FloatRegister vtmp4, FloatRegister vtmp5, 1468 FloatRegister tmpC1, FloatRegister tmpC2, FloatRegister tmpC3, 1469 FloatRegister tmpC4, Register tmp1, Register tmp2, 1470 Register tmp3, Register tmp4, Register tmp5); 1471 void generate_dsin_dcos(bool isCos, address npio2_hw, address two_over_pi, 1472 address pio2, address dsin_coef, address dcos_coef); 1473 private: 1474 // begin trigonometric functions support block 1475 void generate__ieee754_rem_pio2(address npio2_hw, address two_over_pi, address pio2); 1476 void generate__kernel_rem_pio2(address two_over_pi, address pio2); 1477 void generate_kernel_sin(FloatRegister x, bool iyIsOne, address dsin_coef); 1478 void generate_kernel_cos(FloatRegister x, address dcos_coef); 1479 // end trigonometric functions support block 1480 void add2_with_carry(Register final_dest_hi, Register dest_hi, Register dest_lo, 1481 Register src1, Register src2); 1482 void add2_with_carry(Register dest_hi, Register dest_lo, Register src1, Register src2) { 1483 add2_with_carry(dest_hi, dest_hi, dest_lo, src1, src2); 1484 } 1485 void multiply_64_x_64_loop(Register x, Register xstart, Register x_xstart, 1486 Register y, Register y_idx, Register z, 1487 Register carry, Register product, 1488 Register idx, Register kdx); 1489 void multiply_128_x_128_loop(Register y, Register z, 1490 Register carry, Register carry2, 1491 Register idx, Register jdx, 1492 Register yz_idx1, Register yz_idx2, 1493 Register tmp, Register tmp3, Register tmp4, 1494 Register tmp7, Register product_hi); 1495 void kernel_crc32_using_crc32(Register crc, Register buf, 1496 Register len, Register tmp0, Register tmp1, Register tmp2, 1497 Register tmp3); 1498 void kernel_crc32c_using_crc32c(Register crc, Register buf, 1499 Register len, Register tmp0, Register tmp1, Register tmp2, 1500 Register tmp3); 1501 1502 void ghash_modmul (FloatRegister result, 1503 FloatRegister result_lo, FloatRegister result_hi, FloatRegister b, 1504 FloatRegister a, FloatRegister vzr, FloatRegister a1_xor_a0, FloatRegister p, 1505 FloatRegister t1, FloatRegister t2, FloatRegister t3); 1506 void ghash_load_wide(int index, Register data, FloatRegister result, FloatRegister state); 1507 public: 1508 void multiply_to_len(Register x, Register xlen, Register y, Register ylen, Register z, 1509 Register zlen, Register tmp1, Register tmp2, Register tmp3, 1510 Register tmp4, Register tmp5, Register tmp6, Register tmp7); 1511 void mul_add(Register out, Register in, Register offs, Register len, Register k); 1512 void ghash_multiply(FloatRegister result_lo, FloatRegister result_hi, 1513 FloatRegister a, FloatRegister b, FloatRegister a1_xor_a0, 1514 FloatRegister tmp1, FloatRegister tmp2, FloatRegister tmp3); 1515 void ghash_multiply_wide(int index, 1516 FloatRegister result_lo, FloatRegister result_hi, 1517 FloatRegister a, FloatRegister b, FloatRegister a1_xor_a0, 1518 FloatRegister tmp1, FloatRegister tmp2, FloatRegister tmp3); 1519 void ghash_reduce(FloatRegister result, FloatRegister lo, FloatRegister hi, 1520 FloatRegister p, FloatRegister z, FloatRegister t1); 1521 void ghash_reduce_wide(int index, FloatRegister result, FloatRegister lo, FloatRegister hi, 1522 FloatRegister p, FloatRegister z, FloatRegister t1); 1523 void ghash_processBlocks_wide(address p, Register state, Register subkeyH, 1524 Register data, Register blocks, int unrolls); 1525 1526 1527 void aesenc_loadkeys(Register key, Register keylen); 1528 void aesecb_encrypt(Register from, Register to, Register keylen, 1529 FloatRegister data = v0, int unrolls = 1); 1530 void aesecb_decrypt(Register from, Register to, Register key, Register keylen); 1531 void aes_round(FloatRegister input, FloatRegister subkey); 1532 1533 // ChaCha20 functions support block 1534 void cc20_quarter_round(FloatRegister aVec, FloatRegister bVec, 1535 FloatRegister cVec, FloatRegister dVec, FloatRegister scratch, 1536 FloatRegister tbl); 1537 void cc20_shift_lane_org(FloatRegister bVec, FloatRegister cVec, 1538 FloatRegister dVec, bool colToDiag); 1539 1540 // Place an ISB after code may have been modified due to a safepoint. 1541 void safepoint_isb(); 1542 1543 private: 1544 // Return the effective address r + (r1 << ext) + offset. 1545 // Uses rscratch2. 1546 Address offsetted_address(Register r, Register r1, Address::extend ext, 1547 int offset, int size); 1548 1549 private: 1550 // Returns an address on the stack which is reachable with a ldr/str of size 1551 // Uses rscratch2 if the address is not directly reachable 1552 Address spill_address(int size, int offset, Register tmp=rscratch2); 1553 Address sve_spill_address(int sve_reg_size_in_bytes, int offset, Register tmp=rscratch2); 1554 1555 bool merge_alignment_check(Register base, size_t size, int64_t cur_offset, int64_t prev_offset) const; 1556 1557 // Check whether two loads/stores can be merged into ldp/stp. 1558 bool ldst_can_merge(Register rx, const Address &adr, size_t cur_size_in_bytes, bool is_store) const; 1559 1560 // Merge current load/store with previous load/store into ldp/stp. 1561 void merge_ldst(Register rx, const Address &adr, size_t cur_size_in_bytes, bool is_store); 1562 1563 // Try to merge two loads/stores into ldp/stp. If success, returns true else false. 1564 bool try_merge_ldst(Register rt, const Address &adr, size_t cur_size_in_bytes, bool is_store); 1565 1566 public: 1567 void spill(Register Rx, bool is64, int offset) { 1568 if (is64) { 1569 str(Rx, spill_address(8, offset)); 1570 } else { 1571 strw(Rx, spill_address(4, offset)); 1572 } 1573 } 1574 void spill(FloatRegister Vx, SIMD_RegVariant T, int offset) { 1575 str(Vx, T, spill_address(1 << (int)T, offset)); 1576 } 1577 1578 void spill_sve_vector(FloatRegister Zx, int offset, int vector_reg_size_in_bytes) { 1579 sve_str(Zx, sve_spill_address(vector_reg_size_in_bytes, offset)); 1580 } 1581 void spill_sve_predicate(PRegister pr, int offset, int predicate_reg_size_in_bytes) { 1582 sve_str(pr, sve_spill_address(predicate_reg_size_in_bytes, offset)); 1583 } 1584 1585 void unspill(Register Rx, bool is64, int offset) { 1586 if (is64) { 1587 ldr(Rx, spill_address(8, offset)); 1588 } else { 1589 ldrw(Rx, spill_address(4, offset)); 1590 } 1591 } 1592 void unspill(FloatRegister Vx, SIMD_RegVariant T, int offset) { 1593 ldr(Vx, T, spill_address(1 << (int)T, offset)); 1594 } 1595 1596 void unspill_sve_vector(FloatRegister Zx, int offset, int vector_reg_size_in_bytes) { 1597 sve_ldr(Zx, sve_spill_address(vector_reg_size_in_bytes, offset)); 1598 } 1599 void unspill_sve_predicate(PRegister pr, int offset, int predicate_reg_size_in_bytes) { 1600 sve_ldr(pr, sve_spill_address(predicate_reg_size_in_bytes, offset)); 1601 } 1602 1603 void spill_copy128(int src_offset, int dst_offset, 1604 Register tmp1=rscratch1, Register tmp2=rscratch2) { 1605 if (src_offset < 512 && (src_offset & 7) == 0 && 1606 dst_offset < 512 && (dst_offset & 7) == 0) { 1607 ldp(tmp1, tmp2, Address(sp, src_offset)); 1608 stp(tmp1, tmp2, Address(sp, dst_offset)); 1609 } else { 1610 unspill(tmp1, true, src_offset); 1611 spill(tmp1, true, dst_offset); 1612 unspill(tmp1, true, src_offset+8); 1613 spill(tmp1, true, dst_offset+8); 1614 } 1615 } 1616 void spill_copy_sve_vector_stack_to_stack(int src_offset, int dst_offset, 1617 int sve_vec_reg_size_in_bytes) { 1618 assert(sve_vec_reg_size_in_bytes % 16 == 0, "unexpected sve vector reg size"); 1619 for (int i = 0; i < sve_vec_reg_size_in_bytes / 16; i++) { 1620 spill_copy128(src_offset, dst_offset); 1621 src_offset += 16; 1622 dst_offset += 16; 1623 } 1624 } 1625 void spill_copy_sve_predicate_stack_to_stack(int src_offset, int dst_offset, 1626 int sve_predicate_reg_size_in_bytes) { 1627 sve_ldr(ptrue, sve_spill_address(sve_predicate_reg_size_in_bytes, src_offset)); 1628 sve_str(ptrue, sve_spill_address(sve_predicate_reg_size_in_bytes, dst_offset)); 1629 reinitialize_ptrue(); 1630 } 1631 void cache_wb(Address line); 1632 void cache_wbsync(bool is_pre); 1633 1634 // Code for java.lang.Thread::onSpinWait() intrinsic. 1635 void spin_wait(); 1636 1637 private: 1638 // Check the current thread doesn't need a cross modify fence. 1639 void verify_cross_modify_fence_not_required() PRODUCT_RETURN; 1640 1641 }; 1642 1643 #ifdef ASSERT 1644 inline bool AbstractAssembler::pd_check_instruction_mark() { return false; } 1645 #endif 1646 1647 /** 1648 * class SkipIfEqual: 1649 * 1650 * Instantiating this class will result in assembly code being output that will 1651 * jump around any code emitted between the creation of the instance and it's 1652 * automatic destruction at the end of a scope block, depending on the value of 1653 * the flag passed to the constructor, which will be checked at run-time. 1654 */ 1655 class SkipIfEqual { 1656 private: 1657 MacroAssembler* _masm; 1658 Label _label; 1659 1660 public: 1661 SkipIfEqual(MacroAssembler*, const bool* flag_addr, bool value); 1662 ~SkipIfEqual(); 1663 }; 1664 1665 struct tableswitch { 1666 Register _reg; 1667 int _insn_index; jint _first_key; jint _last_key; 1668 Label _after; 1669 Label _branches; 1670 }; 1671 1672 #endif // CPU_AARCH64_MACROASSEMBLER_AARCH64_HPP