1 /*
   2  * Copyright (c) 1997, 2021, Oracle and/or its affiliates. All rights reserved.
   3  * Copyright (c) 2014, 2021, Red Hat Inc. All rights reserved.
   4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   5  *
   6  * This code is free software; you can redistribute it and/or modify it
   7  * under the terms of the GNU General Public License version 2 only, as
   8  * published by the Free Software Foundation.
   9  *
  10  * This code is distributed in the hope that it will be useful, but WITHOUT
  11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  13  * version 2 for more details (a copy is included in the LICENSE file that
  14  * accompanied this code).
  15  *
  16  * You should have received a copy of the GNU General Public License version
  17  * 2 along with this work; if not, write to the Free Software Foundation,
  18  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  19  *
  20  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  21  * or visit www.oracle.com if you need additional information or have any
  22  * questions.
  23  *
  24  */
  25 
  26 #ifndef CPU_AARCH64_MACROASSEMBLER_AARCH64_HPP
  27 #define CPU_AARCH64_MACROASSEMBLER_AARCH64_HPP
  28 
  29 #include "asm/assembler.inline.hpp"
  30 #include "oops/compressedOops.hpp"
  31 #include "runtime/vm_version.hpp"
  32 #include "utilities/macros.hpp"
  33 #include "utilities/powerOfTwo.hpp"
  34 #include "runtime/signature.hpp"
  35 
  36 
  37 class ciInlineKlass;
  38 
  39 // MacroAssembler extends Assembler by frequently used macros.
  40 //
  41 // Instructions for which a 'better' code sequence exists depending
  42 // on arguments should also go in here.
  43 
  44 class MacroAssembler: public Assembler {
  45   friend class LIR_Assembler;
  46 
  47  public:
  48   using Assembler::mov;
  49   using Assembler::movi;
  50 
  51  protected:
  52 
  53   // Support for VM calls
  54   //
  55   // This is the base routine called by the different versions of call_VM_leaf. The interpreter
  56   // may customize this version by overriding it for its purposes (e.g., to save/restore
  57   // additional registers when doing a VM call).
  58   virtual void call_VM_leaf_base(
  59     address entry_point,               // the entry point
  60     int     number_of_arguments,        // the number of arguments to pop after the call
  61     Label *retaddr = NULL
  62   );
  63 
  64   virtual void call_VM_leaf_base(
  65     address entry_point,               // the entry point
  66     int     number_of_arguments,        // the number of arguments to pop after the call
  67     Label &retaddr) {
  68     call_VM_leaf_base(entry_point, number_of_arguments, &retaddr);
  69   }
  70 
  71   // This is the base routine called by the different versions of call_VM. The interpreter
  72   // may customize this version by overriding it for its purposes (e.g., to save/restore
  73   // additional registers when doing a VM call).
  74   //
  75   // If no java_thread register is specified (noreg) than rthread will be used instead. call_VM_base
  76   // returns the register which contains the thread upon return. If a thread register has been
  77   // specified, the return value will correspond to that register. If no last_java_sp is specified
  78   // (noreg) than rsp will be used instead.
  79   virtual void call_VM_base(           // returns the register containing the thread upon return
  80     Register oop_result,               // where an oop-result ends up if any; use noreg otherwise
  81     Register java_thread,              // the thread if computed before     ; use noreg otherwise
  82     Register last_java_sp,             // to set up last_Java_frame in stubs; use noreg otherwise
  83     address  entry_point,              // the entry point
  84     int      number_of_arguments,      // the number of arguments (w/o thread) to pop after the call
  85     bool     check_exceptions          // whether to check for pending exceptions after return
  86   );
  87 
  88   void call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions = true);
  89 
  90   enum KlassDecodeMode {
  91     KlassDecodeNone,
  92     KlassDecodeZero,
  93     KlassDecodeXor,
  94     KlassDecodeMovk
  95   };
  96 
  97   KlassDecodeMode klass_decode_mode();
  98 
  99  private:
 100   static KlassDecodeMode _klass_decode_mode;
 101 
 102  public:
 103   MacroAssembler(CodeBuffer* code) : Assembler(code) {}
 104 
 105  // These routines should emit JVMTI PopFrame and ForceEarlyReturn handling code.
 106  // The implementation is only non-empty for the InterpreterMacroAssembler,
 107  // as only the interpreter handles PopFrame and ForceEarlyReturn requests.
 108  virtual void check_and_handle_popframe(Register java_thread);
 109  virtual void check_and_handle_earlyret(Register java_thread);
 110 
 111   void safepoint_poll(Label& slow_path, bool at_return, bool acquire, bool in_nmethod);
 112 
 113   // Helper functions for statistics gathering.
 114   // Unconditional atomic increment.
 115   void atomic_incw(Register counter_addr, Register tmp, Register tmp2);
 116   void atomic_incw(Address counter_addr, Register tmp1, Register tmp2, Register tmp3) {
 117     lea(tmp1, counter_addr);
 118     atomic_incw(tmp1, tmp2, tmp3);
 119   }
 120   // Load Effective Address
 121   void lea(Register r, const Address &a) {
 122     InstructionMark im(this);
 123     code_section()->relocate(inst_mark(), a.rspec());
 124     a.lea(this, r);
 125   }
 126 
 127   /* Sometimes we get misaligned loads and stores, usually from Unsafe
 128      accesses, and these can exceed the offset range. */
 129   Address legitimize_address(const Address &a, int size, Register scratch) {
 130     if (a.getMode() == Address::base_plus_offset) {
 131       if (! Address::offset_ok_for_immed(a.offset(), exact_log2(size))) {
 132         block_comment("legitimize_address {");
 133         lea(scratch, a);
 134         block_comment("} legitimize_address");
 135         return Address(scratch);
 136       }
 137     }
 138     return a;
 139   }
 140 
 141   void addmw(Address a, Register incr, Register scratch) {
 142     ldrw(scratch, a);
 143     addw(scratch, scratch, incr);
 144     strw(scratch, a);
 145   }
 146 
 147   // Add constant to memory word
 148   void addmw(Address a, int imm, Register scratch) {
 149     ldrw(scratch, a);
 150     if (imm > 0)
 151       addw(scratch, scratch, (unsigned)imm);
 152     else
 153       subw(scratch, scratch, (unsigned)-imm);
 154     strw(scratch, a);
 155   }
 156 
 157   void bind(Label& L) {
 158     Assembler::bind(L);
 159     code()->clear_last_insn();
 160   }
 161 
 162   void membar(Membar_mask_bits order_constraint);
 163 
 164   using Assembler::ldr;
 165   using Assembler::str;
 166   using Assembler::ldrw;
 167   using Assembler::strw;
 168 
 169   void ldr(Register Rx, const Address &adr);
 170   void ldrw(Register Rw, const Address &adr);
 171   void str(Register Rx, const Address &adr);
 172   void strw(Register Rx, const Address &adr);
 173 
 174   // Frame creation and destruction shared between JITs.
 175   void build_frame(int framesize);
 176   void remove_frame(int framesize);
 177 
 178   virtual void _call_Unimplemented(address call_site) {
 179     mov(rscratch2, call_site);
 180   }
 181 
 182 // Microsoft's MSVC team thinks that the __FUNCSIG__ is approximately (sympathy for calling conventions) equivalent to __PRETTY_FUNCTION__
 183 // Also, from Clang patch: "It is very similar to GCC's PRETTY_FUNCTION, except it prints the calling convention."
 184 // https://reviews.llvm.org/D3311
 185 
 186 #ifdef _WIN64
 187 #define call_Unimplemented() _call_Unimplemented((address)__FUNCSIG__)
 188 #else
 189 #define call_Unimplemented() _call_Unimplemented((address)__PRETTY_FUNCTION__)
 190 #endif
 191 
 192   // aliases defined in AARCH64 spec
 193 
 194   template<class T>
 195   inline void cmpw(Register Rd, T imm)  { subsw(zr, Rd, imm); }
 196 
 197   inline void cmp(Register Rd, unsigned char imm8)  { subs(zr, Rd, imm8); }
 198   inline void cmp(Register Rd, unsigned imm) = delete;
 199 
 200   inline void cmnw(Register Rd, unsigned imm) { addsw(zr, Rd, imm); }
 201   inline void cmn(Register Rd, unsigned imm) { adds(zr, Rd, imm); }
 202 
 203   void cset(Register Rd, Assembler::Condition cond) {
 204     csinc(Rd, zr, zr, ~cond);
 205   }
 206   void csetw(Register Rd, Assembler::Condition cond) {
 207     csincw(Rd, zr, zr, ~cond);
 208   }
 209 
 210   void cneg(Register Rd, Register Rn, Assembler::Condition cond) {
 211     csneg(Rd, Rn, Rn, ~cond);
 212   }
 213   void cnegw(Register Rd, Register Rn, Assembler::Condition cond) {
 214     csnegw(Rd, Rn, Rn, ~cond);
 215   }
 216 
 217   inline void movw(Register Rd, Register Rn) {
 218     if (Rd == sp || Rn == sp) {
 219       addw(Rd, Rn, 0U);
 220     } else {
 221       orrw(Rd, zr, Rn);
 222     }
 223   }
 224   inline void mov(Register Rd, Register Rn) {
 225     assert(Rd != r31_sp && Rn != r31_sp, "should be");
 226     if (Rd == Rn) {
 227     } else if (Rd == sp || Rn == sp) {
 228       add(Rd, Rn, 0U);
 229     } else {
 230       orr(Rd, zr, Rn);
 231     }
 232   }
 233 
 234   inline void moviw(Register Rd, unsigned imm) { orrw(Rd, zr, imm); }
 235   inline void movi(Register Rd, unsigned imm) { orr(Rd, zr, imm); }
 236 
 237   inline void tstw(Register Rd, Register Rn) { andsw(zr, Rd, Rn); }
 238   inline void tst(Register Rd, Register Rn) { ands(zr, Rd, Rn); }
 239 
 240   inline void tstw(Register Rd, uint64_t imm) { andsw(zr, Rd, imm); }
 241   inline void tst(Register Rd, uint64_t imm) { ands(zr, Rd, imm); }
 242 
 243   inline void bfiw(Register Rd, Register Rn, unsigned lsb, unsigned width) {
 244     bfmw(Rd, Rn, ((32 - lsb) & 31), (width - 1));
 245   }
 246   inline void bfi(Register Rd, Register Rn, unsigned lsb, unsigned width) {
 247     bfm(Rd, Rn, ((64 - lsb) & 63), (width - 1));
 248   }
 249 
 250   inline void bfxilw(Register Rd, Register Rn, unsigned lsb, unsigned width) {
 251     bfmw(Rd, Rn, lsb, (lsb + width - 1));
 252   }
 253   inline void bfxil(Register Rd, Register Rn, unsigned lsb, unsigned width) {
 254     bfm(Rd, Rn, lsb , (lsb + width - 1));
 255   }
 256 
 257   inline void sbfizw(Register Rd, Register Rn, unsigned lsb, unsigned width) {
 258     sbfmw(Rd, Rn, ((32 - lsb) & 31), (width - 1));
 259   }
 260   inline void sbfiz(Register Rd, Register Rn, unsigned lsb, unsigned width) {
 261     sbfm(Rd, Rn, ((64 - lsb) & 63), (width - 1));
 262   }
 263 
 264   inline void sbfxw(Register Rd, Register Rn, unsigned lsb, unsigned width) {
 265     sbfmw(Rd, Rn, lsb, (lsb + width - 1));
 266   }
 267   inline void sbfx(Register Rd, Register Rn, unsigned lsb, unsigned width) {
 268     sbfm(Rd, Rn, lsb , (lsb + width - 1));
 269   }
 270 
 271   inline void ubfizw(Register Rd, Register Rn, unsigned lsb, unsigned width) {
 272     ubfmw(Rd, Rn, ((32 - lsb) & 31), (width - 1));
 273   }
 274   inline void ubfiz(Register Rd, Register Rn, unsigned lsb, unsigned width) {
 275     ubfm(Rd, Rn, ((64 - lsb) & 63), (width - 1));
 276   }
 277 
 278   inline void ubfxw(Register Rd, Register Rn, unsigned lsb, unsigned width) {
 279     ubfmw(Rd, Rn, lsb, (lsb + width - 1));
 280   }
 281   inline void ubfx(Register Rd, Register Rn, unsigned lsb, unsigned width) {
 282     ubfm(Rd, Rn, lsb , (lsb + width - 1));
 283   }
 284 
 285   inline void asrw(Register Rd, Register Rn, unsigned imm) {
 286     sbfmw(Rd, Rn, imm, 31);
 287   }
 288 
 289   inline void asr(Register Rd, Register Rn, unsigned imm) {
 290     sbfm(Rd, Rn, imm, 63);
 291   }
 292 
 293   inline void lslw(Register Rd, Register Rn, unsigned imm) {
 294     ubfmw(Rd, Rn, ((32 - imm) & 31), (31 - imm));
 295   }
 296 
 297   inline void lsl(Register Rd, Register Rn, unsigned imm) {
 298     ubfm(Rd, Rn, ((64 - imm) & 63), (63 - imm));
 299   }
 300 
 301   inline void lsrw(Register Rd, Register Rn, unsigned imm) {
 302     ubfmw(Rd, Rn, imm, 31);
 303   }
 304 
 305   inline void lsr(Register Rd, Register Rn, unsigned imm) {
 306     ubfm(Rd, Rn, imm, 63);
 307   }
 308 
 309   inline void rorw(Register Rd, Register Rn, unsigned imm) {
 310     extrw(Rd, Rn, Rn, imm);
 311   }
 312 
 313   inline void ror(Register Rd, Register Rn, unsigned imm) {
 314     extr(Rd, Rn, Rn, imm);
 315   }
 316 
 317   inline void sxtbw(Register Rd, Register Rn) {
 318     sbfmw(Rd, Rn, 0, 7);
 319   }
 320   inline void sxthw(Register Rd, Register Rn) {
 321     sbfmw(Rd, Rn, 0, 15);
 322   }
 323   inline void sxtb(Register Rd, Register Rn) {
 324     sbfm(Rd, Rn, 0, 7);
 325   }
 326   inline void sxth(Register Rd, Register Rn) {
 327     sbfm(Rd, Rn, 0, 15);
 328   }
 329   inline void sxtw(Register Rd, Register Rn) {
 330     sbfm(Rd, Rn, 0, 31);
 331   }
 332 
 333   inline void uxtbw(Register Rd, Register Rn) {
 334     ubfmw(Rd, Rn, 0, 7);
 335   }
 336   inline void uxthw(Register Rd, Register Rn) {
 337     ubfmw(Rd, Rn, 0, 15);
 338   }
 339   inline void uxtb(Register Rd, Register Rn) {
 340     ubfm(Rd, Rn, 0, 7);
 341   }
 342   inline void uxth(Register Rd, Register Rn) {
 343     ubfm(Rd, Rn, 0, 15);
 344   }
 345   inline void uxtw(Register Rd, Register Rn) {
 346     ubfm(Rd, Rn, 0, 31);
 347   }
 348 
 349   inline void cmnw(Register Rn, Register Rm) {
 350     addsw(zr, Rn, Rm);
 351   }
 352   inline void cmn(Register Rn, Register Rm) {
 353     adds(zr, Rn, Rm);
 354   }
 355 
 356   inline void cmpw(Register Rn, Register Rm) {
 357     subsw(zr, Rn, Rm);
 358   }
 359   inline void cmp(Register Rn, Register Rm) {
 360     subs(zr, Rn, Rm);
 361   }
 362 
 363   inline void negw(Register Rd, Register Rn) {
 364     subw(Rd, zr, Rn);
 365   }
 366 
 367   inline void neg(Register Rd, Register Rn) {
 368     sub(Rd, zr, Rn);
 369   }
 370 
 371   inline void negsw(Register Rd, Register Rn) {
 372     subsw(Rd, zr, Rn);
 373   }
 374 
 375   inline void negs(Register Rd, Register Rn) {
 376     subs(Rd, zr, Rn);
 377   }
 378 
 379   inline void cmnw(Register Rn, Register Rm, enum shift_kind kind, unsigned shift = 0) {
 380     addsw(zr, Rn, Rm, kind, shift);
 381   }
 382   inline void cmn(Register Rn, Register Rm, enum shift_kind kind, unsigned shift = 0) {
 383     adds(zr, Rn, Rm, kind, shift);
 384   }
 385 
 386   inline void cmpw(Register Rn, Register Rm, enum shift_kind kind, unsigned shift = 0) {
 387     subsw(zr, Rn, Rm, kind, shift);
 388   }
 389   inline void cmp(Register Rn, Register Rm, enum shift_kind kind, unsigned shift = 0) {
 390     subs(zr, Rn, Rm, kind, shift);
 391   }
 392 
 393   inline void negw(Register Rd, Register Rn, enum shift_kind kind, unsigned shift = 0) {
 394     subw(Rd, zr, Rn, kind, shift);
 395   }
 396 
 397   inline void neg(Register Rd, Register Rn, enum shift_kind kind, unsigned shift = 0) {
 398     sub(Rd, zr, Rn, kind, shift);
 399   }
 400 
 401   inline void negsw(Register Rd, Register Rn, enum shift_kind kind, unsigned shift = 0) {
 402     subsw(Rd, zr, Rn, kind, shift);
 403   }
 404 
 405   inline void negs(Register Rd, Register Rn, enum shift_kind kind, unsigned shift = 0) {
 406     subs(Rd, zr, Rn, kind, shift);
 407   }
 408 
 409   inline void mnegw(Register Rd, Register Rn, Register Rm) {
 410     msubw(Rd, Rn, Rm, zr);
 411   }
 412   inline void mneg(Register Rd, Register Rn, Register Rm) {
 413     msub(Rd, Rn, Rm, zr);
 414   }
 415 
 416   inline void mulw(Register Rd, Register Rn, Register Rm) {
 417     maddw(Rd, Rn, Rm, zr);
 418   }
 419   inline void mul(Register Rd, Register Rn, Register Rm) {
 420     madd(Rd, Rn, Rm, zr);
 421   }
 422 
 423   inline void smnegl(Register Rd, Register Rn, Register Rm) {
 424     smsubl(Rd, Rn, Rm, zr);
 425   }
 426   inline void smull(Register Rd, Register Rn, Register Rm) {
 427     smaddl(Rd, Rn, Rm, zr);
 428   }
 429 
 430   inline void umnegl(Register Rd, Register Rn, Register Rm) {
 431     umsubl(Rd, Rn, Rm, zr);
 432   }
 433   inline void umull(Register Rd, Register Rn, Register Rm) {
 434     umaddl(Rd, Rn, Rm, zr);
 435   }
 436 
 437 #define WRAP(INSN)                                                            \
 438   void INSN(Register Rd, Register Rn, Register Rm, Register Ra) {             \
 439     if ((VM_Version::features() & VM_Version::CPU_A53MAC) && Ra != zr)        \
 440       nop();                                                                  \
 441     Assembler::INSN(Rd, Rn, Rm, Ra);                                          \
 442   }
 443 
 444   WRAP(madd) WRAP(msub) WRAP(maddw) WRAP(msubw)
 445   WRAP(smaddl) WRAP(smsubl) WRAP(umaddl) WRAP(umsubl)
 446 #undef WRAP
 447 
 448 
 449   // macro assembly operations needed for aarch64
 450 
 451   // first two private routines for loading 32 bit or 64 bit constants
 452 private:
 453 
 454   void mov_immediate64(Register dst, uint64_t imm64);
 455   void mov_immediate32(Register dst, uint32_t imm32);
 456 
 457   int push(unsigned int bitset, Register stack);
 458   int pop(unsigned int bitset, Register stack);
 459 
 460   int push_fp(unsigned int bitset, Register stack);
 461   int pop_fp(unsigned int bitset, Register stack);
 462 
 463   void mov(Register dst, Address a);
 464 
 465 public:
 466   void push(RegSet regs, Register stack) { if (regs.bits()) push(regs.bits(), stack); }
 467   void pop(RegSet regs, Register stack) { if (regs.bits()) pop(regs.bits(), stack); }
 468 
 469   void push_fp(FloatRegSet regs, Register stack) { if (regs.bits()) push_fp(regs.bits(), stack); }
 470   void pop_fp(FloatRegSet regs, Register stack) { if (regs.bits()) pop_fp(regs.bits(), stack); }
 471 
 472   static RegSet call_clobbered_registers();
 473 
 474   // Push and pop everything that might be clobbered by a native
 475   // runtime call except rscratch1 and rscratch2.  (They are always
 476   // scratch, so we don't have to protect them.)  Only save the lower
 477   // 64 bits of each vector register. Additonal registers can be excluded
 478   // in a passed RegSet.
 479   void push_call_clobbered_registers_except(RegSet exclude);
 480   void pop_call_clobbered_registers_except(RegSet exclude);
 481 
 482   void push_call_clobbered_registers() {
 483     push_call_clobbered_registers_except(RegSet());
 484   }
 485   void pop_call_clobbered_registers() {
 486     pop_call_clobbered_registers_except(RegSet());
 487   }
 488 
 489 
 490   // now mov instructions for loading absolute addresses and 32 or
 491   // 64 bit integers
 492 
 493   inline void mov(Register dst, address addr)             { mov_immediate64(dst, (uint64_t)addr); }
 494 
 495   inline void mov(Register dst, int imm64)                { mov_immediate64(dst, (uint64_t)imm64); }
 496   inline void mov(Register dst, long imm64)               { mov_immediate64(dst, (uint64_t)imm64); }
 497   inline void mov(Register dst, long long imm64)          { mov_immediate64(dst, (uint64_t)imm64); }
 498   inline void mov(Register dst, unsigned int imm64)       { mov_immediate64(dst, (uint64_t)imm64); }
 499   inline void mov(Register dst, unsigned long imm64)      { mov_immediate64(dst, (uint64_t)imm64); }
 500   inline void mov(Register dst, unsigned long long imm64) { mov_immediate64(dst, (uint64_t)imm64); }
 501 
 502   inline void movw(Register dst, uint32_t imm32)
 503   {
 504     mov_immediate32(dst, imm32);
 505   }
 506 
 507   void mov(Register dst, RegisterOrConstant src) {
 508     if (src.is_register())
 509       mov(dst, src.as_register());
 510     else
 511       mov(dst, src.as_constant());
 512   }
 513 
 514   void movptr(Register r, uintptr_t imm64);
 515 
 516   void mov(FloatRegister Vd, SIMD_Arrangement T, uint32_t imm32);
 517 
 518   void mov(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn) {
 519     orr(Vd, T, Vn, Vn);
 520   }
 521 
 522 
 523 public:
 524 
 525   // Generalized Test Bit And Branch, including a "far" variety which
 526   // spans more than 32KiB.
 527   void tbr(Condition cond, Register Rt, int bitpos, Label &dest, bool isfar = false) {
 528     assert(cond == EQ || cond == NE, "must be");
 529 
 530     if (isfar)
 531       cond = ~cond;
 532 
 533     void (Assembler::* branch)(Register Rt, int bitpos, Label &L);
 534     if (cond == Assembler::EQ)
 535       branch = &Assembler::tbz;
 536     else
 537       branch = &Assembler::tbnz;
 538 
 539     if (isfar) {
 540       Label L;
 541       (this->*branch)(Rt, bitpos, L);
 542       b(dest);
 543       bind(L);
 544     } else {
 545       (this->*branch)(Rt, bitpos, dest);
 546     }
 547   }
 548 
 549   // macro instructions for accessing and updating floating point
 550   // status register
 551   //
 552   // FPSR : op1 == 011
 553   //        CRn == 0100
 554   //        CRm == 0100
 555   //        op2 == 001
 556 
 557   inline void get_fpsr(Register reg)
 558   {
 559     mrs(0b11, 0b0100, 0b0100, 0b001, reg);
 560   }
 561 
 562   inline void set_fpsr(Register reg)
 563   {
 564     msr(0b011, 0b0100, 0b0100, 0b001, reg);
 565   }
 566 
 567   inline void clear_fpsr()
 568   {
 569     msr(0b011, 0b0100, 0b0100, 0b001, zr);
 570   }
 571 
 572   // DCZID_EL0: op1 == 011
 573   //            CRn == 0000
 574   //            CRm == 0000
 575   //            op2 == 111
 576   inline void get_dczid_el0(Register reg)
 577   {
 578     mrs(0b011, 0b0000, 0b0000, 0b111, reg);
 579   }
 580 
 581   // CTR_EL0:   op1 == 011
 582   //            CRn == 0000
 583   //            CRm == 0000
 584   //            op2 == 001
 585   inline void get_ctr_el0(Register reg)
 586   {
 587     mrs(0b011, 0b0000, 0b0000, 0b001, reg);
 588   }
 589 
 590   // idiv variant which deals with MINLONG as dividend and -1 as divisor
 591   int corrected_idivl(Register result, Register ra, Register rb,
 592                       bool want_remainder, Register tmp = rscratch1);
 593   int corrected_idivq(Register result, Register ra, Register rb,
 594                       bool want_remainder, Register tmp = rscratch1);
 595 
 596   // Support for NULL-checks
 597   //
 598   // Generates code that causes a NULL OS exception if the content of reg is NULL.
 599   // If the accessed location is M[reg + offset] and the offset is known, provide the
 600   // offset. No explicit code generation is needed if the offset is within a certain
 601   // range (0 <= offset <= page_size).
 602 
 603   virtual void null_check(Register reg, int offset = -1);
 604   static bool needs_explicit_null_check(intptr_t offset);
 605   static bool uses_implicit_null_check(void* address);
 606 
 607   // markWord tests, kills markWord reg
 608   void test_markword_is_inline_type(Register markword, Label& is_inline_type);
 609 
 610   // inlineKlass queries, kills temp_reg
 611   void test_klass_is_inline_type(Register klass, Register temp_reg, Label& is_inline_type);
 612   void test_klass_is_empty_inline_type(Register klass, Register temp_reg, Label& is_empty_inline_type);
 613   void test_oop_is_not_inline_type(Register object, Register tmp, Label& not_inline_type);
 614 
 615   // Get the default value oop for the given InlineKlass
 616   void get_default_value_oop(Register inline_klass, Register temp_reg, Register obj);
 617   // The empty value oop, for the given InlineKlass ("empty" as in no instance fields)
 618   // get_default_value_oop with extra assertion for empty inline klass
 619   void get_empty_inline_type_oop(Register inline_klass, Register temp_reg, Register obj);
 620 
 621   void test_field_is_null_free_inline_type(Register flags, Register temp_reg, Label& is_null_free);
 622   void test_field_is_not_null_free_inline_type(Register flags, Register temp_reg, Label& not_null_free);
 623   void test_field_is_inlined(Register flags, Register temp_reg, Label& is_flattened);
 624 
 625   // Check oops for special arrays, i.e. flattened and/or null-free
 626   void test_oop_prototype_bit(Register oop, Register temp_reg, int32_t test_bit, bool jmp_set, Label& jmp_label);
 627   void test_flattened_array_oop(Register klass, Register temp_reg, Label& is_flattened_array);
 628   void test_non_flattened_array_oop(Register oop, Register temp_reg, Label&is_non_flattened_array);
 629   void test_null_free_array_oop(Register oop, Register temp_reg, Label& is_null_free_array);
 630   void test_non_null_free_array_oop(Register oop, Register temp_reg, Label&is_non_null_free_array);
 631 
 632   // Check array klass layout helper for flatten or null-free arrays...
 633   void test_flattened_array_layout(Register lh, Label& is_flattened_array);
 634   void test_non_flattened_array_layout(Register lh, Label& is_non_flattened_array);
 635   void test_null_free_array_layout(Register lh, Label& is_null_free_array);
 636   void test_non_null_free_array_layout(Register lh, Label& is_non_null_free_array);
 637 
 638   static address target_addr_for_insn(address insn_addr, unsigned insn);
 639   static address target_addr_for_insn(address insn_addr) {
 640     unsigned insn = *(unsigned*)insn_addr;
 641     return target_addr_for_insn(insn_addr, insn);
 642   }
 643 
 644   // Required platform-specific helpers for Label::patch_instructions.
 645   // They _shadow_ the declarations in AbstractAssembler, which are undefined.
 646   static int pd_patch_instruction_size(address branch, address target);
 647   static void pd_patch_instruction(address branch, address target, const char* file = NULL, int line = 0) {
 648     pd_patch_instruction_size(branch, target);
 649   }
 650   static address pd_call_destination(address branch) {
 651     return target_addr_for_insn(branch);
 652   }
 653 #ifndef PRODUCT
 654   static void pd_print_patched_instruction(address branch);
 655 #endif
 656 
 657   static int patch_oop(address insn_addr, address o);
 658   static int patch_narrow_klass(address insn_addr, narrowKlass n);
 659 
 660   address emit_trampoline_stub(int insts_call_instruction_offset, address target);
 661   void emit_static_call_stub();
 662 
 663   // The following 4 methods return the offset of the appropriate move instruction
 664 
 665   // Support for fast byte/short loading with zero extension (depending on particular CPU)
 666   int load_unsigned_byte(Register dst, Address src);
 667   int load_unsigned_short(Register dst, Address src);
 668 
 669   // Support for fast byte/short loading with sign extension (depending on particular CPU)
 670   int load_signed_byte(Register dst, Address src);
 671   int load_signed_short(Register dst, Address src);
 672 
 673   int load_signed_byte32(Register dst, Address src);
 674   int load_signed_short32(Register dst, Address src);
 675 
 676   // Support for sign-extension (hi:lo = extend_sign(lo))
 677   void extend_sign(Register hi, Register lo);
 678 
 679   // Load and store values by size and signed-ness
 680   void load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2 = noreg);
 681   void store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2 = noreg);
 682 
 683   // Support for inc/dec with optimal instruction selection depending on value
 684 
 685   // x86_64 aliases an unqualified register/address increment and
 686   // decrement to call incrementq and decrementq but also supports
 687   // explicitly sized calls to incrementq/decrementq or
 688   // incrementl/decrementl
 689 
 690   // for aarch64 the proper convention would be to use
 691   // increment/decrement for 64 bit operatons and
 692   // incrementw/decrementw for 32 bit operations. so when porting
 693   // x86_64 code we can leave calls to increment/decrement as is,
 694   // replace incrementq/decrementq with increment/decrement and
 695   // replace incrementl/decrementl with incrementw/decrementw.
 696 
 697   // n.b. increment/decrement calls with an Address destination will
 698   // need to use a scratch register to load the value to be
 699   // incremented. increment/decrement calls which add or subtract a
 700   // constant value greater than 2^12 will need to use a 2nd scratch
 701   // register to hold the constant. so, a register increment/decrement
 702   // may trash rscratch2 and an address increment/decrement trash
 703   // rscratch and rscratch2
 704 
 705   void decrementw(Address dst, int value = 1);
 706   void decrementw(Register reg, int value = 1);
 707 
 708   void decrement(Register reg, int value = 1);
 709   void decrement(Address dst, int value = 1);
 710 
 711   void incrementw(Address dst, int value = 1);
 712   void incrementw(Register reg, int value = 1);
 713 
 714   void increment(Register reg, int value = 1);
 715   void increment(Address dst, int value = 1);
 716 
 717 
 718   // Alignment
 719   void align(int modulus);
 720 
 721   // Stack frame creation/removal
 722   void enter()
 723   {
 724     stp(rfp, lr, Address(pre(sp, -2 * wordSize)));
 725     mov(rfp, sp);
 726   }
 727   void leave()
 728   {
 729     mov(sp, rfp);
 730     ldp(rfp, lr, Address(post(sp, 2 * wordSize)));
 731   }
 732 
 733   // Support for getting the JavaThread pointer (i.e.; a reference to thread-local information)
 734   // The pointer will be loaded into the thread register.
 735   void get_thread(Register thread);
 736 
 737 
 738   // Support for VM calls
 739   //
 740   // It is imperative that all calls into the VM are handled via the call_VM macros.
 741   // They make sure that the stack linkage is setup correctly. call_VM's correspond
 742   // to ENTRY/ENTRY_X entry points while call_VM_leaf's correspond to LEAF entry points.
 743 
 744 
 745   void call_VM(Register oop_result,
 746                address entry_point,
 747                bool check_exceptions = true);
 748   void call_VM(Register oop_result,
 749                address entry_point,
 750                Register arg_1,
 751                bool check_exceptions = true);
 752   void call_VM(Register oop_result,
 753                address entry_point,
 754                Register arg_1, Register arg_2,
 755                bool check_exceptions = true);
 756   void call_VM(Register oop_result,
 757                address entry_point,
 758                Register arg_1, Register arg_2, Register arg_3,
 759                bool check_exceptions = true);
 760 
 761   // Overloadings with last_Java_sp
 762   void call_VM(Register oop_result,
 763                Register last_java_sp,
 764                address entry_point,
 765                int number_of_arguments = 0,
 766                bool check_exceptions = true);
 767   void call_VM(Register oop_result,
 768                Register last_java_sp,
 769                address entry_point,
 770                Register arg_1, bool
 771                check_exceptions = true);
 772   void call_VM(Register oop_result,
 773                Register last_java_sp,
 774                address entry_point,
 775                Register arg_1, Register arg_2,
 776                bool check_exceptions = true);
 777   void call_VM(Register oop_result,
 778                Register last_java_sp,
 779                address entry_point,
 780                Register arg_1, Register arg_2, Register arg_3,
 781                bool check_exceptions = true);
 782 
 783   void get_vm_result  (Register oop_result, Register thread);
 784   void get_vm_result_2(Register metadata_result, Register thread);
 785 
 786   // These always tightly bind to MacroAssembler::call_VM_base
 787   // bypassing the virtual implementation
 788   void super_call_VM(Register oop_result, Register last_java_sp, address entry_point, int number_of_arguments = 0, bool check_exceptions = true);
 789   void super_call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, bool check_exceptions = true);
 790   void super_call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, bool check_exceptions = true);
 791   void super_call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, Register arg_3, bool check_exceptions = true);
 792   void super_call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, Register arg_3, Register arg_4, bool check_exceptions = true);
 793 
 794   void call_VM_leaf(address entry_point,
 795                     int number_of_arguments = 0);
 796   void call_VM_leaf(address entry_point,
 797                     Register arg_1);
 798   void call_VM_leaf(address entry_point,
 799                     Register arg_1, Register arg_2);
 800   void call_VM_leaf(address entry_point,
 801                     Register arg_1, Register arg_2, Register arg_3);
 802 
 803   // These always tightly bind to MacroAssembler::call_VM_leaf_base
 804   // bypassing the virtual implementation
 805   void super_call_VM_leaf(address entry_point);
 806   void super_call_VM_leaf(address entry_point, Register arg_1);
 807   void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2);
 808   void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3);
 809   void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3, Register arg_4);
 810 
 811   // last Java Frame (fills frame anchor)
 812   void set_last_Java_frame(Register last_java_sp,
 813                            Register last_java_fp,
 814                            address last_java_pc,
 815                            Register scratch);
 816 
 817   void set_last_Java_frame(Register last_java_sp,
 818                            Register last_java_fp,
 819                            Label &last_java_pc,
 820                            Register scratch);
 821 
 822   void set_last_Java_frame(Register last_java_sp,
 823                            Register last_java_fp,
 824                            Register last_java_pc,
 825                            Register scratch);
 826 
 827   void reset_last_Java_frame(Register thread);
 828 
 829   // thread in the default location (rthread)
 830   void reset_last_Java_frame(bool clear_fp);
 831 
 832   // Stores
 833   void store_check(Register obj);                // store check for obj - register is destroyed afterwards
 834   void store_check(Register obj, Address dst);   // same as above, dst is exact store location (reg. is destroyed)
 835 
 836   void resolve_jobject(Register value, Register thread, Register tmp);
 837 
 838   // C 'boolean' to Java boolean: x == 0 ? 0 : 1
 839   void c2bool(Register x);
 840 
 841   void load_method_holder_cld(Register rresult, Register rmethod);
 842   void load_method_holder(Register holder, Register method);
 843 
 844   // oop manipulations
 845   void load_metadata(Register dst, Register src);
 846 
 847   void load_klass(Register dst, Register src);
 848   void store_klass(Register dst, Register src);
 849   void cmp_klass(Register oop, Register trial_klass, Register tmp);
 850 
 851   void resolve_weak_handle(Register result, Register tmp);
 852   void resolve_oop_handle(Register result, Register tmp = r5);
 853   void load_mirror(Register dst, Register method, Register tmp = r5);
 854 
 855   void access_load_at(BasicType type, DecoratorSet decorators, Register dst, Address src,
 856                       Register tmp1, Register tmp_thread);
 857 
 858   void access_store_at(BasicType type, DecoratorSet decorators, Address dst, Register src,
 859                        Register tmp1, Register tmp_thread, Register tmp3 = noreg);
 860 
 861   void access_value_copy(DecoratorSet decorators, Register src, Register dst, Register inline_klass);
 862 
 863   // inline type data payload offsets...
 864   void first_field_offset(Register inline_klass, Register offset);
 865   void data_for_oop(Register oop, Register data, Register inline_klass);
 866   // get data payload ptr a flat value array at index, kills rcx and index
 867   void data_for_value_array_index(Register array, Register array_klass,
 868                                   Register index, Register data);
 869 
 870   void load_heap_oop(Register dst, Address src, Register tmp1 = noreg,
 871                      Register thread_tmp = noreg, DecoratorSet decorators = 0);
 872 
 873   void load_heap_oop_not_null(Register dst, Address src, Register tmp1 = noreg,
 874                               Register thread_tmp = noreg, DecoratorSet decorators = 0);
 875   void store_heap_oop(Address dst, Register src, Register tmp1 = noreg,
 876                       Register tmp_thread = noreg, Register tmp3 = noreg, DecoratorSet decorators = 0);
 877 
 878   // currently unimplemented
 879   // Used for storing NULL. All other oop constants should be
 880   // stored using routines that take a jobject.
 881   void store_heap_oop_null(Address dst);
 882 
 883   void load_prototype_header(Register dst, Register src);
 884 
 885   void store_klass_gap(Register dst, Register src);
 886 
 887   // This dummy is to prevent a call to store_heap_oop from
 888   // converting a zero (like NULL) into a Register by giving
 889   // the compiler two choices it can't resolve
 890 
 891   void store_heap_oop(Address dst, void* dummy);
 892 
 893   void encode_heap_oop(Register d, Register s);
 894   void encode_heap_oop(Register r) { encode_heap_oop(r, r); }
 895   void decode_heap_oop(Register d, Register s);
 896   void decode_heap_oop(Register r) { decode_heap_oop(r, r); }
 897   void encode_heap_oop_not_null(Register r);
 898   void decode_heap_oop_not_null(Register r);
 899   void encode_heap_oop_not_null(Register dst, Register src);
 900   void decode_heap_oop_not_null(Register dst, Register src);
 901 
 902   void set_narrow_oop(Register dst, jobject obj);
 903 
 904   void encode_klass_not_null(Register r);
 905   void decode_klass_not_null(Register r);
 906   void encode_klass_not_null(Register dst, Register src);
 907   void decode_klass_not_null(Register dst, Register src);
 908 
 909   void set_narrow_klass(Register dst, Klass* k);
 910 
 911   // if heap base register is used - reinit it with the correct value
 912   void reinit_heapbase();
 913 
 914   DEBUG_ONLY(void verify_heapbase(const char* msg);)
 915 
 916   void push_CPU_state(bool save_vectors = false, bool use_sve = false,
 917                       int sve_vector_size_in_bytes = 0);
 918   void pop_CPU_state(bool restore_vectors = false, bool use_sve = false,
 919                       int sve_vector_size_in_bytes = 0);
 920 
 921   // Round up to a power of two
 922   void round_to(Register reg, int modulus);
 923 
 924   // allocation
 925 
 926   // Object / value buffer allocation...
 927   // Allocate instance of klass, assumes klass initialized by caller
 928   // new_obj prefers to be rax
 929   // Kills t1 and t2, perserves klass, return allocation in new_obj (rsi on LP64)
 930   void allocate_instance(Register klass, Register new_obj,
 931                          Register t1, Register t2,
 932                          bool clear_fields, Label& alloc_failed);
 933 
 934   void eden_allocate(
 935     Register obj,                      // result: pointer to object after successful allocation
 936     Register var_size_in_bytes,        // object size in bytes if unknown at compile time; invalid otherwise
 937     int      con_size_in_bytes,        // object size in bytes if   known at compile time
 938     Register t1,                       // temp register
 939     Label&   slow_case                 // continuation point if fast allocation fails
 940   );
 941   void tlab_allocate(
 942     Register obj,                      // result: pointer to object after successful allocation
 943     Register var_size_in_bytes,        // object size in bytes if unknown at compile time; invalid otherwise
 944     int      con_size_in_bytes,        // object size in bytes if   known at compile time
 945     Register t1,                       // temp register
 946     Register t2,                       // temp register
 947     Label&   slow_case                 // continuation point if fast allocation fails
 948   );
 949   void verify_tlab();
 950 
 951   // For field "index" within "klass", return inline_klass ...
 952   void get_inline_type_field_klass(Register klass, Register index, Register inline_klass);
 953 
 954   // interface method calling
 955   void lookup_interface_method(Register recv_klass,
 956                                Register intf_klass,
 957                                RegisterOrConstant itable_index,
 958                                Register method_result,
 959                                Register scan_temp,
 960                                Label& no_such_interface,
 961                    bool return_method = true);
 962 
 963   // virtual method calling
 964   // n.b. x86 allows RegisterOrConstant for vtable_index
 965   void lookup_virtual_method(Register recv_klass,
 966                              RegisterOrConstant vtable_index,
 967                              Register method_result);
 968 
 969   // Test sub_klass against super_klass, with fast and slow paths.
 970 
 971   // The fast path produces a tri-state answer: yes / no / maybe-slow.
 972   // One of the three labels can be NULL, meaning take the fall-through.
 973   // If super_check_offset is -1, the value is loaded up from super_klass.
 974   // No registers are killed, except temp_reg.
 975   void check_klass_subtype_fast_path(Register sub_klass,
 976                                      Register super_klass,
 977                                      Register temp_reg,
 978                                      Label* L_success,
 979                                      Label* L_failure,
 980                                      Label* L_slow_path,
 981                 RegisterOrConstant super_check_offset = RegisterOrConstant(-1));
 982 
 983   // The rest of the type check; must be wired to a corresponding fast path.
 984   // It does not repeat the fast path logic, so don't use it standalone.
 985   // The temp_reg and temp2_reg can be noreg, if no temps are available.
 986   // Updates the sub's secondary super cache as necessary.
 987   // If set_cond_codes, condition codes will be Z on success, NZ on failure.
 988   void check_klass_subtype_slow_path(Register sub_klass,
 989                                      Register super_klass,
 990                                      Register temp_reg,
 991                                      Register temp2_reg,
 992                                      Label* L_success,
 993                                      Label* L_failure,
 994                                      bool set_cond_codes = false);
 995 
 996   // Simplified, combined version, good for typical uses.
 997   // Falls through on failure.
 998   void check_klass_subtype(Register sub_klass,
 999                            Register super_klass,
1000                            Register temp_reg,
1001                            Label& L_success);
1002 
1003   void clinit_barrier(Register klass,
1004                       Register thread,
1005                       Label* L_fast_path = NULL,
1006                       Label* L_slow_path = NULL);
1007 
1008   Address argument_address(RegisterOrConstant arg_slot, int extra_slot_offset = 0);
1009 
1010   void verify_sve_vector_length();
1011   void reinitialize_ptrue() {
1012     if (UseSVE > 0) {
1013       sve_ptrue(ptrue, B);
1014     }
1015   }
1016   void verify_ptrue();
1017 
1018   // Debugging
1019 
1020   // only if +VerifyOops
1021   void verify_oop(Register reg, const char* s = "broken oop");
1022   void verify_oop_addr(Address addr, const char * s = "broken oop addr");
1023 
1024 // TODO: verify method and klass metadata (compare against vptr?)
1025   void _verify_method_ptr(Register reg, const char * msg, const char * file, int line) {}
1026   void _verify_klass_ptr(Register reg, const char * msg, const char * file, int line){}
1027 
1028 #define verify_method_ptr(reg) _verify_method_ptr(reg, "broken method " #reg, __FILE__, __LINE__)
1029 #define verify_klass_ptr(reg) _verify_klass_ptr(reg, "broken klass " #reg, __FILE__, __LINE__)
1030 
1031   // only if +VerifyFPU
1032   void verify_FPU(int stack_depth, const char* s = "illegal FPU state");
1033 
1034   // prints msg, dumps registers and stops execution
1035   void stop(const char* msg);
1036 
1037   static void debug64(char* msg, int64_t pc, int64_t regs[]);
1038 
1039   void untested()                                { stop("untested"); }
1040 
1041   void unimplemented(const char* what = "");
1042 
1043   void should_not_reach_here()                   { stop("should not reach here"); }
1044 
1045   // Stack overflow checking
1046   void bang_stack_with_offset(int offset) {
1047     // stack grows down, caller passes positive offset
1048     assert(offset > 0, "must bang with negative offset");
1049     sub(rscratch2, sp, offset);
1050     str(zr, Address(rscratch2));
1051   }
1052 
1053   // Writes to stack successive pages until offset reached to check for
1054   // stack overflow + shadow pages.  Also, clobbers tmp
1055   void bang_stack_size(Register size, Register tmp);
1056 
1057   // Check for reserved stack access in method being exited (for JIT)
1058   void reserved_stack_check();
1059 
1060   // Arithmetics
1061 
1062   void addptr(const Address &dst, int32_t src);
1063   void cmpptr(Register src1, Address src2);
1064 
1065   void cmpoop(Register obj1, Register obj2);
1066 
1067   // Various forms of CAS
1068 
1069   void cmpxchg_obj_header(Register oldv, Register newv, Register obj, Register tmp,
1070                           Label &suceed, Label *fail);
1071   void cmpxchgptr(Register oldv, Register newv, Register addr, Register tmp,
1072                   Label &suceed, Label *fail);
1073 
1074   void cmpxchgw(Register oldv, Register newv, Register addr, Register tmp,
1075                   Label &suceed, Label *fail);
1076 
1077   void atomic_add(Register prev, RegisterOrConstant incr, Register addr);
1078   void atomic_addw(Register prev, RegisterOrConstant incr, Register addr);
1079   void atomic_addal(Register prev, RegisterOrConstant incr, Register addr);
1080   void atomic_addalw(Register prev, RegisterOrConstant incr, Register addr);
1081 
1082   void atomic_xchg(Register prev, Register newv, Register addr);
1083   void atomic_xchgw(Register prev, Register newv, Register addr);
1084   void atomic_xchgl(Register prev, Register newv, Register addr);
1085   void atomic_xchglw(Register prev, Register newv, Register addr);
1086   void atomic_xchgal(Register prev, Register newv, Register addr);
1087   void atomic_xchgalw(Register prev, Register newv, Register addr);
1088 
1089   void orptr(Address adr, RegisterOrConstant src) {
1090     ldr(rscratch1, adr);
1091     if (src.is_register())
1092       orr(rscratch1, rscratch1, src.as_register());
1093     else
1094       orr(rscratch1, rscratch1, src.as_constant());
1095     str(rscratch1, adr);
1096   }
1097 
1098   // A generic CAS; success or failure is in the EQ flag.
1099   // Clobbers rscratch1
1100   void cmpxchg(Register addr, Register expected, Register new_val,
1101                enum operand_size size,
1102                bool acquire, bool release, bool weak,
1103                Register result);
1104 
1105 private:
1106   void compare_eq(Register rn, Register rm, enum operand_size size);
1107 
1108 #ifdef ASSERT
1109   // Template short-hand support to clean-up after a failed call to trampoline
1110   // call generation (see trampoline_call() below),  when a set of Labels must
1111   // be reset (before returning).
1112   template<typename Label, typename... More>
1113   void reset_labels(Label &lbl, More&... more) {
1114     lbl.reset(); reset_labels(more...);
1115   }
1116   template<typename Label>
1117   void reset_labels(Label &lbl) {
1118     lbl.reset();
1119   }
1120 #endif
1121 
1122 public:
1123   // Calls
1124 
1125   address trampoline_call(Address entry, CodeBuffer* cbuf = NULL);
1126 
1127   static bool far_branches() {
1128     return ReservedCodeCacheSize > branch_range;
1129   }
1130 
1131   // Jumps that can reach anywhere in the code cache.
1132   // Trashes tmp.
1133   void far_call(Address entry, CodeBuffer *cbuf = NULL, Register tmp = rscratch1);
1134   void far_jump(Address entry, CodeBuffer *cbuf = NULL, Register tmp = rscratch1);
1135 
1136   static int far_branch_size() {
1137     if (far_branches()) {
1138       return 3 * 4;  // adrp, add, br
1139     } else {
1140       return 4;
1141     }
1142   }
1143 
1144   // Emit the CompiledIC call idiom
1145   address ic_call(address entry, jint method_index = 0);
1146 
1147 public:
1148 
1149   // Data
1150 
1151   void mov_metadata(Register dst, Metadata* obj);
1152   Address allocate_metadata_address(Metadata* obj);
1153   Address constant_oop_address(jobject obj);
1154 
1155   void movoop(Register dst, jobject obj, bool immediate = false);
1156 
1157   // CRC32 code for java.util.zip.CRC32::updateBytes() instrinsic.
1158   void kernel_crc32(Register crc, Register buf, Register len,
1159         Register table0, Register table1, Register table2, Register table3,
1160         Register tmp, Register tmp2, Register tmp3);
1161   // CRC32 code for java.util.zip.CRC32C::updateBytes() instrinsic.
1162   void kernel_crc32c(Register crc, Register buf, Register len,
1163         Register table0, Register table1, Register table2, Register table3,
1164         Register tmp, Register tmp2, Register tmp3);
1165 
1166   // Stack push and pop individual 64 bit registers
1167   void push(Register src);
1168   void pop(Register dst);
1169 
1170   // push all registers onto the stack
1171   void pusha();
1172   void popa();
1173 
1174   void repne_scan(Register addr, Register value, Register count,
1175                   Register scratch);
1176   void repne_scanw(Register addr, Register value, Register count,
1177                    Register scratch);
1178 
1179   typedef void (MacroAssembler::* add_sub_imm_insn)(Register Rd, Register Rn, unsigned imm);
1180   typedef void (MacroAssembler::* add_sub_reg_insn)(Register Rd, Register Rn, Register Rm, enum shift_kind kind, unsigned shift);
1181 
1182   // If a constant does not fit in an immediate field, generate some
1183   // number of MOV instructions and then perform the operation
1184   void wrap_add_sub_imm_insn(Register Rd, Register Rn, unsigned imm,
1185                              add_sub_imm_insn insn1,
1186                              add_sub_reg_insn insn2);
1187   // Seperate vsn which sets the flags
1188   void wrap_adds_subs_imm_insn(Register Rd, Register Rn, unsigned imm,
1189                              add_sub_imm_insn insn1,
1190                              add_sub_reg_insn insn2);
1191 
1192 #define WRAP(INSN)                                                      \
1193   void INSN(Register Rd, Register Rn, unsigned imm) {                   \
1194     wrap_add_sub_imm_insn(Rd, Rn, imm, &Assembler::INSN, &Assembler::INSN); \
1195   }                                                                     \
1196                                                                         \
1197   void INSN(Register Rd, Register Rn, Register Rm,                      \
1198              enum shift_kind kind, unsigned shift = 0) {                \
1199     Assembler::INSN(Rd, Rn, Rm, kind, shift);                           \
1200   }                                                                     \
1201                                                                         \
1202   void INSN(Register Rd, Register Rn, Register Rm) {                    \
1203     Assembler::INSN(Rd, Rn, Rm);                                        \
1204   }                                                                     \
1205                                                                         \
1206   void INSN(Register Rd, Register Rn, Register Rm,                      \
1207            ext::operation option, int amount = 0) {                     \
1208     Assembler::INSN(Rd, Rn, Rm, option, amount);                        \
1209   }
1210 
1211   WRAP(add) WRAP(addw) WRAP(sub) WRAP(subw)
1212 
1213 #undef WRAP
1214 #define WRAP(INSN)                                                      \
1215   void INSN(Register Rd, Register Rn, unsigned imm) {                   \
1216     wrap_adds_subs_imm_insn(Rd, Rn, imm, &Assembler::INSN, &Assembler::INSN); \
1217   }                                                                     \
1218                                                                         \
1219   void INSN(Register Rd, Register Rn, Register Rm,                      \
1220              enum shift_kind kind, unsigned shift = 0) {                \
1221     Assembler::INSN(Rd, Rn, Rm, kind, shift);                           \
1222   }                                                                     \
1223                                                                         \
1224   void INSN(Register Rd, Register Rn, Register Rm) {                    \
1225     Assembler::INSN(Rd, Rn, Rm);                                        \
1226   }                                                                     \
1227                                                                         \
1228   void INSN(Register Rd, Register Rn, Register Rm,                      \
1229            ext::operation option, int amount = 0) {                     \
1230     Assembler::INSN(Rd, Rn, Rm, option, amount);                        \
1231   }
1232 
1233   WRAP(adds) WRAP(addsw) WRAP(subs) WRAP(subsw)
1234 
1235   void add(Register Rd, Register Rn, RegisterOrConstant increment);
1236   void addw(Register Rd, Register Rn, RegisterOrConstant increment);
1237   void sub(Register Rd, Register Rn, RegisterOrConstant decrement);
1238   void subw(Register Rd, Register Rn, RegisterOrConstant decrement);
1239 
1240   void adrp(Register reg1, const Address &dest, uint64_t &byte_offset);
1241 
1242   void verified_entry(Compile* C, int sp_inc);
1243 
1244   // Inline type specific methods
1245   #include "asm/macroAssembler_common.hpp"
1246 
1247   int store_inline_type_fields_to_buf(ciInlineKlass* vk, bool from_interpreter = true);
1248   bool move_helper(VMReg from, VMReg to, BasicType bt, RegState reg_state[]);
1249   bool unpack_inline_helper(const GrowableArray<SigEntry>* sig, int& sig_index,
1250                             VMReg from, int& from_index, VMRegPair* to, int to_count, int& to_index,
1251                             RegState reg_state[]);
1252   bool pack_inline_helper(const GrowableArray<SigEntry>* sig, int& sig_index, int vtarg_index,
1253                           VMRegPair* from, int from_count, int& from_index, VMReg to,
1254                           RegState reg_state[], Register val_array);
1255   int extend_stack_for_inline_args(int args_on_stack);
1256   void remove_frame(int initial_framesize, bool needs_stack_repair);
1257   VMReg spill_reg_for(VMReg reg);
1258   void save_stack_increment(int sp_inc, int frame_size);
1259 
1260   void tableswitch(Register index, jint lowbound, jint highbound,
1261                    Label &jumptable, Label &jumptable_end, int stride = 1) {
1262     adr(rscratch1, jumptable);
1263     subsw(rscratch2, index, lowbound);
1264     subsw(zr, rscratch2, highbound - lowbound);
1265     br(Assembler::HS, jumptable_end);
1266     add(rscratch1, rscratch1, rscratch2,
1267         ext::sxtw, exact_log2(stride * Assembler::instruction_size));
1268     br(rscratch1);
1269   }
1270 
1271   // Form an address from base + offset in Rd.  Rd may or may not
1272   // actually be used: you must use the Address that is returned.  It
1273   // is up to you to ensure that the shift provided matches the size
1274   // of your data.
1275   Address form_address(Register Rd, Register base, int64_t byte_offset, int shift);
1276 
1277   // Return true iff an address is within the 48-bit AArch64 address
1278   // space.
1279   bool is_valid_AArch64_address(address a) {
1280     return ((uint64_t)a >> 48) == 0;
1281   }
1282 
1283   // Load the base of the cardtable byte map into reg.
1284   void load_byte_map_base(Register reg);
1285 
1286   // Prolog generator routines to support switch between x86 code and
1287   // generated ARM code
1288 
1289   // routine to generate an x86 prolog for a stub function which
1290   // bootstraps into the generated ARM code which directly follows the
1291   // stub
1292   //
1293 
1294   public:
1295 
1296   void ldr_constant(Register dest, const Address &const_addr) {
1297     if (NearCpool) {
1298       ldr(dest, const_addr);
1299     } else {
1300       uint64_t offset;
1301       adrp(dest, InternalAddress(const_addr.target()), offset);
1302       ldr(dest, Address(dest, offset));
1303     }
1304   }
1305 
1306   address read_polling_page(Register r, relocInfo::relocType rtype);
1307   void get_polling_page(Register dest, relocInfo::relocType rtype);
1308 
1309   // CRC32 code for java.util.zip.CRC32::updateBytes() instrinsic.
1310   void update_byte_crc32(Register crc, Register val, Register table);
1311   void update_word_crc32(Register crc, Register v, Register tmp,
1312         Register table0, Register table1, Register table2, Register table3,
1313         bool upper = false);
1314 
1315   address has_negatives(Register ary1, Register len, Register result);
1316 
1317   address arrays_equals(Register a1, Register a2, Register result, Register cnt1,
1318                         Register tmp1, Register tmp2, Register tmp3, int elem_size);
1319 
1320   void string_equals(Register a1, Register a2, Register result, Register cnt1,
1321                      int elem_size);
1322 
1323   void fill_words(Register base, Register cnt, Register value);
1324   void fill_words(Register base, uint64_t cnt, Register value);
1325 
1326   void zero_words(Register base, uint64_t cnt);
1327   address zero_words(Register ptr, Register cnt);
1328   void zero_dcache_blocks(Register base, Register cnt);
1329 
1330   static const int zero_words_block_size;
1331 
1332   address byte_array_inflate(Register src, Register dst, Register len,
1333                              FloatRegister vtmp1, FloatRegister vtmp2,
1334                              FloatRegister vtmp3, Register tmp4);
1335 
1336   void char_array_compress(Register src, Register dst, Register len,
1337                            FloatRegister tmp1Reg, FloatRegister tmp2Reg,
1338                            FloatRegister tmp3Reg, FloatRegister tmp4Reg,
1339                            Register result);
1340 
1341   void encode_iso_array(Register src, Register dst,
1342                         Register len, Register result,
1343                         FloatRegister Vtmp1, FloatRegister Vtmp2,
1344                         FloatRegister Vtmp3, FloatRegister Vtmp4);
1345   void fast_log(FloatRegister vtmp0, FloatRegister vtmp1, FloatRegister vtmp2,
1346                 FloatRegister vtmp3, FloatRegister vtmp4, FloatRegister vtmp5,
1347                 FloatRegister tmpC1, FloatRegister tmpC2, FloatRegister tmpC3,
1348                 FloatRegister tmpC4, Register tmp1, Register tmp2,
1349                 Register tmp3, Register tmp4, Register tmp5);
1350   void generate_dsin_dcos(bool isCos, address npio2_hw, address two_over_pi,
1351       address pio2, address dsin_coef, address dcos_coef);
1352  private:
1353   // begin trigonometric functions support block
1354   void generate__ieee754_rem_pio2(address npio2_hw, address two_over_pi, address pio2);
1355   void generate__kernel_rem_pio2(address two_over_pi, address pio2);
1356   void generate_kernel_sin(FloatRegister x, bool iyIsOne, address dsin_coef);
1357   void generate_kernel_cos(FloatRegister x, address dcos_coef);
1358   // end trigonometric functions support block
1359   void add2_with_carry(Register final_dest_hi, Register dest_hi, Register dest_lo,
1360                        Register src1, Register src2);
1361   void add2_with_carry(Register dest_hi, Register dest_lo, Register src1, Register src2) {
1362     add2_with_carry(dest_hi, dest_hi, dest_lo, src1, src2);
1363   }
1364   void multiply_64_x_64_loop(Register x, Register xstart, Register x_xstart,
1365                              Register y, Register y_idx, Register z,
1366                              Register carry, Register product,
1367                              Register idx, Register kdx);
1368   void multiply_128_x_128_loop(Register y, Register z,
1369                                Register carry, Register carry2,
1370                                Register idx, Register jdx,
1371                                Register yz_idx1, Register yz_idx2,
1372                                Register tmp, Register tmp3, Register tmp4,
1373                                Register tmp7, Register product_hi);
1374   void kernel_crc32_using_crc32(Register crc, Register buf,
1375         Register len, Register tmp0, Register tmp1, Register tmp2,
1376         Register tmp3);
1377   void kernel_crc32c_using_crc32c(Register crc, Register buf,
1378         Register len, Register tmp0, Register tmp1, Register tmp2,
1379         Register tmp3);
1380 
1381   void ghash_modmul (FloatRegister result,
1382                      FloatRegister result_lo, FloatRegister result_hi, FloatRegister b,
1383                      FloatRegister a, FloatRegister vzr, FloatRegister a1_xor_a0, FloatRegister p,
1384                      FloatRegister t1, FloatRegister t2, FloatRegister t3);
1385   void ghash_load_wide(int index, Register data, FloatRegister result, FloatRegister state);
1386 public:
1387   void multiply_to_len(Register x, Register xlen, Register y, Register ylen, Register z,
1388                        Register zlen, Register tmp1, Register tmp2, Register tmp3,
1389                        Register tmp4, Register tmp5, Register tmp6, Register tmp7);
1390   void mul_add(Register out, Register in, Register offs, Register len, Register k);
1391   void ghash_multiply(FloatRegister result_lo, FloatRegister result_hi,
1392                       FloatRegister a, FloatRegister b, FloatRegister a1_xor_a0,
1393                       FloatRegister tmp1, FloatRegister tmp2, FloatRegister tmp3);
1394   void ghash_multiply_wide(int index,
1395                            FloatRegister result_lo, FloatRegister result_hi,
1396                            FloatRegister a, FloatRegister b, FloatRegister a1_xor_a0,
1397                            FloatRegister tmp1, FloatRegister tmp2, FloatRegister tmp3);
1398   void ghash_reduce(FloatRegister result, FloatRegister lo, FloatRegister hi,
1399                     FloatRegister p, FloatRegister z, FloatRegister t1);
1400   void ghash_reduce_wide(int index, FloatRegister result, FloatRegister lo, FloatRegister hi,
1401                     FloatRegister p, FloatRegister z, FloatRegister t1);
1402   void ghash_processBlocks_wide(address p, Register state, Register subkeyH,
1403                                 Register data, Register blocks, int unrolls);
1404 
1405 
1406   void aesenc_loadkeys(Register key, Register keylen);
1407   void aesecb_encrypt(Register from, Register to, Register keylen,
1408                       FloatRegister data = v0, int unrolls = 1);
1409   void aesecb_decrypt(Register from, Register to, Register key, Register keylen);
1410   void aes_round(FloatRegister input, FloatRegister subkey);
1411 
1412   // Place an ISB after code may have been modified due to a safepoint.
1413   void safepoint_isb();
1414 
1415 private:
1416   // Return the effective address r + (r1 << ext) + offset.
1417   // Uses rscratch2.
1418   Address offsetted_address(Register r, Register r1, Address::extend ext,
1419                             int offset, int size);
1420 
1421 private:
1422   // Returns an address on the stack which is reachable with a ldr/str of size
1423   // Uses rscratch2 if the address is not directly reachable
1424   Address spill_address(int size, int offset, Register tmp=rscratch2);
1425   Address sve_spill_address(int sve_reg_size_in_bytes, int offset, Register tmp=rscratch2);
1426 
1427   bool merge_alignment_check(Register base, size_t size, int64_t cur_offset, int64_t prev_offset) const;
1428 
1429   // Check whether two loads/stores can be merged into ldp/stp.
1430   bool ldst_can_merge(Register rx, const Address &adr, size_t cur_size_in_bytes, bool is_store) const;
1431 
1432   // Merge current load/store with previous load/store into ldp/stp.
1433   void merge_ldst(Register rx, const Address &adr, size_t cur_size_in_bytes, bool is_store);
1434 
1435   // Try to merge two loads/stores into ldp/stp. If success, returns true else false.
1436   bool try_merge_ldst(Register rt, const Address &adr, size_t cur_size_in_bytes, bool is_store);
1437 
1438 public:
1439   void spill(Register Rx, bool is64, int offset) {
1440     if (is64) {
1441       str(Rx, spill_address(8, offset));
1442     } else {
1443       strw(Rx, spill_address(4, offset));
1444     }
1445   }
1446   void spill(FloatRegister Vx, SIMD_RegVariant T, int offset) {
1447     str(Vx, T, spill_address(1 << (int)T, offset));
1448   }
1449   void spill_sve_vector(FloatRegister Zx, int offset, int vector_reg_size_in_bytes) {
1450     sve_str(Zx, sve_spill_address(vector_reg_size_in_bytes, offset));
1451   }
1452   void unspill(Register Rx, bool is64, int offset) {
1453     if (is64) {
1454       ldr(Rx, spill_address(8, offset));
1455     } else {
1456       ldrw(Rx, spill_address(4, offset));
1457     }
1458   }
1459   void unspill(FloatRegister Vx, SIMD_RegVariant T, int offset) {
1460     ldr(Vx, T, spill_address(1 << (int)T, offset));
1461   }
1462   void unspill_sve_vector(FloatRegister Zx, int offset, int vector_reg_size_in_bytes) {
1463     sve_ldr(Zx, sve_spill_address(vector_reg_size_in_bytes, offset));
1464   }
1465   void spill_copy128(int src_offset, int dst_offset,
1466                      Register tmp1=rscratch1, Register tmp2=rscratch2) {
1467     if (src_offset < 512 && (src_offset & 7) == 0 &&
1468         dst_offset < 512 && (dst_offset & 7) == 0) {
1469       ldp(tmp1, tmp2, Address(sp, src_offset));
1470       stp(tmp1, tmp2, Address(sp, dst_offset));
1471     } else {
1472       unspill(tmp1, true, src_offset);
1473       spill(tmp1, true, dst_offset);
1474       unspill(tmp1, true, src_offset+8);
1475       spill(tmp1, true, dst_offset+8);
1476     }
1477   }
1478   void spill_copy_sve_vector_stack_to_stack(int src_offset, int dst_offset,
1479                                             int sve_vec_reg_size_in_bytes) {
1480     assert(sve_vec_reg_size_in_bytes % 16 == 0, "unexpected sve vector reg size");
1481     for (int i = 0; i < sve_vec_reg_size_in_bytes / 16; i++) {
1482       spill_copy128(src_offset, dst_offset);
1483       src_offset += 16;
1484       dst_offset += 16;
1485     }
1486   }
1487   void cache_wb(Address line);
1488   void cache_wbsync(bool is_pre);
1489 
1490 private:
1491   // Check the current thread doesn't need a cross modify fence.
1492   void verify_cross_modify_fence_not_required() PRODUCT_RETURN;
1493 
1494 };
1495 
1496 #ifdef ASSERT
1497 inline bool AbstractAssembler::pd_check_instruction_mark() { return false; }
1498 #endif
1499 
1500 /**
1501  * class SkipIfEqual:
1502  *
1503  * Instantiating this class will result in assembly code being output that will
1504  * jump around any code emitted between the creation of the instance and it's
1505  * automatic destruction at the end of a scope block, depending on the value of
1506  * the flag passed to the constructor, which will be checked at run-time.
1507  */
1508 class SkipIfEqual {
1509  private:
1510   MacroAssembler* _masm;
1511   Label _label;
1512 
1513  public:
1514    SkipIfEqual(MacroAssembler*, const bool* flag_addr, bool value);
1515    ~SkipIfEqual();
1516 };
1517 
1518 struct tableswitch {
1519   Register _reg;
1520   int _insn_index; jint _first_key; jint _last_key;
1521   Label _after;
1522   Label _branches;
1523 };
1524 
1525 #endif // CPU_AARCH64_MACROASSEMBLER_AARCH64_HPP