1 /*
   2  * Copyright (c) 1997, 2022, Oracle and/or its affiliates. All rights reserved.
   3  * Copyright (c) 2014, 2021, Red Hat Inc. All rights reserved.
   4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   5  *
   6  * This code is free software; you can redistribute it and/or modify it
   7  * under the terms of the GNU General Public License version 2 only, as
   8  * published by the Free Software Foundation.
   9  *
  10  * This code is distributed in the hope that it will be useful, but WITHOUT
  11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  13  * version 2 for more details (a copy is included in the LICENSE file that
  14  * accompanied this code).
  15  *
  16  * You should have received a copy of the GNU General Public License version
  17  * 2 along with this work; if not, write to the Free Software Foundation,
  18  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  19  *
  20  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  21  * or visit www.oracle.com if you need additional information or have any
  22  * questions.
  23  *
  24  */
  25 
  26 #ifndef CPU_AARCH64_ASSEMBLER_AARCH64_HPP
  27 #define CPU_AARCH64_ASSEMBLER_AARCH64_HPP
  28 
  29 #include "asm/register.hpp"
  30 #include "metaprogramming/enableIf.hpp"
  31 
  32 #ifdef __GNUC__
  33 
  34 // __nop needs volatile so that compiler doesn't optimize it away
  35 #define NOP() asm volatile ("nop");
  36 
  37 #elif defined(_MSC_VER)
  38 
  39 // Use MSVC intrinsic: https://docs.microsoft.com/en-us/cpp/intrinsics/arm64-intrinsics?view=vs-2019#I
  40 #define NOP() __nop();
  41 
  42 #endif
  43 
  44 
  45 // definitions of various symbolic names for machine registers
  46 
  47 // First intercalls between C and Java which use 8 general registers
  48 // and 8 floating registers
  49 
  50 // we also have to copy between x86 and ARM registers but that's a
  51 // secondary complication -- not all code employing C call convention
  52 // executes as x86 code though -- we generate some of it
  53 
  54 class Argument {
  55  public:
  56   enum {
  57     n_int_register_parameters_c   = 8,  // r0, r1, ... r7 (c_rarg0, c_rarg1, ...)
  58     n_float_register_parameters_c = 8,  // v0, v1, ... v7 (c_farg0, c_farg1, ... )
  59 
  60     n_int_register_parameters_j   = 8, // r1, ... r7, r0 (rj_rarg0, j_rarg1, ...
  61     n_float_register_parameters_j = 8  // v0, v1, ... v7 (j_farg0, j_farg1, ...
  62   };
  63 };
  64 
  65 REGISTER_DECLARATION(Register, c_rarg0, r0);
  66 REGISTER_DECLARATION(Register, c_rarg1, r1);
  67 REGISTER_DECLARATION(Register, c_rarg2, r2);
  68 REGISTER_DECLARATION(Register, c_rarg3, r3);
  69 REGISTER_DECLARATION(Register, c_rarg4, r4);
  70 REGISTER_DECLARATION(Register, c_rarg5, r5);
  71 REGISTER_DECLARATION(Register, c_rarg6, r6);
  72 REGISTER_DECLARATION(Register, c_rarg7, r7);
  73 
  74 REGISTER_DECLARATION(FloatRegister, c_farg0, v0);
  75 REGISTER_DECLARATION(FloatRegister, c_farg1, v1);
  76 REGISTER_DECLARATION(FloatRegister, c_farg2, v2);
  77 REGISTER_DECLARATION(FloatRegister, c_farg3, v3);
  78 REGISTER_DECLARATION(FloatRegister, c_farg4, v4);
  79 REGISTER_DECLARATION(FloatRegister, c_farg5, v5);
  80 REGISTER_DECLARATION(FloatRegister, c_farg6, v6);
  81 REGISTER_DECLARATION(FloatRegister, c_farg7, v7);
  82 
  83 // Symbolically name the register arguments used by the Java calling convention.
  84 // We have control over the convention for java so we can do what we please.
  85 // What pleases us is to offset the java calling convention so that when
  86 // we call a suitable jni method the arguments are lined up and we don't
  87 // have to do much shuffling. A suitable jni method is non-static and a
  88 // small number of arguments
  89 //
  90 //  |--------------------------------------------------------------------|
  91 //  | c_rarg0  c_rarg1  c_rarg2 c_rarg3 c_rarg4 c_rarg5 c_rarg6 c_rarg7  |
  92 //  |--------------------------------------------------------------------|
  93 //  | r0       r1       r2      r3      r4      r5      r6      r7       |
  94 //  |--------------------------------------------------------------------|
  95 //  | j_rarg7  j_rarg0  j_rarg1 j_rarg2 j_rarg3 j_rarg4 j_rarg5 j_rarg6  |
  96 //  |--------------------------------------------------------------------|
  97 
  98 
  99 REGISTER_DECLARATION(Register, j_rarg0, c_rarg1);
 100 REGISTER_DECLARATION(Register, j_rarg1, c_rarg2);
 101 REGISTER_DECLARATION(Register, j_rarg2, c_rarg3);
 102 REGISTER_DECLARATION(Register, j_rarg3, c_rarg4);
 103 REGISTER_DECLARATION(Register, j_rarg4, c_rarg5);
 104 REGISTER_DECLARATION(Register, j_rarg5, c_rarg6);
 105 REGISTER_DECLARATION(Register, j_rarg6, c_rarg7);
 106 REGISTER_DECLARATION(Register, j_rarg7, c_rarg0);
 107 
 108 // Java floating args are passed as per C
 109 
 110 REGISTER_DECLARATION(FloatRegister, j_farg0, v0);
 111 REGISTER_DECLARATION(FloatRegister, j_farg1, v1);
 112 REGISTER_DECLARATION(FloatRegister, j_farg2, v2);
 113 REGISTER_DECLARATION(FloatRegister, j_farg3, v3);
 114 REGISTER_DECLARATION(FloatRegister, j_farg4, v4);
 115 REGISTER_DECLARATION(FloatRegister, j_farg5, v5);
 116 REGISTER_DECLARATION(FloatRegister, j_farg6, v6);
 117 REGISTER_DECLARATION(FloatRegister, j_farg7, v7);
 118 
 119 // registers used to hold VM data either temporarily within a method
 120 // or across method calls
 121 
 122 // volatile (caller-save) registers
 123 
 124 // r8 is used for indirect result location return
 125 // we use it and r9 as scratch registers
 126 REGISTER_DECLARATION(Register, rscratch1, r8);
 127 REGISTER_DECLARATION(Register, rscratch2, r9);
 128 
 129 // current method -- must be in a call-clobbered register
 130 REGISTER_DECLARATION(Register, rmethod,   r12);
 131 
 132 // non-volatile (callee-save) registers are r16-29
 133 // of which the following are dedicated global state
 134 
 135 // link register
 136 REGISTER_DECLARATION(Register, lr,        r30);
 137 // frame pointer
 138 REGISTER_DECLARATION(Register, rfp,       r29);
 139 // current thread
 140 REGISTER_DECLARATION(Register, rthread,   r28);
 141 // base of heap
 142 REGISTER_DECLARATION(Register, rheapbase, r27);
 143 // constant pool cache
 144 REGISTER_DECLARATION(Register, rcpool,    r26);
 145 // monitors allocated on stack
 146 REGISTER_DECLARATION(Register, rmonitors, r25);
 147 // locals on stack
 148 REGISTER_DECLARATION(Register, rlocals,   r24);
 149 // bytecode pointer
 150 REGISTER_DECLARATION(Register, rbcp,      r22);
 151 // Dispatch table base
 152 REGISTER_DECLARATION(Register, rdispatch, r21);
 153 // Java stack pointer
 154 REGISTER_DECLARATION(Register, esp,      r20);
 155 
 156 // Preserved predicate register with all elements set TRUE.
 157 REGISTER_DECLARATION(PRegister, ptrue, p7);
 158 
 159 #define assert_cond(ARG1) assert(ARG1, #ARG1)
 160 
 161 namespace asm_util {
 162   uint32_t encode_logical_immediate(bool is32, uint64_t imm);
 163   uint32_t encode_sve_logical_immediate(unsigned elembits, uint64_t imm);
 164   bool operand_valid_for_immediate_bits(int64_t imm, unsigned nbits);
 165 };
 166 
 167 using namespace asm_util;
 168 
 169 
 170 class Assembler;
 171 
 172 class Instruction_aarch64 {
 173   unsigned insn;
 174 #ifdef ASSERT
 175   unsigned bits;
 176 #endif
 177   Assembler *assem;
 178 
 179 public:
 180 
 181   Instruction_aarch64(class Assembler *as) {
 182 #ifdef ASSERT
 183     bits = 0;
 184 #endif
 185     insn = 0;
 186     assem = as;
 187   }
 188 
 189   inline ~Instruction_aarch64();
 190 
 191   unsigned &get_insn() { return insn; }
 192 #ifdef ASSERT
 193   unsigned &get_bits() { return bits; }
 194 #endif
 195 
 196   static inline int32_t extend(unsigned val, int hi = 31, int lo = 0) {
 197     union {
 198       unsigned u;
 199       int n;
 200     };
 201 
 202     u = val << (31 - hi);
 203     n = n >> (31 - hi + lo);
 204     return n;
 205   }
 206 
 207   static inline uint32_t extract(uint32_t val, int msb, int lsb) {
 208     int nbits = msb - lsb + 1;
 209     assert_cond(msb >= lsb);
 210     uint32_t mask = checked_cast<uint32_t>(right_n_bits(nbits));
 211     uint32_t result = val >> lsb;
 212     result &= mask;
 213     return result;
 214   }
 215 
 216   static inline int32_t sextract(uint32_t val, int msb, int lsb) {
 217     uint32_t uval = extract(val, msb, lsb);
 218     return extend(uval, msb - lsb);
 219   }
 220 
 221   static void patch(address a, int msb, int lsb, uint64_t val) {
 222     int nbits = msb - lsb + 1;
 223     guarantee(val < (1ULL << nbits), "Field too big for insn");
 224     assert_cond(msb >= lsb);
 225     unsigned mask = checked_cast<unsigned>(right_n_bits(nbits));
 226     val <<= lsb;
 227     mask <<= lsb;
 228     unsigned target = *(unsigned *)a;
 229     target &= ~mask;
 230     target |= val;
 231     *(unsigned *)a = target;
 232   }
 233 
 234   static void spatch(address a, int msb, int lsb, int64_t val) {
 235     int nbits = msb - lsb + 1;
 236     int64_t chk = val >> (nbits - 1);
 237     guarantee (chk == -1 || chk == 0, "Field too big for insn");
 238     unsigned uval = val;
 239     unsigned mask = checked_cast<unsigned>(right_n_bits(nbits));
 240     uval &= mask;
 241     uval <<= lsb;
 242     mask <<= lsb;
 243     unsigned target = *(unsigned *)a;
 244     target &= ~mask;
 245     target |= uval;
 246     *(unsigned *)a = target;
 247   }
 248 
 249   void f(unsigned val, int msb, int lsb) {
 250     int nbits = msb - lsb + 1;
 251     guarantee(val < (1ULL << nbits), "Field too big for insn");
 252     assert_cond(msb >= lsb);
 253     val <<= lsb;
 254     insn |= val;
 255 #ifdef ASSERT
 256     unsigned mask = checked_cast<unsigned>(right_n_bits(nbits));
 257     mask <<= lsb;
 258     assert_cond((bits & mask) == 0);
 259     bits |= mask;
 260 #endif
 261   }
 262 
 263   void f(unsigned val, int bit) {
 264     f(val, bit, bit);
 265   }
 266 
 267   void sf(int64_t val, int msb, int lsb) {
 268     int nbits = msb - lsb + 1;
 269     int64_t chk = val >> (nbits - 1);
 270     guarantee (chk == -1 || chk == 0, "Field too big for insn");
 271     unsigned uval = val;
 272     unsigned mask = checked_cast<unsigned>(right_n_bits(nbits));
 273     uval &= mask;
 274     f(uval, lsb + nbits - 1, lsb);
 275   }
 276 
 277   void rf(Register r, int lsb) {
 278     f(r->encoding_nocheck(), lsb + 4, lsb);
 279   }
 280 
 281   // reg|ZR
 282   void zrf(Register r, int lsb) {
 283     f(r->encoding_nocheck() - (r == zr), lsb + 4, lsb);
 284   }
 285 
 286   // reg|SP
 287   void srf(Register r, int lsb) {
 288     f(r == sp ? 31 : r->encoding_nocheck(), lsb + 4, lsb);
 289   }
 290 
 291   void rf(FloatRegister r, int lsb) {
 292     f(r->encoding_nocheck(), lsb + 4, lsb);
 293   }
 294 
 295   void prf(PRegister r, int lsb) {
 296     f(r->encoding_nocheck(), lsb + 3, lsb);
 297   }
 298 
 299   void pgrf(PRegister r, int lsb) {
 300     f(r->encoding_nocheck(), lsb + 2, lsb);
 301   }
 302 
 303   unsigned get(int msb = 31, int lsb = 0) {
 304     int nbits = msb - lsb + 1;
 305     unsigned mask = checked_cast<unsigned>(right_n_bits(nbits)) << lsb;
 306     assert_cond((bits & mask) == mask);
 307     return (insn & mask) >> lsb;
 308   }
 309 };
 310 
 311 #define starti Instruction_aarch64 current_insn(this);
 312 
 313 class PrePost {
 314   int _offset;
 315   Register _r;
 316 public:
 317   PrePost(Register reg, int o) : _offset(o), _r(reg) { }
 318   int offset() { return _offset; }
 319   Register reg() { return _r; }
 320 };
 321 
 322 class Pre : public PrePost {
 323 public:
 324   Pre(Register reg, int o) : PrePost(reg, o) { }
 325 };
 326 class Post : public PrePost {
 327   Register _idx;
 328   bool _is_postreg;
 329 public:
 330   Post(Register reg, int o) : PrePost(reg, o) { _idx = NULL; _is_postreg = false; }
 331   Post(Register reg, Register idx) : PrePost(reg, 0) { _idx = idx; _is_postreg = true; }
 332   Register idx_reg() { return _idx; }
 333   bool is_postreg() {return _is_postreg; }
 334 };
 335 
 336 namespace ext
 337 {
 338   enum operation { uxtb, uxth, uxtw, uxtx, sxtb, sxth, sxtw, sxtx };
 339 };
 340 
 341 // Addressing modes
 342 class Address {
 343  public:
 344 
 345   enum mode { no_mode, base_plus_offset, pre, post, post_reg, pcrel,
 346               base_plus_offset_reg, literal };
 347 
 348   // Shift and extend for base reg + reg offset addressing
 349   class extend {
 350     int _option, _shift;
 351     ext::operation _op;
 352   public:
 353     extend() { }
 354     extend(int s, int o, ext::operation op) : _option(o), _shift(s), _op(op) { }
 355     int option() const{ return _option; }
 356     int shift() const { return _shift; }
 357     ext::operation op() const { return _op; }
 358   };
 359   class uxtw : public extend {
 360   public:
 361     uxtw(int shift = -1): extend(shift, 0b010, ext::uxtw) { }
 362   };
 363   class lsl : public extend {
 364   public:
 365     lsl(int shift = -1): extend(shift, 0b011, ext::uxtx) { }
 366   };
 367   class sxtw : public extend {
 368   public:
 369     sxtw(int shift = -1): extend(shift, 0b110, ext::sxtw) { }
 370   };
 371   class sxtx : public extend {
 372   public:
 373     sxtx(int shift = -1): extend(shift, 0b111, ext::sxtx) { }
 374   };
 375 
 376  private:
 377   Register _base;
 378   Register _index;
 379   int64_t _offset;
 380   enum mode _mode;
 381   extend _ext;
 382 
 383   RelocationHolder _rspec;
 384 
 385   // Typically we use AddressLiterals we want to use their rval
 386   // However in some situations we want the lval (effect address) of
 387   // the item.  We provide a special factory for making those lvals.
 388   bool _is_lval;
 389 
 390   // If the target is far we'll need to load the ea of this to a
 391   // register to reach it. Otherwise if near we can do PC-relative
 392   // addressing.
 393   address          _target;
 394 
 395  public:
 396   Address()
 397     : _mode(no_mode) { }
 398   Address(Register r)
 399     : _base(r), _index(noreg), _offset(0), _mode(base_plus_offset), _target(0) { }
 400 
 401   template<typename T, ENABLE_IF(std::is_integral<T>::value)>
 402   Address(Register r, T o)
 403     : _base(r), _index(noreg), _offset(o), _mode(base_plus_offset), _target(0) {}
 404 
 405   Address(Register r, ByteSize disp)
 406     : Address(r, in_bytes(disp)) { }
 407   Address(Register r, Register r1, extend ext = lsl())
 408     : _base(r), _index(r1), _offset(0), _mode(base_plus_offset_reg),
 409       _ext(ext), _target(0) { }
 410   Address(Pre p)
 411     : _base(p.reg()), _offset(p.offset()), _mode(pre) { }
 412   Address(Post p)
 413     : _base(p.reg()),  _index(p.idx_reg()), _offset(p.offset()),
 414       _mode(p.is_postreg() ? post_reg : post), _target(0) { }
 415   Address(address target, RelocationHolder const& rspec)
 416     : _mode(literal),
 417       _rspec(rspec),
 418       _is_lval(false),
 419       _target(target)  { }
 420   Address(address target, relocInfo::relocType rtype = relocInfo::external_word_type);
 421   Address(Register base, RegisterOrConstant index, extend ext = lsl())
 422     : _base (base),
 423       _offset(0), _ext(ext), _target(0) {
 424     if (index.is_register()) {
 425       _mode = base_plus_offset_reg;
 426       _index = index.as_register();
 427     } else {
 428       guarantee(ext.option() == ext::uxtx, "should be");
 429       assert(index.is_constant(), "should be");
 430       _mode = base_plus_offset;
 431       _offset = index.as_constant() << ext.shift();
 432     }
 433   }
 434 
 435   Register base() const {
 436     guarantee((_mode == base_plus_offset || _mode == base_plus_offset_reg
 437                || _mode == post || _mode == post_reg),
 438               "wrong mode");
 439     return _base;
 440   }
 441   int64_t offset() const {
 442     return _offset;
 443   }
 444   Register index() const {
 445     return _index;
 446   }
 447   mode getMode() const {
 448     return _mode;
 449   }
 450   bool uses(Register reg) const { return _base == reg || _index == reg; }
 451   address target() const { return _target; }
 452   const RelocationHolder& rspec() const { return _rspec; }
 453 
 454   void encode(Instruction_aarch64 *i) const {
 455     i->f(0b111, 29, 27);
 456     i->srf(_base, 5);
 457 
 458     switch(_mode) {
 459     case base_plus_offset:
 460       {
 461         unsigned size = i->get(31, 30);
 462         if (i->get(26, 26) && i->get(23, 23)) {
 463           // SIMD Q Type - Size = 128 bits
 464           assert(size == 0, "bad size");
 465           size = 0b100;
 466         }
 467         assert(offset_ok_for_immed(_offset, size),
 468                "must be, was: " INT64_FORMAT ", %d", _offset, size);
 469         unsigned mask = (1 << size) - 1;
 470         if (_offset < 0 || _offset & mask) {
 471           i->f(0b00, 25, 24);
 472           i->f(0, 21), i->f(0b00, 11, 10);
 473           i->sf(_offset, 20, 12);
 474         } else {
 475           i->f(0b01, 25, 24);
 476           i->f(_offset >> size, 21, 10);
 477         }
 478       }
 479       break;
 480 
 481     case base_plus_offset_reg:
 482       {
 483         i->f(0b00, 25, 24);
 484         i->f(1, 21);
 485         i->rf(_index, 16);
 486         i->f(_ext.option(), 15, 13);
 487         unsigned size = i->get(31, 30);
 488         if (i->get(26, 26) && i->get(23, 23)) {
 489           // SIMD Q Type - Size = 128 bits
 490           assert(size == 0, "bad size");
 491           size = 0b100;
 492         }
 493         if (size == 0) // It's a byte
 494           i->f(_ext.shift() >= 0, 12);
 495         else {
 496           assert(_ext.shift() <= 0 || _ext.shift() == (int)size, "bad shift");
 497           i->f(_ext.shift() > 0, 12);
 498         }
 499         i->f(0b10, 11, 10);
 500       }
 501       break;
 502 
 503     case pre:
 504       i->f(0b00, 25, 24);
 505       i->f(0, 21), i->f(0b11, 11, 10);
 506       i->sf(_offset, 20, 12);
 507       break;
 508 
 509     case post:
 510       i->f(0b00, 25, 24);
 511       i->f(0, 21), i->f(0b01, 11, 10);
 512       i->sf(_offset, 20, 12);
 513       break;
 514 
 515     default:
 516       ShouldNotReachHere();
 517     }
 518   }
 519 
 520   void encode_pair(Instruction_aarch64 *i) const {
 521     switch(_mode) {
 522     case base_plus_offset:
 523       i->f(0b010, 25, 23);
 524       break;
 525     case pre:
 526       i->f(0b011, 25, 23);
 527       break;
 528     case post:
 529       i->f(0b001, 25, 23);
 530       break;
 531     default:
 532       ShouldNotReachHere();
 533     }
 534 
 535     unsigned size; // Operand shift in 32-bit words
 536 
 537     if (i->get(26, 26)) { // float
 538       switch(i->get(31, 30)) {
 539       case 0b10:
 540         size = 2; break;
 541       case 0b01:
 542         size = 1; break;
 543       case 0b00:
 544         size = 0; break;
 545       default:
 546         ShouldNotReachHere();
 547         size = 0;  // unreachable
 548       }
 549     } else {
 550       size = i->get(31, 31);
 551     }
 552 
 553     size = 4 << size;
 554     guarantee(_offset % size == 0, "bad offset");
 555     i->sf(_offset / size, 21, 15);
 556     i->srf(_base, 5);
 557   }
 558 
 559   void encode_nontemporal_pair(Instruction_aarch64 *i) const {
 560     // Only base + offset is allowed
 561     i->f(0b000, 25, 23);
 562     unsigned size = i->get(31, 31);
 563     size = 4 << size;
 564     guarantee(_offset % size == 0, "bad offset");
 565     i->sf(_offset / size, 21, 15);
 566     i->srf(_base, 5);
 567     guarantee(_mode == Address::base_plus_offset,
 568               "Bad addressing mode for non-temporal op");
 569   }
 570 
 571   void lea(MacroAssembler *, Register) const;
 572 
 573   static bool offset_ok_for_immed(int64_t offset, uint shift);
 574 
 575   static bool offset_ok_for_sve_immed(int64_t offset, int shift, int vl /* sve vector length */) {
 576     if (offset % vl == 0) {
 577       // Convert address offset into sve imm offset (MUL VL).
 578       int sve_offset = offset / vl;
 579       if (((-(1 << (shift - 1))) <= sve_offset) && (sve_offset < (1 << (shift - 1)))) {
 580         // sve_offset can be encoded
 581         return true;
 582       }
 583     }
 584     return false;
 585   }
 586 };
 587 
 588 // Convenience classes
 589 class RuntimeAddress: public Address {
 590 
 591   public:
 592 
 593   RuntimeAddress(address target) : Address(target, relocInfo::runtime_call_type) {}
 594 
 595 };
 596 
 597 class OopAddress: public Address {
 598 
 599   public:
 600 
 601   OopAddress(address target) : Address(target, relocInfo::oop_type){}
 602 
 603 };
 604 
 605 class ExternalAddress: public Address {
 606  private:
 607   static relocInfo::relocType reloc_for_target(address target) {
 608     // Sometimes ExternalAddress is used for values which aren't
 609     // exactly addresses, like the card table base.
 610     // external_word_type can't be used for values in the first page
 611     // so just skip the reloc in that case.
 612     return external_word_Relocation::can_be_relocated(target) ? relocInfo::external_word_type : relocInfo::none;
 613   }
 614 
 615  public:
 616 
 617   ExternalAddress(address target) : Address(target, reloc_for_target(target)) {}
 618 
 619 };
 620 
 621 class InternalAddress: public Address {
 622 
 623   public:
 624 
 625   InternalAddress(address target) : Address(target, relocInfo::internal_word_type) {}
 626 };
 627 
 628 const int FPUStateSizeInWords = FloatRegisterImpl::number_of_registers *
 629                                 FloatRegisterImpl::save_slots_per_register;
 630 
 631 typedef enum {
 632   PLDL1KEEP = 0b00000, PLDL1STRM, PLDL2KEEP, PLDL2STRM, PLDL3KEEP, PLDL3STRM,
 633   PSTL1KEEP = 0b10000, PSTL1STRM, PSTL2KEEP, PSTL2STRM, PSTL3KEEP, PSTL3STRM,
 634   PLIL1KEEP = 0b01000, PLIL1STRM, PLIL2KEEP, PLIL2STRM, PLIL3KEEP, PLIL3STRM
 635 } prfop;
 636 
 637 class Assembler : public AbstractAssembler {
 638 
 639 public:
 640 
 641 #ifndef PRODUCT
 642   static const uintptr_t asm_bp;
 643 
 644   void emit_int32(jint x) {
 645     if ((uintptr_t)pc() == asm_bp)
 646       NOP();
 647     AbstractAssembler::emit_int32(x);
 648   }
 649 #else
 650   void emit_int32(jint x) {
 651     AbstractAssembler::emit_int32(x);
 652   }
 653 #endif
 654 
 655   enum { instruction_size = 4 };
 656 
 657   //---<  calculate length of instruction  >---
 658   // We just use the values set above.
 659   // instruction must start at passed address
 660   static unsigned int instr_len(unsigned char *instr) { return instruction_size; }
 661 
 662   //---<  longest instructions  >---
 663   static unsigned int instr_maxlen() { return instruction_size; }
 664 
 665   Address adjust(Register base, int offset, bool preIncrement) {
 666     if (preIncrement)
 667       return Address(Pre(base, offset));
 668     else
 669       return Address(Post(base, offset));
 670   }
 671 
 672   Address pre(Register base, int offset) {
 673     return adjust(base, offset, true);
 674   }
 675 
 676   Address post(Register base, int offset) {
 677     return adjust(base, offset, false);
 678   }
 679 
 680   Address post(Register base, Register idx) {
 681     return Address(Post(base, idx));
 682   }
 683 
 684   static address locate_next_instruction(address inst);
 685 
 686 #define f current_insn.f
 687 #define sf current_insn.sf
 688 #define rf current_insn.rf
 689 #define srf current_insn.srf
 690 #define zrf current_insn.zrf
 691 #define prf current_insn.prf
 692 #define pgrf current_insn.pgrf
 693 
 694   typedef void (Assembler::* uncond_branch_insn)(address dest);
 695   typedef void (Assembler::* compare_and_branch_insn)(Register Rt, address dest);
 696   typedef void (Assembler::* test_and_branch_insn)(Register Rt, int bitpos, address dest);
 697   typedef void (Assembler::* prefetch_insn)(address target, prfop);
 698 
 699   void wrap_label(Label &L, uncond_branch_insn insn);
 700   void wrap_label(Register r, Label &L, compare_and_branch_insn insn);
 701   void wrap_label(Register r, int bitpos, Label &L, test_and_branch_insn insn);
 702   void wrap_label(Label &L, prfop, prefetch_insn insn);
 703 
 704   // PC-rel. addressing
 705 
 706   void adr(Register Rd, address dest);
 707   void _adrp(Register Rd, address dest);
 708 
 709   void adr(Register Rd, const Address &dest);
 710   void _adrp(Register Rd, const Address &dest);
 711 
 712   void adr(Register Rd, Label &L) {
 713     wrap_label(Rd, L, &Assembler::Assembler::adr);
 714   }
 715   void _adrp(Register Rd, Label &L) {
 716     wrap_label(Rd, L, &Assembler::_adrp);
 717   }
 718 
 719   void adrp(Register Rd, const Address &dest, uint64_t &offset);
 720 
 721 #undef INSN
 722 
 723   void add_sub_immediate(Instruction_aarch64 &current_insn, Register Rd, Register Rn,
 724                          unsigned uimm, int op, int negated_op);
 725 
 726   // Add/subtract (immediate)
 727 #define INSN(NAME, decode, negated)                                     \
 728   void NAME(Register Rd, Register Rn, unsigned imm, unsigned shift) {   \
 729     starti;                                                             \
 730     f(decode, 31, 29), f(0b10001, 28, 24), f(shift, 23, 22), f(imm, 21, 10); \
 731     zrf(Rd, 0), srf(Rn, 5);                                             \
 732   }                                                                     \
 733                                                                         \
 734   void NAME(Register Rd, Register Rn, unsigned imm) {                   \
 735     starti;                                                             \
 736     add_sub_immediate(current_insn, Rd, Rn, imm, decode, negated);      \
 737   }
 738 
 739   INSN(addsw, 0b001, 0b011);
 740   INSN(subsw, 0b011, 0b001);
 741   INSN(adds,  0b101, 0b111);
 742   INSN(subs,  0b111, 0b101);
 743 
 744 #undef INSN
 745 
 746 #define INSN(NAME, decode, negated)                     \
 747   void NAME(Register Rd, Register Rn, unsigned imm) {   \
 748     starti;                                             \
 749     add_sub_immediate(current_insn, Rd, Rn, imm, decode, negated);     \
 750   }
 751 
 752   INSN(addw, 0b000, 0b010);
 753   INSN(subw, 0b010, 0b000);
 754   INSN(add,  0b100, 0b110);
 755   INSN(sub,  0b110, 0b100);
 756 
 757 #undef INSN
 758 
 759  // Logical (immediate)
 760 #define INSN(NAME, decode, is32)                                \
 761   void NAME(Register Rd, Register Rn, uint64_t imm) {           \
 762     starti;                                                     \
 763     uint32_t val = encode_logical_immediate(is32, imm);         \
 764     f(decode, 31, 29), f(0b100100, 28, 23), f(val, 22, 10);     \
 765     srf(Rd, 0), zrf(Rn, 5);                                     \
 766   }
 767 
 768   INSN(andw, 0b000, true);
 769   INSN(orrw, 0b001, true);
 770   INSN(eorw, 0b010, true);
 771   INSN(andr,  0b100, false);
 772   INSN(orr,  0b101, false);
 773   INSN(eor,  0b110, false);
 774 
 775 #undef INSN
 776 
 777 #define INSN(NAME, decode, is32)                                \
 778   void NAME(Register Rd, Register Rn, uint64_t imm) {           \
 779     starti;                                                     \
 780     uint32_t val = encode_logical_immediate(is32, imm);         \
 781     f(decode, 31, 29), f(0b100100, 28, 23), f(val, 22, 10);     \
 782     zrf(Rd, 0), zrf(Rn, 5);                                     \
 783   }
 784 
 785   INSN(ands, 0b111, false);
 786   INSN(andsw, 0b011, true);
 787 
 788 #undef INSN
 789 
 790   // Move wide (immediate)
 791 #define INSN(NAME, opcode)                                              \
 792   void NAME(Register Rd, unsigned imm, unsigned shift = 0) {            \
 793     assert_cond((shift/16)*16 == shift);                                \
 794     starti;                                                             \
 795     f(opcode, 31, 29), f(0b100101, 28, 23), f(shift/16, 22, 21),        \
 796       f(imm, 20, 5);                                                    \
 797     rf(Rd, 0);                                                          \
 798   }
 799 
 800   INSN(movnw, 0b000);
 801   INSN(movzw, 0b010);
 802   INSN(movkw, 0b011);
 803   INSN(movn, 0b100);
 804   INSN(movz, 0b110);
 805   INSN(movk, 0b111);
 806 
 807 #undef INSN
 808 
 809   // Bitfield
 810 #define INSN(NAME, opcode, size)                                        \
 811   void NAME(Register Rd, Register Rn, unsigned immr, unsigned imms) {   \
 812     starti;                                                             \
 813     guarantee(size == 1 || (immr < 32 && imms < 32), "incorrect immr/imms");\
 814     f(opcode, 31, 22), f(immr, 21, 16), f(imms, 15, 10);                \
 815     zrf(Rn, 5), rf(Rd, 0);                                              \
 816   }
 817 
 818   INSN(sbfmw, 0b0001001100, 0);
 819   INSN(bfmw,  0b0011001100, 0);
 820   INSN(ubfmw, 0b0101001100, 0);
 821   INSN(sbfm,  0b1001001101, 1);
 822   INSN(bfm,   0b1011001101, 1);
 823   INSN(ubfm,  0b1101001101, 1);
 824 
 825 #undef INSN
 826 
 827   // Extract
 828 #define INSN(NAME, opcode, size)                                        \
 829   void NAME(Register Rd, Register Rn, Register Rm, unsigned imms) {     \
 830     starti;                                                             \
 831     guarantee(size == 1 || imms < 32, "incorrect imms");                \
 832     f(opcode, 31, 21), f(imms, 15, 10);                                 \
 833     zrf(Rm, 16), zrf(Rn, 5), zrf(Rd, 0);                                \
 834   }
 835 
 836   INSN(extrw, 0b00010011100, 0);
 837   INSN(extr,  0b10010011110, 1);
 838 
 839 #undef INSN
 840 
 841   // The maximum range of a branch is fixed for the AArch64
 842   // architecture.  In debug mode we shrink it in order to test
 843   // trampolines, but not so small that branches in the interpreter
 844   // are out of range.
 845   static const uint64_t branch_range = NOT_DEBUG(128 * M) DEBUG_ONLY(2 * M);
 846 
 847   static bool reachable_from_branch_at(address branch, address target) {
 848     return uabs(target - branch) < branch_range;
 849   }
 850 
 851   // Unconditional branch (immediate)
 852 #define INSN(NAME, opcode)                                              \
 853   void NAME(address dest) {                                             \
 854     starti;                                                             \
 855     int64_t offset = (dest - pc()) >> 2;                                \
 856     DEBUG_ONLY(assert(reachable_from_branch_at(pc(), dest), "debug only")); \
 857     f(opcode, 31), f(0b00101, 30, 26), sf(offset, 25, 0);               \
 858   }                                                                     \
 859   void NAME(Label &L) {                                                 \
 860     wrap_label(L, &Assembler::NAME);                                    \
 861   }                                                                     \
 862   void NAME(const Address &dest);
 863 
 864   INSN(b, 0);
 865   INSN(bl, 1);
 866 
 867 #undef INSN
 868 
 869   // Compare & branch (immediate)
 870 #define INSN(NAME, opcode)                              \
 871   void NAME(Register Rt, address dest) {                \
 872     int64_t offset = (dest - pc()) >> 2;                \
 873     starti;                                             \
 874     f(opcode, 31, 24), sf(offset, 23, 5), rf(Rt, 0);    \
 875   }                                                     \
 876   void NAME(Register Rt, Label &L) {                    \
 877     wrap_label(Rt, L, &Assembler::NAME);                \
 878   }
 879 
 880   INSN(cbzw,  0b00110100);
 881   INSN(cbnzw, 0b00110101);
 882   INSN(cbz,   0b10110100);
 883   INSN(cbnz,  0b10110101);
 884 
 885 #undef INSN
 886 
 887   // Test & branch (immediate)
 888 #define INSN(NAME, opcode)                                              \
 889   void NAME(Register Rt, int bitpos, address dest) {                    \
 890     int64_t offset = (dest - pc()) >> 2;                                \
 891     int b5 = bitpos >> 5;                                               \
 892     bitpos &= 0x1f;                                                     \
 893     starti;                                                             \
 894     f(b5, 31), f(opcode, 30, 24), f(bitpos, 23, 19), sf(offset, 18, 5); \
 895     rf(Rt, 0);                                                          \
 896   }                                                                     \
 897   void NAME(Register Rt, int bitpos, Label &L) {                        \
 898     wrap_label(Rt, bitpos, L, &Assembler::NAME);                        \
 899   }
 900 
 901   INSN(tbz,  0b0110110);
 902   INSN(tbnz, 0b0110111);
 903 
 904 #undef INSN
 905 
 906   // Conditional branch (immediate)
 907   enum Condition
 908     {EQ, NE, HS, CS=HS, LO, CC=LO, MI, PL, VS, VC, HI, LS, GE, LT, GT, LE, AL, NV};
 909 
 910   void br(Condition  cond, address dest) {
 911     int64_t offset = (dest - pc()) >> 2;
 912     starti;
 913     f(0b0101010, 31, 25), f(0, 24), sf(offset, 23, 5), f(0, 4), f(cond, 3, 0);
 914   }
 915 
 916 #define INSN(NAME, cond)                        \
 917   void NAME(address dest) {                     \
 918     br(cond, dest);                             \
 919   }
 920 
 921   INSN(beq, EQ);
 922   INSN(bne, NE);
 923   INSN(bhs, HS);
 924   INSN(bcs, CS);
 925   INSN(blo, LO);
 926   INSN(bcc, CC);
 927   INSN(bmi, MI);
 928   INSN(bpl, PL);
 929   INSN(bvs, VS);
 930   INSN(bvc, VC);
 931   INSN(bhi, HI);
 932   INSN(bls, LS);
 933   INSN(bge, GE);
 934   INSN(blt, LT);
 935   INSN(bgt, GT);
 936   INSN(ble, LE);
 937   INSN(bal, AL);
 938   INSN(bnv, NV);
 939 
 940   void br(Condition cc, Label &L);
 941 
 942 #undef INSN
 943 
 944   // Exception generation
 945   void generate_exception(int opc, int op2, int LL, unsigned imm) {
 946     starti;
 947     f(0b11010100, 31, 24);
 948     f(opc, 23, 21), f(imm, 20, 5), f(op2, 4, 2), f(LL, 1, 0);
 949   }
 950 
 951 #define INSN(NAME, opc, op2, LL)                \
 952   void NAME(unsigned imm) {                     \
 953     generate_exception(opc, op2, LL, imm);      \
 954   }
 955 
 956   INSN(svc, 0b000, 0, 0b01);
 957   INSN(hvc, 0b000, 0, 0b10);
 958   INSN(smc, 0b000, 0, 0b11);
 959   INSN(brk, 0b001, 0, 0b00);
 960   INSN(hlt, 0b010, 0, 0b00);
 961   INSN(dcps1, 0b101, 0, 0b01);
 962   INSN(dcps2, 0b101, 0, 0b10);
 963   INSN(dcps3, 0b101, 0, 0b11);
 964 
 965 #undef INSN
 966 
 967   // System
 968   void system(int op0, int op1, int CRn, int CRm, int op2,
 969               Register rt = dummy_reg)
 970   {
 971     starti;
 972     f(0b11010101000, 31, 21);
 973     f(op0, 20, 19);
 974     f(op1, 18, 16);
 975     f(CRn, 15, 12);
 976     f(CRm, 11, 8);
 977     f(op2, 7, 5);
 978     rf(rt, 0);
 979   }
 980 
 981   // Hint instructions
 982 
 983 #define INSN(NAME, crm, op2)               \
 984   void NAME() {                            \
 985     system(0b00, 0b011, 0b0010, crm, op2); \
 986   }
 987 
 988   INSN(nop,   0b000, 0b0000);
 989   INSN(yield, 0b000, 0b0001);
 990   INSN(wfe,   0b000, 0b0010);
 991   INSN(wfi,   0b000, 0b0011);
 992   INSN(sev,   0b000, 0b0100);
 993   INSN(sevl,  0b000, 0b0101);
 994 
 995   INSN(autia1716, 0b0001, 0b100);
 996   INSN(autiasp,   0b0011, 0b101);
 997   INSN(autiaz,    0b0011, 0b100);
 998   INSN(autib1716, 0b0001, 0b110);
 999   INSN(autibsp,   0b0011, 0b111);
1000   INSN(autibz,    0b0011, 0b110);
1001   INSN(pacia1716, 0b0001, 0b000);
1002   INSN(paciasp,   0b0011, 0b001);
1003   INSN(paciaz,    0b0011, 0b000);
1004   INSN(pacib1716, 0b0001, 0b010);
1005   INSN(pacibsp,   0b0011, 0b011);
1006   INSN(pacibz,    0b0011, 0b010);
1007   INSN(xpaclri,   0b0000, 0b111);
1008 
1009 #undef INSN
1010 
1011   // we only provide mrs and msr for the special purpose system
1012   // registers where op1 (instr[20:19]) == 11 and, (currently) only
1013   // use it for FPSR n.b msr has L (instr[21]) == 0 mrs has L == 1
1014 
1015   void msr(int op1, int CRn, int CRm, int op2, Register rt) {
1016     starti;
1017     f(0b1101010100011, 31, 19);
1018     f(op1, 18, 16);
1019     f(CRn, 15, 12);
1020     f(CRm, 11, 8);
1021     f(op2, 7, 5);
1022     // writing zr is ok
1023     zrf(rt, 0);
1024   }
1025 
1026   void mrs(int op1, int CRn, int CRm, int op2, Register rt) {
1027     starti;
1028     f(0b1101010100111, 31, 19);
1029     f(op1, 18, 16);
1030     f(CRn, 15, 12);
1031     f(CRm, 11, 8);
1032     f(op2, 7, 5);
1033     // reading to zr is a mistake
1034     rf(rt, 0);
1035   }
1036 
1037   enum barrier {OSHLD = 0b0001, OSHST, OSH, NSHLD=0b0101, NSHST, NSH,
1038                 ISHLD = 0b1001, ISHST, ISH, LD=0b1101, ST, SY};
1039 
1040   void dsb(barrier imm) {
1041     system(0b00, 0b011, 0b00011, imm, 0b100);
1042   }
1043 
1044   void dmb(barrier imm) {
1045     system(0b00, 0b011, 0b00011, imm, 0b101);
1046   }
1047 
1048   void isb() {
1049     system(0b00, 0b011, 0b00011, SY, 0b110);
1050   }
1051 
1052   void sys(int op1, int CRn, int CRm, int op2,
1053            Register rt = as_Register(0b11111)) {
1054     system(0b01, op1, CRn, CRm, op2, rt);
1055   }
1056 
1057   // Only implement operations accessible from EL0 or higher, i.e.,
1058   //            op1    CRn    CRm    op2
1059   // IC IVAU     3      7      5      1
1060   // DC CVAC     3      7      10     1
1061   // DC CVAP     3      7      12     1
1062   // DC CVAU     3      7      11     1
1063   // DC CIVAC    3      7      14     1
1064   // DC ZVA      3      7      4      1
1065   // So only deal with the CRm field.
1066   enum icache_maintenance {IVAU = 0b0101};
1067   enum dcache_maintenance {CVAC = 0b1010, CVAP = 0b1100, CVAU = 0b1011, CIVAC = 0b1110, ZVA = 0b100};
1068 
1069   void dc(dcache_maintenance cm, Register Rt) {
1070     sys(0b011, 0b0111, cm, 0b001, Rt);
1071   }
1072 
1073   void ic(icache_maintenance cm, Register Rt) {
1074     sys(0b011, 0b0111, cm, 0b001, Rt);
1075   }
1076 
1077   // A more convenient access to dmb for our purposes
1078   enum Membar_mask_bits {
1079     // We can use ISH for a barrier because the Arm ARM says "This
1080     // architecture assumes that all Processing Elements that use the
1081     // same operating system or hypervisor are in the same Inner
1082     // Shareable shareability domain."
1083     StoreStore = ISHST,
1084     LoadStore  = ISHLD,
1085     LoadLoad   = ISHLD,
1086     StoreLoad  = ISH,
1087     AnyAny     = ISH
1088   };
1089 
1090   void membar(Membar_mask_bits order_constraint) {
1091     dmb(Assembler::barrier(order_constraint));
1092   }
1093 
1094   // Unconditional branch (register)
1095 
1096   void branch_reg(int OP, int A, int M, Register RN, Register RM) {
1097     starti;
1098     f(0b1101011, 31, 25);
1099     f(OP, 24, 21);
1100     f(0b111110000, 20, 12);
1101     f(A, 11, 11);
1102     f(M, 10, 10);
1103     rf(RN, 5);
1104     rf(RM, 0);
1105   }
1106 
1107 #define INSN(NAME, opc)              \
1108   void NAME(Register RN) {           \
1109     branch_reg(opc, 0, 0, RN, r0);    \
1110   }
1111 
1112   INSN(br, 0b0000);
1113   INSN(blr, 0b0001);
1114   INSN(ret, 0b0010);
1115 
1116   void ret(void *p); // This forces a compile-time error for ret(0)
1117 
1118 #undef INSN
1119 
1120 #define INSN(NAME, opc)                     \
1121   void NAME() {                             \
1122     branch_reg(opc, 0, 0, dummy_reg, r0);    \
1123   }
1124 
1125   INSN(eret, 0b0100);
1126   INSN(drps, 0b0101);
1127 
1128 #undef INSN
1129 
1130 #define INSN(NAME, M)                                  \
1131   void NAME() {                                        \
1132     branch_reg(0b0010, 1, M, dummy_reg, dummy_reg);    \
1133   }
1134 
1135   INSN(retaa, 0);
1136   INSN(retab, 1);
1137 
1138 #undef INSN
1139 
1140 #define INSN(NAME, OP, M)                   \
1141   void NAME(Register rn) {                  \
1142     branch_reg(OP, 1, M, rn, dummy_reg);    \
1143   }
1144 
1145   INSN(braaz,  0b0000, 0);
1146   INSN(brabz,  0b0000, 1);
1147   INSN(blraaz, 0b0001, 0);
1148   INSN(blrabz, 0b0001, 1);
1149 
1150 #undef INSN
1151 
1152 #define INSN(NAME, OP, M)                  \
1153   void NAME(Register rn, Register rm) {    \
1154     branch_reg(OP, 1, M, rn, rm);          \
1155   }
1156 
1157   INSN(braa,  0b1000, 0);
1158   INSN(brab,  0b1000, 1);
1159   INSN(blraa, 0b1001, 0);
1160   INSN(blrab, 0b1001, 1);
1161 
1162 #undef INSN
1163 
1164   // Load/store exclusive
1165   enum operand_size { byte, halfword, word, xword };
1166 
1167   void load_store_exclusive(Register Rs, Register Rt1, Register Rt2,
1168     Register Rn, enum operand_size sz, int op, bool ordered) {
1169     starti;
1170     f(sz, 31, 30), f(0b001000, 29, 24), f(op, 23, 21);
1171     rf(Rs, 16), f(ordered, 15), zrf(Rt2, 10), srf(Rn, 5), zrf(Rt1, 0);
1172   }
1173 
1174   void load_exclusive(Register dst, Register addr,
1175                       enum operand_size sz, bool ordered) {
1176     load_store_exclusive(dummy_reg, dst, dummy_reg, addr,
1177                          sz, 0b010, ordered);
1178   }
1179 
1180   void store_exclusive(Register status, Register new_val, Register addr,
1181                        enum operand_size sz, bool ordered) {
1182     load_store_exclusive(status, new_val, dummy_reg, addr,
1183                          sz, 0b000, ordered);
1184   }
1185 
1186 #define INSN4(NAME, sz, op, o0) /* Four registers */                    \
1187   void NAME(Register Rs, Register Rt1, Register Rt2, Register Rn) {     \
1188     guarantee(Rs != Rn && Rs != Rt1 && Rs != Rt2, "unpredictable instruction"); \
1189     load_store_exclusive(Rs, Rt1, Rt2, Rn, sz, op, o0);                 \
1190   }
1191 
1192 #define INSN3(NAME, sz, op, o0) /* Three registers */                   \
1193   void NAME(Register Rs, Register Rt, Register Rn) {                    \
1194     guarantee(Rs != Rn && Rs != Rt, "unpredictable instruction");       \
1195     load_store_exclusive(Rs, Rt, dummy_reg, Rn, sz, op, o0); \
1196   }
1197 
1198 #define INSN2(NAME, sz, op, o0) /* Two registers */                     \
1199   void NAME(Register Rt, Register Rn) {                                 \
1200     load_store_exclusive(dummy_reg, Rt, dummy_reg, \
1201                          Rn, sz, op, o0);                               \
1202   }
1203 
1204 #define INSN_FOO(NAME, sz, op, o0) /* Three registers, encoded differently */ \
1205   void NAME(Register Rt1, Register Rt2, Register Rn) {                  \
1206     guarantee(Rt1 != Rt2, "unpredictable instruction");                 \
1207     load_store_exclusive(dummy_reg, Rt1, Rt2, Rn, sz, op, o0);          \
1208   }
1209 
1210   // bytes
1211   INSN3(stxrb, byte, 0b000, 0);
1212   INSN3(stlxrb, byte, 0b000, 1);
1213   INSN2(ldxrb, byte, 0b010, 0);
1214   INSN2(ldaxrb, byte, 0b010, 1);
1215   INSN2(stlrb, byte, 0b100, 1);
1216   INSN2(ldarb, byte, 0b110, 1);
1217 
1218   // halfwords
1219   INSN3(stxrh, halfword, 0b000, 0);
1220   INSN3(stlxrh, halfword, 0b000, 1);
1221   INSN2(ldxrh, halfword, 0b010, 0);
1222   INSN2(ldaxrh, halfword, 0b010, 1);
1223   INSN2(stlrh, halfword, 0b100, 1);
1224   INSN2(ldarh, halfword, 0b110, 1);
1225 
1226   // words
1227   INSN3(stxrw, word, 0b000, 0);
1228   INSN3(stlxrw, word, 0b000, 1);
1229   INSN4(stxpw, word, 0b001, 0);
1230   INSN4(stlxpw, word, 0b001, 1);
1231   INSN2(ldxrw, word, 0b010, 0);
1232   INSN2(ldaxrw, word, 0b010, 1);
1233   INSN_FOO(ldxpw, word, 0b011, 0);
1234   INSN_FOO(ldaxpw, word, 0b011, 1);
1235   INSN2(stlrw, word, 0b100, 1);
1236   INSN2(ldarw, word, 0b110, 1);
1237 
1238   // xwords
1239   INSN3(stxr, xword, 0b000, 0);
1240   INSN3(stlxr, xword, 0b000, 1);
1241   INSN4(stxp, xword, 0b001, 0);
1242   INSN4(stlxp, xword, 0b001, 1);
1243   INSN2(ldxr, xword, 0b010, 0);
1244   INSN2(ldaxr, xword, 0b010, 1);
1245   INSN_FOO(ldxp, xword, 0b011, 0);
1246   INSN_FOO(ldaxp, xword, 0b011, 1);
1247   INSN2(stlr, xword, 0b100, 1);
1248   INSN2(ldar, xword, 0b110, 1);
1249 
1250 #undef INSN2
1251 #undef INSN3
1252 #undef INSN4
1253 #undef INSN_FOO
1254 
1255   // 8.1 Compare and swap extensions
1256   void lse_cas(Register Rs, Register Rt, Register Rn,
1257                         enum operand_size sz, bool a, bool r, bool not_pair) {
1258     starti;
1259     if (! not_pair) { // Pair
1260       assert(sz == word || sz == xword, "invalid size");
1261       /* The size bit is in bit 30, not 31 */
1262       sz = (operand_size)(sz == word ? 0b00:0b01);
1263     }
1264     f(sz, 31, 30), f(0b001000, 29, 24), f(not_pair ? 1 : 0, 23), f(a, 22), f(1, 21);
1265     zrf(Rs, 16), f(r, 15), f(0b11111, 14, 10), srf(Rn, 5), zrf(Rt, 0);
1266   }
1267 
1268   // CAS
1269 #define INSN(NAME, a, r)                                                \
1270   void NAME(operand_size sz, Register Rs, Register Rt, Register Rn) {   \
1271     assert(Rs != Rn && Rs != Rt, "unpredictable instruction");          \
1272     lse_cas(Rs, Rt, Rn, sz, a, r, true);                                \
1273   }
1274   INSN(cas,    false, false)
1275   INSN(casa,   true,  false)
1276   INSN(casl,   false, true)
1277   INSN(casal,  true,  true)
1278 #undef INSN
1279 
1280   // CASP
1281 #define INSN(NAME, a, r)                                                \
1282   void NAME(operand_size sz, Register Rs, Register Rs1,                 \
1283             Register Rt, Register Rt1, Register Rn) {                   \
1284     assert((Rs->encoding() & 1) == 0 && (Rt->encoding() & 1) == 0 &&    \
1285            Rs->successor() == Rs1 && Rt->successor() == Rt1 &&          \
1286            Rs != Rn && Rs1 != Rn && Rs != Rt, "invalid registers");     \
1287     lse_cas(Rs, Rt, Rn, sz, a, r, false);                               \
1288   }
1289   INSN(casp,    false, false)
1290   INSN(caspa,   true,  false)
1291   INSN(caspl,   false, true)
1292   INSN(caspal,  true,  true)
1293 #undef INSN
1294 
1295   // 8.1 Atomic operations
1296   void lse_atomic(Register Rs, Register Rt, Register Rn,
1297                   enum operand_size sz, int op1, int op2, bool a, bool r) {
1298     starti;
1299     f(sz, 31, 30), f(0b111000, 29, 24), f(a, 23), f(r, 22), f(1, 21);
1300     zrf(Rs, 16), f(op1, 15), f(op2, 14, 12), f(0, 11, 10), srf(Rn, 5), zrf(Rt, 0);
1301   }
1302 
1303 #define INSN(NAME, NAME_A, NAME_L, NAME_AL, op1, op2)                   \
1304   void NAME(operand_size sz, Register Rs, Register Rt, Register Rn) {   \
1305     lse_atomic(Rs, Rt, Rn, sz, op1, op2, false, false);                 \
1306   }                                                                     \
1307   void NAME_A(operand_size sz, Register Rs, Register Rt, Register Rn) { \
1308     lse_atomic(Rs, Rt, Rn, sz, op1, op2, true, false);                  \
1309   }                                                                     \
1310   void NAME_L(operand_size sz, Register Rs, Register Rt, Register Rn) { \
1311     lse_atomic(Rs, Rt, Rn, sz, op1, op2, false, true);                  \
1312   }                                                                     \
1313   void NAME_AL(operand_size sz, Register Rs, Register Rt, Register Rn) {\
1314     lse_atomic(Rs, Rt, Rn, sz, op1, op2, true, true);                   \
1315   }
1316   INSN(ldadd,  ldadda,  ldaddl,  ldaddal,  0, 0b000);
1317   INSN(ldbic,  ldbica,  ldbicl,  ldbical,  0, 0b001);
1318   INSN(ldeor,  ldeora,  ldeorl,  ldeoral,  0, 0b010);
1319   INSN(ldorr,  ldorra,  ldorrl,  ldorral,  0, 0b011);
1320   INSN(ldsmax, ldsmaxa, ldsmaxl, ldsmaxal, 0, 0b100);
1321   INSN(ldsmin, ldsmina, ldsminl, ldsminal, 0, 0b101);
1322   INSN(ldumax, ldumaxa, ldumaxl, ldumaxal, 0, 0b110);
1323   INSN(ldumin, ldumina, lduminl, lduminal, 0, 0b111);
1324   INSN(swp,    swpa,    swpl,    swpal,    1, 0b000);
1325 #undef INSN
1326 
1327   // Load register (literal)
1328 #define INSN(NAME, opc, V)                                              \
1329   void NAME(Register Rt, address dest) {                                \
1330     int64_t offset = (dest - pc()) >> 2;                                \
1331     starti;                                                             \
1332     f(opc, 31, 30), f(0b011, 29, 27), f(V, 26), f(0b00, 25, 24),        \
1333       sf(offset, 23, 5);                                                \
1334     rf(Rt, 0);                                                          \
1335   }                                                                     \
1336   void NAME(Register Rt, address dest, relocInfo::relocType rtype) {    \
1337     InstructionMark im(this);                                           \
1338     guarantee(rtype == relocInfo::internal_word_type,                   \
1339               "only internal_word_type relocs make sense here");        \
1340     code_section()->relocate(inst_mark(), InternalAddress(dest).rspec()); \
1341     NAME(Rt, dest);                                                     \
1342   }                                                                     \
1343   void NAME(Register Rt, Label &L) {                                    \
1344     wrap_label(Rt, L, &Assembler::NAME);                                \
1345   }
1346 
1347   INSN(ldrw, 0b00, 0);
1348   INSN(ldr, 0b01, 0);
1349   INSN(ldrsw, 0b10, 0);
1350 
1351 #undef INSN
1352 
1353 #define INSN(NAME, opc, V)                                              \
1354   void NAME(FloatRegister Rt, address dest) {                           \
1355     int64_t offset = (dest - pc()) >> 2;                                \
1356     starti;                                                             \
1357     f(opc, 31, 30), f(0b011, 29, 27), f(V, 26), f(0b00, 25, 24),        \
1358       sf(offset, 23, 5);                                                \
1359     rf(as_Register(Rt), 0);                                             \
1360   }
1361 
1362   INSN(ldrs, 0b00, 1);
1363   INSN(ldrd, 0b01, 1);
1364   INSN(ldrq, 0b10, 1);
1365 
1366 #undef INSN
1367 
1368 #define INSN(NAME, size, opc)                                           \
1369   void NAME(FloatRegister Rt, Register Rn) {                            \
1370     starti;                                                             \
1371     f(size, 31, 30), f(0b111100, 29, 24), f(opc, 23, 22), f(0, 21);     \
1372     f(0, 20, 12), f(0b01, 11, 10);                                      \
1373     rf(Rn, 5), rf(as_Register(Rt), 0);                                  \
1374   }
1375 
1376   INSN(ldrs, 0b10, 0b01);
1377   INSN(ldrd, 0b11, 0b01);
1378   INSN(ldrq, 0b00, 0b11);
1379 
1380 #undef INSN
1381 
1382 
1383 #define INSN(NAME, opc, V)                                              \
1384   void NAME(address dest, prfop op = PLDL1KEEP) {                       \
1385     int64_t offset = (dest - pc()) >> 2;                                \
1386     starti;                                                             \
1387     f(opc, 31, 30), f(0b011, 29, 27), f(V, 26), f(0b00, 25, 24),        \
1388       sf(offset, 23, 5);                                                \
1389     f(op, 4, 0);                                                        \
1390   }                                                                     \
1391   void NAME(Label &L, prfop op = PLDL1KEEP) {                           \
1392     wrap_label(L, op, &Assembler::NAME);                                \
1393   }
1394 
1395   INSN(prfm, 0b11, 0);
1396 
1397 #undef INSN
1398 
1399   // Load/store
1400   void ld_st1(int opc, int p1, int V, int L,
1401               Register Rt1, Register Rt2, Address adr, bool no_allocate) {
1402     starti;
1403     f(opc, 31, 30), f(p1, 29, 27), f(V, 26), f(L, 22);
1404     zrf(Rt2, 10), zrf(Rt1, 0);
1405     if (no_allocate) {
1406       adr.encode_nontemporal_pair(&current_insn);
1407     } else {
1408       adr.encode_pair(&current_insn);
1409     }
1410   }
1411 
1412   // Load/store register pair (offset)
1413 #define INSN(NAME, size, p1, V, L, no_allocate)         \
1414   void NAME(Register Rt1, Register Rt2, Address adr) {  \
1415     ld_st1(size, p1, V, L, Rt1, Rt2, adr, no_allocate); \
1416    }
1417 
1418   INSN(stpw, 0b00, 0b101, 0, 0, false);
1419   INSN(ldpw, 0b00, 0b101, 0, 1, false);
1420   INSN(ldpsw, 0b01, 0b101, 0, 1, false);
1421   INSN(stp, 0b10, 0b101, 0, 0, false);
1422   INSN(ldp, 0b10, 0b101, 0, 1, false);
1423 
1424   // Load/store no-allocate pair (offset)
1425   INSN(stnpw, 0b00, 0b101, 0, 0, true);
1426   INSN(ldnpw, 0b00, 0b101, 0, 1, true);
1427   INSN(stnp, 0b10, 0b101, 0, 0, true);
1428   INSN(ldnp, 0b10, 0b101, 0, 1, true);
1429 
1430 #undef INSN
1431 
1432 #define INSN(NAME, size, p1, V, L, no_allocate)                         \
1433   void NAME(FloatRegister Rt1, FloatRegister Rt2, Address adr) {        \
1434     ld_st1(size, p1, V, L,                                              \
1435            as_Register(Rt1), as_Register(Rt2), adr, no_allocate);       \
1436    }
1437 
1438   INSN(stps, 0b00, 0b101, 1, 0, false);
1439   INSN(ldps, 0b00, 0b101, 1, 1, false);
1440   INSN(stpd, 0b01, 0b101, 1, 0, false);
1441   INSN(ldpd, 0b01, 0b101, 1, 1, false);
1442   INSN(stpq, 0b10, 0b101, 1, 0, false);
1443   INSN(ldpq, 0b10, 0b101, 1, 1, false);
1444 
1445 #undef INSN
1446 
1447   // Load/store register (all modes)
1448   void ld_st2(Register Rt, const Address &adr, int size, int op, int V = 0) {
1449     starti;
1450 
1451     f(V, 26); // general reg?
1452     zrf(Rt, 0);
1453 
1454     // Encoding for literal loads is done here (rather than pushed
1455     // down into Address::encode) because the encoding of this
1456     // instruction is too different from all of the other forms to
1457     // make it worth sharing.
1458     if (adr.getMode() == Address::literal) {
1459       assert(size == 0b10 || size == 0b11, "bad operand size in ldr");
1460       assert(op == 0b01, "literal form can only be used with loads");
1461       f(size & 0b01, 31, 30), f(0b011, 29, 27), f(0b00, 25, 24);
1462       int64_t offset = (adr.target() - pc()) >> 2;
1463       sf(offset, 23, 5);
1464       code_section()->relocate(pc(), adr.rspec());
1465       return;
1466     }
1467 
1468     f(size, 31, 30);
1469     f(op, 23, 22); // str
1470     adr.encode(&current_insn);
1471   }
1472 
1473 #define INSN(NAME, size, op)                            \
1474   void NAME(Register Rt, const Address &adr) {          \
1475     ld_st2(Rt, adr, size, op);                          \
1476   }                                                     \
1477 
1478   INSN(str, 0b11, 0b00);
1479   INSN(strw, 0b10, 0b00);
1480   INSN(strb, 0b00, 0b00);
1481   INSN(strh, 0b01, 0b00);
1482 
1483   INSN(ldr, 0b11, 0b01);
1484   INSN(ldrw, 0b10, 0b01);
1485   INSN(ldrb, 0b00, 0b01);
1486   INSN(ldrh, 0b01, 0b01);
1487 
1488   INSN(ldrsb, 0b00, 0b10);
1489   INSN(ldrsbw, 0b00, 0b11);
1490   INSN(ldrsh, 0b01, 0b10);
1491   INSN(ldrshw, 0b01, 0b11);
1492   INSN(ldrsw, 0b10, 0b10);
1493 
1494 #undef INSN
1495 
1496 #define INSN(NAME, size, op)                                    \
1497   void NAME(const Address &adr, prfop pfop = PLDL1KEEP) {       \
1498     ld_st2(as_Register(pfop), adr, size, op);                   \
1499   }
1500 
1501   INSN(prfm, 0b11, 0b10); // FIXME: PRFM should not be used with
1502                           // writeback modes, but the assembler
1503                           // doesn't enfore that.
1504 
1505 #undef INSN
1506 
1507 #define INSN(NAME, size, op)                            \
1508   void NAME(FloatRegister Rt, const Address &adr) {     \
1509     ld_st2(as_Register(Rt), adr, size, op, 1);          \
1510   }
1511 
1512   INSN(strd, 0b11, 0b00);
1513   INSN(strs, 0b10, 0b00);
1514   INSN(ldrd, 0b11, 0b01);
1515   INSN(ldrs, 0b10, 0b01);
1516   INSN(strq, 0b00, 0b10);
1517   INSN(ldrq, 0x00, 0b11);
1518 
1519 #undef INSN
1520 
1521 /* SIMD extensions
1522  *
1523  * We just use FloatRegister in the following. They are exactly the same
1524  * as SIMD registers.
1525  */
1526 public:
1527 
1528   enum SIMD_Arrangement {
1529     T8B, T16B, T4H, T8H, T2S, T4S, T1D, T2D, T1Q, INVALID_ARRANGEMENT
1530   };
1531 
1532   enum SIMD_RegVariant {
1533       B, H, S, D, Q, INVALID
1534   };
1535 
1536 private:
1537 
1538   static SIMD_Arrangement _esize2arrangement_table[9][2];
1539   static SIMD_RegVariant _esize2regvariant[9];
1540 
1541 public:
1542 
1543   static SIMD_Arrangement esize2arrangement(unsigned esize, bool isQ);
1544   static SIMD_RegVariant elemType_to_regVariant(BasicType bt);
1545   static SIMD_RegVariant elemBytes_to_regVariant(unsigned esize);
1546   // Return the corresponding bits for different SIMD_RegVariant value.
1547   static unsigned regVariant_to_elemBits(SIMD_RegVariant T);
1548 
1549   enum shift_kind { LSL, LSR, ASR, ROR };
1550 
1551   void op_shifted_reg(Instruction_aarch64 &current_insn, unsigned decode,
1552                       enum shift_kind kind, unsigned shift,
1553                       unsigned size, unsigned op) {
1554     f(size, 31);
1555     f(op, 30, 29);
1556     f(decode, 28, 24);
1557     f(shift, 15, 10);
1558     f(kind, 23, 22);
1559   }
1560 
1561   // Logical (shifted register)
1562 #define INSN(NAME, size, op, N)                                         \
1563   void NAME(Register Rd, Register Rn, Register Rm,                      \
1564             enum shift_kind kind = LSL, unsigned shift = 0) {           \
1565     starti;                                                             \
1566     guarantee(size == 1 || shift < 32, "incorrect shift");              \
1567     f(N, 21);                                                           \
1568     zrf(Rm, 16), zrf(Rn, 5), zrf(Rd, 0);                                \
1569     op_shifted_reg(current_insn, 0b01010, kind, shift, size, op);       \
1570   }
1571 
1572   INSN(andr, 1, 0b00, 0);
1573   INSN(orr, 1, 0b01, 0);
1574   INSN(eor, 1, 0b10, 0);
1575   INSN(ands, 1, 0b11, 0);
1576   INSN(andw, 0, 0b00, 0);
1577   INSN(orrw, 0, 0b01, 0);
1578   INSN(eorw, 0, 0b10, 0);
1579   INSN(andsw, 0, 0b11, 0);
1580 
1581 #undef INSN
1582 
1583 #define INSN(NAME, size, op, N)                                         \
1584   void NAME(Register Rd, Register Rn, Register Rm,                      \
1585             enum shift_kind kind = LSL, unsigned shift = 0) {           \
1586     starti;                                                             \
1587     f(N, 21);                                                           \
1588     zrf(Rm, 16), zrf(Rn, 5), zrf(Rd, 0);                                \
1589     op_shifted_reg(current_insn, 0b01010, kind, shift, size, op);       \
1590   }                                                                     \
1591                                                                         \
1592   /* These instructions have no immediate form. Provide an overload so  \
1593      that if anyone does try to use an immediate operand -- this has    \
1594      happened! -- we'll get a compile-time error. */                    \
1595   void NAME(Register Rd, Register Rn, unsigned imm,                     \
1596             enum shift_kind kind = LSL, unsigned shift = 0) {           \
1597     assert(false, " can't be used with immediate operand");             \
1598   }
1599 
1600   INSN(bic, 1, 0b00, 1);
1601   INSN(orn, 1, 0b01, 1);
1602   INSN(eon, 1, 0b10, 1);
1603   INSN(bics, 1, 0b11, 1);
1604   INSN(bicw, 0, 0b00, 1);
1605   INSN(ornw, 0, 0b01, 1);
1606   INSN(eonw, 0, 0b10, 1);
1607   INSN(bicsw, 0, 0b11, 1);
1608 
1609 #undef INSN
1610 
1611 #ifdef _WIN64
1612 // In MSVC, `mvn` is defined as a macro and it affects compilation
1613 #undef mvn
1614 #endif
1615 
1616   // Aliases for short forms of orn
1617 void mvn(Register Rd, Register Rm,
1618             enum shift_kind kind = LSL, unsigned shift = 0) {
1619   orn(Rd, zr, Rm, kind, shift);
1620 }
1621 
1622 void mvnw(Register Rd, Register Rm,
1623             enum shift_kind kind = LSL, unsigned shift = 0) {
1624   ornw(Rd, zr, Rm, kind, shift);
1625 }
1626 
1627   // Add/subtract (shifted register)
1628 #define INSN(NAME, size, op)                            \
1629   void NAME(Register Rd, Register Rn, Register Rm,      \
1630             enum shift_kind kind, unsigned shift = 0) { \
1631     starti;                                             \
1632     f(0, 21);                                           \
1633     assert_cond(kind != ROR);                           \
1634     guarantee(size == 1 || shift < 32, "incorrect shift");\
1635     zrf(Rd, 0), zrf(Rn, 5), zrf(Rm, 16);                \
1636     op_shifted_reg(current_insn, 0b01011, kind, shift, size, op);      \
1637   }
1638 
1639   INSN(add, 1, 0b000);
1640   INSN(sub, 1, 0b10);
1641   INSN(addw, 0, 0b000);
1642   INSN(subw, 0, 0b10);
1643 
1644   INSN(adds, 1, 0b001);
1645   INSN(subs, 1, 0b11);
1646   INSN(addsw, 0, 0b001);
1647   INSN(subsw, 0, 0b11);
1648 
1649 #undef INSN
1650 
1651   // Add/subtract (extended register)
1652 #define INSN(NAME, op)                                                  \
1653   void NAME(Register Rd, Register Rn, Register Rm,                      \
1654            ext::operation option, int amount = 0) {                     \
1655     starti;                                                             \
1656     zrf(Rm, 16), srf(Rn, 5), srf(Rd, 0);                                \
1657     add_sub_extended_reg(current_insn, op, 0b01011, Rd, Rn, Rm, 0b00, option, amount); \
1658   }
1659 
1660   void add_sub_extended_reg(Instruction_aarch64 &current_insn, unsigned op, unsigned decode,
1661     Register Rd, Register Rn, Register Rm,
1662     unsigned opt, ext::operation option, unsigned imm) {
1663     guarantee(imm <= 4, "shift amount must be <= 4");
1664     f(op, 31, 29), f(decode, 28, 24), f(opt, 23, 22), f(1, 21);
1665     f(option, 15, 13), f(imm, 12, 10);
1666   }
1667 
1668   INSN(addw, 0b000);
1669   INSN(subw, 0b010);
1670   INSN(add, 0b100);
1671   INSN(sub, 0b110);
1672 
1673 #undef INSN
1674 
1675 #define INSN(NAME, op)                                                  \
1676   void NAME(Register Rd, Register Rn, Register Rm,                      \
1677            ext::operation option, int amount = 0) {                     \
1678     starti;                                                             \
1679     zrf(Rm, 16), srf(Rn, 5), zrf(Rd, 0);                                \
1680     add_sub_extended_reg(current_insn, op, 0b01011, Rd, Rn, Rm, 0b00, option, amount); \
1681   }
1682 
1683   INSN(addsw, 0b001);
1684   INSN(subsw, 0b011);
1685   INSN(adds, 0b101);
1686   INSN(subs, 0b111);
1687 
1688 #undef INSN
1689 
1690   // Aliases for short forms of add and sub
1691 #define INSN(NAME)                                      \
1692   void NAME(Register Rd, Register Rn, Register Rm) {    \
1693     if (Rd == sp || Rn == sp)                           \
1694       NAME(Rd, Rn, Rm, ext::uxtx);                      \
1695     else                                                \
1696       NAME(Rd, Rn, Rm, LSL);                            \
1697   }
1698 
1699   INSN(addw);
1700   INSN(subw);
1701   INSN(add);
1702   INSN(sub);
1703 
1704   INSN(addsw);
1705   INSN(subsw);
1706   INSN(adds);
1707   INSN(subs);
1708 
1709 #undef INSN
1710 
1711   // Add/subtract (with carry)
1712   void add_sub_carry(unsigned op, Register Rd, Register Rn, Register Rm) {
1713     starti;
1714     f(op, 31, 29);
1715     f(0b11010000, 28, 21);
1716     f(0b000000, 15, 10);
1717     zrf(Rm, 16), zrf(Rn, 5), zrf(Rd, 0);
1718   }
1719 
1720   #define INSN(NAME, op)                                \
1721     void NAME(Register Rd, Register Rn, Register Rm) {  \
1722       add_sub_carry(op, Rd, Rn, Rm);                    \
1723     }
1724 
1725   INSN(adcw, 0b000);
1726   INSN(adcsw, 0b001);
1727   INSN(sbcw, 0b010);
1728   INSN(sbcsw, 0b011);
1729   INSN(adc, 0b100);
1730   INSN(adcs, 0b101);
1731   INSN(sbc,0b110);
1732   INSN(sbcs, 0b111);
1733 
1734 #undef INSN
1735 
1736   // Conditional compare (both kinds)
1737   void conditional_compare(unsigned op, int o1, int o2, int o3,
1738                            Register Rn, unsigned imm5, unsigned nzcv,
1739                            unsigned cond) {
1740     starti;
1741     f(op, 31, 29);
1742     f(0b11010010, 28, 21);
1743     f(cond, 15, 12);
1744     f(o1, 11);
1745     f(o2, 10);
1746     f(o3, 4);
1747     f(nzcv, 3, 0);
1748     f(imm5, 20, 16), zrf(Rn, 5);
1749   }
1750 
1751 #define INSN(NAME, op)                                                  \
1752   void NAME(Register Rn, Register Rm, int imm, Condition cond) {        \
1753     int regNumber = (Rm == zr ? 31 : Rm->encoding());                   \
1754     conditional_compare(op, 0, 0, 0, Rn, regNumber, imm, cond);         \
1755   }                                                                     \
1756                                                                         \
1757   void NAME(Register Rn, int imm5, int imm, Condition cond) {           \
1758     conditional_compare(op, 1, 0, 0, Rn, imm5, imm, cond);              \
1759   }
1760 
1761   INSN(ccmnw, 0b001);
1762   INSN(ccmpw, 0b011);
1763   INSN(ccmn, 0b101);
1764   INSN(ccmp, 0b111);
1765 
1766 #undef INSN
1767 
1768   // Conditional select
1769   void conditional_select(unsigned op, unsigned op2,
1770                           Register Rd, Register Rn, Register Rm,
1771                           unsigned cond) {
1772     starti;
1773     f(op, 31, 29);
1774     f(0b11010100, 28, 21);
1775     f(cond, 15, 12);
1776     f(op2, 11, 10);
1777     zrf(Rm, 16), zrf(Rn, 5), rf(Rd, 0);
1778   }
1779 
1780 #define INSN(NAME, op, op2)                                             \
1781   void NAME(Register Rd, Register Rn, Register Rm, Condition cond) {    \
1782     conditional_select(op, op2, Rd, Rn, Rm, cond);                      \
1783   }
1784 
1785   INSN(cselw, 0b000, 0b00);
1786   INSN(csincw, 0b000, 0b01);
1787   INSN(csinvw, 0b010, 0b00);
1788   INSN(csnegw, 0b010, 0b01);
1789   INSN(csel, 0b100, 0b00);
1790   INSN(csinc, 0b100, 0b01);
1791   INSN(csinv, 0b110, 0b00);
1792   INSN(csneg, 0b110, 0b01);
1793 
1794 #undef INSN
1795 
1796   // Data processing
1797   void data_processing(Instruction_aarch64 &current_insn, unsigned op29, unsigned opcode,
1798                        Register Rd, Register Rn) {
1799     f(op29, 31, 29), f(0b11010110, 28, 21);
1800     f(opcode, 15, 10);
1801     rf(Rn, 5), rf(Rd, 0);
1802   }
1803 
1804   // (1 source)
1805 #define INSN(NAME, op29, opcode2, opcode)                       \
1806   void NAME(Register Rd, Register Rn) {                         \
1807     starti;                                                     \
1808     f(opcode2, 20, 16);                                         \
1809     data_processing(current_insn, op29, opcode, Rd, Rn);        \
1810   }
1811 
1812   INSN(rbitw,  0b010, 0b00000, 0b00000);
1813   INSN(rev16w, 0b010, 0b00000, 0b00001);
1814   INSN(revw,   0b010, 0b00000, 0b00010);
1815   INSN(clzw,   0b010, 0b00000, 0b00100);
1816   INSN(clsw,   0b010, 0b00000, 0b00101);
1817 
1818   INSN(rbit,   0b110, 0b00000, 0b00000);
1819   INSN(rev16,  0b110, 0b00000, 0b00001);
1820   INSN(rev32,  0b110, 0b00000, 0b00010);
1821   INSN(rev,    0b110, 0b00000, 0b00011);
1822   INSN(clz,    0b110, 0b00000, 0b00100);
1823   INSN(cls,    0b110, 0b00000, 0b00101);
1824 
1825   // PAC instructions
1826   INSN(pacia,  0b110, 0b00001, 0b00000);
1827   INSN(pacib,  0b110, 0b00001, 0b00001);
1828   INSN(pacda,  0b110, 0b00001, 0b00010);
1829   INSN(pacdb,  0b110, 0b00001, 0b00011);
1830   INSN(autia,  0b110, 0b00001, 0b00100);
1831   INSN(autib,  0b110, 0b00001, 0b00101);
1832   INSN(autda,  0b110, 0b00001, 0b00110);
1833   INSN(autdb,  0b110, 0b00001, 0b00111);
1834 
1835 #undef INSN
1836 
1837 #define INSN(NAME, op29, opcode2, opcode)                       \
1838   void NAME(Register Rd) {                                      \
1839     starti;                                                     \
1840     f(opcode2, 20, 16);                                         \
1841     data_processing(current_insn, op29, opcode, Rd, dummy_reg); \
1842   }
1843 
1844   // PAC instructions (with zero modifier)
1845   INSN(paciza,  0b110, 0b00001, 0b01000);
1846   INSN(pacizb,  0b110, 0b00001, 0b01001);
1847   INSN(pacdza,  0b110, 0b00001, 0b01010);
1848   INSN(pacdzb,  0b110, 0b00001, 0b01011);
1849   INSN(autiza,  0b110, 0b00001, 0b01100);
1850   INSN(autizb,  0b110, 0b00001, 0b01101);
1851   INSN(autdza,  0b110, 0b00001, 0b01110);
1852   INSN(autdzb,  0b110, 0b00001, 0b01111);
1853   INSN(xpaci,   0b110, 0b00001, 0b10000);
1854   INSN(xpacd,   0b110, 0b00001, 0b10001);
1855 
1856 #undef INSN
1857 
1858   // (2 sources)
1859 #define INSN(NAME, op29, opcode)                                \
1860   void NAME(Register Rd, Register Rn, Register Rm) {            \
1861     starti;                                                     \
1862     rf(Rm, 16);                                                 \
1863     data_processing(current_insn, op29, opcode, Rd, Rn);        \
1864   }
1865 
1866   INSN(udivw, 0b000, 0b000010);
1867   INSN(sdivw, 0b000, 0b000011);
1868   INSN(lslvw, 0b000, 0b001000);
1869   INSN(lsrvw, 0b000, 0b001001);
1870   INSN(asrvw, 0b000, 0b001010);
1871   INSN(rorvw, 0b000, 0b001011);
1872 
1873   INSN(udiv, 0b100, 0b000010);
1874   INSN(sdiv, 0b100, 0b000011);
1875   INSN(lslv, 0b100, 0b001000);
1876   INSN(lsrv, 0b100, 0b001001);
1877   INSN(asrv, 0b100, 0b001010);
1878   INSN(rorv, 0b100, 0b001011);
1879 
1880 #undef INSN
1881 
1882   // (3 sources)
1883   void data_processing(unsigned op54, unsigned op31, unsigned o0,
1884                        Register Rd, Register Rn, Register Rm,
1885                        Register Ra) {
1886     starti;
1887     f(op54, 31, 29), f(0b11011, 28, 24);
1888     f(op31, 23, 21), f(o0, 15);
1889     zrf(Rm, 16), zrf(Ra, 10), zrf(Rn, 5), zrf(Rd, 0);
1890   }
1891 
1892 #define INSN(NAME, op54, op31, o0)                                      \
1893   void NAME(Register Rd, Register Rn, Register Rm, Register Ra) {       \
1894     data_processing(op54, op31, o0, Rd, Rn, Rm, Ra);                    \
1895   }
1896 
1897   INSN(maddw, 0b000, 0b000, 0);
1898   INSN(msubw, 0b000, 0b000, 1);
1899   INSN(madd, 0b100, 0b000, 0);
1900   INSN(msub, 0b100, 0b000, 1);
1901   INSN(smaddl, 0b100, 0b001, 0);
1902   INSN(smsubl, 0b100, 0b001, 1);
1903   INSN(umaddl, 0b100, 0b101, 0);
1904   INSN(umsubl, 0b100, 0b101, 1);
1905 
1906 #undef INSN
1907 
1908 #define INSN(NAME, op54, op31, o0)                                      \
1909   void NAME(Register Rd, Register Rn, Register Rm) {                    \
1910     data_processing(op54, op31, o0, Rd, Rn, Rm, as_Register(31));       \
1911   }
1912 
1913   INSN(smulh, 0b100, 0b010, 0);
1914   INSN(umulh, 0b100, 0b110, 0);
1915 
1916 #undef INSN
1917 
1918   // Floating-point data-processing (1 source)
1919   void data_processing(unsigned op31, unsigned type, unsigned opcode,
1920                        FloatRegister Vd, FloatRegister Vn) {
1921     starti;
1922     f(op31, 31, 29);
1923     f(0b11110, 28, 24);
1924     f(type, 23, 22), f(1, 21), f(opcode, 20, 15), f(0b10000, 14, 10);
1925     rf(Vn, 5), rf(Vd, 0);
1926   }
1927 
1928 #define INSN(NAME, op31, type, opcode)                  \
1929   void NAME(FloatRegister Vd, FloatRegister Vn) {       \
1930     data_processing(op31, type, opcode, Vd, Vn);        \
1931   }
1932 
1933 private:
1934   INSN(i_fmovs, 0b000, 0b00, 0b000000);
1935 public:
1936   INSN(fabss, 0b000, 0b00, 0b000001);
1937   INSN(fnegs, 0b000, 0b00, 0b000010);
1938   INSN(fsqrts, 0b000, 0b00, 0b000011);
1939   INSN(fcvts, 0b000, 0b00, 0b000101);   // Single-precision to double-precision
1940 
1941 private:
1942   INSN(i_fmovd, 0b000, 0b01, 0b000000);
1943 public:
1944   INSN(fabsd, 0b000, 0b01, 0b000001);
1945   INSN(fnegd, 0b000, 0b01, 0b000010);
1946   INSN(fsqrtd, 0b000, 0b01, 0b000011);
1947   INSN(fcvtd, 0b000, 0b01, 0b000100);   // Double-precision to single-precision
1948 
1949   void fmovd(FloatRegister Vd, FloatRegister Vn) {
1950     assert(Vd != Vn, "should be");
1951     i_fmovd(Vd, Vn);
1952   }
1953 
1954   void fmovs(FloatRegister Vd, FloatRegister Vn) {
1955     assert(Vd != Vn, "should be");
1956     i_fmovs(Vd, Vn);
1957   }
1958 
1959 private:
1960   void _fcvt_narrow_extend(FloatRegister Vd, SIMD_Arrangement Ta,
1961                            FloatRegister Vn, SIMD_Arrangement Tb, bool do_extend) {
1962     assert((do_extend && (Tb >> 1) + 1 == (Ta >> 1))
1963            || (!do_extend && (Ta >> 1) + 1 == (Tb >> 1)), "Incompatible arrangement");
1964     starti;
1965     int op30 = (do_extend ? Tb : Ta) & 1;
1966     int op22 = ((do_extend ? Ta : Tb) >> 1) & 1;
1967     f(0, 31), f(op30, 30), f(0b0011100, 29, 23), f(op22, 22);
1968     f(0b100001011, 21, 13), f(do_extend ? 1 : 0, 12), f(0b10, 11, 10);
1969     rf(Vn, 5), rf(Vd, 0);
1970   }
1971 
1972 public:
1973   void fcvtl(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn,  SIMD_Arrangement Tb) {
1974     assert(Tb == T4H || Tb == T8H|| Tb == T2S || Tb == T4S, "invalid arrangement");
1975     _fcvt_narrow_extend(Vd, Ta, Vn, Tb, true);
1976   }
1977 
1978   void fcvtn(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn,  SIMD_Arrangement Tb) {
1979     assert(Ta == T4H || Ta == T8H|| Ta == T2S || Ta == T4S, "invalid arrangement");
1980     _fcvt_narrow_extend(Vd, Ta, Vn, Tb, false);
1981   }
1982 
1983 #undef INSN
1984 
1985   // Floating-point data-processing (2 source)
1986   void data_processing(unsigned op31, unsigned type, unsigned opcode,
1987                        FloatRegister Vd, FloatRegister Vn, FloatRegister Vm) {
1988     starti;
1989     f(op31, 31, 29);
1990     f(0b11110, 28, 24);
1991     f(type, 23, 22), f(1, 21), f(opcode, 15, 10);
1992     rf(Vm, 16), rf(Vn, 5), rf(Vd, 0);
1993   }
1994 
1995 #define INSN(NAME, op31, type, opcode)                  \
1996   void NAME(FloatRegister Vd, FloatRegister Vn, FloatRegister Vm) {     \
1997     data_processing(op31, type, opcode, Vd, Vn, Vm);    \
1998   }
1999 
2000   INSN(fabds,  0b011, 0b10, 0b110101);
2001   INSN(fmuls,  0b000, 0b00, 0b000010);
2002   INSN(fdivs,  0b000, 0b00, 0b000110);
2003   INSN(fadds,  0b000, 0b00, 0b001010);
2004   INSN(fsubs,  0b000, 0b00, 0b001110);
2005   INSN(fmaxs,  0b000, 0b00, 0b010010);
2006   INSN(fmins,  0b000, 0b00, 0b010110);
2007   INSN(fnmuls, 0b000, 0b00, 0b100010);
2008 
2009   INSN(fabdd,  0b011, 0b11, 0b110101);
2010   INSN(fmuld,  0b000, 0b01, 0b000010);
2011   INSN(fdivd,  0b000, 0b01, 0b000110);
2012   INSN(faddd,  0b000, 0b01, 0b001010);
2013   INSN(fsubd,  0b000, 0b01, 0b001110);
2014   INSN(fmaxd,  0b000, 0b01, 0b010010);
2015   INSN(fmind,  0b000, 0b01, 0b010110);
2016   INSN(fnmuld, 0b000, 0b01, 0b100010);
2017 
2018 #undef INSN
2019 
2020    // Floating-point data-processing (3 source)
2021   void data_processing(unsigned op31, unsigned type, unsigned o1, unsigned o0,
2022                        FloatRegister Vd, FloatRegister Vn, FloatRegister Vm,
2023                        FloatRegister Va) {
2024     starti;
2025     f(op31, 31, 29);
2026     f(0b11111, 28, 24);
2027     f(type, 23, 22), f(o1, 21), f(o0, 15);
2028     rf(Vm, 16), rf(Va, 10), rf(Vn, 5), rf(Vd, 0);
2029   }
2030 
2031 #define INSN(NAME, op31, type, o1, o0)                                  \
2032   void NAME(FloatRegister Vd, FloatRegister Vn, FloatRegister Vm,       \
2033             FloatRegister Va) {                                         \
2034     data_processing(op31, type, o1, o0, Vd, Vn, Vm, Va);                \
2035   }
2036 
2037   INSN(fmadds, 0b000, 0b00, 0, 0);
2038   INSN(fmsubs, 0b000, 0b00, 0, 1);
2039   INSN(fnmadds, 0b000, 0b00, 1, 0);
2040   INSN(fnmsubs, 0b000, 0b00, 1, 1);
2041 
2042   INSN(fmaddd, 0b000, 0b01, 0, 0);
2043   INSN(fmsubd, 0b000, 0b01, 0, 1);
2044   INSN(fnmaddd, 0b000, 0b01, 1, 0);
2045   INSN(fnmsub, 0b000, 0b01, 1, 1);
2046 
2047 #undef INSN
2048 
2049    // Floating-point conditional select
2050   void fp_conditional_select(unsigned op31, unsigned type,
2051                              unsigned op1, unsigned op2,
2052                              Condition cond, FloatRegister Vd,
2053                              FloatRegister Vn, FloatRegister Vm) {
2054     starti;
2055     f(op31, 31, 29);
2056     f(0b11110, 28, 24);
2057     f(type, 23, 22);
2058     f(op1, 21, 21);
2059     f(op2, 11, 10);
2060     f(cond, 15, 12);
2061     rf(Vm, 16), rf(Vn, 5), rf(Vd, 0);
2062   }
2063 
2064 #define INSN(NAME, op31, type, op1, op2)                                \
2065   void NAME(FloatRegister Vd, FloatRegister Vn,                         \
2066             FloatRegister Vm, Condition cond) {                         \
2067     fp_conditional_select(op31, type, op1, op2, cond, Vd, Vn, Vm);      \
2068   }
2069 
2070   INSN(fcsels, 0b000, 0b00, 0b1, 0b11);
2071   INSN(fcseld, 0b000, 0b01, 0b1, 0b11);
2072 
2073 #undef INSN
2074 
2075    // Floating-point<->integer conversions
2076   void float_int_convert(unsigned sflag, unsigned ftype,
2077                          unsigned rmode, unsigned opcode,
2078                          Register Rd, Register Rn) {
2079     starti;
2080     f(sflag, 31);
2081     f(0b00, 30, 29);
2082     f(0b11110, 28, 24);
2083     f(ftype, 23, 22), f(1, 21), f(rmode, 20, 19);
2084     f(opcode, 18, 16), f(0b000000, 15, 10);
2085     zrf(Rn, 5), zrf(Rd, 0);
2086   }
2087 
2088 #define INSN(NAME, sflag, ftype, rmode, opcode)                          \
2089   void NAME(Register Rd, FloatRegister Vn) {                             \
2090     float_int_convert(sflag, ftype, rmode, opcode, Rd, as_Register(Vn)); \
2091   }
2092 
2093   INSN(fcvtzsw, 0b0, 0b00, 0b11, 0b000);
2094   INSN(fcvtzs,  0b1, 0b00, 0b11, 0b000);
2095   INSN(fcvtzdw, 0b0, 0b01, 0b11, 0b000);
2096   INSN(fcvtzd,  0b1, 0b01, 0b11, 0b000);
2097 
2098   // RoundToNearestTiesAway
2099   INSN(fcvtassw, 0b0, 0b00, 0b00, 0b100);  // float -> signed word
2100   INSN(fcvtasd,  0b1, 0b01, 0b00, 0b100);  // double -> signed xword
2101 
2102   // RoundTowardsNegative
2103   INSN(fcvtmssw, 0b0, 0b00, 0b10, 0b000);  // float -> signed word
2104   INSN(fcvtmsd,  0b1, 0b01, 0b10, 0b000);  // double -> signed xword
2105 
2106   INSN(fmovs, 0b0, 0b00, 0b00, 0b110);
2107   INSN(fmovd, 0b1, 0b01, 0b00, 0b110);
2108 
2109   INSN(fmovhid, 0b1, 0b10, 0b01, 0b110);
2110 
2111 #undef INSN
2112 
2113 #define INSN(NAME, sflag, type, rmode, opcode)                           \
2114   void NAME(FloatRegister Vd, Register Rn) {                            \
2115     float_int_convert(sflag, type, rmode, opcode, as_Register(Vd), Rn);  \
2116   }
2117 
2118   INSN(fmovs, 0b0, 0b00, 0b00, 0b111);
2119   INSN(fmovd, 0b1, 0b01, 0b00, 0b111);
2120 
2121   INSN(scvtfws, 0b0, 0b00, 0b00, 0b010);
2122   INSN(scvtfs,  0b1, 0b00, 0b00, 0b010);
2123   INSN(scvtfwd, 0b0, 0b01, 0b00, 0b010);
2124   INSN(scvtfd,  0b1, 0b01, 0b00, 0b010);
2125 
2126   // INSN(fmovhid, 0b100, 0b10, 0b01, 0b111);
2127 
2128 #undef INSN
2129 
2130   enum sign_kind { SIGNED, UNSIGNED };
2131 
2132 private:
2133   void _xcvtf_scalar_integer(sign_kind sign, unsigned sz,
2134                              FloatRegister Rd, FloatRegister Rn) {
2135     starti;
2136     f(0b01, 31, 30), f(sign == SIGNED ? 0 : 1, 29);
2137     f(0b111100, 27, 23), f((sz >> 1) & 1, 22), f(0b100001110110, 21, 10);
2138     rf(Rn, 5), rf(Rd, 0);
2139   }
2140 
2141 public:
2142 #define INSN(NAME, sign, sz)                        \
2143   void NAME(FloatRegister Rd, FloatRegister Rn) {   \
2144     _xcvtf_scalar_integer(sign, sz, Rd, Rn);        \
2145   }
2146 
2147   INSN(scvtfs, SIGNED, 0);
2148   INSN(scvtfd, SIGNED, 1);
2149 
2150 #undef INSN
2151 
2152 private:
2153   void _xcvtf_vector_integer(sign_kind sign, SIMD_Arrangement T,
2154                              FloatRegister Rd, FloatRegister Rn) {
2155     assert(T == T2S || T == T4S || T == T2D, "invalid arrangement");
2156     starti;
2157     f(0, 31), f(T & 1, 30), f(sign == SIGNED ? 0 : 1, 29);
2158     f(0b011100, 28, 23), f((T >> 1) & 1, 22), f(0b100001110110, 21, 10);
2159     rf(Rn, 5), rf(Rd, 0);
2160   }
2161 
2162 public:
2163   void scvtfv(SIMD_Arrangement T, FloatRegister Rd, FloatRegister Rn) {
2164     _xcvtf_vector_integer(SIGNED, T, Rd, Rn);
2165   }
2166 
2167   // Floating-point compare
2168   void float_compare(unsigned op31, unsigned type,
2169                      unsigned op, unsigned op2,
2170                      FloatRegister Vn, FloatRegister Vm = as_FloatRegister(0)) {
2171     starti;
2172     f(op31, 31, 29);
2173     f(0b11110, 28, 24);
2174     f(type, 23, 22), f(1, 21);
2175     f(op, 15, 14), f(0b1000, 13, 10), f(op2, 4, 0);
2176     rf(Vn, 5), rf(Vm, 16);
2177   }
2178 
2179 
2180 #define INSN(NAME, op31, type, op, op2)                 \
2181   void NAME(FloatRegister Vn, FloatRegister Vm) {       \
2182     float_compare(op31, type, op, op2, Vn, Vm);         \
2183   }
2184 
2185 #define INSN1(NAME, op31, type, op, op2)        \
2186   void NAME(FloatRegister Vn, double d) {       \
2187     assert_cond(d == 0.0);                      \
2188     float_compare(op31, type, op, op2, Vn);     \
2189   }
2190 
2191   INSN(fcmps, 0b000, 0b00, 0b00, 0b00000);
2192   INSN1(fcmps, 0b000, 0b00, 0b00, 0b01000);
2193   // INSN(fcmpes, 0b000, 0b00, 0b00, 0b10000);
2194   // INSN1(fcmpes, 0b000, 0b00, 0b00, 0b11000);
2195 
2196   INSN(fcmpd, 0b000,   0b01, 0b00, 0b00000);
2197   INSN1(fcmpd, 0b000,  0b01, 0b00, 0b01000);
2198   // INSN(fcmped, 0b000,  0b01, 0b00, 0b10000);
2199   // INSN1(fcmped, 0b000, 0b01, 0b00, 0b11000);
2200 
2201 #undef INSN
2202 #undef INSN1
2203 
2204 // Floating-point compare. 3-registers versions (scalar).
2205 #define INSN(NAME, sz, e)                                             \
2206   void NAME(FloatRegister Vd, FloatRegister Vn, FloatRegister Vm) {   \
2207     starti;                                                           \
2208     f(0b01111110, 31, 24), f(e, 23), f(sz, 22), f(1, 21), rf(Vm, 16); \
2209     f(0b111011, 15, 10), rf(Vn, 5), rf(Vd, 0);                        \
2210   }                                                                   \
2211 
2212   INSN(facged, 1, 0); // facge-double
2213   INSN(facges, 0, 0); // facge-single
2214   INSN(facgtd, 1, 1); // facgt-double
2215   INSN(facgts, 0, 1); // facgt-single
2216 
2217 #undef INSN
2218 
2219   // Floating-point Move (immediate)
2220 private:
2221   unsigned pack(double value);
2222 
2223   void fmov_imm(FloatRegister Vn, double value, unsigned size) {
2224     starti;
2225     f(0b00011110, 31, 24), f(size, 23, 22), f(1, 21);
2226     f(pack(value), 20, 13), f(0b10000000, 12, 5);
2227     rf(Vn, 0);
2228   }
2229 
2230 public:
2231 
2232   void fmovs(FloatRegister Vn, double value) {
2233     if (value)
2234       fmov_imm(Vn, value, 0b00);
2235     else
2236       movi(Vn, T2S, 0);
2237   }
2238   void fmovd(FloatRegister Vn, double value) {
2239     if (value)
2240       fmov_imm(Vn, value, 0b01);
2241     else
2242       movi(Vn, T1D, 0);
2243   }
2244 
2245    // Floating-point rounding
2246    // type: half-precision = 11
2247    //       single         = 00
2248    //       double         = 01
2249    // rmode: A = Away     = 100
2250    //        I = current  = 111
2251    //        M = MinusInf = 010
2252    //        N = eveN     = 000
2253    //        P = PlusInf  = 001
2254    //        X = eXact    = 110
2255    //        Z = Zero     = 011
2256   void float_round(unsigned type, unsigned rmode, FloatRegister Rd, FloatRegister Rn) {
2257     starti;
2258     f(0b00011110, 31, 24);
2259     f(type, 23, 22);
2260     f(0b1001, 21, 18);
2261     f(rmode, 17, 15);
2262     f(0b10000, 14, 10);
2263     rf(Rn, 5), rf(Rd, 0);
2264   }
2265 #define INSN(NAME, type, rmode)                   \
2266   void NAME(FloatRegister Vd, FloatRegister Vn) { \
2267     float_round(type, rmode, Vd, Vn);             \
2268   }
2269 
2270 public:
2271   INSN(frintah, 0b11, 0b100);
2272   INSN(frintih, 0b11, 0b111);
2273   INSN(frintmh, 0b11, 0b010);
2274   INSN(frintnh, 0b11, 0b000);
2275   INSN(frintph, 0b11, 0b001);
2276   INSN(frintxh, 0b11, 0b110);
2277   INSN(frintzh, 0b11, 0b011);
2278 
2279   INSN(frintas, 0b00, 0b100);
2280   INSN(frintis, 0b00, 0b111);
2281   INSN(frintms, 0b00, 0b010);
2282   INSN(frintns, 0b00, 0b000);
2283   INSN(frintps, 0b00, 0b001);
2284   INSN(frintxs, 0b00, 0b110);
2285   INSN(frintzs, 0b00, 0b011);
2286 
2287   INSN(frintad, 0b01, 0b100);
2288   INSN(frintid, 0b01, 0b111);
2289   INSN(frintmd, 0b01, 0b010);
2290   INSN(frintnd, 0b01, 0b000);
2291   INSN(frintpd, 0b01, 0b001);
2292   INSN(frintxd, 0b01, 0b110);
2293   INSN(frintzd, 0b01, 0b011);
2294 #undef INSN
2295 
2296 private:
2297   static short SIMD_Size_in_bytes[];
2298 
2299 public:
2300 #define INSN(NAME, op)                                                  \
2301   void NAME(FloatRegister Rt, SIMD_RegVariant T, const Address &adr) {  \
2302     ld_st2(as_Register(Rt), adr, (int)T & 3, op + ((T==Q) ? 0b10:0b00), 1); \
2303   }
2304 
2305   INSN(ldr, 1);
2306   INSN(str, 0);
2307 
2308 #undef INSN
2309 
2310  private:
2311 
2312   void ld_st(FloatRegister Vt, SIMD_Arrangement T, Register Xn, int op1, int op2) {
2313     starti;
2314     f(0,31), f((int)T & 1, 30);
2315     f(op1, 29, 21), f(0, 20, 16), f(op2, 15, 12);
2316     f((int)T >> 1, 11, 10), srf(Xn, 5), rf(Vt, 0);
2317   }
2318   void ld_st(FloatRegister Vt, SIMD_Arrangement T, Register Xn,
2319              int imm, int op1, int op2, int regs) {
2320 
2321     bool replicate = op2 >> 2 == 3;
2322     // post-index value (imm) is formed differently for replicate/non-replicate ld* instructions
2323     int expectedImmediate = replicate ? regs * (1 << (T >> 1)) : SIMD_Size_in_bytes[T] * regs;
2324     guarantee(T < T1Q , "incorrect arrangement");
2325     guarantee(imm == expectedImmediate, "bad offset");
2326     starti;
2327     f(0,31), f((int)T & 1, 30);
2328     f(op1 | 0b100, 29, 21), f(0b11111, 20, 16), f(op2, 15, 12);
2329     f((int)T >> 1, 11, 10), srf(Xn, 5), rf(Vt, 0);
2330   }
2331   void ld_st(FloatRegister Vt, SIMD_Arrangement T, Register Xn,
2332              Register Xm, int op1, int op2) {
2333     starti;
2334     f(0,31), f((int)T & 1, 30);
2335     f(op1 | 0b100, 29, 21), rf(Xm, 16), f(op2, 15, 12);
2336     f((int)T >> 1, 11, 10), srf(Xn, 5), rf(Vt, 0);
2337   }
2338 
2339   void ld_st(FloatRegister Vt, SIMD_Arrangement T, Address a, int op1, int op2, int regs) {
2340     switch (a.getMode()) {
2341     case Address::base_plus_offset:
2342       guarantee(a.offset() == 0, "no offset allowed here");
2343       ld_st(Vt, T, a.base(), op1, op2);
2344       break;
2345     case Address::post:
2346       ld_st(Vt, T, a.base(), a.offset(), op1, op2, regs);
2347       break;
2348     case Address::post_reg:
2349       ld_st(Vt, T, a.base(), a.index(), op1, op2);
2350       break;
2351     default:
2352       ShouldNotReachHere();
2353     }
2354   }
2355 
2356  public:
2357 
2358 #define INSN1(NAME, op1, op2)                                           \
2359   void NAME(FloatRegister Vt, SIMD_Arrangement T, const Address &a) {   \
2360     ld_st(Vt, T, a, op1, op2, 1);                                       \
2361  }
2362 
2363 #define INSN2(NAME, op1, op2)                                           \
2364   void NAME(FloatRegister Vt, FloatRegister Vt2, SIMD_Arrangement T, const Address &a) { \
2365     assert(Vt->successor() == Vt2, "Registers must be ordered");        \
2366     ld_st(Vt, T, a, op1, op2, 2);                                       \
2367   }
2368 
2369 #define INSN3(NAME, op1, op2)                                           \
2370   void NAME(FloatRegister Vt, FloatRegister Vt2, FloatRegister Vt3,     \
2371             SIMD_Arrangement T, const Address &a) {                     \
2372     assert(Vt->successor() == Vt2 && Vt2->successor() == Vt3,           \
2373            "Registers must be ordered");                                \
2374     ld_st(Vt, T, a, op1, op2, 3);                                       \
2375   }
2376 
2377 #define INSN4(NAME, op1, op2)                                           \
2378   void NAME(FloatRegister Vt, FloatRegister Vt2, FloatRegister Vt3,     \
2379             FloatRegister Vt4, SIMD_Arrangement T, const Address &a) {  \
2380     assert(Vt->successor() == Vt2 && Vt2->successor() == Vt3 &&         \
2381            Vt3->successor() == Vt4, "Registers must be ordered");       \
2382     ld_st(Vt, T, a, op1, op2, 4);                                       \
2383   }
2384 
2385   INSN1(ld1,  0b001100010, 0b0111);
2386   INSN2(ld1,  0b001100010, 0b1010);
2387   INSN3(ld1,  0b001100010, 0b0110);
2388   INSN4(ld1,  0b001100010, 0b0010);
2389 
2390   INSN2(ld2,  0b001100010, 0b1000);
2391   INSN3(ld3,  0b001100010, 0b0100);
2392   INSN4(ld4,  0b001100010, 0b0000);
2393 
2394   INSN1(st1,  0b001100000, 0b0111);
2395   INSN2(st1,  0b001100000, 0b1010);
2396   INSN3(st1,  0b001100000, 0b0110);
2397   INSN4(st1,  0b001100000, 0b0010);
2398 
2399   INSN2(st2,  0b001100000, 0b1000);
2400   INSN3(st3,  0b001100000, 0b0100);
2401   INSN4(st4,  0b001100000, 0b0000);
2402 
2403   INSN1(ld1r, 0b001101010, 0b1100);
2404   INSN2(ld2r, 0b001101011, 0b1100);
2405   INSN3(ld3r, 0b001101010, 0b1110);
2406   INSN4(ld4r, 0b001101011, 0b1110);
2407 
2408 #undef INSN1
2409 #undef INSN2
2410 #undef INSN3
2411 #undef INSN4
2412 
2413 #define INSN(NAME, opc)                                                                 \
2414   void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, FloatRegister Vm) { \
2415     starti;                                                                             \
2416     assert(T == T8B || T == T16B, "must be T8B or T16B");                               \
2417     f(0, 31), f((int)T & 1, 30), f(opc, 29, 21);                                        \
2418     rf(Vm, 16), f(0b000111, 15, 10), rf(Vn, 5), rf(Vd, 0);                              \
2419   }
2420 
2421   INSN(eor,  0b101110001);
2422   INSN(orr,  0b001110101);
2423   INSN(andr, 0b001110001);
2424   INSN(bic,  0b001110011);
2425   INSN(bif,  0b101110111);
2426   INSN(bit,  0b101110101);
2427   INSN(bsl,  0b101110011);
2428   INSN(orn,  0b001110111);
2429 
2430 #undef INSN
2431 
2432 #define INSN(NAME, opc, opc2, acceptT2D)                                                \
2433   void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, FloatRegister Vm) { \
2434     guarantee(T != T1Q && T != T1D, "incorrect arrangement");                           \
2435     if (!acceptT2D) guarantee(T != T2D, "incorrect arrangement");                       \
2436     starti;                                                                             \
2437     f(0, 31), f((int)T & 1, 30), f(opc, 29), f(0b01110, 28, 24);                        \
2438     f((int)T >> 1, 23, 22), f(1, 21), rf(Vm, 16), f(opc2, 15, 10);                      \
2439     rf(Vn, 5), rf(Vd, 0);                                                               \
2440   }
2441 
2442   INSN(addv,   0, 0b100001, true);  // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S, T2D
2443   INSN(subv,   1, 0b100001, true);  // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S, T2D
2444   INSN(uqsubv, 1, 0b001011, true);  // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S, T2D
2445   INSN(mulv,   0, 0b100111, false); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S
2446   INSN(mlav,   0, 0b100101, false); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S
2447   INSN(mlsv,   1, 0b100101, false); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S
2448   INSN(sshl,   0, 0b010001, true);  // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S, T2D
2449   INSN(ushl,   1, 0b010001, true);  // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S, T2D
2450   INSN(addpv,  0, 0b101111, true);  // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S, T2D
2451   INSN(smullv, 0, 0b110000, false); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S
2452   INSN(umullv, 1, 0b110000, false); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S
2453   INSN(umlalv, 1, 0b100000, false); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S
2454   INSN(maxv,   0, 0b011001, false); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S
2455   INSN(minv,   0, 0b011011, false); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S
2456   INSN(smaxp,  0, 0b101001, false); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S
2457   INSN(sminp,  0, 0b101011, false); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S
2458   INSN(cmeq,   1, 0b100011, true);  // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S, T2D
2459   INSN(cmgt,   0, 0b001101, true);  // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S, T2D
2460   INSN(cmge,   0, 0b001111, true);  // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S, T2D
2461   INSN(cmhi,   1, 0b001101, true);  // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S, T2D
2462   INSN(cmhs,   1, 0b001111, true);  // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S, T2D
2463 
2464 #undef INSN
2465 
2466 #define INSN(NAME, opc, opc2, accepted) \
2467   void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn) {                   \
2468     guarantee(T != T1Q && T != T1D, "incorrect arrangement");                           \
2469     if (accepted < 3) guarantee(T != T2D, "incorrect arrangement");                     \
2470     if (accepted < 2) guarantee(T != T2S, "incorrect arrangement");                     \
2471     if (accepted < 1) guarantee(T == T8B || T == T16B, "incorrect arrangement");        \
2472     starti;                                                                             \
2473     f(0, 31), f((int)T & 1, 30), f(opc, 29), f(0b01110, 28, 24);                        \
2474     f((int)T >> 1, 23, 22), f(opc2, 21, 10);                                            \
2475     rf(Vn, 5), rf(Vd, 0);                                                               \
2476   }
2477 
2478   INSN(absr,   0, 0b100000101110, 3); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S, T2D
2479   INSN(negr,   1, 0b100000101110, 3); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S, T2D
2480   INSN(notr,   1, 0b100000010110, 0); // accepted arrangements: T8B, T16B
2481   INSN(addv,   0, 0b110001101110, 1); // accepted arrangements: T8B, T16B, T4H, T8H,      T4S
2482   INSN(smaxv,  0, 0b110000101010, 1); // accepted arrangements: T8B, T16B, T4H, T8H,      T4S
2483   INSN(umaxv,  1, 0b110000101010, 1); // accepted arrangements: T8B, T16B, T4H, T8H,      T4S
2484   INSN(sminv,  0, 0b110001101010, 1); // accepted arrangements: T8B, T16B, T4H, T8H,      T4S
2485   INSN(uminv,  1, 0b110001101010, 1); // accepted arrangements: T8B, T16B, T4H, T8H,      T4S
2486   INSN(cls,    0, 0b100000010010, 2); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S
2487   INSN(clz,    1, 0b100000010010, 2); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S
2488   INSN(cnt,    0, 0b100000010110, 0); // accepted arrangements: T8B, T16B
2489   INSN(uaddlp, 1, 0b100000001010, 2); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S
2490   INSN(uaddlv, 1, 0b110000001110, 1); // accepted arrangements: T8B, T16B, T4H, T8H,      T4S
2491   // Zero compare.
2492   INSN(cmeq,   0, 0b100000100110, 3); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S, T2D
2493   INSN(cmge,   1, 0b100000100010, 3); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S, T2D
2494   INSN(cmgt,   0, 0b100000100010, 3); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S, T2D
2495   INSN(cmle,   1, 0b100000100110, 3); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S, T2D
2496   INSN(cmlt,   0, 0b100000101010, 3); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S, T2D
2497 
2498 #undef INSN
2499 
2500 #define INSN(NAME, opc) \
2501   void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn) {                  \
2502     starti;                                                                            \
2503     assert(T == T4S, "arrangement must be T4S");                                       \
2504     f(0, 31), f((int)T & 1, 30), f(0b101110, 29, 24), f(opc, 23),                      \
2505     f(T == T4S ? 0 : 1, 22), f(0b110000111110, 21, 10); rf(Vn, 5), rf(Vd, 0);          \
2506   }
2507 
2508   INSN(fmaxv, 0);
2509   INSN(fminv, 1);
2510 
2511 #undef INSN
2512 
2513 // Advanced SIMD modified immediate
2514 #define INSN(NAME, op0, cmode0) \
2515   void NAME(FloatRegister Vd, SIMD_Arrangement T, unsigned imm8, unsigned lsl = 0) {   \
2516     unsigned cmode = cmode0;                                                           \
2517     unsigned op = op0;                                                                 \
2518     starti;                                                                            \
2519     assert(lsl == 0 ||                                                                 \
2520            ((T == T4H || T == T8H) && lsl == 8) ||                                     \
2521            ((T == T2S || T == T4S) && ((lsl >> 3) < 4) && ((lsl & 7) == 0)), "invalid shift");\
2522     cmode |= lsl >> 2;                                                                 \
2523     if (T == T4H || T == T8H) cmode |= 0b1000;                                         \
2524     if (!(T == T4H || T == T8H || T == T2S || T == T4S)) {                             \
2525       assert(op == 0 && cmode0 == 0, "must be MOVI");                                  \
2526       cmode = 0b1110;                                                                  \
2527       if (T == T1D || T == T2D) op = 1;                                                \
2528     }                                                                                  \
2529     f(0, 31), f((int)T & 1, 30), f(op, 29), f(0b0111100000, 28, 19);                   \
2530     f(imm8 >> 5, 18, 16), f(cmode, 15, 12), f(0x01, 11, 10), f(imm8 & 0b11111, 9, 5);  \
2531     rf(Vd, 0);                                                                         \
2532   }
2533 
2534   INSN(movi, 0, 0);
2535   INSN(orri, 0, 1);
2536   INSN(mvni, 1, 0);
2537   INSN(bici, 1, 1);
2538 
2539 #undef INSN
2540 
2541 #define INSN(NAME, op, cmode)                                           \
2542   void NAME(FloatRegister Vd, SIMD_Arrangement T, double imm) {         \
2543     unsigned imm8 = pack(imm);                                          \
2544     starti;                                                             \
2545     f(0, 31), f((int)T & 1, 30), f(op, 29), f(0b0111100000, 28, 19);    \
2546     f(imm8 >> 5, 18, 16), f(cmode, 15, 12), f(0x01, 11, 10), f(imm8 & 0b11111, 9, 5); \
2547     rf(Vd, 0);                                                          \
2548   }
2549 
2550   INSN(fmovs, 0, 0b1111);
2551   INSN(fmovd, 1, 0b1111);
2552 
2553 #undef INSN
2554 
2555 // Advanced SIMD three same
2556 #define INSN(NAME, op1, op2, op3)                                                       \
2557   void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, FloatRegister Vm) { \
2558     starti;                                                                             \
2559     assert(T == T2S || T == T4S || T == T2D, "invalid arrangement");                    \
2560     f(0, 31), f((int)T & 1, 30), f(op1, 29), f(0b01110, 28, 24), f(op2, 23);            \
2561     f(T==T2D ? 1:0, 22); f(1, 21), rf(Vm, 16), f(op3, 15, 10), rf(Vn, 5), rf(Vd, 0);    \
2562   }
2563 
2564   INSN(fabd, 1, 1, 0b110101);
2565   INSN(fadd, 0, 0, 0b110101);
2566   INSN(fdiv, 1, 0, 0b111111);
2567   INSN(fmul, 1, 0, 0b110111);
2568   INSN(fsub, 0, 1, 0b110101);
2569   INSN(fmla, 0, 0, 0b110011);
2570   INSN(fmls, 0, 1, 0b110011);
2571   INSN(fmax, 0, 0, 0b111101);
2572   INSN(fmin, 0, 1, 0b111101);
2573   INSN(fcmeq, 0, 0, 0b111001);
2574   INSN(fcmgt, 1, 1, 0b111001);
2575   INSN(fcmge, 1, 0, 0b111001);
2576 
2577 #undef INSN
2578 
2579 #define INSN(NAME, opc)                                                                 \
2580   void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, FloatRegister Vm) { \
2581     starti;                                                                             \
2582     assert(T == T4S, "arrangement must be T4S");                                        \
2583     f(0b01011110000, 31, 21), rf(Vm, 16), f(opc, 15, 10), rf(Vn, 5), rf(Vd, 0);         \
2584   }
2585 
2586   INSN(sha1c,     0b000000);
2587   INSN(sha1m,     0b001000);
2588   INSN(sha1p,     0b000100);
2589   INSN(sha1su0,   0b001100);
2590   INSN(sha256h2,  0b010100);
2591   INSN(sha256h,   0b010000);
2592   INSN(sha256su1, 0b011000);
2593 
2594 #undef INSN
2595 
2596 #define INSN(NAME, opc)                                                                 \
2597   void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn) {                   \
2598     starti;                                                                             \
2599     assert(T == T4S, "arrangement must be T4S");                                        \
2600     f(0b0101111000101000, 31, 16), f(opc, 15, 10), rf(Vn, 5), rf(Vd, 0);                \
2601   }
2602 
2603   INSN(sha1h,     0b000010);
2604   INSN(sha1su1,   0b000110);
2605   INSN(sha256su0, 0b001010);
2606 
2607 #undef INSN
2608 
2609 #define INSN(NAME, opc)                                                                 \
2610   void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, FloatRegister Vm) { \
2611     starti;                                                                             \
2612     assert(T == T2D, "arrangement must be T2D");                                        \
2613     f(0b11001110011, 31, 21), rf(Vm, 16), f(opc, 15, 10), rf(Vn, 5), rf(Vd, 0);         \
2614   }
2615 
2616   INSN(sha512h,   0b100000);
2617   INSN(sha512h2,  0b100001);
2618   INSN(sha512su1, 0b100010);
2619 
2620 #undef INSN
2621 
2622 #define INSN(NAME, opc)                                                                 \
2623   void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn) {                   \
2624     starti;                                                                             \
2625     assert(T == T2D, "arrangement must be T2D");                                        \
2626     f(opc, 31, 10), rf(Vn, 5), rf(Vd, 0);                                               \
2627   }
2628 
2629   INSN(sha512su0, 0b1100111011000000100000);
2630 
2631 #undef INSN
2632 
2633 #define INSN(NAME, opc)                                                                                   \
2634   void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, FloatRegister Vm, FloatRegister Va) { \
2635     starti;                                                                                               \
2636     assert(T == T16B, "arrangement must be T16B");                                                        \
2637     f(0b11001110, 31, 24), f(opc, 23, 21), rf(Vm, 16), f(0b0, 15, 15), rf(Va, 10), rf(Vn, 5), rf(Vd, 0);  \
2638   }
2639 
2640   INSN(eor3, 0b000);
2641   INSN(bcax, 0b001);
2642 
2643 #undef INSN
2644 
2645 #define INSN(NAME, opc)                                                                               \
2646   void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, FloatRegister Vm, unsigned imm) { \
2647     starti;                                                                                           \
2648     assert(T == T2D, "arrangement must be T2D");                                                      \
2649     f(0b11001110, 31, 24), f(opc, 23, 21), rf(Vm, 16), f(imm, 15, 10), rf(Vn, 5), rf(Vd, 0);          \
2650   }
2651 
2652   INSN(xar, 0b100);
2653 
2654 #undef INSN
2655 
2656 #define INSN(NAME, opc)                                                                           \
2657   void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, FloatRegister Vm) {           \
2658     starti;                                                                                       \
2659     assert(T == T2D, "arrangement must be T2D");                                                  \
2660     f(0b11001110, 31, 24), f(opc, 23, 21), rf(Vm, 16), f(0b100011, 15, 10), rf(Vn, 5), rf(Vd, 0); \
2661   }
2662 
2663   INSN(rax1, 0b011);
2664 
2665 #undef INSN
2666 
2667 #define INSN(NAME, opc)                           \
2668   void NAME(FloatRegister Vd, FloatRegister Vn) { \
2669     starti;                                       \
2670     f(opc, 31, 10), rf(Vn, 5), rf(Vd, 0);         \
2671   }
2672 
2673   INSN(aese, 0b0100111000101000010010);
2674   INSN(aesd, 0b0100111000101000010110);
2675   INSN(aesmc, 0b0100111000101000011010);
2676   INSN(aesimc, 0b0100111000101000011110);
2677 
2678 #undef INSN
2679 
2680 #define INSN(NAME, op1, op2) \
2681   void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, FloatRegister Vm, int index = 0) { \
2682     starti;                                                                                            \
2683     assert(T == T2S || T == T4S || T == T2D, "invalid arrangement");                                   \
2684     assert(index >= 0 && ((T == T2D && index <= 1) || (T != T2D && index <= 3)), "invalid index");     \
2685     f(0, 31), f((int)T & 1, 30), f(op1, 29); f(0b011111, 28, 23);                                      \
2686     f(T == T2D ? 1 : 0, 22), f(T == T2D ? 0 : index & 1, 21), rf(Vm, 16);                              \
2687     f(op2, 15, 12), f(T == T2D ? index : (index >> 1), 11), f(0, 10);                                  \
2688     rf(Vn, 5), rf(Vd, 0);                                                                              \
2689   }
2690 
2691   // FMLA/FMLS - Vector - Scalar
2692   INSN(fmlavs, 0, 0b0001);
2693   INSN(fmlsvs, 0, 0b0101);
2694   // FMULX - Vector - Scalar
2695   INSN(fmulxvs, 1, 0b1001);
2696 
2697 #undef INSN
2698 
2699   // Floating-point Reciprocal Estimate
2700   void frecpe(FloatRegister Vd, FloatRegister Vn, SIMD_RegVariant type) {
2701     assert(type == D || type == S, "Wrong type for frecpe");
2702     starti;
2703     f(0b010111101, 31, 23);
2704     f(type == D ? 1 : 0, 22);
2705     f(0b100001110110, 21, 10);
2706     rf(Vn, 5), rf(Vd, 0);
2707   }
2708 
2709   // (long) {a, b} -> (a + b)
2710   void addpd(FloatRegister Vd, FloatRegister Vn) {
2711     starti;
2712     f(0b0101111011110001101110, 31, 10);
2713     rf(Vn, 5), rf(Vd, 0);
2714   }
2715 
2716   // Floating-point AdvSIMD scalar pairwise
2717 #define INSN(NAME, op1, op2) \
2718   void NAME(FloatRegister Vd, FloatRegister Vn, SIMD_RegVariant type) {                 \
2719     starti;                                                                             \
2720     assert(type == D || type == S, "Wrong type for faddp/fmaxp/fminp");                 \
2721     f(0b0111111, 31, 25), f(op1, 24, 23),                                               \
2722     f(type == S ? 0 : 1, 22), f(0b11000, 21, 17), f(op2, 16, 10), rf(Vn, 5), rf(Vd, 0); \
2723   }
2724 
2725   INSN(faddp, 0b00, 0b0110110);
2726   INSN(fmaxp, 0b00, 0b0111110);
2727   INSN(fminp, 0b01, 0b0111110);
2728 
2729 #undef INSN
2730 
2731   void ins(FloatRegister Vd, SIMD_RegVariant T, FloatRegister Vn, int didx, int sidx) {
2732     starti;
2733     assert(T != Q, "invalid register variant");
2734     f(0b01101110000, 31, 21), f(((didx<<1)|1)<<(int)T, 20, 16), f(0, 15);
2735     f(sidx<<(int)T, 14, 11), f(1, 10), rf(Vn, 5), rf(Vd, 0);
2736   }
2737 
2738 #define INSN(NAME, cond, op1, op2)                                                      \
2739   void NAME(Register Rd, FloatRegister Vn, SIMD_RegVariant T, int idx) {                \
2740     starti;                                                                             \
2741     assert(cond, "invalid register variant");                                           \
2742     f(0, 31), f(op1, 30), f(0b001110000, 29, 21);                                       \
2743     f(((idx << 1) | 1) << (int)T, 20, 16), f(op2, 15, 10);                              \
2744     rf(Vn, 5), rf(Rd, 0);                                                               \
2745   }
2746 
2747   INSN(umov, (T != Q), (T == D ? 1 : 0), 0b001111);
2748   INSN(smov, (T < D),  1,                0b001011);
2749 
2750 #undef INSN
2751 
2752 #define INSN(NAME, opc, opc2, isSHR)                                    \
2753   void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, int shift){ \
2754     starti;                                                             \
2755     /* The encodings for the immh:immb fields (bits 22:16) in *SHR are  \
2756      *   0001 xxx       8B/16B, shift = 16  - UInt(immh:immb)           \
2757      *   001x xxx       4H/8H,  shift = 32  - UInt(immh:immb)           \
2758      *   01xx xxx       2S/4S,  shift = 64  - UInt(immh:immb)           \
2759      *   1xxx xxx       1D/2D,  shift = 128 - UInt(immh:immb)           \
2760      *   (1D is RESERVED)                                               \
2761      * for SHL shift is calculated as:                                  \
2762      *   0001 xxx       8B/16B, shift = UInt(immh:immb) - 8             \
2763      *   001x xxx       4H/8H,  shift = UInt(immh:immb) - 16            \
2764      *   01xx xxx       2S/4S,  shift = UInt(immh:immb) - 32            \
2765      *   1xxx xxx       1D/2D,  shift = UInt(immh:immb) - 64            \
2766      *   (1D is RESERVED)                                               \
2767      */                                                                 \
2768     guarantee(!isSHR || (isSHR && (shift != 0)), "impossible encoding");\
2769     assert((1 << ((T>>1)+3)) > shift, "Invalid Shift value");           \
2770     int cVal = (1 << (((T >> 1) + 3) + (isSHR ? 1 : 0)));               \
2771     int encodedShift = isSHR ? cVal - shift : cVal + shift;             \
2772     f(0, 31), f(T & 1, 30), f(opc, 29), f(0b011110, 28, 23),            \
2773     f(encodedShift, 22, 16); f(opc2, 15, 10), rf(Vn, 5), rf(Vd, 0);     \
2774   }
2775 
2776   INSN(shl,  0, 0b010101, /* isSHR = */ false);
2777   INSN(sshr, 0, 0b000001, /* isSHR = */ true);
2778   INSN(ushr, 1, 0b000001, /* isSHR = */ true);
2779   INSN(usra, 1, 0b000101, /* isSHR = */ true);
2780   INSN(ssra, 0, 0b000101, /* isSHR = */ true);
2781 
2782 #undef INSN
2783 
2784 #define INSN(NAME, opc, opc2, isSHR)                                    \
2785   void NAME(FloatRegister Vd, FloatRegister Vn, int shift){             \
2786     starti;                                                             \
2787     int encodedShift = isSHR ? 128 - shift : 64 + shift;                \
2788     f(0b01, 31, 30), f(opc, 29), f(0b111110, 28, 23),                   \
2789     f(encodedShift, 22, 16); f(opc2, 15, 10), rf(Vn, 5), rf(Vd, 0);     \
2790   }
2791 
2792   INSN(shld,  0, 0b010101, /* isSHR = */ false);
2793   INSN(sshrd, 0, 0b000001, /* isSHR = */ true);
2794   INSN(ushrd, 1, 0b000001, /* isSHR = */ true);
2795 
2796 #undef INSN
2797 
2798 private:
2799   void _xshll(sign_kind sign, FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, SIMD_Arrangement Tb, int shift) {
2800     starti;
2801     /* The encodings for the immh:immb fields (bits 22:16) are
2802      *   0001 xxx       8H, 8B/16B shift = xxx
2803      *   001x xxx       4S, 4H/8H  shift = xxxx
2804      *   01xx xxx       2D, 2S/4S  shift = xxxxx
2805      *   1xxx xxx       RESERVED
2806      */
2807     assert((Tb >> 1) + 1 == (Ta >> 1), "Incompatible arrangement");
2808     assert((1 << ((Tb>>1)+3)) > shift, "Invalid shift value");
2809     f(0, 31), f(Tb & 1, 30), f(sign == SIGNED ? 0 : 1, 29), f(0b011110, 28, 23);
2810     f((1 << ((Tb>>1)+3))|shift, 22, 16);
2811     f(0b101001, 15, 10), rf(Vn, 5), rf(Vd, 0);
2812   }
2813 
2814 public:
2815   void ushll(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn,  SIMD_Arrangement Tb, int shift) {
2816     assert(Tb == T8B || Tb == T4H || Tb == T2S, "invalid arrangement");
2817     _xshll(UNSIGNED, Vd, Ta, Vn, Tb, shift);
2818   }
2819 
2820   void ushll2(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn,  SIMD_Arrangement Tb, int shift) {
2821     assert(Tb == T16B || Tb == T8H || Tb == T4S, "invalid arrangement");
2822     _xshll(UNSIGNED, Vd, Ta, Vn, Tb, shift);
2823   }
2824 
2825   void uxtl(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn,  SIMD_Arrangement Tb) {
2826     ushll(Vd, Ta, Vn, Tb, 0);
2827   }
2828 
2829   void sshll(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn,  SIMD_Arrangement Tb, int shift) {
2830     assert(Tb == T8B || Tb == T4H || Tb == T2S, "invalid arrangement");
2831     _xshll(SIGNED, Vd, Ta, Vn, Tb, shift);
2832   }
2833 
2834   void sshll2(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn,  SIMD_Arrangement Tb, int shift) {
2835     assert(Tb == T16B || Tb == T8H || Tb == T4S, "invalid arrangement");
2836     _xshll(SIGNED, Vd, Ta, Vn, Tb, shift);
2837   }
2838 
2839   void sxtl(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn,  SIMD_Arrangement Tb) {
2840     sshll(Vd, Ta, Vn, Tb, 0);
2841   }
2842 
2843   // Move from general purpose register
2844   //   mov  Vd.T[index], Rn
2845   void mov(FloatRegister Vd, SIMD_RegVariant T, int index, Register Xn) {
2846     guarantee(T != Q, "invalid register variant");
2847     starti;
2848     f(0b01001110000, 31, 21), f(((1 << T) | (index << (T + 1))), 20, 16);
2849     f(0b000111, 15, 10), zrf(Xn, 5), rf(Vd, 0);
2850   }
2851 
2852   // Move to general purpose register
2853   //   mov  Rd, Vn.T[index]
2854   void mov(Register Xd, FloatRegister Vn, SIMD_RegVariant T, int index) {
2855     guarantee(T == S || T == D, "invalid register variant");
2856     umov(Xd, Vn, T, index);
2857   }
2858 
2859 private:
2860   void _pmull(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, FloatRegister Vm, SIMD_Arrangement Tb) {
2861     starti;
2862     assert((Ta == T1Q && (Tb == T1D || Tb == T2D)) ||
2863            (Ta == T8H && (Tb == T8B || Tb == T16B)), "Invalid Size specifier");
2864     int size = (Ta == T1Q) ? 0b11 : 0b00;
2865     f(0, 31), f(Tb & 1, 30), f(0b001110, 29, 24), f(size, 23, 22);
2866     f(1, 21), rf(Vm, 16), f(0b111000, 15, 10), rf(Vn, 5), rf(Vd, 0);
2867   }
2868 
2869 public:
2870   void pmull(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, FloatRegister Vm, SIMD_Arrangement Tb) {
2871     assert(Tb == T1D || Tb == T8B, "pmull assumes T1D or T8B as the second size specifier");
2872     _pmull(Vd, Ta, Vn, Vm, Tb);
2873   }
2874 
2875   void pmull2(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, FloatRegister Vm, SIMD_Arrangement Tb) {
2876     assert(Tb == T2D || Tb == T16B, "pmull2 assumes T2D or T16B as the second size specifier");
2877     _pmull(Vd, Ta, Vn, Vm, Tb);
2878   }
2879 
2880   void uqxtn(FloatRegister Vd, SIMD_Arrangement Tb, FloatRegister Vn, SIMD_Arrangement Ta) {
2881     starti;
2882     int size_b = (int)Tb >> 1;
2883     int size_a = (int)Ta >> 1;
2884     assert(size_b < 3 && size_b == size_a - 1, "Invalid size specifier");
2885     f(0, 31), f(Tb & 1, 30), f(0b101110, 29, 24), f(size_b, 23, 22);
2886     f(0b100001010010, 21, 10), rf(Vn, 5), rf(Vd, 0);
2887   }
2888 
2889   void xtn(FloatRegister Vd, SIMD_Arrangement Tb, FloatRegister Vn, SIMD_Arrangement Ta) {
2890     starti;
2891     int size_b = (int)Tb >> 1;
2892     int size_a = (int)Ta >> 1;
2893     assert(size_b < 3 && size_b == size_a - 1, "Invalid size specifier");
2894     f(0, 31), f(Tb & 1, 30), f(0b001110, 29, 24), f(size_b, 23, 22);
2895     f(0b100001001010, 21, 10), rf(Vn, 5), rf(Vd, 0);
2896   }
2897 
2898   void dup(FloatRegister Vd, SIMD_Arrangement T, Register Xs)
2899   {
2900     starti;
2901     assert(T != T1D, "reserved encoding");
2902     f(0,31), f((int)T & 1, 30), f(0b001110000, 29, 21);
2903     f((1 << (T >> 1)), 20, 16), f(0b000011, 15, 10), zrf(Xs, 5), rf(Vd, 0);
2904   }
2905 
2906   void dup(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, int index = 0)
2907   {
2908     starti;
2909     assert(T != T1D, "reserved encoding");
2910     f(0, 31), f((int)T & 1, 30), f(0b001110000, 29, 21);
2911     f(((1 << (T >> 1)) | (index << ((T >> 1) + 1))), 20, 16);
2912     f(0b000001, 15, 10), rf(Vn, 5), rf(Vd, 0);
2913   }
2914 
2915   // Advanced SIMD scalar copy
2916   void dup(FloatRegister Vd, SIMD_RegVariant T, FloatRegister Vn, int index = 0)
2917   {
2918     starti;
2919     assert(T != Q, "invalid size");
2920     f(0b01011110000, 31, 21);
2921     f((1 << T) | (index << (T + 1)), 20, 16);
2922     f(0b000001, 15, 10), rf(Vn, 5), rf(Vd, 0);
2923   }
2924 
2925   // AdvSIMD ZIP/UZP/TRN
2926 #define INSN(NAME, opcode)                                              \
2927   void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, FloatRegister Vm) { \
2928     guarantee(T != T1D && T != T1Q, "invalid arrangement");             \
2929     starti;                                                             \
2930     f(0, 31), f(0b001110, 29, 24), f(0, 21), f(0, 15);                  \
2931     f(opcode, 14, 12), f(0b10, 11, 10);                                 \
2932     rf(Vm, 16), rf(Vn, 5), rf(Vd, 0);                                   \
2933     f(T & 1, 30), f(T >> 1, 23, 22);                                    \
2934   }
2935 
2936   INSN(uzp1, 0b001);
2937   INSN(trn1, 0b010);
2938   INSN(zip1, 0b011);
2939   INSN(uzp2, 0b101);
2940   INSN(trn2, 0b110);
2941   INSN(zip2, 0b111);
2942 
2943 #undef INSN
2944 
2945   // CRC32 instructions
2946 #define INSN(NAME, c, sf, sz)                                             \
2947   void NAME(Register Rd, Register Rn, Register Rm) {                      \
2948     starti;                                                               \
2949     f(sf, 31), f(0b0011010110, 30, 21), f(0b010, 15, 13), f(c, 12);       \
2950     f(sz, 11, 10), rf(Rm, 16), rf(Rn, 5), rf(Rd, 0);                      \
2951   }
2952 
2953   INSN(crc32b,  0, 0, 0b00);
2954   INSN(crc32h,  0, 0, 0b01);
2955   INSN(crc32w,  0, 0, 0b10);
2956   INSN(crc32x,  0, 1, 0b11);
2957   INSN(crc32cb, 1, 0, 0b00);
2958   INSN(crc32ch, 1, 0, 0b01);
2959   INSN(crc32cw, 1, 0, 0b10);
2960   INSN(crc32cx, 1, 1, 0b11);
2961 
2962 #undef INSN
2963 
2964   // Table vector lookup
2965 #define INSN(NAME, op)                                                  \
2966   void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, unsigned registers, FloatRegister Vm) { \
2967     starti;                                                             \
2968     assert(T == T8B || T == T16B, "invalid arrangement");               \
2969     assert(0 < registers && registers <= 4, "invalid number of registers"); \
2970     f(0, 31), f((int)T & 1, 30), f(0b001110000, 29, 21), rf(Vm, 16), f(0, 15); \
2971     f(registers - 1, 14, 13), f(op, 12),f(0b00, 11, 10), rf(Vn, 5), rf(Vd, 0); \
2972   }
2973 
2974   INSN(tbl, 0);
2975   INSN(tbx, 1);
2976 
2977 #undef INSN
2978 
2979   // AdvSIMD two-reg misc
2980   // In this instruction group, the 2 bits in the size field ([23:22]) may be
2981   // fixed or determined by the "SIMD_Arrangement T", or both. The additional
2982   // parameter "tmask" is a 2-bit mask used to indicate which bits in the size
2983   // field are determined by the SIMD_Arrangement. The bit of "tmask" should be
2984   // set to 1 if corresponding bit marked as "x" in the ArmARM.
2985 #define INSN(NAME, U, size, tmask, opcode)                                          \
2986   void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn) {               \
2987        starti;                                                                      \
2988        assert((ASSERTION), MSG);                                                    \
2989        f(0, 31), f((int)T & 1, 30), f(U, 29), f(0b01110, 28, 24);                   \
2990        f(size | ((int)(T >> 1) & tmask), 23, 22), f(0b10000, 21, 17);               \
2991        f(opcode, 16, 12), f(0b10, 11, 10), rf(Vn, 5), rf(Vd, 0);                    \
2992  }
2993 
2994 #define MSG "invalid arrangement"
2995 
2996 #define ASSERTION (T == T2S || T == T4S || T == T2D)
2997   INSN(fsqrt,  1, 0b10, 0b01, 0b11111);
2998   INSN(fabs,   0, 0b10, 0b01, 0b01111);
2999   INSN(fneg,   1, 0b10, 0b01, 0b01111);
3000   INSN(frintn, 0, 0b00, 0b01, 0b11000);
3001   INSN(frintm, 0, 0b00, 0b01, 0b11001);
3002   INSN(frintp, 0, 0b10, 0b01, 0b11000);
3003   INSN(fcvtas, 0, 0b00, 0b01, 0b11100);
3004   INSN(fcvtzs, 0, 0b10, 0b01, 0b11011);
3005   INSN(fcvtms, 0, 0b00, 0b01, 0b11011);
3006 #undef ASSERTION
3007 
3008 #define ASSERTION (T == T8B || T == T16B || T == T4H || T == T8H || T == T2S || T == T4S)
3009   INSN(rev64, 0, 0b00, 0b11, 0b00000);
3010 #undef ASSERTION
3011 
3012 #define ASSERTION (T == T8B || T == T16B || T == T4H || T == T8H)
3013   INSN(rev32, 1, 0b00, 0b11, 0b00000);
3014 #undef ASSERTION
3015 
3016 #define ASSERTION (T == T8B || T == T16B)
3017   INSN(rev16, 0, 0b00, 0b11, 0b00001);
3018   INSN(rbit,  1, 0b01, 0b00, 0b00101);
3019 #undef ASSERTION
3020 
3021 #undef MSG
3022 
3023 #undef INSN
3024 
3025   void ext(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, FloatRegister Vm, int index)
3026   {
3027     starti;
3028     assert(T == T8B || T == T16B, "invalid arrangement");
3029     assert((T == T8B && index <= 0b0111) || (T == T16B && index <= 0b1111), "Invalid index value");
3030     f(0, 31), f((int)T & 1, 30), f(0b101110000, 29, 21);
3031     rf(Vm, 16), f(0, 15), f(index, 14, 11);
3032     f(0, 10), rf(Vn, 5), rf(Vd, 0);
3033   }
3034 
3035 // SVE arithmetic - unpredicated
3036 #define INSN(NAME, opcode)                                                             \
3037   void NAME(FloatRegister Zd, SIMD_RegVariant T, FloatRegister Zn, FloatRegister Zm) { \
3038     starti;                                                                            \
3039     assert(T != Q, "invalid register variant");                                        \
3040     f(0b00000100, 31, 24), f(T, 23, 22), f(1, 21),                                     \
3041     rf(Zm, 16), f(0, 15, 13), f(opcode, 12, 10), rf(Zn, 5), rf(Zd, 0);                 \
3042   }
3043   INSN(sve_add, 0b000);
3044   INSN(sve_sub, 0b001);
3045 #undef INSN
3046 
3047 // SVE integer add/subtract immediate (unpredicated)
3048 #define INSN(NAME, op)                                                  \
3049   void NAME(FloatRegister Zd, SIMD_RegVariant T, unsigned imm8) {       \
3050     starti;                                                             \
3051     /* The immediate is an unsigned value in the range 0 to 255, and    \
3052      * for element width of 16 bits or higher it may also be a          \
3053      * positive multiple of 256 in the range 256 to 65280.              \
3054      */                                                                 \
3055     assert(T != Q, "invalid size");                                     \
3056     int sh = 0;                                                         \
3057     if (imm8 <= 0xff) {                                                 \
3058       sh = 0;                                                           \
3059     } else if (T != B && imm8 <= 0xff00 && (imm8 & 0xff) == 0) {        \
3060       sh = 1;                                                           \
3061       imm8 = (imm8 >> 8);                                               \
3062     } else {                                                            \
3063       guarantee(false, "invalid immediate");                            \
3064     }                                                                   \
3065     f(0b00100101, 31, 24), f(T, 23, 22), f(0b10000, 21, 17);            \
3066     f(op, 16, 14), f(sh, 13), f(imm8, 12, 5), rf(Zd, 0);                \
3067   }
3068 
3069   INSN(sve_add, 0b011);
3070   INSN(sve_sub, 0b111);
3071 #undef INSN
3072 
3073 // SVE floating-point arithmetic - unpredicated
3074 #define INSN(NAME, opcode)                                                             \
3075   void NAME(FloatRegister Zd, SIMD_RegVariant T, FloatRegister Zn, FloatRegister Zm) { \
3076     starti;                                                                            \
3077     assert(T == S || T == D, "invalid register variant");                              \
3078     f(0b01100101, 31, 24), f(T, 23, 22), f(0, 21),                                     \
3079     rf(Zm, 16), f(0, 15, 13), f(opcode, 12, 10), rf(Zn, 5), rf(Zd, 0);                 \
3080   }
3081 
3082   INSN(sve_fadd, 0b000);
3083   INSN(sve_fmul, 0b010);
3084   INSN(sve_fsub, 0b001);
3085 #undef INSN
3086 
3087 private:
3088   void sve_predicate_reg_insn(unsigned op24, unsigned op13,
3089                               FloatRegister Zd_or_Vd, SIMD_RegVariant T,
3090                               PRegister Pg, FloatRegister Zn_or_Vn) {
3091     starti;
3092     f(op24, 31, 24), f(T, 23, 22), f(op13, 21, 13);
3093     pgrf(Pg, 10), rf(Zn_or_Vn, 5), rf(Zd_or_Vd, 0);
3094   }
3095 
3096   void sve_shift_imm_encoding(SIMD_RegVariant T, int shift, bool isSHR,
3097                               int& tszh, int& tszl_imm) {
3098     /* The encodings for the tszh:tszl:imm3 fields
3099      * for shift right is calculated as:
3100      *   0001 xxx       B, shift = 16  - UInt(tszh:tszl:imm3)
3101      *   001x xxx       H, shift = 32  - UInt(tszh:tszl:imm3)
3102      *   01xx xxx       S, shift = 64  - UInt(tszh:tszl:imm3)
3103      *   1xxx xxx       D, shift = 128 - UInt(tszh:tszl:imm3)
3104      * for shift left is calculated as:
3105      *   0001 xxx       B, shift = UInt(tszh:tszl:imm3) - 8
3106      *   001x xxx       H, shift = UInt(tszh:tszl:imm3) - 16
3107      *   01xx xxx       S, shift = UInt(tszh:tszl:imm3) - 32
3108      *   1xxx xxx       D, shift = UInt(tszh:tszl:imm3) - 64
3109      */
3110     assert(T != Q, "Invalid register variant");
3111     if (isSHR) {
3112       assert(((1 << (T + 3)) >= shift) && (shift > 0) , "Invalid shift value");
3113     } else {
3114       assert(((1 << (T + 3)) > shift) && (shift >= 0) , "Invalid shift value");
3115     }
3116     int cVal = (1 << ((T + 3) + (isSHR ? 1 : 0)));
3117     int encodedShift = isSHR ? cVal - shift : cVal + shift;
3118     tszh = encodedShift >> 5;
3119     tszl_imm = encodedShift & 0x1f;
3120   }
3121 
3122 public:
3123 
3124 // SVE integer arithmetic - predicate
3125 #define INSN(NAME, op1, op2)                                                                            \
3126   void NAME(FloatRegister Zdn_or_Zd_or_Vd, SIMD_RegVariant T, PRegister Pg, FloatRegister Znm_or_Vn) {  \
3127     assert(T != Q, "invalid register variant");                                                         \
3128     sve_predicate_reg_insn(op1, op2, Zdn_or_Zd_or_Vd, T, Pg, Znm_or_Vn);                                \
3129   }
3130 
3131   INSN(sve_abs,  0b00000100, 0b010110101); // vector abs, unary
3132   INSN(sve_add,  0b00000100, 0b000000000); // vector add
3133   INSN(sve_and,  0b00000100, 0b011010000); // vector and
3134   INSN(sve_andv, 0b00000100, 0b011010001); // bitwise and reduction to scalar
3135   INSN(sve_asr,  0b00000100, 0b010000100); // vector arithmetic shift right
3136   INSN(sve_bic,  0b00000100, 0b011011000); // vector bitwise clear
3137   INSN(sve_cnt,  0b00000100, 0b011010101); // count non-zero bits
3138   INSN(sve_cpy,  0b00000101, 0b100000100); // copy scalar to each active vector element
3139   INSN(sve_eor,  0b00000100, 0b011001000); // vector eor
3140   INSN(sve_eorv, 0b00000100, 0b011001001); // bitwise xor reduction to scalar
3141   INSN(sve_lsl,  0b00000100, 0b010011100); // vector logical shift left
3142   INSN(sve_lsr,  0b00000100, 0b010001100); // vector logical shift right
3143   INSN(sve_mul,  0b00000100, 0b010000000); // vector mul
3144   INSN(sve_neg,  0b00000100, 0b010111101); // vector neg, unary
3145   INSN(sve_not,  0b00000100, 0b011110101); // bitwise invert vector, unary
3146   INSN(sve_orr,  0b00000100, 0b011000000); // vector or
3147   INSN(sve_orv,  0b00000100, 0b011000001); // bitwise or reduction to scalar
3148   INSN(sve_smax, 0b00000100, 0b001000000); // signed maximum vectors
3149   INSN(sve_smaxv, 0b00000100, 0b001000001); // signed maximum reduction to scalar
3150   INSN(sve_smin,  0b00000100, 0b001010000); // signed minimum vectors
3151   INSN(sve_sminv, 0b00000100, 0b001010001); // signed minimum reduction to scalar
3152   INSN(sve_sub,   0b00000100, 0b000001000); // vector sub
3153   INSN(sve_uaddv, 0b00000100, 0b000001001); // unsigned add reduction to scalar
3154 #undef INSN
3155 
3156 // SVE floating-point arithmetic - predicate
3157 #define INSN(NAME, op1, op2)                                                                          \
3158   void NAME(FloatRegister Zd_or_Zdn_or_Vd, SIMD_RegVariant T, PRegister Pg, FloatRegister Zn_or_Zm) { \
3159     assert(T == S || T == D, "invalid register variant");                                             \
3160     sve_predicate_reg_insn(op1, op2, Zd_or_Zdn_or_Vd, T, Pg, Zn_or_Zm);                               \
3161   }
3162 
3163   INSN(sve_fabs,    0b00000100, 0b011100101);
3164   INSN(sve_fadd,    0b01100101, 0b000000100);
3165   INSN(sve_fadda,   0b01100101, 0b011000001); // add strictly-ordered reduction to scalar Vd
3166   INSN(sve_fdiv,    0b01100101, 0b001101100);
3167   INSN(sve_fmax,    0b01100101, 0b000110100); // floating-point maximum
3168   INSN(sve_fmaxv,   0b01100101, 0b000110001); // floating-point maximum recursive reduction to scalar
3169   INSN(sve_fmin,    0b01100101, 0b000111100); // floating-point minimum
3170   INSN(sve_fminv,   0b01100101, 0b000111001); // floating-point minimum recursive reduction to scalar
3171   INSN(sve_fmul,    0b01100101, 0b000010100);
3172   INSN(sve_fneg,    0b00000100, 0b011101101);
3173   INSN(sve_frintm,  0b01100101, 0b000010101); // floating-point round to integral value, toward minus infinity
3174   INSN(sve_frintn,  0b01100101, 0b000000101); // floating-point round to integral value, nearest with ties to even
3175   INSN(sve_frinta,  0b01100101, 0b000100101); // floating-point round to integral value, nearest with ties to away
3176   INSN(sve_frintp,  0b01100101, 0b000001101); // floating-point round to integral value, toward plus infinity
3177   INSN(sve_fsqrt,   0b01100101, 0b001101101);
3178   INSN(sve_fsub,    0b01100101, 0b000001100);
3179 #undef INSN
3180 
3181   // SVE multiple-add/sub - predicated
3182 #define INSN(NAME, op0, op1, op2)                                                                     \
3183   void NAME(FloatRegister Zda, SIMD_RegVariant T, PRegister Pg, FloatRegister Zn, FloatRegister Zm) { \
3184     starti;                                                                                           \
3185     assert(T != Q, "invalid size");                                                                   \
3186     f(op0, 31, 24), f(T, 23, 22), f(op1, 21), rf(Zm, 16);                                             \
3187     f(op2, 15, 13), pgrf(Pg, 10), rf(Zn, 5), rf(Zda, 0);                                              \
3188   }
3189 
3190   INSN(sve_fmla,  0b01100101, 1, 0b000); // floating-point fused multiply-add, writing addend: Zda = Zda + Zn * Zm
3191   INSN(sve_fmls,  0b01100101, 1, 0b001); // floating-point fused multiply-subtract: Zda = Zda + -Zn * Zm
3192   INSN(sve_fnmla, 0b01100101, 1, 0b010); // floating-point negated fused multiply-add: Zda = -Zda + -Zn * Zm
3193   INSN(sve_fnmls, 0b01100101, 1, 0b011); // floating-point negated fused multiply-subtract: Zda = -Zda + Zn * Zm
3194   INSN(sve_fmad,  0b01100101, 1, 0b100); // floating-point fused multiply-add, writing multiplicand: Zda = Zm + Zda * Zn
3195   INSN(sve_fmsb,  0b01100101, 1, 0b101); // floating-point fused multiply-subtract, writing multiplicand: Zda = Zm + -Zda * Zn
3196   INSN(sve_fnmad, 0b01100101, 1, 0b110); // floating-point negated fused multiply-add, writing multiplicand: Zda = -Zm + -Zda * Zn
3197   INSN(sve_fnmsb, 0b01100101, 1, 0b111); // floating-point negated fused multiply-subtract, writing multiplicand: Zda = -Zm + Zda * Zn
3198   INSN(sve_mla,   0b00000100, 0, 0b010); // multiply-add, writing addend: Zda = Zda + Zn*Zm
3199   INSN(sve_mls,   0b00000100, 0, 0b011); // multiply-subtract, writing addend: Zda = Zda + -Zn*Zm
3200 #undef INSN
3201 
3202 // SVE bitwise logical - unpredicated
3203 #define INSN(NAME, opc)                                              \
3204   void NAME(FloatRegister Zd, FloatRegister Zn, FloatRegister Zm) {  \
3205     starti;                                                          \
3206     f(0b00000100, 31, 24), f(opc, 23, 22), f(1, 21),                 \
3207     rf(Zm, 16), f(0b001100, 15, 10), rf(Zn, 5), rf(Zd, 0);           \
3208   }
3209   INSN(sve_and, 0b00);
3210   INSN(sve_eor, 0b10);
3211   INSN(sve_orr, 0b01);
3212   INSN(sve_bic, 0b11);
3213 #undef INSN
3214 
3215 // SVE bitwise logical with immediate (unpredicated)
3216 #define INSN(NAME, opc)                                                      \
3217   void NAME(FloatRegister Zd, SIMD_RegVariant T, uint64_t imm) {             \
3218     starti;                                                                  \
3219     unsigned elembits = regVariant_to_elemBits(T);                           \
3220     uint32_t val = encode_sve_logical_immediate(elembits, imm);              \
3221     f(0b00000101, 31, 24), f(opc, 23, 22), f(0b0000, 21, 18);                \
3222     f(val, 17, 5), rf(Zd, 0);                                                \
3223   }
3224   INSN(sve_and, 0b10);
3225   INSN(sve_eor, 0b01);
3226   INSN(sve_orr, 0b00);
3227 #undef INSN
3228 
3229 // SVE shift immediate - unpredicated
3230 #define INSN(NAME, opc, isSHR)                                                  \
3231   void NAME(FloatRegister Zd, SIMD_RegVariant T, FloatRegister Zn, int shift) { \
3232     starti;                                                                     \
3233     int tszh, tszl_imm;                                                         \
3234     sve_shift_imm_encoding(T, shift, isSHR, tszh, tszl_imm);                    \
3235     f(0b00000100, 31, 24);                                                      \
3236     f(tszh, 23, 22), f(1,21), f(tszl_imm, 20, 16);                              \
3237     f(0b100, 15, 13), f(opc, 12, 10), rf(Zn, 5), rf(Zd, 0);                     \
3238   }
3239 
3240   INSN(sve_asr, 0b100, /* isSHR = */ true);
3241   INSN(sve_lsl, 0b111, /* isSHR = */ false);
3242   INSN(sve_lsr, 0b101, /* isSHR = */ true);
3243 #undef INSN
3244 
3245 // SVE bitwise shift by immediate (predicated)
3246 #define INSN(NAME, opc, isSHR)                                                  \
3247   void NAME(FloatRegister Zdn, SIMD_RegVariant T, PRegister Pg, int shift) {    \
3248     starti;                                                                     \
3249     int tszh, tszl_imm;                                                         \
3250     sve_shift_imm_encoding(T, shift, isSHR, tszh, tszl_imm);                    \
3251     f(0b00000100, 31, 24), f(tszh, 23, 22), f(0b00, 21, 20), f(opc, 19, 16);    \
3252     f(0b100, 15, 13), pgrf(Pg, 10), f(tszl_imm, 9, 5), rf(Zdn, 0);              \
3253   }
3254 
3255   INSN(sve_asr, 0b0000, /* isSHR = */ true);
3256   INSN(sve_lsl, 0b0011, /* isSHR = */ false);
3257   INSN(sve_lsr, 0b0001, /* isSHR = */ true);
3258 #undef INSN
3259 
3260 private:
3261 
3262   // Scalar base + immediate index
3263   void sve_ld_st1(FloatRegister Zt, Register Xn, int imm, PRegister Pg,
3264               SIMD_RegVariant T, int op1, int type, int op2) {
3265     starti;
3266     assert_cond(T >= type);
3267     f(op1, 31, 25), f(type, 24, 23), f(T, 22, 21);
3268     f(0, 20), sf(imm, 19, 16), f(op2, 15, 13);
3269     pgrf(Pg, 10), srf(Xn, 5), rf(Zt, 0);
3270   }
3271 
3272   // Scalar base + scalar index
3273   void sve_ld_st1(FloatRegister Zt, Register Xn, Register Xm, PRegister Pg,
3274               SIMD_RegVariant T, int op1, int type, int op2) {
3275     starti;
3276     assert_cond(T >= type);
3277     f(op1, 31, 25), f(type, 24, 23), f(T, 22, 21);
3278     rf(Xm, 16), f(op2, 15, 13);
3279     pgrf(Pg, 10), srf(Xn, 5), rf(Zt, 0);
3280   }
3281 
3282   void sve_ld_st1(FloatRegister Zt, PRegister Pg,
3283               SIMD_RegVariant T, const Address &a,
3284               int op1, int type, int imm_op2, int scalar_op2) {
3285     switch (a.getMode()) {
3286     case Address::base_plus_offset:
3287       sve_ld_st1(Zt, a.base(), a.offset(), Pg, T, op1, type, imm_op2);
3288       break;
3289     case Address::base_plus_offset_reg:
3290       sve_ld_st1(Zt, a.base(), a.index(), Pg, T, op1, type, scalar_op2);
3291       break;
3292     default:
3293       ShouldNotReachHere();
3294     }
3295   }
3296 
3297 public:
3298 
3299 // SVE contiguous load/store
3300 #define INSN(NAME, op1, type, imm_op2, scalar_op2)                                   \
3301   void NAME(FloatRegister Zt, SIMD_RegVariant T, PRegister Pg, const Address &a) {   \
3302     assert(T != Q, "invalid register variant");                                      \
3303     sve_ld_st1(Zt, Pg, T, a, op1, type, imm_op2, scalar_op2);                        \
3304   }
3305 
3306   INSN(sve_ld1b, 0b1010010, 0b00, 0b101, 0b010);
3307   INSN(sve_st1b, 0b1110010, 0b00, 0b111, 0b010);
3308   INSN(sve_ld1h, 0b1010010, 0b01, 0b101, 0b010);
3309   INSN(sve_st1h, 0b1110010, 0b01, 0b111, 0b010);
3310   INSN(sve_ld1w, 0b1010010, 0b10, 0b101, 0b010);
3311   INSN(sve_st1w, 0b1110010, 0b10, 0b111, 0b010);
3312   INSN(sve_ld1d, 0b1010010, 0b11, 0b101, 0b010);
3313   INSN(sve_st1d, 0b1110010, 0b11, 0b111, 0b010);
3314 #undef INSN
3315 
3316 // Gather/scatter load/store (SVE) - scalar plus vector
3317 #define INSN(NAME, op1, type, op2, op3)                                         \
3318   void NAME(FloatRegister Zt, PRegister Pg, Register Xn, FloatRegister Zm) {    \
3319     starti;                                                                     \
3320     f(op1, 31, 25), f(type, 24, 23), f(op2, 22, 21), rf(Zm, 16);                \
3321     f(op3, 15, 13), pgrf(Pg, 10), srf(Xn, 5), rf(Zt, 0);                        \
3322   }
3323   // SVE 32-bit gather load words (scalar plus 32-bit scaled offsets)
3324   INSN(sve_ld1w_gather,  0b1000010, 0b10, 0b01, 0b010);
3325   // SVE 64-bit gather load (scalar plus 32-bit unpacked scaled offsets)
3326   INSN(sve_ld1d_gather,  0b1100010, 0b11, 0b01, 0b010);
3327   // SVE 32-bit scatter store (scalar plus 32-bit scaled offsets)
3328   INSN(sve_st1w_scatter, 0b1110010, 0b10, 0b11, 0b100);
3329   // SVE 64-bit scatter store (scalar plus unpacked 32-bit scaled offsets)
3330   INSN(sve_st1d_scatter, 0b1110010, 0b11, 0b01, 0b100);
3331 #undef INSN
3332 
3333 // SVE load/store - unpredicated
3334 #define INSN(NAME, op1)                                                         \
3335   void NAME(FloatRegister Zt, const Address &a)  {                              \
3336     starti;                                                                     \
3337     assert(a.index() == noreg, "invalid address variant");                      \
3338     f(op1, 31, 29), f(0b0010110, 28, 22), sf(a.offset() >> 3, 21, 16),          \
3339     f(0b010, 15, 13), f(a.offset() & 0x7, 12, 10), srf(a.base(), 5), rf(Zt, 0); \
3340   }
3341 
3342   INSN(sve_ldr, 0b100); // LDR (vector)
3343   INSN(sve_str, 0b111); // STR (vector)
3344 #undef INSN
3345 
3346 // SVE stack frame adjustment
3347 #define INSN(NAME, op) \
3348   void NAME(Register Xd, Register Xn, int imm6) {                 \
3349     starti;                                                       \
3350     f(0b000001000, 31, 23), f(op, 22, 21);                        \
3351     srf(Xn, 16), f(0b01010, 15, 11), sf(imm6, 10, 5), srf(Xd, 0); \
3352   }
3353 
3354   INSN(sve_addvl, 0b01); // Add multiple of vector register size to scalar register
3355   INSN(sve_addpl, 0b11); // Add multiple of predicate register size to scalar register
3356 #undef INSN
3357 
3358 // SVE inc/dec register by element count
3359 #define INSN(NAME, op) \
3360   void NAME(Register Xdn, SIMD_RegVariant T, unsigned imm4 = 1, int pattern = 0b11111) { \
3361     starti;                                                                              \
3362     assert(T != Q, "invalid size");                                                      \
3363     f(0b00000100,31, 24), f(T, 23, 22), f(0b11, 21, 20);                                 \
3364     f(imm4 - 1, 19, 16), f(0b11100, 15, 11), f(op, 10), f(pattern, 9, 5), rf(Xdn, 0);    \
3365   }
3366 
3367   INSN(sve_inc, 0);
3368   INSN(sve_dec, 1);
3369 #undef INSN
3370 
3371 // SVE predicate logical operations
3372 #define INSN(NAME, op1, op2, op3) \
3373   void NAME(PRegister Pd, PRegister Pg, PRegister Pn, PRegister Pm) { \
3374     starti;                                                           \
3375     f(0b00100101, 31, 24), f(op1, 23, 22), f(0b00, 21, 20);           \
3376     prf(Pm, 16), f(0b01, 15, 14), prf(Pg, 10), f(op2, 9);             \
3377     prf(Pn, 5), f(op3, 4), prf(Pd, 0);                                \
3378   }
3379 
3380   INSN(sve_and,  0b00, 0b0, 0b0);
3381   INSN(sve_ands, 0b01, 0b0, 0b0);
3382   INSN(sve_eor,  0b00, 0b1, 0b0);
3383   INSN(sve_eors, 0b01, 0b1, 0b0);
3384   INSN(sve_orr,  0b10, 0b0, 0b0);
3385   INSN(sve_orrs, 0b11, 0b0, 0b0);
3386   INSN(sve_bic,  0b00, 0b0, 0b1);
3387 #undef INSN
3388 
3389   // SVE increment register by predicate count
3390   void sve_incp(const Register rd, SIMD_RegVariant T, PRegister pg) {
3391     starti;
3392     assert(T != Q, "invalid size");
3393     f(0b00100101, 31, 24), f(T, 23, 22), f(0b1011001000100, 21, 9),
3394     prf(pg, 5), rf(rd, 0);
3395   }
3396 
3397   // SVE broadcast general-purpose register to vector elements (unpredicated)
3398   void sve_dup(FloatRegister Zd, SIMD_RegVariant T, Register Rn) {
3399     starti;
3400     assert(T != Q, "invalid size");
3401     f(0b00000101, 31, 24), f(T, 23, 22), f(0b100000001110, 21, 10);
3402     srf(Rn, 5), rf(Zd, 0);
3403   }
3404 
3405   // SVE broadcast signed immediate to vector elements (unpredicated)
3406   void sve_dup(FloatRegister Zd, SIMD_RegVariant T, int imm8) {
3407     starti;
3408     assert(T != Q, "invalid size");
3409     int sh = 0;
3410     if (imm8 <= 127 && imm8 >= -128) {
3411       sh = 0;
3412     } else if (T != B && imm8 <= 32512 && imm8 >= -32768 && (imm8 & 0xff) == 0) {
3413       sh = 1;
3414       imm8 = (imm8 >> 8);
3415     } else {
3416       guarantee(false, "invalid immediate");
3417     }
3418     f(0b00100101, 31, 24), f(T, 23, 22), f(0b11100011, 21, 14);
3419     f(sh, 13), sf(imm8, 12, 5), rf(Zd, 0);
3420   }
3421 
3422   // SVE predicate test
3423   void sve_ptest(PRegister Pg, PRegister Pn) {
3424     starti;
3425     f(0b001001010101000011, 31, 14), prf(Pg, 10), f(0, 9), prf(Pn, 5), f(0, 4, 0);
3426   }
3427 
3428   // SVE predicate initialize
3429   void sve_ptrue(PRegister pd, SIMD_RegVariant esize, int pattern = 0b11111) {
3430     starti;
3431     f(0b00100101, 31, 24), f(esize, 23, 22), f(0b011000111000, 21, 10);
3432     f(pattern, 9, 5), f(0b0, 4), prf(pd, 0);
3433   }
3434 
3435   // SVE predicate zero
3436   void sve_pfalse(PRegister pd) {
3437     starti;
3438     f(0b00100101, 31, 24), f(0b00, 23, 22), f(0b011000111001, 21, 10);
3439     f(0b000000, 9, 4), prf(pd, 0);
3440   }
3441 
3442 // SVE load/store predicate register
3443 #define INSN(NAME, op1)                                                  \
3444   void NAME(PRegister Pt, const Address &a)  {                           \
3445     starti;                                                              \
3446     assert(a.index() == noreg, "invalid address variant");               \
3447     f(op1, 31, 29), f(0b0010110, 28, 22), sf(a.offset() >> 3, 21, 16),   \
3448     f(0b000, 15, 13), f(a.offset() & 0x7, 12, 10), srf(a.base(), 5),     \
3449     f(0, 4), prf(Pt, 0);                                                 \
3450   }
3451 
3452   INSN(sve_ldr, 0b100); // LDR (predicate)
3453   INSN(sve_str, 0b111); // STR (predicate)
3454 #undef INSN
3455 
3456   // SVE move predicate register
3457   void sve_mov(PRegister Pd, PRegister Pn) {
3458     starti;
3459     f(0b001001011000, 31, 20), prf(Pn, 16), f(0b01, 15, 14), prf(Pn, 10);
3460     f(0, 9), prf(Pn, 5), f(0, 4), prf(Pd, 0);
3461   }
3462 
3463   // SVE copy general-purpose register to vector elements (predicated)
3464   void sve_cpy(FloatRegister Zd, SIMD_RegVariant T, PRegister Pg, Register Rn) {
3465     starti;
3466     assert(T != Q, "invalid size");
3467     f(0b00000101, 31, 24), f(T, 23, 22), f(0b101000101, 21, 13);
3468     pgrf(Pg, 10), srf(Rn, 5), rf(Zd, 0);
3469   }
3470 
3471 private:
3472   void sve_cpy(FloatRegister Zd, SIMD_RegVariant T, PRegister Pg, int imm8,
3473                bool isMerge, bool isFloat) {
3474     starti;
3475     assert(T != Q, "invalid size");
3476     int sh = 0;
3477     if (imm8 <= 127 && imm8 >= -128) {
3478       sh = 0;
3479     } else if (T != B && imm8 <= 32512 && imm8 >= -32768 && (imm8 & 0xff) == 0) {
3480       sh = 1;
3481       imm8 = (imm8 >> 8);
3482     } else {
3483       guarantee(false, "invalid immediate");
3484     }
3485     int m = isMerge ? 1 : 0;
3486     f(0b00000101, 31, 24), f(T, 23, 22), f(0b01, 21, 20);
3487     prf(Pg, 16), f(isFloat ? 1 : 0, 15), f(m, 14), f(sh, 13), sf(imm8, 12, 5), rf(Zd, 0);
3488   }
3489 
3490 public:
3491   // SVE copy signed integer immediate to vector elements (predicated)
3492   void sve_cpy(FloatRegister Zd, SIMD_RegVariant T, PRegister Pg, int imm8, bool isMerge) {
3493     sve_cpy(Zd, T, Pg, imm8, isMerge, /*isFloat*/false);
3494   }
3495   // SVE copy floating-point immediate to vector elements (predicated)
3496   void sve_cpy(FloatRegister Zd, SIMD_RegVariant T, PRegister Pg, double d) {
3497     sve_cpy(Zd, T, Pg, checked_cast<int8_t>(pack(d)), /*isMerge*/true, /*isFloat*/true);
3498   }
3499 
3500   // SVE conditionally select elements from two vectors
3501   void sve_sel(FloatRegister Zd, SIMD_RegVariant T, PRegister Pg,
3502                FloatRegister Zn, FloatRegister Zm) {
3503     starti;
3504     assert(T != Q, "invalid size");
3505     f(0b00000101, 31, 24), f(T, 23, 22), f(0b1, 21), rf(Zm, 16);
3506     f(0b11, 15, 14), prf(Pg, 10), rf(Zn, 5), rf(Zd, 0);
3507   }
3508 
3509   // SVE Permute Vector - Extract
3510   void sve_ext(FloatRegister Zdn, FloatRegister Zm, int imm8) {
3511     starti;
3512     f(0b00000101001, 31, 21), f(imm8 >> 3, 20, 16), f(0b000, 15, 13);
3513     f(imm8 & 0b111, 12, 10), rf(Zm, 5), rf(Zdn, 0);
3514   }
3515 
3516 // SVE Integer/Floating-Point Compare - Vectors
3517 #define INSN(NAME, op1, op2, fp)  \
3518   void NAME(Condition cond, PRegister Pd, SIMD_RegVariant T, PRegister Pg,             \
3519             FloatRegister Zn, FloatRegister Zm) {                                      \
3520     starti;                                                                            \
3521     if (fp == 0) {                                                                     \
3522       assert(T != Q, "invalid size");                                                  \
3523     } else {                                                                           \
3524       assert(T != B && T != Q, "invalid size");                                        \
3525       assert(cond != HI && cond != HS, "invalid condition for fcm");                   \
3526     }                                                                                  \
3527     int cond_op;                                                                       \
3528     switch(cond) {                                                                     \
3529       case EQ: cond_op = (op2 << 2) | 0b10; break;                                     \
3530       case NE: cond_op = (op2 << 2) | 0b11; break;                                     \
3531       case GE: cond_op = (op2 << 2) | 0b00; break;                                     \
3532       case GT: cond_op = (op2 << 2) | 0b01; break;                                     \
3533       case HI: cond_op = 0b0001; break;                                                \
3534       case HS: cond_op = 0b0000; break;                                                \
3535       default:                                                                         \
3536         ShouldNotReachHere();                                                          \
3537     }                                                                                  \
3538     f(op1, 31, 24), f(T, 23, 22), f(0, 21), rf(Zm, 16), f((cond_op >> 1) & 7, 15, 13); \
3539     pgrf(Pg, 10), rf(Zn, 5), f(cond_op & 1, 4), prf(Pd, 0);                            \
3540   }
3541 
3542   INSN(sve_cmp, 0b00100100, 0b10, 0);
3543   INSN(sve_fcm, 0b01100101, 0b01, 1);
3544 #undef INSN
3545 
3546 // SVE Integer Compare - Signed Immediate
3547 void sve_cmp(Condition cond, PRegister Pd, SIMD_RegVariant T,
3548              PRegister Pg, FloatRegister Zn, int imm5) {
3549   starti;
3550   assert(T != Q, "invalid size");
3551   guarantee(-16 <= imm5 && imm5 <= 15, "invalid immediate");
3552   int cond_op;
3553   switch(cond) {
3554     case EQ: cond_op = 0b1000; break;
3555     case NE: cond_op = 0b1001; break;
3556     case GE: cond_op = 0b0000; break;
3557     case GT: cond_op = 0b0001; break;
3558     case LE: cond_op = 0b0011; break;
3559     case LT: cond_op = 0b0010; break;
3560     default:
3561       ShouldNotReachHere();
3562   }
3563   f(0b00100101, 31, 24), f(T, 23, 22), f(0b0, 21), sf(imm5, 20, 16),
3564   f((cond_op >> 1) & 0x7, 15, 13), pgrf(Pg, 10), rf(Zn, 5);
3565   f(cond_op & 0x1, 4), prf(Pd, 0);
3566 }
3567 
3568 // SVE Floating-point compare vector with zero
3569 void sve_fcm(Condition cond, PRegister Pd, SIMD_RegVariant T,
3570              PRegister Pg, FloatRegister Zn, double d) {
3571   starti;
3572   assert(T != Q, "invalid size");
3573   guarantee(d == 0.0, "invalid immediate");
3574   int cond_op;
3575   switch(cond) {
3576     case EQ: cond_op = 0b100; break;
3577     case GT: cond_op = 0b001; break;
3578     case GE: cond_op = 0b000; break;
3579     case LT: cond_op = 0b010; break;
3580     case LE: cond_op = 0b011; break;
3581     case NE: cond_op = 0b110; break;
3582     default:
3583       ShouldNotReachHere();
3584   }
3585   f(0b01100101, 31, 24), f(T, 23, 22), f(0b0100, 21, 18),
3586   f((cond_op >> 1) & 0x3, 17, 16), f(0b001, 15, 13),
3587   pgrf(Pg, 10), rf(Zn, 5);
3588   f(cond_op & 0x1, 4), prf(Pd, 0);
3589 }
3590 
3591 // SVE unpack vector elements
3592 #define INSN(NAME, op) \
3593   void NAME(FloatRegister Zd, SIMD_RegVariant T, FloatRegister Zn) { \
3594     starti;                                                          \
3595     assert(T != B && T != Q, "invalid size");                        \
3596     f(0b00000101, 31, 24), f(T, 23, 22), f(0b1100, 21, 18);          \
3597     f(op, 17, 16), f(0b001110, 15, 10), rf(Zn, 5), rf(Zd, 0);        \
3598   }
3599 
3600   INSN(sve_uunpkhi, 0b11); // Signed unpack and extend half of vector - high half
3601   INSN(sve_uunpklo, 0b10); // Signed unpack and extend half of vector - low half
3602   INSN(sve_sunpkhi, 0b01); // Unsigned unpack and extend half of vector - high half
3603   INSN(sve_sunpklo, 0b00); // Unsigned unpack and extend half of vector - low half
3604 #undef INSN
3605 
3606 // SVE unpack predicate elements
3607 #define INSN(NAME, op) \
3608   void NAME(PRegister Pd, PRegister Pn) { \
3609     starti;                                                          \
3610     f(0b000001010011000, 31, 17), f(op, 16), f(0b0100000, 15, 9);    \
3611     prf(Pn, 5), f(0b0, 4), prf(Pd, 0);                               \
3612   }
3613 
3614   INSN(sve_punpkhi, 0b1); // Unpack and widen high half of predicate
3615   INSN(sve_punpklo, 0b0); // Unpack and widen low half of predicate
3616 #undef INSN
3617 
3618 // SVE permute vector elements
3619 #define INSN(NAME, op) \
3620   void NAME(FloatRegister Zd, SIMD_RegVariant T, FloatRegister Zn, FloatRegister Zm) { \
3621     starti;                                                                            \
3622     assert(T != Q, "invalid size");                                                    \
3623     f(0b00000101, 31, 24), f(T, 23, 22), f(0b1, 21), rf(Zm, 16);                       \
3624     f(0b01101, 15, 11), f(op, 10), rf(Zn, 5), rf(Zd, 0);                               \
3625   }
3626 
3627   INSN(sve_uzp1, 0b0); // Concatenate even elements from two vectors
3628   INSN(sve_uzp2, 0b1); // Concatenate odd elements from two vectors
3629 #undef INSN
3630 
3631 // SVE permute predicate elements
3632 #define INSN(NAME, op) \
3633   void NAME(PRegister Pd, SIMD_RegVariant T, PRegister Pn, PRegister Pm) {             \
3634     starti;                                                                            \
3635     assert(T != Q, "invalid size");                                                    \
3636     f(0b00000101, 31, 24), f(T, 23, 22), f(0b10, 21, 20), prf(Pm, 16);                 \
3637     f(0b01001, 15, 11), f(op, 10), f(0b0, 9), prf(Pn, 5), f(0b0, 4), prf(Pd, 0);       \
3638   }
3639 
3640   INSN(sve_uzp1, 0b0); // Concatenate even elements from two predicates
3641   INSN(sve_uzp2, 0b1); // Concatenate odd elements from two predicates
3642 #undef INSN
3643 
3644 // Predicate counted loop (SVE) (32-bit variants are not included)
3645 #define INSN(NAME, decode)                                                \
3646   void NAME(PRegister Pd, SIMD_RegVariant T, Register Rn, Register Rm) {  \
3647     starti;                                                               \
3648     assert(T != Q, "invalid register variant");                           \
3649     f(0b00100101, 31, 24), f(T, 23, 22), f(1, 21),                        \
3650     zrf(Rm, 16), f(0, 15, 13), f(1, 12), f(decode >> 1, 11, 10),          \
3651     zrf(Rn, 5), f(decode & 1, 4), prf(Pd, 0);                             \
3652   }
3653 
3654   INSN(sve_whilelt, 0b010);  // While incrementing signed scalar less than scalar
3655   INSN(sve_whilele, 0b011);  // While incrementing signed scalar less than or equal to scalar
3656   INSN(sve_whilelo, 0b110);  // While incrementing unsigned scalar lower than scalar
3657   INSN(sve_whilels, 0b111);  // While incrementing unsigned scalar lower than or the same as scalar
3658 #undef INSN
3659 
3660   // SVE predicate reverse
3661   void sve_rev(PRegister Pd, SIMD_RegVariant T, PRegister Pn) {
3662     starti;
3663     assert(T != Q, "invalid size");
3664     f(0b00000101, 31, 24), f(T, 23, 22), f(0b1101000100000, 21, 9);
3665     prf(Pn, 5), f(0, 4), prf(Pd, 0);
3666   }
3667 
3668 // SVE partition break condition
3669 #define INSN(NAME, op) \
3670   void NAME(PRegister Pd, PRegister Pg, PRegister Pn, bool isMerge) {      \
3671     starti;                                                                \
3672     f(0b00100101, 31, 24), f(op, 23, 22), f(0b01000001, 21, 14);           \
3673     prf(Pg, 10), f(0b0, 9), prf(Pn, 5), f(isMerge ? 1 : 0, 4), prf(Pd, 0); \
3674   }
3675 
3676   INSN(sve_brka, 0b00); // Break after first true condition
3677   INSN(sve_brkb, 0b10); // Break before first true condition
3678 #undef INSN
3679 
3680 // Element count and increment scalar (SVE)
3681 #define INSN(NAME, TYPE)                                                             \
3682   void NAME(Register Xdn, unsigned imm4 = 1, int pattern = 0b11111) {                \
3683     starti;                                                                          \
3684     f(0b00000100, 31, 24), f(TYPE, 23, 22), f(0b10, 21, 20);                         \
3685     f(imm4 - 1, 19, 16), f(0b11100, 15, 11), f(0, 10), f(pattern, 9, 5), rf(Xdn, 0); \
3686   }
3687 
3688   INSN(sve_cntb, B);  // Set scalar to multiple of 8-bit predicate constraint element count
3689   INSN(sve_cnth, H);  // Set scalar to multiple of 16-bit predicate constraint element count
3690   INSN(sve_cntw, S);  // Set scalar to multiple of 32-bit predicate constraint element count
3691   INSN(sve_cntd, D);  // Set scalar to multiple of 64-bit predicate constraint element count
3692 #undef INSN
3693 
3694   // Set scalar to active predicate element count
3695   void sve_cntp(Register Xd, SIMD_RegVariant T, PRegister Pg, PRegister Pn) {
3696     starti;
3697     assert(T != Q, "invalid size");
3698     f(0b00100101, 31, 24), f(T, 23, 22), f(0b10000010, 21, 14);
3699     prf(Pg, 10), f(0, 9), prf(Pn, 5), rf(Xd, 0);
3700   }
3701 
3702   // SVE convert signed integer to floating-point (predicated)
3703   void sve_scvtf(FloatRegister Zd, SIMD_RegVariant T_dst, PRegister Pg,
3704                  FloatRegister Zn, SIMD_RegVariant T_src) {
3705     starti;
3706     assert(T_src != B && T_dst != B && T_src != Q && T_dst != Q &&
3707            (T_src != H || T_dst == T_src), "invalid register variant");
3708     int opc = T_dst;
3709     int opc2 = T_src;
3710     // In most cases we can treat T_dst, T_src as opc, opc2,
3711     // except for the following two combinations.
3712     // +-----+------+---+------------------------------------+
3713     // | opc | opc2 | U |        Instruction Details         |
3714     // +-----+------+---+------------------------------------+
3715     // |  11 |   00 | 0 | SCVTF - 32-bit to double-precision |
3716     // |  11 |   10 | 0 | SCVTF - 64-bit to single-precision |
3717     // +-----+------+---+------------------------------------+
3718     if (T_src == S && T_dst == D) {
3719       opc = 0b11;
3720       opc2 = 0b00;
3721     } else if (T_src == D && T_dst == S) {
3722       opc = 0b11;
3723       opc2 = 0b10;
3724     }
3725     f(0b01100101, 31, 24), f(opc, 23, 22), f(0b010, 21, 19);
3726     f(opc2, 18, 17), f(0b0101, 16, 13);
3727     pgrf(Pg, 10), rf(Zn, 5), rf(Zd, 0);
3728   }
3729 
3730   // SVE floating-point convert to signed integer, rounding toward zero (predicated)
3731   void sve_fcvtzs(FloatRegister Zd, SIMD_RegVariant T_dst, PRegister Pg,
3732                   FloatRegister Zn, SIMD_RegVariant T_src) {
3733     starti;
3734     assert(T_src != B && T_dst != B && T_src != Q && T_dst != Q &&
3735            (T_dst != H || T_src == H), "invalid register variant");
3736     int opc = T_src;
3737     int opc2 = T_dst;
3738     // In most cases we can treat T_src, T_dst as opc, opc2,
3739     // except for the following two combinations.
3740     // +-----+------+---+-------------------------------------+
3741     // | opc | opc2 | U |         Instruction Details         |
3742     // +-----+------+---+-------------------------------------+
3743     // |  11 |  10  | 0 | FCVTZS - single-precision to 64-bit |
3744     // |  11 |  00  | 0 | FCVTZS - double-precision to 32-bit |
3745     // +-----+------+---+-------------------------------------+
3746     if (T_src == S && T_dst == D) {
3747       opc = 0b11;
3748       opc2 = 0b10;
3749     } else if (T_src == D && T_dst == S) {
3750       opc = 0b11;
3751       opc2 = 0b00;
3752     }
3753     f(0b01100101, 31, 24), f(opc, 23, 22), f(0b011, 21, 19);
3754     f(opc2, 18, 17), f(0b0101, 16, 13);
3755     pgrf(Pg, 10), rf(Zn, 5), rf(Zd, 0);
3756   }
3757 
3758   // SVE floating-point convert precision (predicated)
3759   void sve_fcvt(FloatRegister Zd, SIMD_RegVariant T_dst, PRegister Pg,
3760                 FloatRegister Zn, SIMD_RegVariant T_src) {
3761     starti;
3762     assert(T_src != B && T_dst != B && T_src != Q && T_dst != Q &&
3763            T_src != T_dst, "invalid register variant");
3764     guarantee(T_src != H && T_dst != H, "half-precision unsupported");
3765     f(0b01100101, 31, 24), f(0b11, 23, 22), f(0b0010, 21, 18);
3766     f(T_dst, 17, 16), f(0b101, 15, 13);
3767     pgrf(Pg, 10), rf(Zn, 5), rf(Zd, 0);
3768   }
3769 
3770 // SVE extract element to general-purpose register
3771 #define INSN(NAME, before)                                                      \
3772   void NAME(Register Rd, SIMD_RegVariant T, PRegister Pg,  FloatRegister Zn) {  \
3773     starti;                                                                     \
3774     f(0b00000101, 31, 24), f(T, 23, 22), f(0b10000, 21, 17);                    \
3775     f(before, 16), f(0b101, 15, 13);                                            \
3776     pgrf(Pg, 10), rf(Zn, 5), rf(Rd, 0);                                         \
3777   }
3778 
3779   INSN(sve_lasta, 0b0);
3780   INSN(sve_lastb, 0b1);
3781 #undef INSN
3782 
3783 // SVE extract element to SIMD&FP scalar register
3784 #define INSN(NAME, before)                                                           \
3785   void NAME(FloatRegister Vd, SIMD_RegVariant T, PRegister Pg,  FloatRegister Zn) {  \
3786     starti;                                                                          \
3787     f(0b00000101, 31, 24), f(T, 23, 22), f(0b10001, 21, 17);                         \
3788     f(before, 16), f(0b100, 15, 13);                                                 \
3789     pgrf(Pg, 10), rf(Zn, 5), rf(Vd, 0);                                              \
3790   }
3791 
3792   INSN(sve_lasta, 0b0);
3793   INSN(sve_lastb, 0b1);
3794 #undef INSN
3795 
3796   // SVE create index starting from and incremented by immediate
3797   void sve_index(FloatRegister Zd, SIMD_RegVariant T, int imm1, int imm2) {
3798     starti;
3799     f(0b00000100, 31, 24), f(T, 23, 22), f(0b1, 21);
3800     sf(imm2, 20, 16), f(0b010000, 15, 10);
3801     sf(imm1, 9, 5), rf(Zd, 0);
3802   }
3803 
3804   // SVE programmable table lookup/permute using vector of element indices
3805   void sve_tbl(FloatRegister Zd, SIMD_RegVariant T, FloatRegister Zn, FloatRegister Zm) {
3806     starti;
3807     assert(T != Q, "invalid size");
3808     f(0b00000101, 31, 24), f(T, 23, 22), f(0b1, 21), rf(Zm, 16);
3809     f(0b001100, 15, 10), rf(Zn, 5), rf(Zd, 0);
3810   }
3811 
3812   Assembler(CodeBuffer* code) : AbstractAssembler(code) {
3813   }
3814 
3815   // Stack overflow checking
3816   virtual void bang_stack_with_offset(int offset);
3817 
3818   static bool operand_valid_for_logical_immediate(bool is32, uint64_t imm);
3819   static bool operand_valid_for_sve_logical_immediate(unsigned elembits, uint64_t imm);
3820   static bool operand_valid_for_add_sub_immediate(int64_t imm);
3821   static bool operand_valid_for_sve_add_sub_immediate(int64_t imm);
3822   static bool operand_valid_for_float_immediate(double imm);
3823   static int  operand_valid_for_movi_immediate(uint64_t imm64, SIMD_Arrangement T);
3824 
3825   void emit_data64(jlong data, relocInfo::relocType rtype, int format = 0);
3826   void emit_data64(jlong data, RelocationHolder const& rspec, int format = 0);
3827 };
3828 
3829 inline Assembler::Membar_mask_bits operator|(Assembler::Membar_mask_bits a,
3830                                              Assembler::Membar_mask_bits b) {
3831   return Assembler::Membar_mask_bits(unsigned(a)|unsigned(b));
3832 }
3833 
3834 Instruction_aarch64::~Instruction_aarch64() {
3835   assem->emit_int32(insn);
3836   assert_cond(get_bits() == 0xffffffff);
3837 }
3838 
3839 #undef f
3840 #undef sf
3841 #undef rf
3842 #undef srf
3843 #undef zrf
3844 #undef prf
3845 #undef pgrf
3846 #undef fixed
3847 
3848 #undef starti
3849 
3850 // Invert a condition
3851 inline const Assembler::Condition operator~(const Assembler::Condition cond) {
3852   return Assembler::Condition(int(cond) ^ 1);
3853 }
3854 
3855 extern "C" void das(uint64_t start, int len);
3856 
3857 #endif // CPU_AARCH64_ASSEMBLER_AARCH64_HPP