1 /*
   2  * Copyright (c) 1997, 2021, Oracle and/or its affiliates. All rights reserved.
   3  * Copyright (c) 2014, 2021, Red Hat Inc. All rights reserved.
   4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   5  *
   6  * This code is free software; you can redistribute it and/or modify it
   7  * under the terms of the GNU General Public License version 2 only, as
   8  * published by the Free Software Foundation.
   9  *
  10  * This code is distributed in the hope that it will be useful, but WITHOUT
  11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  13  * version 2 for more details (a copy is included in the LICENSE file that
  14  * accompanied this code).
  15  *
  16  * You should have received a copy of the GNU General Public License version
  17  * 2 along with this work; if not, write to the Free Software Foundation,
  18  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  19  *
  20  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  21  * or visit www.oracle.com if you need additional information or have any
  22  * questions.
  23  *
  24  */
  25 
  26 #ifndef CPU_AARCH64_ASSEMBLER_AARCH64_HPP
  27 #define CPU_AARCH64_ASSEMBLER_AARCH64_HPP
  28 
  29 #include "asm/register.hpp"
  30 
  31 #ifdef __GNUC__
  32 
  33 // __nop needs volatile so that compiler doesn't optimize it away
  34 #define NOP() asm volatile ("nop");
  35 
  36 #elif defined(_MSC_VER)
  37 
  38 // Use MSVC instrinsic: https://docs.microsoft.com/en-us/cpp/intrinsics/arm64-intrinsics?view=vs-2019#I
  39 #define NOP() __nop();
  40 
  41 #endif
  42 
  43 
  44 // definitions of various symbolic names for machine registers
  45 
  46 // First intercalls between C and Java which use 8 general registers
  47 // and 8 floating registers
  48 
  49 // we also have to copy between x86 and ARM registers but that's a
  50 // secondary complication -- not all code employing C call convention
  51 // executes as x86 code though -- we generate some of it
  52 
  53 class Argument {
  54  public:
  55   enum {
  56     n_int_register_parameters_c   = 8,  // r0, r1, ... r7 (c_rarg0, c_rarg1, ...)
  57     n_float_register_parameters_c = 8,  // v0, v1, ... v7 (c_farg0, c_farg1, ... )
  58 
  59     n_int_register_parameters_j   = 8, // r1, ... r7, r0 (rj_rarg0, j_rarg1, ...
  60     n_float_register_parameters_j = 8  // v0, v1, ... v7 (j_farg0, j_farg1, ...
  61   };
  62 };
  63 
  64 REGISTER_DECLARATION(Register, c_rarg0, r0);
  65 REGISTER_DECLARATION(Register, c_rarg1, r1);
  66 REGISTER_DECLARATION(Register, c_rarg2, r2);
  67 REGISTER_DECLARATION(Register, c_rarg3, r3);
  68 REGISTER_DECLARATION(Register, c_rarg4, r4);
  69 REGISTER_DECLARATION(Register, c_rarg5, r5);
  70 REGISTER_DECLARATION(Register, c_rarg6, r6);
  71 REGISTER_DECLARATION(Register, c_rarg7, r7);
  72 
  73 REGISTER_DECLARATION(FloatRegister, c_farg0, v0);
  74 REGISTER_DECLARATION(FloatRegister, c_farg1, v1);
  75 REGISTER_DECLARATION(FloatRegister, c_farg2, v2);
  76 REGISTER_DECLARATION(FloatRegister, c_farg3, v3);
  77 REGISTER_DECLARATION(FloatRegister, c_farg4, v4);
  78 REGISTER_DECLARATION(FloatRegister, c_farg5, v5);
  79 REGISTER_DECLARATION(FloatRegister, c_farg6, v6);
  80 REGISTER_DECLARATION(FloatRegister, c_farg7, v7);
  81 
  82 // Symbolically name the register arguments used by the Java calling convention.
  83 // We have control over the convention for java so we can do what we please.
  84 // What pleases us is to offset the java calling convention so that when
  85 // we call a suitable jni method the arguments are lined up and we don't
  86 // have to do much shuffling. A suitable jni method is non-static and a
  87 // small number of arguments
  88 //
  89 //  |--------------------------------------------------------------------|
  90 //  | c_rarg0  c_rarg1  c_rarg2 c_rarg3 c_rarg4 c_rarg5 c_rarg6 c_rarg7  |
  91 //  |--------------------------------------------------------------------|
  92 //  | r0       r1       r2      r3      r4      r5      r6      r7       |
  93 //  |--------------------------------------------------------------------|
  94 //  | j_rarg7  j_rarg0  j_rarg1 j_rarg2 j_rarg3 j_rarg4 j_rarg5 j_rarg6  |
  95 //  |--------------------------------------------------------------------|
  96 
  97 
  98 REGISTER_DECLARATION(Register, j_rarg0, c_rarg1);
  99 REGISTER_DECLARATION(Register, j_rarg1, c_rarg2);
 100 REGISTER_DECLARATION(Register, j_rarg2, c_rarg3);
 101 REGISTER_DECLARATION(Register, j_rarg3, c_rarg4);
 102 REGISTER_DECLARATION(Register, j_rarg4, c_rarg5);
 103 REGISTER_DECLARATION(Register, j_rarg5, c_rarg6);
 104 REGISTER_DECLARATION(Register, j_rarg6, c_rarg7);
 105 REGISTER_DECLARATION(Register, j_rarg7, c_rarg0);
 106 
 107 // Java floating args are passed as per C
 108 
 109 REGISTER_DECLARATION(FloatRegister, j_farg0, v0);
 110 REGISTER_DECLARATION(FloatRegister, j_farg1, v1);
 111 REGISTER_DECLARATION(FloatRegister, j_farg2, v2);
 112 REGISTER_DECLARATION(FloatRegister, j_farg3, v3);
 113 REGISTER_DECLARATION(FloatRegister, j_farg4, v4);
 114 REGISTER_DECLARATION(FloatRegister, j_farg5, v5);
 115 REGISTER_DECLARATION(FloatRegister, j_farg6, v6);
 116 REGISTER_DECLARATION(FloatRegister, j_farg7, v7);
 117 
 118 // registers used to hold VM data either temporarily within a method
 119 // or across method calls
 120 
 121 // volatile (caller-save) registers
 122 
 123 // r8 is used for indirect result location return
 124 // we use it and r9 as scratch registers
 125 REGISTER_DECLARATION(Register, rscratch1, r8);
 126 REGISTER_DECLARATION(Register, rscratch2, r9);
 127 
 128 // current method -- must be in a call-clobbered register
 129 REGISTER_DECLARATION(Register, rmethod,   r12);
 130 
 131 // non-volatile (callee-save) registers are r16-29
 132 // of which the following are dedicated global state
 133 
 134 // link register
 135 REGISTER_DECLARATION(Register, lr,        r30);
 136 // frame pointer
 137 REGISTER_DECLARATION(Register, rfp,       r29);
 138 // current thread
 139 REGISTER_DECLARATION(Register, rthread,   r28);
 140 // base of heap
 141 REGISTER_DECLARATION(Register, rheapbase, r27);
 142 // constant pool cache
 143 REGISTER_DECLARATION(Register, rcpool,    r26);
 144 // monitors allocated on stack
 145 REGISTER_DECLARATION(Register, rmonitors, r25);
 146 // locals on stack
 147 REGISTER_DECLARATION(Register, rlocals,   r24);
 148 // bytecode pointer
 149 REGISTER_DECLARATION(Register, rbcp,      r22);
 150 // Dispatch table base
 151 REGISTER_DECLARATION(Register, rdispatch, r21);
 152 // Java stack pointer
 153 REGISTER_DECLARATION(Register, esp,      r20);
 154 
 155 // Preserved predicate register with all elements set TRUE.
 156 REGISTER_DECLARATION(PRegister, ptrue, p7);
 157 
 158 #define assert_cond(ARG1) assert(ARG1, #ARG1)
 159 
 160 namespace asm_util {
 161   uint32_t encode_logical_immediate(bool is32, uint64_t imm);
 162   uint32_t encode_sve_logical_immediate(unsigned elembits, uint64_t imm);
 163   bool operand_valid_for_immediate_bits(int64_t imm, unsigned nbits);
 164 };
 165 
 166 using namespace asm_util;
 167 
 168 
 169 class Assembler;
 170 
 171 class Instruction_aarch64 {
 172   unsigned insn;
 173 #ifdef ASSERT
 174   unsigned bits;
 175 #endif
 176   Assembler *assem;
 177 
 178 public:
 179 
 180   Instruction_aarch64(class Assembler *as) {
 181 #ifdef ASSERT
 182     bits = 0;
 183 #endif
 184     insn = 0;
 185     assem = as;
 186   }
 187 
 188   inline ~Instruction_aarch64();
 189 
 190   unsigned &get_insn() { return insn; }
 191 #ifdef ASSERT
 192   unsigned &get_bits() { return bits; }
 193 #endif
 194 
 195   static inline int32_t extend(unsigned val, int hi = 31, int lo = 0) {
 196     union {
 197       unsigned u;
 198       int n;
 199     };
 200 
 201     u = val << (31 - hi);
 202     n = n >> (31 - hi + lo);
 203     return n;
 204   }
 205 
 206   static inline uint32_t extract(uint32_t val, int msb, int lsb) {
 207     int nbits = msb - lsb + 1;
 208     assert_cond(msb >= lsb);
 209     uint32_t mask = checked_cast<uint32_t>(right_n_bits(nbits));
 210     uint32_t result = val >> lsb;
 211     result &= mask;
 212     return result;
 213   }
 214 
 215   static inline int32_t sextract(uint32_t val, int msb, int lsb) {
 216     uint32_t uval = extract(val, msb, lsb);
 217     return extend(uval, msb - lsb);
 218   }
 219 
 220   static void patch(address a, int msb, int lsb, uint64_t val) {
 221     int nbits = msb - lsb + 1;
 222     guarantee(val < (1ULL << nbits), "Field too big for insn");
 223     assert_cond(msb >= lsb);
 224     unsigned mask = checked_cast<unsigned>(right_n_bits(nbits));
 225     val <<= lsb;
 226     mask <<= lsb;
 227     unsigned target = *(unsigned *)a;
 228     target &= ~mask;
 229     target |= val;
 230     *(unsigned *)a = target;
 231   }
 232 
 233   static void spatch(address a, int msb, int lsb, int64_t val) {
 234     int nbits = msb - lsb + 1;
 235     int64_t chk = val >> (nbits - 1);
 236     guarantee (chk == -1 || chk == 0, "Field too big for insn");
 237     unsigned uval = val;
 238     unsigned mask = checked_cast<unsigned>(right_n_bits(nbits));
 239     uval &= mask;
 240     uval <<= lsb;
 241     mask <<= lsb;
 242     unsigned target = *(unsigned *)a;
 243     target &= ~mask;
 244     target |= uval;
 245     *(unsigned *)a = target;
 246   }
 247 
 248   void f(unsigned val, int msb, int lsb) {
 249     int nbits = msb - lsb + 1;
 250     guarantee(val < (1ULL << nbits), "Field too big for insn");
 251     assert_cond(msb >= lsb);
 252     val <<= lsb;
 253     insn |= val;
 254 #ifdef ASSERT
 255     unsigned mask = checked_cast<unsigned>(right_n_bits(nbits));
 256     mask <<= lsb;
 257     assert_cond((bits & mask) == 0);
 258     bits |= mask;
 259 #endif
 260   }
 261 
 262   void f(unsigned val, int bit) {
 263     f(val, bit, bit);
 264   }
 265 
 266   void sf(int64_t val, int msb, int lsb) {
 267     int nbits = msb - lsb + 1;
 268     int64_t chk = val >> (nbits - 1);
 269     guarantee (chk == -1 || chk == 0, "Field too big for insn");
 270     unsigned uval = val;
 271     unsigned mask = checked_cast<unsigned>(right_n_bits(nbits));
 272     uval &= mask;
 273     f(uval, lsb + nbits - 1, lsb);
 274   }
 275 
 276   void rf(Register r, int lsb) {
 277     f(r->encoding_nocheck(), lsb + 4, lsb);
 278   }
 279 
 280   // reg|ZR
 281   void zrf(Register r, int lsb) {
 282     f(r->encoding_nocheck() - (r == zr), lsb + 4, lsb);
 283   }
 284 
 285   // reg|SP
 286   void srf(Register r, int lsb) {
 287     f(r == sp ? 31 : r->encoding_nocheck(), lsb + 4, lsb);
 288   }
 289 
 290   void rf(FloatRegister r, int lsb) {
 291     f(r->encoding_nocheck(), lsb + 4, lsb);
 292   }
 293 
 294   void prf(PRegister r, int lsb) {
 295     f(r->encoding_nocheck(), lsb + 3, lsb);
 296   }
 297 
 298   void pgrf(PRegister r, int lsb) {
 299     f(r->encoding_nocheck(), lsb + 2, lsb);
 300   }
 301 
 302   unsigned get(int msb = 31, int lsb = 0) {
 303     int nbits = msb - lsb + 1;
 304     unsigned mask = checked_cast<unsigned>(right_n_bits(nbits)) << lsb;
 305     assert_cond((bits & mask) == mask);
 306     return (insn & mask) >> lsb;
 307   }
 308 
 309   void fixed(unsigned value, unsigned mask) {
 310     assert_cond ((mask & bits) == 0);
 311 #ifdef ASSERT
 312     bits |= mask;
 313 #endif
 314     insn |= value;
 315   }
 316 };
 317 
 318 #define starti Instruction_aarch64 current_insn(this);
 319 
 320 class PrePost {
 321   int _offset;
 322   Register _r;
 323 public:
 324   PrePost(Register reg, int o) : _offset(o), _r(reg) { }
 325   int offset() { return _offset; }
 326   Register reg() { return _r; }
 327 };
 328 
 329 class Pre : public PrePost {
 330 public:
 331   Pre(Register reg, int o) : PrePost(reg, o) { }
 332 };
 333 class Post : public PrePost {
 334   Register _idx;
 335   bool _is_postreg;
 336 public:
 337   Post(Register reg, int o) : PrePost(reg, o) { _idx = NULL; _is_postreg = false; }
 338   Post(Register reg, Register idx) : PrePost(reg, 0) { _idx = idx; _is_postreg = true; }
 339   Register idx_reg() { return _idx; }
 340   bool is_postreg() {return _is_postreg; }
 341 };
 342 
 343 namespace ext
 344 {
 345   enum operation { uxtb, uxth, uxtw, uxtx, sxtb, sxth, sxtw, sxtx };
 346 };
 347 
 348 // Addressing modes
 349 class Address {
 350  public:
 351 
 352   enum mode { no_mode, base_plus_offset, pre, post, post_reg, pcrel,
 353               base_plus_offset_reg, literal };
 354 
 355   // Shift and extend for base reg + reg offset addressing
 356   class extend {
 357     int _option, _shift;
 358     ext::operation _op;
 359   public:
 360     extend() { }
 361     extend(int s, int o, ext::operation op) : _option(o), _shift(s), _op(op) { }
 362     int option() const{ return _option; }
 363     int shift() const { return _shift; }
 364     ext::operation op() const { return _op; }
 365   };
 366   class uxtw : public extend {
 367   public:
 368     uxtw(int shift = -1): extend(shift, 0b010, ext::uxtw) { }
 369   };
 370   class lsl : public extend {
 371   public:
 372     lsl(int shift = -1): extend(shift, 0b011, ext::uxtx) { }
 373   };
 374   class sxtw : public extend {
 375   public:
 376     sxtw(int shift = -1): extend(shift, 0b110, ext::sxtw) { }
 377   };
 378   class sxtx : public extend {
 379   public:
 380     sxtx(int shift = -1): extend(shift, 0b111, ext::sxtx) { }
 381   };
 382 
 383  private:
 384   Register _base;
 385   Register _index;
 386   int64_t _offset;
 387   enum mode _mode;
 388   extend _ext;
 389 
 390   RelocationHolder _rspec;
 391 
 392   // Typically we use AddressLiterals we want to use their rval
 393   // However in some situations we want the lval (effect address) of
 394   // the item.  We provide a special factory for making those lvals.
 395   bool _is_lval;
 396 
 397   // If the target is far we'll need to load the ea of this to a
 398   // register to reach it. Otherwise if near we can do PC-relative
 399   // addressing.
 400   address          _target;
 401 
 402  public:
 403   Address()
 404     : _mode(no_mode) { }
 405   Address(Register r)
 406     : _base(r), _index(noreg), _offset(0), _mode(base_plus_offset), _target(0) { }
 407   Address(Register r, int o)
 408     : _base(r), _index(noreg), _offset(o), _mode(base_plus_offset), _target(0) { }
 409   Address(Register r, long o)
 410     : _base(r), _index(noreg), _offset(o), _mode(base_plus_offset), _target(0) { }
 411   Address(Register r, long long o)
 412     : _base(r), _index(noreg), _offset(o), _mode(base_plus_offset), _target(0) { }
 413   Address(Register r, unsigned int o)
 414     : _base(r), _index(noreg), _offset(o), _mode(base_plus_offset), _target(0) { }
 415   Address(Register r, unsigned long o)
 416     : _base(r), _index(noreg), _offset(o), _mode(base_plus_offset), _target(0) { }
 417   Address(Register r, unsigned long long o)
 418     : _base(r), _index(noreg), _offset(o), _mode(base_plus_offset), _target(0) { }
 419   Address(Register r, ByteSize disp)
 420     : Address(r, in_bytes(disp)) { }
 421   Address(Register r, Register r1, extend ext = lsl())
 422     : _base(r), _index(r1), _offset(0), _mode(base_plus_offset_reg),
 423       _ext(ext), _target(0) { }
 424   Address(Pre p)
 425     : _base(p.reg()), _offset(p.offset()), _mode(pre) { }
 426   Address(Post p)
 427     : _base(p.reg()),  _index(p.idx_reg()), _offset(p.offset()),
 428       _mode(p.is_postreg() ? post_reg : post), _target(0) { }
 429   Address(address target, RelocationHolder const& rspec)
 430     : _mode(literal),
 431       _rspec(rspec),
 432       _is_lval(false),
 433       _target(target)  { }
 434   Address(address target, relocInfo::relocType rtype = relocInfo::external_word_type);
 435   Address(Register base, RegisterOrConstant index, extend ext = lsl())
 436     : _base (base),
 437       _offset(0), _ext(ext), _target(0) {
 438     if (index.is_register()) {
 439       _mode = base_plus_offset_reg;
 440       _index = index.as_register();
 441     } else {
 442       guarantee(ext.option() == ext::uxtx, "should be");
 443       assert(index.is_constant(), "should be");
 444       _mode = base_plus_offset;
 445       _offset = index.as_constant() << ext.shift();
 446     }
 447   }
 448 
 449   Register base() const {
 450     guarantee((_mode == base_plus_offset || _mode == base_plus_offset_reg
 451                || _mode == post || _mode == post_reg),
 452               "wrong mode");
 453     return _base;
 454   }
 455   int64_t offset() const {
 456     return _offset;
 457   }
 458   Register index() const {
 459     return _index;
 460   }
 461   mode getMode() const {
 462     return _mode;
 463   }
 464   bool uses(Register reg) const { return _base == reg || _index == reg; }
 465   address target() const { return _target; }
 466   const RelocationHolder& rspec() const { return _rspec; }
 467 
 468   void encode(Instruction_aarch64 *i) const {
 469     i->f(0b111, 29, 27);
 470     i->srf(_base, 5);
 471 
 472     switch(_mode) {
 473     case base_plus_offset:
 474       {
 475         unsigned size = i->get(31, 30);
 476         if (i->get(26, 26) && i->get(23, 23)) {
 477           // SIMD Q Type - Size = 128 bits
 478           assert(size == 0, "bad size");
 479           size = 0b100;
 480         }
 481         unsigned mask = (1 << size) - 1;
 482         if (_offset < 0 || _offset & mask)
 483           {
 484             i->f(0b00, 25, 24);
 485             i->f(0, 21), i->f(0b00, 11, 10);
 486             i->sf(_offset, 20, 12);
 487           } else {
 488             i->f(0b01, 25, 24);
 489             i->f(_offset >> size, 21, 10);
 490           }
 491       }
 492       break;
 493 
 494     case base_plus_offset_reg:
 495       {
 496         i->f(0b00, 25, 24);
 497         i->f(1, 21);
 498         i->rf(_index, 16);
 499         i->f(_ext.option(), 15, 13);
 500         unsigned size = i->get(31, 30);
 501         if (i->get(26, 26) && i->get(23, 23)) {
 502           // SIMD Q Type - Size = 128 bits
 503           assert(size == 0, "bad size");
 504           size = 0b100;
 505         }
 506         if (size == 0) // It's a byte
 507           i->f(_ext.shift() >= 0, 12);
 508         else {
 509           assert(_ext.shift() <= 0 || _ext.shift() == (int)size, "bad shift");
 510           i->f(_ext.shift() > 0, 12);
 511         }
 512         i->f(0b10, 11, 10);
 513       }
 514       break;
 515 
 516     case pre:
 517       i->f(0b00, 25, 24);
 518       i->f(0, 21), i->f(0b11, 11, 10);
 519       i->sf(_offset, 20, 12);
 520       break;
 521 
 522     case post:
 523       i->f(0b00, 25, 24);
 524       i->f(0, 21), i->f(0b01, 11, 10);
 525       i->sf(_offset, 20, 12);
 526       break;
 527 
 528     default:
 529       ShouldNotReachHere();
 530     }
 531   }
 532 
 533   void encode_pair(Instruction_aarch64 *i) const {
 534     switch(_mode) {
 535     case base_plus_offset:
 536       i->f(0b010, 25, 23);
 537       break;
 538     case pre:
 539       i->f(0b011, 25, 23);
 540       break;
 541     case post:
 542       i->f(0b001, 25, 23);
 543       break;
 544     default:
 545       ShouldNotReachHere();
 546     }
 547 
 548     unsigned size; // Operand shift in 32-bit words
 549 
 550     if (i->get(26, 26)) { // float
 551       switch(i->get(31, 30)) {
 552       case 0b10:
 553         size = 2; break;
 554       case 0b01:
 555         size = 1; break;
 556       case 0b00:
 557         size = 0; break;
 558       default:
 559         ShouldNotReachHere();
 560         size = 0;  // unreachable
 561       }
 562     } else {
 563       size = i->get(31, 31);
 564     }
 565 
 566     size = 4 << size;
 567     guarantee(_offset % size == 0, "bad offset");
 568     i->sf(_offset / size, 21, 15);
 569     i->srf(_base, 5);
 570   }
 571 
 572   void encode_nontemporal_pair(Instruction_aarch64 *i) const {
 573     // Only base + offset is allowed
 574     i->f(0b000, 25, 23);
 575     unsigned size = i->get(31, 31);
 576     size = 4 << size;
 577     guarantee(_offset % size == 0, "bad offset");
 578     i->sf(_offset / size, 21, 15);
 579     i->srf(_base, 5);
 580     guarantee(_mode == Address::base_plus_offset,
 581               "Bad addressing mode for non-temporal op");
 582   }
 583 
 584   void lea(MacroAssembler *, Register) const;
 585 
 586   static bool offset_ok_for_immed(int64_t offset, uint shift);
 587 
 588   static bool offset_ok_for_sve_immed(int64_t offset, int shift, int vl /* sve vector length */) {
 589     if (offset % vl == 0) {
 590       // Convert address offset into sve imm offset (MUL VL).
 591       int sve_offset = offset / vl;
 592       if (((-(1 << (shift - 1))) <= sve_offset) && (sve_offset < (1 << (shift - 1)))) {
 593         // sve_offset can be encoded
 594         return true;
 595       }
 596     }
 597     return false;
 598   }
 599 };
 600 
 601 // Convience classes
 602 class RuntimeAddress: public Address {
 603 
 604   public:
 605 
 606   RuntimeAddress(address target) : Address(target, relocInfo::runtime_call_type) {}
 607 
 608 };
 609 
 610 class OopAddress: public Address {
 611 
 612   public:
 613 
 614   OopAddress(address target) : Address(target, relocInfo::oop_type){}
 615 
 616 };
 617 
 618 class ExternalAddress: public Address {
 619  private:
 620   static relocInfo::relocType reloc_for_target(address target) {
 621     // Sometimes ExternalAddress is used for values which aren't
 622     // exactly addresses, like the card table base.
 623     // external_word_type can't be used for values in the first page
 624     // so just skip the reloc in that case.
 625     return external_word_Relocation::can_be_relocated(target) ? relocInfo::external_word_type : relocInfo::none;
 626   }
 627 
 628  public:
 629 
 630   ExternalAddress(address target) : Address(target, reloc_for_target(target)) {}
 631 
 632 };
 633 
 634 class InternalAddress: public Address {
 635 
 636   public:
 637 
 638   InternalAddress(address target) : Address(target, relocInfo::internal_word_type) {}
 639 };
 640 
 641 const int FPUStateSizeInWords = FloatRegisterImpl::number_of_registers *
 642                                 FloatRegisterImpl::save_slots_per_register;
 643 
 644 typedef enum {
 645   PLDL1KEEP = 0b00000, PLDL1STRM, PLDL2KEEP, PLDL2STRM, PLDL3KEEP, PLDL3STRM,
 646   PSTL1KEEP = 0b10000, PSTL1STRM, PSTL2KEEP, PSTL2STRM, PSTL3KEEP, PSTL3STRM,
 647   PLIL1KEEP = 0b01000, PLIL1STRM, PLIL2KEEP, PLIL2STRM, PLIL3KEEP, PLIL3STRM
 648 } prfop;
 649 
 650 class Assembler : public AbstractAssembler {
 651 
 652 public:
 653 
 654 #ifndef PRODUCT
 655   static const uintptr_t asm_bp;
 656 
 657   void emit_int32(jint x) {
 658     if ((uintptr_t)pc() == asm_bp)
 659       NOP();
 660     AbstractAssembler::emit_int32(x);
 661   }
 662 #else
 663   void emit_int32(jint x) {
 664     AbstractAssembler::emit_int32(x);
 665   }
 666 #endif
 667 
 668   enum { instruction_size = 4 };
 669 
 670   //---<  calculate length of instruction  >---
 671   // We just use the values set above.
 672   // instruction must start at passed address
 673   static unsigned int instr_len(unsigned char *instr) { return instruction_size; }
 674 
 675   //---<  longest instructions  >---
 676   static unsigned int instr_maxlen() { return instruction_size; }
 677 
 678   Address adjust(Register base, int offset, bool preIncrement) {
 679     if (preIncrement)
 680       return Address(Pre(base, offset));
 681     else
 682       return Address(Post(base, offset));
 683   }
 684 
 685   Address pre(Register base, int offset) {
 686     return adjust(base, offset, true);
 687   }
 688 
 689   Address post(Register base, int offset) {
 690     return adjust(base, offset, false);
 691   }
 692 
 693   Address post(Register base, Register idx) {
 694     return Address(Post(base, idx));
 695   }
 696 
 697   static address locate_next_instruction(address inst);
 698 
 699 #define f current_insn.f
 700 #define sf current_insn.sf
 701 #define rf current_insn.rf
 702 #define srf current_insn.srf
 703 #define zrf current_insn.zrf
 704 #define prf current_insn.prf
 705 #define pgrf current_insn.pgrf
 706 #define fixed current_insn.fixed
 707 
 708   typedef void (Assembler::* uncond_branch_insn)(address dest);
 709   typedef void (Assembler::* compare_and_branch_insn)(Register Rt, address dest);
 710   typedef void (Assembler::* test_and_branch_insn)(Register Rt, int bitpos, address dest);
 711   typedef void (Assembler::* prefetch_insn)(address target, prfop);
 712 
 713   void wrap_label(Label &L, uncond_branch_insn insn);
 714   void wrap_label(Register r, Label &L, compare_and_branch_insn insn);
 715   void wrap_label(Register r, int bitpos, Label &L, test_and_branch_insn insn);
 716   void wrap_label(Label &L, prfop, prefetch_insn insn);
 717 
 718   // PC-rel. addressing
 719 
 720   void adr(Register Rd, address dest);
 721   void _adrp(Register Rd, address dest);
 722 
 723   void adr(Register Rd, const Address &dest);
 724   void _adrp(Register Rd, const Address &dest);
 725 
 726   void adr(Register Rd, Label &L) {
 727     wrap_label(Rd, L, &Assembler::Assembler::adr);
 728   }
 729   void _adrp(Register Rd, Label &L) {
 730     wrap_label(Rd, L, &Assembler::_adrp);
 731   }
 732 
 733   void adrp(Register Rd, const Address &dest, uint64_t &offset);
 734 
 735 #undef INSN
 736 
 737   void add_sub_immediate(Instruction_aarch64 &current_insn, Register Rd, Register Rn,
 738                          unsigned uimm, int op, int negated_op);
 739 
 740   // Add/subtract (immediate)
 741 #define INSN(NAME, decode, negated)                                     \
 742   void NAME(Register Rd, Register Rn, unsigned imm, unsigned shift) {   \
 743     starti;                                                             \
 744     f(decode, 31, 29), f(0b10001, 28, 24), f(shift, 23, 22), f(imm, 21, 10); \
 745     zrf(Rd, 0), srf(Rn, 5);                                             \
 746   }                                                                     \
 747                                                                         \
 748   void NAME(Register Rd, Register Rn, unsigned imm) {                   \
 749     starti;                                                             \
 750     add_sub_immediate(current_insn, Rd, Rn, imm, decode, negated);      \
 751   }
 752 
 753   INSN(addsw, 0b001, 0b011);
 754   INSN(subsw, 0b011, 0b001);
 755   INSN(adds,  0b101, 0b111);
 756   INSN(subs,  0b111, 0b101);
 757 
 758 #undef INSN
 759 
 760 #define INSN(NAME, decode, negated)                     \
 761   void NAME(Register Rd, Register Rn, unsigned imm) {   \
 762     starti;                                             \
 763     add_sub_immediate(current_insn, Rd, Rn, imm, decode, negated);     \
 764   }
 765 
 766   INSN(addw, 0b000, 0b010);
 767   INSN(subw, 0b010, 0b000);
 768   INSN(add,  0b100, 0b110);
 769   INSN(sub,  0b110, 0b100);
 770 
 771 #undef INSN
 772 
 773  // Logical (immediate)
 774 #define INSN(NAME, decode, is32)                                \
 775   void NAME(Register Rd, Register Rn, uint64_t imm) {           \
 776     starti;                                                     \
 777     uint32_t val = encode_logical_immediate(is32, imm);         \
 778     f(decode, 31, 29), f(0b100100, 28, 23), f(val, 22, 10);     \
 779     srf(Rd, 0), zrf(Rn, 5);                                     \
 780   }
 781 
 782   INSN(andw, 0b000, true);
 783   INSN(orrw, 0b001, true);
 784   INSN(eorw, 0b010, true);
 785   INSN(andr,  0b100, false);
 786   INSN(orr,  0b101, false);
 787   INSN(eor,  0b110, false);
 788 
 789 #undef INSN
 790 
 791 #define INSN(NAME, decode, is32)                                \
 792   void NAME(Register Rd, Register Rn, uint64_t imm) {           \
 793     starti;                                                     \
 794     uint32_t val = encode_logical_immediate(is32, imm);         \
 795     f(decode, 31, 29), f(0b100100, 28, 23), f(val, 22, 10);     \
 796     zrf(Rd, 0), zrf(Rn, 5);                                     \
 797   }
 798 
 799   INSN(ands, 0b111, false);
 800   INSN(andsw, 0b011, true);
 801 
 802 #undef INSN
 803 
 804   // Move wide (immediate)
 805 #define INSN(NAME, opcode)                                              \
 806   void NAME(Register Rd, unsigned imm, unsigned shift = 0) {            \
 807     assert_cond((shift/16)*16 == shift);                                \
 808     starti;                                                             \
 809     f(opcode, 31, 29), f(0b100101, 28, 23), f(shift/16, 22, 21),        \
 810       f(imm, 20, 5);                                                    \
 811     rf(Rd, 0);                                                          \
 812   }
 813 
 814   INSN(movnw, 0b000);
 815   INSN(movzw, 0b010);
 816   INSN(movkw, 0b011);
 817   INSN(movn, 0b100);
 818   INSN(movz, 0b110);
 819   INSN(movk, 0b111);
 820 
 821 #undef INSN
 822 
 823   // Bitfield
 824 #define INSN(NAME, opcode, size)                                        \
 825   void NAME(Register Rd, Register Rn, unsigned immr, unsigned imms) {   \
 826     starti;                                                             \
 827     guarantee(size == 1 || (immr < 32 && imms < 32), "incorrect immr/imms");\
 828     f(opcode, 31, 22), f(immr, 21, 16), f(imms, 15, 10);                \
 829     zrf(Rn, 5), rf(Rd, 0);                                              \
 830   }
 831 
 832   INSN(sbfmw, 0b0001001100, 0);
 833   INSN(bfmw,  0b0011001100, 0);
 834   INSN(ubfmw, 0b0101001100, 0);
 835   INSN(sbfm,  0b1001001101, 1);
 836   INSN(bfm,   0b1011001101, 1);
 837   INSN(ubfm,  0b1101001101, 1);
 838 
 839 #undef INSN
 840 
 841   // Extract
 842 #define INSN(NAME, opcode, size)                                        \
 843   void NAME(Register Rd, Register Rn, Register Rm, unsigned imms) {     \
 844     starti;                                                             \
 845     guarantee(size == 1 || imms < 32, "incorrect imms");                \
 846     f(opcode, 31, 21), f(imms, 15, 10);                                 \
 847     zrf(Rm, 16), zrf(Rn, 5), zrf(Rd, 0);                                \
 848   }
 849 
 850   INSN(extrw, 0b00010011100, 0);
 851   INSN(extr,  0b10010011110, 1);
 852 
 853 #undef INSN
 854 
 855   // The maximum range of a branch is fixed for the AArch64
 856   // architecture.  In debug mode we shrink it in order to test
 857   // trampolines, but not so small that branches in the interpreter
 858   // are out of range.
 859   static const uint64_t branch_range = NOT_DEBUG(128 * M) DEBUG_ONLY(2 * M);
 860 
 861   static bool reachable_from_branch_at(address branch, address target) {
 862     return uabs(target - branch) < branch_range;
 863   }
 864 
 865   // Unconditional branch (immediate)
 866 #define INSN(NAME, opcode)                                              \
 867   void NAME(address dest) {                                             \
 868     starti;                                                             \
 869     int64_t offset = (dest - pc()) >> 2;                                \
 870     DEBUG_ONLY(assert(reachable_from_branch_at(pc(), dest), "debug only")); \
 871     f(opcode, 31), f(0b00101, 30, 26), sf(offset, 25, 0);               \
 872   }                                                                     \
 873   void NAME(Label &L) {                                                 \
 874     wrap_label(L, &Assembler::NAME);                                    \
 875   }                                                                     \
 876   void NAME(const Address &dest);
 877 
 878   INSN(b, 0);
 879   INSN(bl, 1);
 880 
 881 #undef INSN
 882 
 883   // Compare & branch (immediate)
 884 #define INSN(NAME, opcode)                              \
 885   void NAME(Register Rt, address dest) {                \
 886     int64_t offset = (dest - pc()) >> 2;                \
 887     starti;                                             \
 888     f(opcode, 31, 24), sf(offset, 23, 5), rf(Rt, 0);    \
 889   }                                                     \
 890   void NAME(Register Rt, Label &L) {                    \
 891     wrap_label(Rt, L, &Assembler::NAME);                \
 892   }
 893 
 894   INSN(cbzw,  0b00110100);
 895   INSN(cbnzw, 0b00110101);
 896   INSN(cbz,   0b10110100);
 897   INSN(cbnz,  0b10110101);
 898 
 899 #undef INSN
 900 
 901   // Test & branch (immediate)
 902 #define INSN(NAME, opcode)                                              \
 903   void NAME(Register Rt, int bitpos, address dest) {                    \
 904     int64_t offset = (dest - pc()) >> 2;                                \
 905     int b5 = bitpos >> 5;                                               \
 906     bitpos &= 0x1f;                                                     \
 907     starti;                                                             \
 908     f(b5, 31), f(opcode, 30, 24), f(bitpos, 23, 19), sf(offset, 18, 5); \
 909     rf(Rt, 0);                                                          \
 910   }                                                                     \
 911   void NAME(Register Rt, int bitpos, Label &L) {                        \
 912     wrap_label(Rt, bitpos, L, &Assembler::NAME);                        \
 913   }
 914 
 915   INSN(tbz,  0b0110110);
 916   INSN(tbnz, 0b0110111);
 917 
 918 #undef INSN
 919 
 920   // Conditional branch (immediate)
 921   enum Condition
 922     {EQ, NE, HS, CS=HS, LO, CC=LO, MI, PL, VS, VC, HI, LS, GE, LT, GT, LE, AL, NV};
 923 
 924   void br(Condition  cond, address dest) {
 925     int64_t offset = (dest - pc()) >> 2;
 926     starti;
 927     f(0b0101010, 31, 25), f(0, 24), sf(offset, 23, 5), f(0, 4), f(cond, 3, 0);
 928   }
 929 
 930 #define INSN(NAME, cond)                        \
 931   void NAME(address dest) {                     \
 932     br(cond, dest);                             \
 933   }
 934 
 935   INSN(beq, EQ);
 936   INSN(bne, NE);
 937   INSN(bhs, HS);
 938   INSN(bcs, CS);
 939   INSN(blo, LO);
 940   INSN(bcc, CC);
 941   INSN(bmi, MI);
 942   INSN(bpl, PL);
 943   INSN(bvs, VS);
 944   INSN(bvc, VC);
 945   INSN(bhi, HI);
 946   INSN(bls, LS);
 947   INSN(bge, GE);
 948   INSN(blt, LT);
 949   INSN(bgt, GT);
 950   INSN(ble, LE);
 951   INSN(bal, AL);
 952   INSN(bnv, NV);
 953 
 954   void br(Condition cc, Label &L);
 955 
 956 #undef INSN
 957 
 958   // Exception generation
 959   void generate_exception(int opc, int op2, int LL, unsigned imm) {
 960     starti;
 961     f(0b11010100, 31, 24);
 962     f(opc, 23, 21), f(imm, 20, 5), f(op2, 4, 2), f(LL, 1, 0);
 963   }
 964 
 965 #define INSN(NAME, opc, op2, LL)                \
 966   void NAME(unsigned imm) {                     \
 967     generate_exception(opc, op2, LL, imm);      \
 968   }
 969 
 970   INSN(svc, 0b000, 0, 0b01);
 971   INSN(hvc, 0b000, 0, 0b10);
 972   INSN(smc, 0b000, 0, 0b11);
 973   INSN(brk, 0b001, 0, 0b00);
 974   INSN(hlt, 0b010, 0, 0b00);
 975   INSN(dcps1, 0b101, 0, 0b01);
 976   INSN(dcps2, 0b101, 0, 0b10);
 977   INSN(dcps3, 0b101, 0, 0b11);
 978 
 979 #undef INSN
 980 
 981   // System
 982   void system(int op0, int op1, int CRn, int CRm, int op2,
 983               Register rt = dummy_reg)
 984   {
 985     starti;
 986     f(0b11010101000, 31, 21);
 987     f(op0, 20, 19);
 988     f(op1, 18, 16);
 989     f(CRn, 15, 12);
 990     f(CRm, 11, 8);
 991     f(op2, 7, 5);
 992     rf(rt, 0);
 993   }
 994 
 995   void hint(int imm) {
 996     system(0b00, 0b011, 0b0010, 0b0000, imm);
 997   }
 998 
 999   void nop() {
1000     hint(0);
1001   }
1002 
1003   void yield() {
1004     hint(1);
1005   }
1006 
1007   void wfe() {
1008     hint(2);
1009   }
1010 
1011   void wfi() {
1012     hint(3);
1013   }
1014 
1015   void sev() {
1016     hint(4);
1017   }
1018 
1019   void sevl() {
1020     hint(5);
1021   }
1022 
1023   // we only provide mrs and msr for the special purpose system
1024   // registers where op1 (instr[20:19]) == 11 and, (currently) only
1025   // use it for FPSR n.b msr has L (instr[21]) == 0 mrs has L == 1
1026 
1027   void msr(int op1, int CRn, int CRm, int op2, Register rt) {
1028     starti;
1029     f(0b1101010100011, 31, 19);
1030     f(op1, 18, 16);
1031     f(CRn, 15, 12);
1032     f(CRm, 11, 8);
1033     f(op2, 7, 5);
1034     // writing zr is ok
1035     zrf(rt, 0);
1036   }
1037 
1038   void mrs(int op1, int CRn, int CRm, int op2, Register rt) {
1039     starti;
1040     f(0b1101010100111, 31, 19);
1041     f(op1, 18, 16);
1042     f(CRn, 15, 12);
1043     f(CRm, 11, 8);
1044     f(op2, 7, 5);
1045     // reading to zr is a mistake
1046     rf(rt, 0);
1047   }
1048 
1049   enum barrier {OSHLD = 0b0001, OSHST, OSH, NSHLD=0b0101, NSHST, NSH,
1050                 ISHLD = 0b1001, ISHST, ISH, LD=0b1101, ST, SY};
1051 
1052   void dsb(barrier imm) {
1053     system(0b00, 0b011, 0b00011, imm, 0b100);
1054   }
1055 
1056   void dmb(barrier imm) {
1057     system(0b00, 0b011, 0b00011, imm, 0b101);
1058   }
1059 
1060   void isb() {
1061     system(0b00, 0b011, 0b00011, SY, 0b110);
1062   }
1063 
1064   void sys(int op1, int CRn, int CRm, int op2,
1065            Register rt = as_Register(0b11111)) {
1066     system(0b01, op1, CRn, CRm, op2, rt);
1067   }
1068 
1069   // Only implement operations accessible from EL0 or higher, i.e.,
1070   //            op1    CRn    CRm    op2
1071   // IC IVAU     3      7      5      1
1072   // DC CVAC     3      7      10     1
1073   // DC CVAP     3      7      12     1
1074   // DC CVAU     3      7      11     1
1075   // DC CIVAC    3      7      14     1
1076   // DC ZVA      3      7      4      1
1077   // So only deal with the CRm field.
1078   enum icache_maintenance {IVAU = 0b0101};
1079   enum dcache_maintenance {CVAC = 0b1010, CVAP = 0b1100, CVAU = 0b1011, CIVAC = 0b1110, ZVA = 0b100};
1080 
1081   void dc(dcache_maintenance cm, Register Rt) {
1082     sys(0b011, 0b0111, cm, 0b001, Rt);
1083   }
1084 
1085   void ic(icache_maintenance cm, Register Rt) {
1086     sys(0b011, 0b0111, cm, 0b001, Rt);
1087   }
1088 
1089   // A more convenient access to dmb for our purposes
1090   enum Membar_mask_bits {
1091     // We can use ISH for a barrier because the ARM ARM says "This
1092     // architecture assumes that all Processing Elements that use the
1093     // same operating system or hypervisor are in the same Inner
1094     // Shareable shareability domain."
1095     StoreStore = ISHST,
1096     LoadStore  = ISHLD,
1097     LoadLoad   = ISHLD,
1098     StoreLoad  = ISH,
1099     AnyAny     = ISH
1100   };
1101 
1102   void membar(Membar_mask_bits order_constraint) {
1103     dmb(Assembler::barrier(order_constraint));
1104   }
1105 
1106   // Unconditional branch (register)
1107   void branch_reg(Register R, int opc) {
1108     starti;
1109     f(0b1101011, 31, 25);
1110     f(opc, 24, 21);
1111     f(0b11111000000, 20, 10);
1112     rf(R, 5);
1113     f(0b00000, 4, 0);
1114   }
1115 
1116 #define INSN(NAME, opc)                         \
1117   void NAME(Register R) {                       \
1118     branch_reg(R, opc);                         \
1119   }
1120 
1121   INSN(br, 0b0000);
1122   INSN(blr, 0b0001);
1123   INSN(ret, 0b0010);
1124 
1125   void ret(void *p); // This forces a compile-time error for ret(0)
1126 
1127 #undef INSN
1128 
1129 #define INSN(NAME, opc)                         \
1130   void NAME() {                 \
1131     branch_reg(dummy_reg, opc);         \
1132   }
1133 
1134   INSN(eret, 0b0100);
1135   INSN(drps, 0b0101);
1136 
1137 #undef INSN
1138 
1139   // Load/store exclusive
1140   enum operand_size { byte, halfword, word, xword };
1141 
1142   void load_store_exclusive(Register Rs, Register Rt1, Register Rt2,
1143     Register Rn, enum operand_size sz, int op, bool ordered) {
1144     starti;
1145     f(sz, 31, 30), f(0b001000, 29, 24), f(op, 23, 21);
1146     rf(Rs, 16), f(ordered, 15), zrf(Rt2, 10), srf(Rn, 5), zrf(Rt1, 0);
1147   }
1148 
1149   void load_exclusive(Register dst, Register addr,
1150                       enum operand_size sz, bool ordered) {
1151     load_store_exclusive(dummy_reg, dst, dummy_reg, addr,
1152                          sz, 0b010, ordered);
1153   }
1154 
1155   void store_exclusive(Register status, Register new_val, Register addr,
1156                        enum operand_size sz, bool ordered) {
1157     load_store_exclusive(status, new_val, dummy_reg, addr,
1158                          sz, 0b000, ordered);
1159   }
1160 
1161 #define INSN4(NAME, sz, op, o0) /* Four registers */                    \
1162   void NAME(Register Rs, Register Rt1, Register Rt2, Register Rn) {     \
1163     guarantee(Rs != Rn && Rs != Rt1 && Rs != Rt2, "unpredictable instruction"); \
1164     load_store_exclusive(Rs, Rt1, Rt2, Rn, sz, op, o0);                 \
1165   }
1166 
1167 #define INSN3(NAME, sz, op, o0) /* Three registers */                   \
1168   void NAME(Register Rs, Register Rt, Register Rn) {                    \
1169     guarantee(Rs != Rn && Rs != Rt, "unpredictable instruction");       \
1170     load_store_exclusive(Rs, Rt, dummy_reg, Rn, sz, op, o0); \
1171   }
1172 
1173 #define INSN2(NAME, sz, op, o0) /* Two registers */                     \
1174   void NAME(Register Rt, Register Rn) {                                 \
1175     load_store_exclusive(dummy_reg, Rt, dummy_reg, \
1176                          Rn, sz, op, o0);                               \
1177   }
1178 
1179 #define INSN_FOO(NAME, sz, op, o0) /* Three registers, encoded differently */ \
1180   void NAME(Register Rt1, Register Rt2, Register Rn) {                  \
1181     guarantee(Rt1 != Rt2, "unpredictable instruction");                 \
1182     load_store_exclusive(dummy_reg, Rt1, Rt2, Rn, sz, op, o0);          \
1183   }
1184 
1185   // bytes
1186   INSN3(stxrb, byte, 0b000, 0);
1187   INSN3(stlxrb, byte, 0b000, 1);
1188   INSN2(ldxrb, byte, 0b010, 0);
1189   INSN2(ldaxrb, byte, 0b010, 1);
1190   INSN2(stlrb, byte, 0b100, 1);
1191   INSN2(ldarb, byte, 0b110, 1);
1192 
1193   // halfwords
1194   INSN3(stxrh, halfword, 0b000, 0);
1195   INSN3(stlxrh, halfword, 0b000, 1);
1196   INSN2(ldxrh, halfword, 0b010, 0);
1197   INSN2(ldaxrh, halfword, 0b010, 1);
1198   INSN2(stlrh, halfword, 0b100, 1);
1199   INSN2(ldarh, halfword, 0b110, 1);
1200 
1201   // words
1202   INSN3(stxrw, word, 0b000, 0);
1203   INSN3(stlxrw, word, 0b000, 1);
1204   INSN4(stxpw, word, 0b001, 0);
1205   INSN4(stlxpw, word, 0b001, 1);
1206   INSN2(ldxrw, word, 0b010, 0);
1207   INSN2(ldaxrw, word, 0b010, 1);
1208   INSN_FOO(ldxpw, word, 0b011, 0);
1209   INSN_FOO(ldaxpw, word, 0b011, 1);
1210   INSN2(stlrw, word, 0b100, 1);
1211   INSN2(ldarw, word, 0b110, 1);
1212 
1213   // xwords
1214   INSN3(stxr, xword, 0b000, 0);
1215   INSN3(stlxr, xword, 0b000, 1);
1216   INSN4(stxp, xword, 0b001, 0);
1217   INSN4(stlxp, xword, 0b001, 1);
1218   INSN2(ldxr, xword, 0b010, 0);
1219   INSN2(ldaxr, xword, 0b010, 1);
1220   INSN_FOO(ldxp, xword, 0b011, 0);
1221   INSN_FOO(ldaxp, xword, 0b011, 1);
1222   INSN2(stlr, xword, 0b100, 1);
1223   INSN2(ldar, xword, 0b110, 1);
1224 
1225 #undef INSN2
1226 #undef INSN3
1227 #undef INSN4
1228 #undef INSN_FOO
1229 
1230   // 8.1 Compare and swap extensions
1231   void lse_cas(Register Rs, Register Rt, Register Rn,
1232                         enum operand_size sz, bool a, bool r, bool not_pair) {
1233     starti;
1234     if (! not_pair) { // Pair
1235       assert(sz == word || sz == xword, "invalid size");
1236       /* The size bit is in bit 30, not 31 */
1237       sz = (operand_size)(sz == word ? 0b00:0b01);
1238     }
1239     f(sz, 31, 30), f(0b001000, 29, 24), f(not_pair ? 1 : 0, 23), f(a, 22), f(1, 21);
1240     zrf(Rs, 16), f(r, 15), f(0b11111, 14, 10), srf(Rn, 5), zrf(Rt, 0);
1241   }
1242 
1243   // CAS
1244 #define INSN(NAME, a, r)                                                \
1245   void NAME(operand_size sz, Register Rs, Register Rt, Register Rn) {   \
1246     assert(Rs != Rn && Rs != Rt, "unpredictable instruction");          \
1247     lse_cas(Rs, Rt, Rn, sz, a, r, true);                                \
1248   }
1249   INSN(cas,    false, false)
1250   INSN(casa,   true,  false)
1251   INSN(casl,   false, true)
1252   INSN(casal,  true,  true)
1253 #undef INSN
1254 
1255   // CASP
1256 #define INSN(NAME, a, r)                                                \
1257   void NAME(operand_size sz, Register Rs, Register Rs1,                 \
1258             Register Rt, Register Rt1, Register Rn) {                   \
1259     assert((Rs->encoding() & 1) == 0 && (Rt->encoding() & 1) == 0 &&    \
1260            Rs->successor() == Rs1 && Rt->successor() == Rt1 &&          \
1261            Rs != Rn && Rs1 != Rn && Rs != Rt, "invalid registers");     \
1262     lse_cas(Rs, Rt, Rn, sz, a, r, false);                               \
1263   }
1264   INSN(casp,    false, false)
1265   INSN(caspa,   true,  false)
1266   INSN(caspl,   false, true)
1267   INSN(caspal,  true,  true)
1268 #undef INSN
1269 
1270   // 8.1 Atomic operations
1271   void lse_atomic(Register Rs, Register Rt, Register Rn,
1272                   enum operand_size sz, int op1, int op2, bool a, bool r) {
1273     starti;
1274     f(sz, 31, 30), f(0b111000, 29, 24), f(a, 23), f(r, 22), f(1, 21);
1275     zrf(Rs, 16), f(op1, 15), f(op2, 14, 12), f(0, 11, 10), srf(Rn, 5), zrf(Rt, 0);
1276   }
1277 
1278 #define INSN(NAME, NAME_A, NAME_L, NAME_AL, op1, op2)                   \
1279   void NAME(operand_size sz, Register Rs, Register Rt, Register Rn) {   \
1280     lse_atomic(Rs, Rt, Rn, sz, op1, op2, false, false);                 \
1281   }                                                                     \
1282   void NAME_A(operand_size sz, Register Rs, Register Rt, Register Rn) { \
1283     lse_atomic(Rs, Rt, Rn, sz, op1, op2, true, false);                  \
1284   }                                                                     \
1285   void NAME_L(operand_size sz, Register Rs, Register Rt, Register Rn) { \
1286     lse_atomic(Rs, Rt, Rn, sz, op1, op2, false, true);                  \
1287   }                                                                     \
1288   void NAME_AL(operand_size sz, Register Rs, Register Rt, Register Rn) {\
1289     lse_atomic(Rs, Rt, Rn, sz, op1, op2, true, true);                   \
1290   }
1291   INSN(ldadd,  ldadda,  ldaddl,  ldaddal,  0, 0b000);
1292   INSN(ldbic,  ldbica,  ldbicl,  ldbical,  0, 0b001);
1293   INSN(ldeor,  ldeora,  ldeorl,  ldeoral,  0, 0b010);
1294   INSN(ldorr,  ldorra,  ldorrl,  ldorral,  0, 0b011);
1295   INSN(ldsmax, ldsmaxa, ldsmaxl, ldsmaxal, 0, 0b100);
1296   INSN(ldsmin, ldsmina, ldsminl, ldsminal, 0, 0b101);
1297   INSN(ldumax, ldumaxa, ldumaxl, ldumaxal, 0, 0b110);
1298   INSN(ldumin, ldumina, lduminl, lduminal, 0, 0b111);
1299   INSN(swp,    swpa,    swpl,    swpal,    1, 0b000);
1300 #undef INSN
1301 
1302   // Load register (literal)
1303 #define INSN(NAME, opc, V)                                              \
1304   void NAME(Register Rt, address dest) {                                \
1305     int64_t offset = (dest - pc()) >> 2;                                \
1306     starti;                                                             \
1307     f(opc, 31, 30), f(0b011, 29, 27), f(V, 26), f(0b00, 25, 24),        \
1308       sf(offset, 23, 5);                                                \
1309     rf(Rt, 0);                                                          \
1310   }                                                                     \
1311   void NAME(Register Rt, address dest, relocInfo::relocType rtype) {    \
1312     InstructionMark im(this);                                           \
1313     guarantee(rtype == relocInfo::internal_word_type,                   \
1314               "only internal_word_type relocs make sense here");        \
1315     code_section()->relocate(inst_mark(), InternalAddress(dest).rspec()); \
1316     NAME(Rt, dest);                                                     \
1317   }                                                                     \
1318   void NAME(Register Rt, Label &L) {                                    \
1319     wrap_label(Rt, L, &Assembler::NAME);                                \
1320   }
1321 
1322   INSN(ldrw, 0b00, 0);
1323   INSN(ldr, 0b01, 0);
1324   INSN(ldrsw, 0b10, 0);
1325 
1326 #undef INSN
1327 
1328 #define INSN(NAME, opc, V)                                              \
1329   void NAME(FloatRegister Rt, address dest) {                           \
1330     int64_t offset = (dest - pc()) >> 2;                                \
1331     starti;                                                             \
1332     f(opc, 31, 30), f(0b011, 29, 27), f(V, 26), f(0b00, 25, 24),        \
1333       sf(offset, 23, 5);                                                \
1334     rf(as_Register(Rt), 0);                                             \
1335   }
1336 
1337   INSN(ldrs, 0b00, 1);
1338   INSN(ldrd, 0b01, 1);
1339   INSN(ldrq, 0b10, 1);
1340 
1341 #undef INSN
1342 
1343 #define INSN(NAME, size, opc)                                           \
1344   void NAME(FloatRegister Rt, Register Rn) {                            \
1345     starti;                                                             \
1346     f(size, 31, 30), f(0b111100, 29, 24), f(opc, 23, 22), f(0, 21);     \
1347     f(0, 20, 12), f(0b01, 11, 10);                                      \
1348     rf(Rn, 5), rf(as_Register(Rt), 0);                                  \
1349   }
1350 
1351   INSN(ldrs, 0b10, 0b01);
1352   INSN(ldrd, 0b11, 0b01);
1353   INSN(ldrq, 0b00, 0b11);
1354 
1355 #undef INSN
1356 
1357 
1358 #define INSN(NAME, opc, V)                                              \
1359   void NAME(address dest, prfop op = PLDL1KEEP) {                       \
1360     int64_t offset = (dest - pc()) >> 2;                                \
1361     starti;                                                             \
1362     f(opc, 31, 30), f(0b011, 29, 27), f(V, 26), f(0b00, 25, 24),        \
1363       sf(offset, 23, 5);                                                \
1364     f(op, 4, 0);                                                        \
1365   }                                                                     \
1366   void NAME(Label &L, prfop op = PLDL1KEEP) {                           \
1367     wrap_label(L, op, &Assembler::NAME);                                \
1368   }
1369 
1370   INSN(prfm, 0b11, 0);
1371 
1372 #undef INSN
1373 
1374   // Load/store
1375   void ld_st1(int opc, int p1, int V, int L,
1376               Register Rt1, Register Rt2, Address adr, bool no_allocate) {
1377     starti;
1378     f(opc, 31, 30), f(p1, 29, 27), f(V, 26), f(L, 22);
1379     zrf(Rt2, 10), zrf(Rt1, 0);
1380     if (no_allocate) {
1381       adr.encode_nontemporal_pair(&current_insn);
1382     } else {
1383       adr.encode_pair(&current_insn);
1384     }
1385   }
1386 
1387   // Load/store register pair (offset)
1388 #define INSN(NAME, size, p1, V, L, no_allocate)         \
1389   void NAME(Register Rt1, Register Rt2, Address adr) {  \
1390     ld_st1(size, p1, V, L, Rt1, Rt2, adr, no_allocate); \
1391    }
1392 
1393   INSN(stpw, 0b00, 0b101, 0, 0, false);
1394   INSN(ldpw, 0b00, 0b101, 0, 1, false);
1395   INSN(ldpsw, 0b01, 0b101, 0, 1, false);
1396   INSN(stp, 0b10, 0b101, 0, 0, false);
1397   INSN(ldp, 0b10, 0b101, 0, 1, false);
1398 
1399   // Load/store no-allocate pair (offset)
1400   INSN(stnpw, 0b00, 0b101, 0, 0, true);
1401   INSN(ldnpw, 0b00, 0b101, 0, 1, true);
1402   INSN(stnp, 0b10, 0b101, 0, 0, true);
1403   INSN(ldnp, 0b10, 0b101, 0, 1, true);
1404 
1405 #undef INSN
1406 
1407 #define INSN(NAME, size, p1, V, L, no_allocate)                         \
1408   void NAME(FloatRegister Rt1, FloatRegister Rt2, Address adr) {        \
1409     ld_st1(size, p1, V, L,                                              \
1410            as_Register(Rt1), as_Register(Rt2), adr, no_allocate);       \
1411    }
1412 
1413   INSN(stps, 0b00, 0b101, 1, 0, false);
1414   INSN(ldps, 0b00, 0b101, 1, 1, false);
1415   INSN(stpd, 0b01, 0b101, 1, 0, false);
1416   INSN(ldpd, 0b01, 0b101, 1, 1, false);
1417   INSN(stpq, 0b10, 0b101, 1, 0, false);
1418   INSN(ldpq, 0b10, 0b101, 1, 1, false);
1419 
1420 #undef INSN
1421 
1422   // Load/store register (all modes)
1423   void ld_st2(Register Rt, const Address &adr, int size, int op, int V = 0) {
1424     starti;
1425 
1426     f(V, 26); // general reg?
1427     zrf(Rt, 0);
1428 
1429     // Encoding for literal loads is done here (rather than pushed
1430     // down into Address::encode) because the encoding of this
1431     // instruction is too different from all of the other forms to
1432     // make it worth sharing.
1433     if (adr.getMode() == Address::literal) {
1434       assert(size == 0b10 || size == 0b11, "bad operand size in ldr");
1435       assert(op == 0b01, "literal form can only be used with loads");
1436       f(size & 0b01, 31, 30), f(0b011, 29, 27), f(0b00, 25, 24);
1437       int64_t offset = (adr.target() - pc()) >> 2;
1438       sf(offset, 23, 5);
1439       code_section()->relocate(pc(), adr.rspec());
1440       return;
1441     }
1442 
1443     f(size, 31, 30);
1444     f(op, 23, 22); // str
1445     adr.encode(&current_insn);
1446   }
1447 
1448 #define INSN(NAME, size, op)                            \
1449   void NAME(Register Rt, const Address &adr) {          \
1450     ld_st2(Rt, adr, size, op);                          \
1451   }                                                     \
1452 
1453   INSN(str, 0b11, 0b00);
1454   INSN(strw, 0b10, 0b00);
1455   INSN(strb, 0b00, 0b00);
1456   INSN(strh, 0b01, 0b00);
1457 
1458   INSN(ldr, 0b11, 0b01);
1459   INSN(ldrw, 0b10, 0b01);
1460   INSN(ldrb, 0b00, 0b01);
1461   INSN(ldrh, 0b01, 0b01);
1462 
1463   INSN(ldrsb, 0b00, 0b10);
1464   INSN(ldrsbw, 0b00, 0b11);
1465   INSN(ldrsh, 0b01, 0b10);
1466   INSN(ldrshw, 0b01, 0b11);
1467   INSN(ldrsw, 0b10, 0b10);
1468 
1469 #undef INSN
1470 
1471 #define INSN(NAME, size, op)                                    \
1472   void NAME(const Address &adr, prfop pfop = PLDL1KEEP) {       \
1473     ld_st2(as_Register(pfop), adr, size, op);                   \
1474   }
1475 
1476   INSN(prfm, 0b11, 0b10); // FIXME: PRFM should not be used with
1477                           // writeback modes, but the assembler
1478                           // doesn't enfore that.
1479 
1480 #undef INSN
1481 
1482 #define INSN(NAME, size, op)                            \
1483   void NAME(FloatRegister Rt, const Address &adr) {     \
1484     ld_st2(as_Register(Rt), adr, size, op, 1);          \
1485   }
1486 
1487   INSN(strd, 0b11, 0b00);
1488   INSN(strs, 0b10, 0b00);
1489   INSN(ldrd, 0b11, 0b01);
1490   INSN(ldrs, 0b10, 0b01);
1491   INSN(strq, 0b00, 0b10);
1492   INSN(ldrq, 0x00, 0b11);
1493 
1494 #undef INSN
1495 
1496 /* SIMD extensions
1497  *
1498  * We just use FloatRegister in the following. They are exactly the same
1499  * as SIMD registers.
1500  */
1501 public:
1502 
1503   enum SIMD_Arrangement {
1504     T8B, T16B, T4H, T8H, T2S, T4S, T1D, T2D, T1Q, INVALID_ARRANGEMENT
1505   };
1506 
1507   enum SIMD_RegVariant {
1508       B, H, S, D, Q, INVALID
1509   };
1510 
1511 private:
1512 
1513   static SIMD_Arrangement _esize2arrangement_table[9][2];
1514   static SIMD_RegVariant _esize2regvariant[9];
1515 
1516 public:
1517 
1518   static SIMD_Arrangement esize2arrangement(unsigned esize, bool isQ);
1519   static SIMD_RegVariant elemType_to_regVariant(BasicType bt);
1520   static SIMD_RegVariant elemBytes_to_regVariant(unsigned esize);
1521   // Return the corresponding bits for different SIMD_RegVariant value.
1522   static unsigned regVariant_to_elemBits(SIMD_RegVariant T);
1523 
1524   enum shift_kind { LSL, LSR, ASR, ROR };
1525 
1526   void op_shifted_reg(Instruction_aarch64 &current_insn, unsigned decode,
1527                       enum shift_kind kind, unsigned shift,
1528                       unsigned size, unsigned op) {
1529     f(size, 31);
1530     f(op, 30, 29);
1531     f(decode, 28, 24);
1532     f(shift, 15, 10);
1533     f(kind, 23, 22);
1534   }
1535 
1536   // Logical (shifted register)
1537 #define INSN(NAME, size, op, N)                                         \
1538   void NAME(Register Rd, Register Rn, Register Rm,                      \
1539             enum shift_kind kind = LSL, unsigned shift = 0) {           \
1540     starti;                                                             \
1541     guarantee(size == 1 || shift < 32, "incorrect shift");              \
1542     f(N, 21);                                                           \
1543     zrf(Rm, 16), zrf(Rn, 5), zrf(Rd, 0);                                \
1544     op_shifted_reg(current_insn, 0b01010, kind, shift, size, op);       \
1545   }
1546 
1547   INSN(andr, 1, 0b00, 0);
1548   INSN(orr, 1, 0b01, 0);
1549   INSN(eor, 1, 0b10, 0);
1550   INSN(ands, 1, 0b11, 0);
1551   INSN(andw, 0, 0b00, 0);
1552   INSN(orrw, 0, 0b01, 0);
1553   INSN(eorw, 0, 0b10, 0);
1554   INSN(andsw, 0, 0b11, 0);
1555 
1556 #undef INSN
1557 
1558 #define INSN(NAME, size, op, N)                                         \
1559   void NAME(Register Rd, Register Rn, Register Rm,                      \
1560             enum shift_kind kind = LSL, unsigned shift = 0) {           \
1561     starti;                                                             \
1562     f(N, 21);                                                           \
1563     zrf(Rm, 16), zrf(Rn, 5), zrf(Rd, 0);                                \
1564     op_shifted_reg(current_insn, 0b01010, kind, shift, size, op);       \
1565   }                                                                     \
1566                                                                         \
1567   /* These instructions have no immediate form. Provide an overload so  \
1568      that if anyone does try to use an immediate operand -- this has    \
1569      happened! -- we'll get a compile-time error. */                    \
1570   void NAME(Register Rd, Register Rn, unsigned imm,                     \
1571             enum shift_kind kind = LSL, unsigned shift = 0) {           \
1572     assert(false, " can't be used with immediate operand");             \
1573   }
1574 
1575   INSN(bic, 1, 0b00, 1);
1576   INSN(orn, 1, 0b01, 1);
1577   INSN(eon, 1, 0b10, 1);
1578   INSN(bics, 1, 0b11, 1);
1579   INSN(bicw, 0, 0b00, 1);
1580   INSN(ornw, 0, 0b01, 1);
1581   INSN(eonw, 0, 0b10, 1);
1582   INSN(bicsw, 0, 0b11, 1);
1583 
1584 #undef INSN
1585 
1586 #ifdef _WIN64
1587 // In MSVC, `mvn` is defined as a macro and it affects compilation
1588 #undef mvn
1589 #endif
1590 
1591   // Aliases for short forms of orn
1592 void mvn(Register Rd, Register Rm,
1593             enum shift_kind kind = LSL, unsigned shift = 0) {
1594   orn(Rd, zr, Rm, kind, shift);
1595 }
1596 
1597 void mvnw(Register Rd, Register Rm,
1598             enum shift_kind kind = LSL, unsigned shift = 0) {
1599   ornw(Rd, zr, Rm, kind, shift);
1600 }
1601 
1602   // Add/subtract (shifted register)
1603 #define INSN(NAME, size, op)                            \
1604   void NAME(Register Rd, Register Rn, Register Rm,      \
1605             enum shift_kind kind, unsigned shift = 0) { \
1606     starti;                                             \
1607     f(0, 21);                                           \
1608     assert_cond(kind != ROR);                           \
1609     guarantee(size == 1 || shift < 32, "incorrect shift");\
1610     zrf(Rd, 0), zrf(Rn, 5), zrf(Rm, 16);                \
1611     op_shifted_reg(current_insn, 0b01011, kind, shift, size, op);      \
1612   }
1613 
1614   INSN(add, 1, 0b000);
1615   INSN(sub, 1, 0b10);
1616   INSN(addw, 0, 0b000);
1617   INSN(subw, 0, 0b10);
1618 
1619   INSN(adds, 1, 0b001);
1620   INSN(subs, 1, 0b11);
1621   INSN(addsw, 0, 0b001);
1622   INSN(subsw, 0, 0b11);
1623 
1624 #undef INSN
1625 
1626   // Add/subtract (extended register)
1627 #define INSN(NAME, op)                                                  \
1628   void NAME(Register Rd, Register Rn, Register Rm,                      \
1629            ext::operation option, int amount = 0) {                     \
1630     starti;                                                             \
1631     zrf(Rm, 16), srf(Rn, 5), srf(Rd, 0);                                \
1632     add_sub_extended_reg(current_insn, op, 0b01011, Rd, Rn, Rm, 0b00, option, amount); \
1633   }
1634 
1635   void add_sub_extended_reg(Instruction_aarch64 &current_insn, unsigned op, unsigned decode,
1636     Register Rd, Register Rn, Register Rm,
1637     unsigned opt, ext::operation option, unsigned imm) {
1638     guarantee(imm <= 4, "shift amount must be <= 4");
1639     f(op, 31, 29), f(decode, 28, 24), f(opt, 23, 22), f(1, 21);
1640     f(option, 15, 13), f(imm, 12, 10);
1641   }
1642 
1643   INSN(addw, 0b000);
1644   INSN(subw, 0b010);
1645   INSN(add, 0b100);
1646   INSN(sub, 0b110);
1647 
1648 #undef INSN
1649 
1650 #define INSN(NAME, op)                                                  \
1651   void NAME(Register Rd, Register Rn, Register Rm,                      \
1652            ext::operation option, int amount = 0) {                     \
1653     starti;                                                             \
1654     zrf(Rm, 16), srf(Rn, 5), zrf(Rd, 0);                                \
1655     add_sub_extended_reg(current_insn, op, 0b01011, Rd, Rn, Rm, 0b00, option, amount); \
1656   }
1657 
1658   INSN(addsw, 0b001);
1659   INSN(subsw, 0b011);
1660   INSN(adds, 0b101);
1661   INSN(subs, 0b111);
1662 
1663 #undef INSN
1664 
1665   // Aliases for short forms of add and sub
1666 #define INSN(NAME)                                      \
1667   void NAME(Register Rd, Register Rn, Register Rm) {    \
1668     if (Rd == sp || Rn == sp)                           \
1669       NAME(Rd, Rn, Rm, ext::uxtx);                      \
1670     else                                                \
1671       NAME(Rd, Rn, Rm, LSL);                            \
1672   }
1673 
1674   INSN(addw);
1675   INSN(subw);
1676   INSN(add);
1677   INSN(sub);
1678 
1679   INSN(addsw);
1680   INSN(subsw);
1681   INSN(adds);
1682   INSN(subs);
1683 
1684 #undef INSN
1685 
1686   // Add/subtract (with carry)
1687   void add_sub_carry(unsigned op, Register Rd, Register Rn, Register Rm) {
1688     starti;
1689     f(op, 31, 29);
1690     f(0b11010000, 28, 21);
1691     f(0b000000, 15, 10);
1692     zrf(Rm, 16), zrf(Rn, 5), zrf(Rd, 0);
1693   }
1694 
1695   #define INSN(NAME, op)                                \
1696     void NAME(Register Rd, Register Rn, Register Rm) {  \
1697       add_sub_carry(op, Rd, Rn, Rm);                    \
1698     }
1699 
1700   INSN(adcw, 0b000);
1701   INSN(adcsw, 0b001);
1702   INSN(sbcw, 0b010);
1703   INSN(sbcsw, 0b011);
1704   INSN(adc, 0b100);
1705   INSN(adcs, 0b101);
1706   INSN(sbc,0b110);
1707   INSN(sbcs, 0b111);
1708 
1709 #undef INSN
1710 
1711   // Conditional compare (both kinds)
1712   void conditional_compare(unsigned op, int o1, int o2, int o3,
1713                            Register Rn, unsigned imm5, unsigned nzcv,
1714                            unsigned cond) {
1715     starti;
1716     f(op, 31, 29);
1717     f(0b11010010, 28, 21);
1718     f(cond, 15, 12);
1719     f(o1, 11);
1720     f(o2, 10);
1721     f(o3, 4);
1722     f(nzcv, 3, 0);
1723     f(imm5, 20, 16), zrf(Rn, 5);
1724   }
1725 
1726 #define INSN(NAME, op)                                                  \
1727   void NAME(Register Rn, Register Rm, int imm, Condition cond) {        \
1728     int regNumber = (Rm == zr ? 31 : (uintptr_t)Rm);                    \
1729     conditional_compare(op, 0, 0, 0, Rn, regNumber, imm, cond);         \
1730   }                                                                     \
1731                                                                         \
1732   void NAME(Register Rn, int imm5, int imm, Condition cond) {           \
1733     conditional_compare(op, 1, 0, 0, Rn, imm5, imm, cond);              \
1734   }
1735 
1736   INSN(ccmnw, 0b001);
1737   INSN(ccmpw, 0b011);
1738   INSN(ccmn, 0b101);
1739   INSN(ccmp, 0b111);
1740 
1741 #undef INSN
1742 
1743   // Conditional select
1744   void conditional_select(unsigned op, unsigned op2,
1745                           Register Rd, Register Rn, Register Rm,
1746                           unsigned cond) {
1747     starti;
1748     f(op, 31, 29);
1749     f(0b11010100, 28, 21);
1750     f(cond, 15, 12);
1751     f(op2, 11, 10);
1752     zrf(Rm, 16), zrf(Rn, 5), rf(Rd, 0);
1753   }
1754 
1755 #define INSN(NAME, op, op2)                                             \
1756   void NAME(Register Rd, Register Rn, Register Rm, Condition cond) {    \
1757     conditional_select(op, op2, Rd, Rn, Rm, cond);                      \
1758   }
1759 
1760   INSN(cselw, 0b000, 0b00);
1761   INSN(csincw, 0b000, 0b01);
1762   INSN(csinvw, 0b010, 0b00);
1763   INSN(csnegw, 0b010, 0b01);
1764   INSN(csel, 0b100, 0b00);
1765   INSN(csinc, 0b100, 0b01);
1766   INSN(csinv, 0b110, 0b00);
1767   INSN(csneg, 0b110, 0b01);
1768 
1769 #undef INSN
1770 
1771   // Data processing
1772   void data_processing(Instruction_aarch64 &current_insn, unsigned op29, unsigned opcode,
1773                        Register Rd, Register Rn) {
1774     f(op29, 31, 29), f(0b11010110, 28, 21);
1775     f(opcode, 15, 10);
1776     rf(Rn, 5), rf(Rd, 0);
1777   }
1778 
1779   // (1 source)
1780 #define INSN(NAME, op29, opcode2, opcode)                       \
1781   void NAME(Register Rd, Register Rn) {                         \
1782     starti;                                                     \
1783     f(opcode2, 20, 16);                                         \
1784     data_processing(current_insn, op29, opcode, Rd, Rn);        \
1785   }
1786 
1787   INSN(rbitw,  0b010, 0b00000, 0b00000);
1788   INSN(rev16w, 0b010, 0b00000, 0b00001);
1789   INSN(revw,   0b010, 0b00000, 0b00010);
1790   INSN(clzw,   0b010, 0b00000, 0b00100);
1791   INSN(clsw,   0b010, 0b00000, 0b00101);
1792 
1793   INSN(rbit,   0b110, 0b00000, 0b00000);
1794   INSN(rev16,  0b110, 0b00000, 0b00001);
1795   INSN(rev32,  0b110, 0b00000, 0b00010);
1796   INSN(rev,    0b110, 0b00000, 0b00011);
1797   INSN(clz,    0b110, 0b00000, 0b00100);
1798   INSN(cls,    0b110, 0b00000, 0b00101);
1799 
1800 #undef INSN
1801 
1802   // (2 sources)
1803 #define INSN(NAME, op29, opcode)                                \
1804   void NAME(Register Rd, Register Rn, Register Rm) {            \
1805     starti;                                                     \
1806     rf(Rm, 16);                                                 \
1807     data_processing(current_insn, op29, opcode, Rd, Rn);        \
1808   }
1809 
1810   INSN(udivw, 0b000, 0b000010);
1811   INSN(sdivw, 0b000, 0b000011);
1812   INSN(lslvw, 0b000, 0b001000);
1813   INSN(lsrvw, 0b000, 0b001001);
1814   INSN(asrvw, 0b000, 0b001010);
1815   INSN(rorvw, 0b000, 0b001011);
1816 
1817   INSN(udiv, 0b100, 0b000010);
1818   INSN(sdiv, 0b100, 0b000011);
1819   INSN(lslv, 0b100, 0b001000);
1820   INSN(lsrv, 0b100, 0b001001);
1821   INSN(asrv, 0b100, 0b001010);
1822   INSN(rorv, 0b100, 0b001011);
1823 
1824 #undef INSN
1825 
1826   // (3 sources)
1827   void data_processing(unsigned op54, unsigned op31, unsigned o0,
1828                        Register Rd, Register Rn, Register Rm,
1829                        Register Ra) {
1830     starti;
1831     f(op54, 31, 29), f(0b11011, 28, 24);
1832     f(op31, 23, 21), f(o0, 15);
1833     zrf(Rm, 16), zrf(Ra, 10), zrf(Rn, 5), zrf(Rd, 0);
1834   }
1835 
1836 #define INSN(NAME, op54, op31, o0)                                      \
1837   void NAME(Register Rd, Register Rn, Register Rm, Register Ra) {       \
1838     data_processing(op54, op31, o0, Rd, Rn, Rm, Ra);                    \
1839   }
1840 
1841   INSN(maddw, 0b000, 0b000, 0);
1842   INSN(msubw, 0b000, 0b000, 1);
1843   INSN(madd, 0b100, 0b000, 0);
1844   INSN(msub, 0b100, 0b000, 1);
1845   INSN(smaddl, 0b100, 0b001, 0);
1846   INSN(smsubl, 0b100, 0b001, 1);
1847   INSN(umaddl, 0b100, 0b101, 0);
1848   INSN(umsubl, 0b100, 0b101, 1);
1849 
1850 #undef INSN
1851 
1852 #define INSN(NAME, op54, op31, o0)                                      \
1853   void NAME(Register Rd, Register Rn, Register Rm) {                    \
1854     data_processing(op54, op31, o0, Rd, Rn, Rm, as_Register(31));       \
1855   }
1856 
1857   INSN(smulh, 0b100, 0b010, 0);
1858   INSN(umulh, 0b100, 0b110, 0);
1859 
1860 #undef INSN
1861 
1862   // Floating-point data-processing (1 source)
1863   void data_processing(unsigned op31, unsigned type, unsigned opcode,
1864                        FloatRegister Vd, FloatRegister Vn) {
1865     starti;
1866     f(op31, 31, 29);
1867     f(0b11110, 28, 24);
1868     f(type, 23, 22), f(1, 21), f(opcode, 20, 15), f(0b10000, 14, 10);
1869     rf(Vn, 5), rf(Vd, 0);
1870   }
1871 
1872 #define INSN(NAME, op31, type, opcode)                  \
1873   void NAME(FloatRegister Vd, FloatRegister Vn) {       \
1874     data_processing(op31, type, opcode, Vd, Vn);        \
1875   }
1876 
1877 private:
1878   INSN(i_fmovs, 0b000, 0b00, 0b000000);
1879 public:
1880   INSN(fabss, 0b000, 0b00, 0b000001);
1881   INSN(fnegs, 0b000, 0b00, 0b000010);
1882   INSN(fsqrts, 0b000, 0b00, 0b000011);
1883   INSN(fcvts, 0b000, 0b00, 0b000101);   // Single-precision to double-precision
1884 
1885 private:
1886   INSN(i_fmovd, 0b000, 0b01, 0b000000);
1887 public:
1888   INSN(fabsd, 0b000, 0b01, 0b000001);
1889   INSN(fnegd, 0b000, 0b01, 0b000010);
1890   INSN(fsqrtd, 0b000, 0b01, 0b000011);
1891   INSN(fcvtd, 0b000, 0b01, 0b000100);   // Double-precision to single-precision
1892 
1893   void fmovd(FloatRegister Vd, FloatRegister Vn) {
1894     assert(Vd != Vn, "should be");
1895     i_fmovd(Vd, Vn);
1896   }
1897 
1898   void fmovs(FloatRegister Vd, FloatRegister Vn) {
1899     assert(Vd != Vn, "should be");
1900     i_fmovs(Vd, Vn);
1901   }
1902 
1903 private:
1904   void _fcvt_narrow_extend(FloatRegister Vd, SIMD_Arrangement Ta,
1905                            FloatRegister Vn, SIMD_Arrangement Tb, bool do_extend) {
1906     assert((do_extend && (Tb >> 1) + 1 == (Ta >> 1))
1907            || (!do_extend && (Ta >> 1) + 1 == (Tb >> 1)), "Incompatible arrangement");
1908     starti;
1909     int op30 = (do_extend ? Tb : Ta) & 1;
1910     int op22 = ((do_extend ? Ta : Tb) >> 1) & 1;
1911     f(0, 31), f(op30, 30), f(0b0011100, 29, 23), f(op22, 22);
1912     f(0b100001011, 21, 13), f(do_extend ? 1 : 0, 12), f(0b10, 11, 10);
1913     rf(Vn, 5), rf(Vd, 0);
1914   }
1915 
1916 public:
1917   void fcvtl(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn,  SIMD_Arrangement Tb) {
1918     assert(Tb == T4H || Tb == T8H|| Tb == T2S || Tb == T4S, "invalid arrangement");
1919     _fcvt_narrow_extend(Vd, Ta, Vn, Tb, true);
1920   }
1921 
1922   void fcvtn(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn,  SIMD_Arrangement Tb) {
1923     assert(Ta == T4H || Ta == T8H|| Ta == T2S || Ta == T4S, "invalid arrangement");
1924     _fcvt_narrow_extend(Vd, Ta, Vn, Tb, false);
1925   }
1926 
1927 #undef INSN
1928 
1929   // Floating-point data-processing (2 source)
1930   void data_processing(unsigned op31, unsigned type, unsigned opcode,
1931                        FloatRegister Vd, FloatRegister Vn, FloatRegister Vm) {
1932     starti;
1933     f(op31, 31, 29);
1934     f(0b11110, 28, 24);
1935     f(type, 23, 22), f(1, 21), f(opcode, 15, 10);
1936     rf(Vm, 16), rf(Vn, 5), rf(Vd, 0);
1937   }
1938 
1939 #define INSN(NAME, op31, type, opcode)                  \
1940   void NAME(FloatRegister Vd, FloatRegister Vn, FloatRegister Vm) {     \
1941     data_processing(op31, type, opcode, Vd, Vn, Vm);    \
1942   }
1943 
1944   INSN(fabds,  0b011, 0b10, 0b110101);
1945   INSN(fmuls,  0b000, 0b00, 0b000010);
1946   INSN(fdivs,  0b000, 0b00, 0b000110);
1947   INSN(fadds,  0b000, 0b00, 0b001010);
1948   INSN(fsubs,  0b000, 0b00, 0b001110);
1949   INSN(fmaxs,  0b000, 0b00, 0b010010);
1950   INSN(fmins,  0b000, 0b00, 0b010110);
1951   INSN(fnmuls, 0b000, 0b00, 0b100010);
1952 
1953   INSN(fabdd,  0b011, 0b11, 0b110101);
1954   INSN(fmuld,  0b000, 0b01, 0b000010);
1955   INSN(fdivd,  0b000, 0b01, 0b000110);
1956   INSN(faddd,  0b000, 0b01, 0b001010);
1957   INSN(fsubd,  0b000, 0b01, 0b001110);
1958   INSN(fmaxd,  0b000, 0b01, 0b010010);
1959   INSN(fmind,  0b000, 0b01, 0b010110);
1960   INSN(fnmuld, 0b000, 0b01, 0b100010);
1961 
1962 #undef INSN
1963 
1964    // Floating-point data-processing (3 source)
1965   void data_processing(unsigned op31, unsigned type, unsigned o1, unsigned o0,
1966                        FloatRegister Vd, FloatRegister Vn, FloatRegister Vm,
1967                        FloatRegister Va) {
1968     starti;
1969     f(op31, 31, 29);
1970     f(0b11111, 28, 24);
1971     f(type, 23, 22), f(o1, 21), f(o0, 15);
1972     rf(Vm, 16), rf(Va, 10), rf(Vn, 5), rf(Vd, 0);
1973   }
1974 
1975 #define INSN(NAME, op31, type, o1, o0)                                  \
1976   void NAME(FloatRegister Vd, FloatRegister Vn, FloatRegister Vm,       \
1977             FloatRegister Va) {                                         \
1978     data_processing(op31, type, o1, o0, Vd, Vn, Vm, Va);                \
1979   }
1980 
1981   INSN(fmadds, 0b000, 0b00, 0, 0);
1982   INSN(fmsubs, 0b000, 0b00, 0, 1);
1983   INSN(fnmadds, 0b000, 0b00, 1, 0);
1984   INSN(fnmsubs, 0b000, 0b00, 1, 1);
1985 
1986   INSN(fmaddd, 0b000, 0b01, 0, 0);
1987   INSN(fmsubd, 0b000, 0b01, 0, 1);
1988   INSN(fnmaddd, 0b000, 0b01, 1, 0);
1989   INSN(fnmsub, 0b000, 0b01, 1, 1);
1990 
1991 #undef INSN
1992 
1993    // Floating-point conditional select
1994   void fp_conditional_select(unsigned op31, unsigned type,
1995                              unsigned op1, unsigned op2,
1996                              Condition cond, FloatRegister Vd,
1997                              FloatRegister Vn, FloatRegister Vm) {
1998     starti;
1999     f(op31, 31, 29);
2000     f(0b11110, 28, 24);
2001     f(type, 23, 22);
2002     f(op1, 21, 21);
2003     f(op2, 11, 10);
2004     f(cond, 15, 12);
2005     rf(Vm, 16), rf(Vn, 5), rf(Vd, 0);
2006   }
2007 
2008 #define INSN(NAME, op31, type, op1, op2)                                \
2009   void NAME(FloatRegister Vd, FloatRegister Vn,                         \
2010             FloatRegister Vm, Condition cond) {                         \
2011     fp_conditional_select(op31, type, op1, op2, cond, Vd, Vn, Vm);      \
2012   }
2013 
2014   INSN(fcsels, 0b000, 0b00, 0b1, 0b11);
2015   INSN(fcseld, 0b000, 0b01, 0b1, 0b11);
2016 
2017 #undef INSN
2018 
2019    // Floating-point<->integer conversions
2020   void float_int_convert(unsigned op31, unsigned type,
2021                          unsigned rmode, unsigned opcode,
2022                          Register Rd, Register Rn) {
2023     starti;
2024     f(op31, 31, 29);
2025     f(0b11110, 28, 24);
2026     f(type, 23, 22), f(1, 21), f(rmode, 20, 19);
2027     f(opcode, 18, 16), f(0b000000, 15, 10);
2028     zrf(Rn, 5), zrf(Rd, 0);
2029   }
2030 
2031 #define INSN(NAME, op31, type, rmode, opcode)                           \
2032   void NAME(Register Rd, FloatRegister Vn) {                            \
2033     float_int_convert(op31, type, rmode, opcode, Rd, as_Register(Vn));  \
2034   }
2035 
2036   INSN(fcvtzsw, 0b000, 0b00, 0b11, 0b000);
2037   INSN(fcvtzs,  0b100, 0b00, 0b11, 0b000);
2038   INSN(fcvtzdw, 0b000, 0b01, 0b11, 0b000);
2039   INSN(fcvtzd,  0b100, 0b01, 0b11, 0b000);
2040 
2041   INSN(fmovs, 0b000, 0b00, 0b00, 0b110);
2042   INSN(fmovd, 0b100, 0b01, 0b00, 0b110);
2043 
2044   INSN(fmovhid, 0b100, 0b10, 0b01, 0b110);
2045 
2046 #undef INSN
2047 
2048 #define INSN(NAME, op31, type, rmode, opcode)                           \
2049   void NAME(FloatRegister Vd, Register Rn) {                            \
2050     float_int_convert(op31, type, rmode, opcode, as_Register(Vd), Rn);  \
2051   }
2052 
2053   INSN(fmovs, 0b000, 0b00, 0b00, 0b111);
2054   INSN(fmovd, 0b100, 0b01, 0b00, 0b111);
2055 
2056   INSN(scvtfws, 0b000, 0b00, 0b00, 0b010);
2057   INSN(scvtfs,  0b100, 0b00, 0b00, 0b010);
2058   INSN(scvtfwd, 0b000, 0b01, 0b00, 0b010);
2059   INSN(scvtfd,  0b100, 0b01, 0b00, 0b010);
2060 
2061   // INSN(fmovhid, 0b100, 0b10, 0b01, 0b111);
2062 
2063 #undef INSN
2064 
2065   enum sign_kind { SIGNED, UNSIGNED };
2066 
2067 private:
2068   void _xcvtf_scalar_integer(sign_kind sign, unsigned sz,
2069                              FloatRegister Rd, FloatRegister Rn) {
2070     starti;
2071     f(0b01, 31, 30), f(sign == SIGNED ? 0 : 1, 29);
2072     f(0b111100, 27, 23), f((sz >> 1) & 1, 22), f(0b100001110110, 21, 10);
2073     rf(Rn, 5), rf(Rd, 0);
2074   }
2075 
2076 public:
2077 #define INSN(NAME, sign, sz)                        \
2078   void NAME(FloatRegister Rd, FloatRegister Rn) {   \
2079     _xcvtf_scalar_integer(sign, sz, Rd, Rn);        \
2080   }
2081 
2082   INSN(scvtfs, SIGNED, 0);
2083   INSN(scvtfd, SIGNED, 1);
2084 
2085 #undef INSN
2086 
2087 private:
2088   void _xcvtf_vector_integer(sign_kind sign, SIMD_Arrangement T,
2089                              FloatRegister Rd, FloatRegister Rn) {
2090     assert(T == T2S || T == T4S || T == T2D, "invalid arrangement");
2091     starti;
2092     f(0, 31), f(T & 1, 30), f(sign == SIGNED ? 0 : 1, 29);
2093     f(0b011100, 28, 23), f((T >> 1) & 1, 22), f(0b100001110110, 21, 10);
2094     rf(Rn, 5), rf(Rd, 0);
2095   }
2096 
2097 public:
2098   void scvtfv(SIMD_Arrangement T, FloatRegister Rd, FloatRegister Rn) {
2099     _xcvtf_vector_integer(SIGNED, T, Rd, Rn);
2100   }
2101 
2102   // Floating-point compare
2103   void float_compare(unsigned op31, unsigned type,
2104                      unsigned op, unsigned op2,
2105                      FloatRegister Vn, FloatRegister Vm = as_FloatRegister(0)) {
2106     starti;
2107     f(op31, 31, 29);
2108     f(0b11110, 28, 24);
2109     f(type, 23, 22), f(1, 21);
2110     f(op, 15, 14), f(0b1000, 13, 10), f(op2, 4, 0);
2111     rf(Vn, 5), rf(Vm, 16);
2112   }
2113 
2114 
2115 #define INSN(NAME, op31, type, op, op2)                 \
2116   void NAME(FloatRegister Vn, FloatRegister Vm) {       \
2117     float_compare(op31, type, op, op2, Vn, Vm);         \
2118   }
2119 
2120 #define INSN1(NAME, op31, type, op, op2)        \
2121   void NAME(FloatRegister Vn, double d) {       \
2122     assert_cond(d == 0.0);                      \
2123     float_compare(op31, type, op, op2, Vn);     \
2124   }
2125 
2126   INSN(fcmps, 0b000, 0b00, 0b00, 0b00000);
2127   INSN1(fcmps, 0b000, 0b00, 0b00, 0b01000);
2128   // INSN(fcmpes, 0b000, 0b00, 0b00, 0b10000);
2129   // INSN1(fcmpes, 0b000, 0b00, 0b00, 0b11000);
2130 
2131   INSN(fcmpd, 0b000,   0b01, 0b00, 0b00000);
2132   INSN1(fcmpd, 0b000,  0b01, 0b00, 0b01000);
2133   // INSN(fcmped, 0b000,  0b01, 0b00, 0b10000);
2134   // INSN1(fcmped, 0b000, 0b01, 0b00, 0b11000);
2135 
2136 #undef INSN
2137 #undef INSN1
2138 
2139 // Floating-point compare. 3-registers versions (scalar).
2140 #define INSN(NAME, sz, e)                                             \
2141   void NAME(FloatRegister Vd, FloatRegister Vn, FloatRegister Vm) {   \
2142     starti;                                                           \
2143     f(0b01111110, 31, 24), f(e, 23), f(sz, 22), f(1, 21), rf(Vm, 16); \
2144     f(0b111011, 15, 10), rf(Vn, 5), rf(Vd, 0);                        \
2145   }                                                                   \
2146 
2147   INSN(facged, 1, 0); // facge-double
2148   INSN(facges, 0, 0); // facge-single
2149   INSN(facgtd, 1, 1); // facgt-double
2150   INSN(facgts, 0, 1); // facgt-single
2151 
2152 #undef INSN
2153 
2154   // Floating-point Move (immediate)
2155 private:
2156   unsigned pack(double value);
2157 
2158   void fmov_imm(FloatRegister Vn, double value, unsigned size) {
2159     starti;
2160     f(0b00011110, 31, 24), f(size, 23, 22), f(1, 21);
2161     f(pack(value), 20, 13), f(0b10000000, 12, 5);
2162     rf(Vn, 0);
2163   }
2164 
2165 public:
2166 
2167   void fmovs(FloatRegister Vn, double value) {
2168     if (value)
2169       fmov_imm(Vn, value, 0b00);
2170     else
2171       movi(Vn, T2S, 0);
2172   }
2173   void fmovd(FloatRegister Vn, double value) {
2174     if (value)
2175       fmov_imm(Vn, value, 0b01);
2176     else
2177       movi(Vn, T1D, 0);
2178   }
2179 
2180    // Floating-point rounding
2181    // type: half-precision = 11
2182    //       single         = 00
2183    //       double         = 01
2184    // rmode: A = Away     = 100
2185    //        I = current  = 111
2186    //        M = MinusInf = 010
2187    //        N = eveN     = 000
2188    //        P = PlusInf  = 001
2189    //        X = eXact    = 110
2190    //        Z = Zero     = 011
2191   void float_round(unsigned type, unsigned rmode, FloatRegister Rd, FloatRegister Rn) {
2192     starti;
2193     f(0b00011110, 31, 24);
2194     f(type, 23, 22);
2195     f(0b1001, 21, 18);
2196     f(rmode, 17, 15);
2197     f(0b10000, 14, 10);
2198     rf(Rn, 5), rf(Rd, 0);
2199   }
2200 #define INSN(NAME, type, rmode)                   \
2201   void NAME(FloatRegister Vd, FloatRegister Vn) { \
2202     float_round(type, rmode, Vd, Vn);             \
2203   }
2204 
2205 public:
2206   INSN(frintah, 0b11, 0b100);
2207   INSN(frintih, 0b11, 0b111);
2208   INSN(frintmh, 0b11, 0b010);
2209   INSN(frintnh, 0b11, 0b000);
2210   INSN(frintph, 0b11, 0b001);
2211   INSN(frintxh, 0b11, 0b110);
2212   INSN(frintzh, 0b11, 0b011);
2213 
2214   INSN(frintas, 0b00, 0b100);
2215   INSN(frintis, 0b00, 0b111);
2216   INSN(frintms, 0b00, 0b010);
2217   INSN(frintns, 0b00, 0b000);
2218   INSN(frintps, 0b00, 0b001);
2219   INSN(frintxs, 0b00, 0b110);
2220   INSN(frintzs, 0b00, 0b011);
2221 
2222   INSN(frintad, 0b01, 0b100);
2223   INSN(frintid, 0b01, 0b111);
2224   INSN(frintmd, 0b01, 0b010);
2225   INSN(frintnd, 0b01, 0b000);
2226   INSN(frintpd, 0b01, 0b001);
2227   INSN(frintxd, 0b01, 0b110);
2228   INSN(frintzd, 0b01, 0b011);
2229 #undef INSN
2230 
2231 private:
2232   static short SIMD_Size_in_bytes[];
2233 
2234 public:
2235 #define INSN(NAME, op)                                                  \
2236   void NAME(FloatRegister Rt, SIMD_RegVariant T, const Address &adr) {  \
2237     ld_st2(as_Register(Rt), adr, (int)T & 3, op + ((T==Q) ? 0b10:0b00), 1); \
2238   }
2239 
2240   INSN(ldr, 1);
2241   INSN(str, 0);
2242 
2243 #undef INSN
2244 
2245  private:
2246 
2247   void ld_st(FloatRegister Vt, SIMD_Arrangement T, Register Xn, int op1, int op2) {
2248     starti;
2249     f(0,31), f((int)T & 1, 30);
2250     f(op1, 29, 21), f(0, 20, 16), f(op2, 15, 12);
2251     f((int)T >> 1, 11, 10), srf(Xn, 5), rf(Vt, 0);
2252   }
2253   void ld_st(FloatRegister Vt, SIMD_Arrangement T, Register Xn,
2254              int imm, int op1, int op2, int regs) {
2255 
2256     bool replicate = op2 >> 2 == 3;
2257     // post-index value (imm) is formed differently for replicate/non-replicate ld* instructions
2258     int expectedImmediate = replicate ? regs * (1 << (T >> 1)) : SIMD_Size_in_bytes[T] * regs;
2259     guarantee(T < T1Q , "incorrect arrangement");
2260     guarantee(imm == expectedImmediate, "bad offset");
2261     starti;
2262     f(0,31), f((int)T & 1, 30);
2263     f(op1 | 0b100, 29, 21), f(0b11111, 20, 16), f(op2, 15, 12);
2264     f((int)T >> 1, 11, 10), srf(Xn, 5), rf(Vt, 0);
2265   }
2266   void ld_st(FloatRegister Vt, SIMD_Arrangement T, Register Xn,
2267              Register Xm, int op1, int op2) {
2268     starti;
2269     f(0,31), f((int)T & 1, 30);
2270     f(op1 | 0b100, 29, 21), rf(Xm, 16), f(op2, 15, 12);
2271     f((int)T >> 1, 11, 10), srf(Xn, 5), rf(Vt, 0);
2272   }
2273 
2274   void ld_st(FloatRegister Vt, SIMD_Arrangement T, Address a, int op1, int op2, int regs) {
2275     switch (a.getMode()) {
2276     case Address::base_plus_offset:
2277       guarantee(a.offset() == 0, "no offset allowed here");
2278       ld_st(Vt, T, a.base(), op1, op2);
2279       break;
2280     case Address::post:
2281       ld_st(Vt, T, a.base(), a.offset(), op1, op2, regs);
2282       break;
2283     case Address::post_reg:
2284       ld_st(Vt, T, a.base(), a.index(), op1, op2);
2285       break;
2286     default:
2287       ShouldNotReachHere();
2288     }
2289   }
2290 
2291  public:
2292 
2293 #define INSN1(NAME, op1, op2)                                           \
2294   void NAME(FloatRegister Vt, SIMD_Arrangement T, const Address &a) {   \
2295     ld_st(Vt, T, a, op1, op2, 1);                                       \
2296  }
2297 
2298 #define INSN2(NAME, op1, op2)                                           \
2299   void NAME(FloatRegister Vt, FloatRegister Vt2, SIMD_Arrangement T, const Address &a) { \
2300     assert(Vt->successor() == Vt2, "Registers must be ordered");        \
2301     ld_st(Vt, T, a, op1, op2, 2);                                       \
2302   }
2303 
2304 #define INSN3(NAME, op1, op2)                                           \
2305   void NAME(FloatRegister Vt, FloatRegister Vt2, FloatRegister Vt3,     \
2306             SIMD_Arrangement T, const Address &a) {                     \
2307     assert(Vt->successor() == Vt2 && Vt2->successor() == Vt3,           \
2308            "Registers must be ordered");                                \
2309     ld_st(Vt, T, a, op1, op2, 3);                                       \
2310   }
2311 
2312 #define INSN4(NAME, op1, op2)                                           \
2313   void NAME(FloatRegister Vt, FloatRegister Vt2, FloatRegister Vt3,     \
2314             FloatRegister Vt4, SIMD_Arrangement T, const Address &a) {  \
2315     assert(Vt->successor() == Vt2 && Vt2->successor() == Vt3 &&         \
2316            Vt3->successor() == Vt4, "Registers must be ordered");       \
2317     ld_st(Vt, T, a, op1, op2, 4);                                       \
2318   }
2319 
2320   INSN1(ld1,  0b001100010, 0b0111);
2321   INSN2(ld1,  0b001100010, 0b1010);
2322   INSN3(ld1,  0b001100010, 0b0110);
2323   INSN4(ld1,  0b001100010, 0b0010);
2324 
2325   INSN2(ld2,  0b001100010, 0b1000);
2326   INSN3(ld3,  0b001100010, 0b0100);
2327   INSN4(ld4,  0b001100010, 0b0000);
2328 
2329   INSN1(st1,  0b001100000, 0b0111);
2330   INSN2(st1,  0b001100000, 0b1010);
2331   INSN3(st1,  0b001100000, 0b0110);
2332   INSN4(st1,  0b001100000, 0b0010);
2333 
2334   INSN2(st2,  0b001100000, 0b1000);
2335   INSN3(st3,  0b001100000, 0b0100);
2336   INSN4(st4,  0b001100000, 0b0000);
2337 
2338   INSN1(ld1r, 0b001101010, 0b1100);
2339   INSN2(ld2r, 0b001101011, 0b1100);
2340   INSN3(ld3r, 0b001101010, 0b1110);
2341   INSN4(ld4r, 0b001101011, 0b1110);
2342 
2343 #undef INSN1
2344 #undef INSN2
2345 #undef INSN3
2346 #undef INSN4
2347 
2348 #define INSN(NAME, opc)                                                                 \
2349   void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, FloatRegister Vm) { \
2350     starti;                                                                             \
2351     assert(T == T8B || T == T16B, "must be T8B or T16B");                               \
2352     f(0, 31), f((int)T & 1, 30), f(opc, 29, 21);                                        \
2353     rf(Vm, 16), f(0b000111, 15, 10), rf(Vn, 5), rf(Vd, 0);                              \
2354   }
2355 
2356   INSN(eor,  0b101110001);
2357   INSN(orr,  0b001110101);
2358   INSN(andr, 0b001110001);
2359   INSN(bic,  0b001110011);
2360   INSN(bif,  0b101110111);
2361   INSN(bit,  0b101110101);
2362   INSN(bsl,  0b101110011);
2363   INSN(orn,  0b001110111);
2364 
2365 #undef INSN
2366 
2367 #define INSN(NAME, opc, opc2, acceptT2D)                                                \
2368   void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, FloatRegister Vm) { \
2369     guarantee(T != T1Q && T != T1D, "incorrect arrangement");                           \
2370     if (!acceptT2D) guarantee(T != T2D, "incorrect arrangement");                       \
2371     starti;                                                                             \
2372     f(0, 31), f((int)T & 1, 30), f(opc, 29), f(0b01110, 28, 24);                        \
2373     f((int)T >> 1, 23, 22), f(1, 21), rf(Vm, 16), f(opc2, 15, 10);                      \
2374     rf(Vn, 5), rf(Vd, 0);                                                               \
2375   }
2376 
2377   INSN(addv,   0, 0b100001, true);  // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S, T2D
2378   INSN(subv,   1, 0b100001, true);  // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S, T2D
2379   INSN(uqsubv, 1, 0b001011, true);  // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S, T2D
2380   INSN(mulv,   0, 0b100111, false); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S
2381   INSN(mlav,   0, 0b100101, false); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S
2382   INSN(mlsv,   1, 0b100101, false); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S
2383   INSN(sshl,   0, 0b010001, true);  // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S, T2D
2384   INSN(ushl,   1, 0b010001, true);  // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S, T2D
2385   INSN(addpv,  0, 0b101111, true);  // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S, T2D
2386   INSN(smullv, 0, 0b110000, false); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S
2387   INSN(umullv, 1, 0b110000, false); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S
2388   INSN(umlalv, 1, 0b100000, false); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S
2389   INSN(maxv,   0, 0b011001, false); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S
2390   INSN(minv,   0, 0b011011, false); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S
2391   INSN(smaxp,  0, 0b101001, false); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S
2392   INSN(sminp,  0, 0b101011, false); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S
2393   INSN(cmeq,   1, 0b100011, true);  // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S, T2D
2394   INSN(cmgt,   0, 0b001101, true);  // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S, T2D
2395   INSN(cmge,   0, 0b001111, true);  // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S, T2D
2396   INSN(cmhi,   1, 0b001101, true);  // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S, T2D
2397   INSN(cmhs,   1, 0b001111, true);  // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S, T2D
2398 
2399 #undef INSN
2400 
2401 #define INSN(NAME, opc, opc2, accepted) \
2402   void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn) {                   \
2403     guarantee(T != T1Q && T != T1D, "incorrect arrangement");                           \
2404     if (accepted < 3) guarantee(T != T2D, "incorrect arrangement");                     \
2405     if (accepted < 2) guarantee(T != T2S, "incorrect arrangement");                     \
2406     if (accepted < 1) guarantee(T == T8B || T == T16B, "incorrect arrangement");        \
2407     starti;                                                                             \
2408     f(0, 31), f((int)T & 1, 30), f(opc, 29), f(0b01110, 28, 24);                        \
2409     f((int)T >> 1, 23, 22), f(opc2, 21, 10);                                            \
2410     rf(Vn, 5), rf(Vd, 0);                                                               \
2411   }
2412 
2413   INSN(absr,   0, 0b100000101110, 3); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S, T2D
2414   INSN(negr,   1, 0b100000101110, 3); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S, T2D
2415   INSN(notr,   1, 0b100000010110, 0); // accepted arrangements: T8B, T16B
2416   INSN(addv,   0, 0b110001101110, 1); // accepted arrangements: T8B, T16B, T4H, T8H,      T4S
2417   INSN(smaxv,  0, 0b110000101010, 1); // accepted arrangements: T8B, T16B, T4H, T8H,      T4S
2418   INSN(umaxv,  1, 0b110000101010, 1); // accepted arrangements: T8B, T16B, T4H, T8H,      T4S
2419   INSN(sminv,  0, 0b110001101010, 1); // accepted arrangements: T8B, T16B, T4H, T8H,      T4S
2420   INSN(uminv,  1, 0b110001101010, 1); // accepted arrangements: T8B, T16B, T4H, T8H,      T4S
2421   INSN(cls,    0, 0b100000010010, 2); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S
2422   INSN(clz,    1, 0b100000010010, 2); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S
2423   INSN(cnt,    0, 0b100000010110, 0); // accepted arrangements: T8B, T16B
2424   INSN(uaddlp, 1, 0b100000001010, 2); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S
2425   INSN(uaddlv, 1, 0b110000001110, 1); // accepted arrangements: T8B, T16B, T4H, T8H,      T4S
2426 
2427 #undef INSN
2428 
2429 #define INSN(NAME, opc) \
2430   void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn) {                  \
2431     starti;                                                                            \
2432     assert(T == T4S, "arrangement must be T4S");                                       \
2433     f(0, 31), f((int)T & 1, 30), f(0b101110, 29, 24), f(opc, 23),                      \
2434     f(T == T4S ? 0 : 1, 22), f(0b110000111110, 21, 10); rf(Vn, 5), rf(Vd, 0);          \
2435   }
2436 
2437   INSN(fmaxv, 0);
2438   INSN(fminv, 1);
2439 
2440 #undef INSN
2441 
2442 #define INSN(NAME, op0, cmode0) \
2443   void NAME(FloatRegister Vd, SIMD_Arrangement T, unsigned imm8, unsigned lsl = 0) {   \
2444     unsigned cmode = cmode0;                                                           \
2445     unsigned op = op0;                                                                 \
2446     starti;                                                                            \
2447     assert(lsl == 0 ||                                                                 \
2448            ((T == T4H || T == T8H) && lsl == 8) ||                                     \
2449            ((T == T2S || T == T4S) && ((lsl >> 3) < 4) && ((lsl & 7) == 0)), "invalid shift");\
2450     cmode |= lsl >> 2;                                                                 \
2451     if (T == T4H || T == T8H) cmode |= 0b1000;                                         \
2452     if (!(T == T4H || T == T8H || T == T2S || T == T4S)) {                             \
2453       assert(op == 0 && cmode0 == 0, "must be MOVI");                                  \
2454       cmode = 0b1110;                                                                  \
2455       if (T == T1D || T == T2D) op = 1;                                                \
2456     }                                                                                  \
2457     f(0, 31), f((int)T & 1, 30), f(op, 29), f(0b0111100000, 28, 19);                   \
2458     f(imm8 >> 5, 18, 16), f(cmode, 15, 12), f(0x01, 11, 10), f(imm8 & 0b11111, 9, 5);  \
2459     rf(Vd, 0);                                                                         \
2460   }
2461 
2462   INSN(movi, 0, 0);
2463   INSN(orri, 0, 1);
2464   INSN(mvni, 1, 0);
2465   INSN(bici, 1, 1);
2466 
2467 #undef INSN
2468 
2469 #define INSN(NAME, op1, op2, op3) \
2470   void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, FloatRegister Vm) { \
2471     starti;                                                                             \
2472     assert(T == T2S || T == T4S || T == T2D, "invalid arrangement");                    \
2473     f(0, 31), f((int)T & 1, 30), f(op1, 29), f(0b01110, 28, 24), f(op2, 23);            \
2474     f(T==T2D ? 1:0, 22); f(1, 21), rf(Vm, 16), f(op3, 15, 10), rf(Vn, 5), rf(Vd, 0);    \
2475   }
2476 
2477   INSN(fabd, 1, 1, 0b110101);
2478   INSN(fadd, 0, 0, 0b110101);
2479   INSN(fdiv, 1, 0, 0b111111);
2480   INSN(fmul, 1, 0, 0b110111);
2481   INSN(fsub, 0, 1, 0b110101);
2482   INSN(fmla, 0, 0, 0b110011);
2483   INSN(fmls, 0, 1, 0b110011);
2484   INSN(fmax, 0, 0, 0b111101);
2485   INSN(fmin, 0, 1, 0b111101);
2486   INSN(fcmeq, 0, 0, 0b111001);
2487   INSN(fcmgt, 1, 1, 0b111001);
2488   INSN(fcmge, 1, 0, 0b111001);
2489 
2490 #undef INSN
2491 
2492 #define INSN(NAME, opc)                                                                 \
2493   void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, FloatRegister Vm) { \
2494     starti;                                                                             \
2495     assert(T == T4S, "arrangement must be T4S");                                        \
2496     f(0b01011110000, 31, 21), rf(Vm, 16), f(opc, 15, 10), rf(Vn, 5), rf(Vd, 0);         \
2497   }
2498 
2499   INSN(sha1c,     0b000000);
2500   INSN(sha1m,     0b001000);
2501   INSN(sha1p,     0b000100);
2502   INSN(sha1su0,   0b001100);
2503   INSN(sha256h2,  0b010100);
2504   INSN(sha256h,   0b010000);
2505   INSN(sha256su1, 0b011000);
2506 
2507 #undef INSN
2508 
2509 #define INSN(NAME, opc)                                                                 \
2510   void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn) {                   \
2511     starti;                                                                             \
2512     assert(T == T4S, "arrangement must be T4S");                                        \
2513     f(0b0101111000101000, 31, 16), f(opc, 15, 10), rf(Vn, 5), rf(Vd, 0);                \
2514   }
2515 
2516   INSN(sha1h,     0b000010);
2517   INSN(sha1su1,   0b000110);
2518   INSN(sha256su0, 0b001010);
2519 
2520 #undef INSN
2521 
2522 #define INSN(NAME, opc)                                                                 \
2523   void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, FloatRegister Vm) { \
2524     starti;                                                                             \
2525     assert(T == T2D, "arrangement must be T2D");                                        \
2526     f(0b11001110011, 31, 21), rf(Vm, 16), f(opc, 15, 10), rf(Vn, 5), rf(Vd, 0);         \
2527   }
2528 
2529   INSN(sha512h,   0b100000);
2530   INSN(sha512h2,  0b100001);
2531   INSN(sha512su1, 0b100010);
2532 
2533 #undef INSN
2534 
2535 #define INSN(NAME, opc)                                                                 \
2536   void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn) {                   \
2537     starti;                                                                             \
2538     assert(T == T2D, "arrangement must be T2D");                                        \
2539     f(opc, 31, 10), rf(Vn, 5), rf(Vd, 0);                                               \
2540   }
2541 
2542   INSN(sha512su0, 0b1100111011000000100000);
2543 
2544 #undef INSN
2545 
2546 #define INSN(NAME, opc)                                                                                   \
2547   void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, FloatRegister Vm, FloatRegister Va) { \
2548     starti;                                                                                               \
2549     assert(T == T16B, "arrangement must be T16B");                                                        \
2550     f(0b11001110, 31, 24), f(opc, 23, 21), rf(Vm, 16), f(0b0, 15, 15), rf(Va, 10), rf(Vn, 5), rf(Vd, 0);  \
2551   }
2552 
2553   INSN(eor3, 0b000);
2554   INSN(bcax, 0b001);
2555 
2556 #undef INSN
2557 
2558 #define INSN(NAME, opc)                                                                               \
2559   void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, FloatRegister Vm, unsigned imm) { \
2560     starti;                                                                                           \
2561     assert(T == T2D, "arrangement must be T2D");                                                      \
2562     f(0b11001110, 31, 24), f(opc, 23, 21), rf(Vm, 16), f(imm, 15, 10), rf(Vn, 5), rf(Vd, 0);          \
2563   }
2564 
2565   INSN(xar, 0b100);
2566 
2567 #undef INSN
2568 
2569 #define INSN(NAME, opc)                                                                           \
2570   void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, FloatRegister Vm) {           \
2571     starti;                                                                                       \
2572     assert(T == T2D, "arrangement must be T2D");                                                  \
2573     f(0b11001110, 31, 24), f(opc, 23, 21), rf(Vm, 16), f(0b100011, 15, 10), rf(Vn, 5), rf(Vd, 0); \
2574   }
2575 
2576   INSN(rax1, 0b011);
2577 
2578 #undef INSN
2579 
2580 #define INSN(NAME, opc)                           \
2581   void NAME(FloatRegister Vd, FloatRegister Vn) { \
2582     starti;                                       \
2583     f(opc, 31, 10), rf(Vn, 5), rf(Vd, 0);         \
2584   }
2585 
2586   INSN(aese, 0b0100111000101000010010);
2587   INSN(aesd, 0b0100111000101000010110);
2588   INSN(aesmc, 0b0100111000101000011010);
2589   INSN(aesimc, 0b0100111000101000011110);
2590 
2591 #undef INSN
2592 
2593 #define INSN(NAME, op1, op2) \
2594   void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, FloatRegister Vm, int index = 0) { \
2595     starti;                                                                                            \
2596     assert(T == T2S || T == T4S || T == T2D, "invalid arrangement");                                   \
2597     assert(index >= 0 && ((T == T2D && index <= 1) || (T != T2D && index <= 3)), "invalid index");     \
2598     f(0, 31), f((int)T & 1, 30), f(op1, 29); f(0b011111, 28, 23);                                      \
2599     f(T == T2D ? 1 : 0, 22), f(T == T2D ? 0 : index & 1, 21), rf(Vm, 16);                              \
2600     f(op2, 15, 12), f(T == T2D ? index : (index >> 1), 11), f(0, 10);                                  \
2601     rf(Vn, 5), rf(Vd, 0);                                                                              \
2602   }
2603 
2604   // FMLA/FMLS - Vector - Scalar
2605   INSN(fmlavs, 0, 0b0001);
2606   INSN(fmlsvs, 0, 0b0101);
2607   // FMULX - Vector - Scalar
2608   INSN(fmulxvs, 1, 0b1001);
2609 
2610 #undef INSN
2611 
2612   // Floating-point Reciprocal Estimate
2613   void frecpe(FloatRegister Vd, FloatRegister Vn, SIMD_RegVariant type) {
2614     assert(type == D || type == S, "Wrong type for frecpe");
2615     starti;
2616     f(0b010111101, 31, 23);
2617     f(type == D ? 1 : 0, 22);
2618     f(0b100001110110, 21, 10);
2619     rf(Vn, 5), rf(Vd, 0);
2620   }
2621 
2622   // (long) {a, b} -> (a + b)
2623   void addpd(FloatRegister Vd, FloatRegister Vn) {
2624     starti;
2625     f(0b0101111011110001101110, 31, 10);
2626     rf(Vn, 5), rf(Vd, 0);
2627   }
2628 
2629   // Floating-point AdvSIMD scalar pairwise
2630 #define INSN(NAME, op1, op2) \
2631   void NAME(FloatRegister Vd, FloatRegister Vn, SIMD_RegVariant type) {                 \
2632     starti;                                                                             \
2633     assert(type == D || type == S, "Wrong type for faddp/fmaxp/fminp");                 \
2634     f(0b0111111, 31, 25), f(op1, 24, 23),                                               \
2635     f(type == S ? 0 : 1, 22), f(0b11000, 21, 17), f(op2, 16, 10), rf(Vn, 5), rf(Vd, 0); \
2636   }
2637 
2638   INSN(faddp, 0b00, 0b0110110);
2639   INSN(fmaxp, 0b00, 0b0111110);
2640   INSN(fminp, 0b01, 0b0111110);
2641 
2642 #undef INSN
2643 
2644   void ins(FloatRegister Vd, SIMD_RegVariant T, FloatRegister Vn, int didx, int sidx) {
2645     starti;
2646     assert(T != Q, "invalid register variant");
2647     f(0b01101110000, 31, 21), f(((didx<<1)|1)<<(int)T, 20, 16), f(0, 15);
2648     f(sidx<<(int)T, 14, 11), f(1, 10), rf(Vn, 5), rf(Vd, 0);
2649   }
2650 
2651 #define INSN(NAME, cond, op1, op2)                                                      \
2652   void NAME(Register Rd, FloatRegister Vn, SIMD_RegVariant T, int idx) {                \
2653     starti;                                                                             \
2654     assert(cond, "invalid register variant");                                           \
2655     f(0, 31), f(op1, 30), f(0b001110000, 29, 21);                                       \
2656     f(((idx << 1) | 1) << (int)T, 20, 16), f(op2, 15, 10);                              \
2657     rf(Vn, 5), rf(Rd, 0);                                                               \
2658   }
2659 
2660   INSN(umov, (T != Q), (T == D ? 1 : 0), 0b001111);
2661   INSN(smov, (T < D),  1,                0b001011);
2662 
2663 #undef INSN
2664 
2665 #define INSN(NAME, opc, opc2, isSHR)                                    \
2666   void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, int shift){ \
2667     starti;                                                             \
2668     /* The encodings for the immh:immb fields (bits 22:16) in *SHR are  \
2669      *   0001 xxx       8B/16B, shift = 16  - UInt(immh:immb)           \
2670      *   001x xxx       4H/8H,  shift = 32  - UInt(immh:immb)           \
2671      *   01xx xxx       2S/4S,  shift = 64  - UInt(immh:immb)           \
2672      *   1xxx xxx       1D/2D,  shift = 128 - UInt(immh:immb)           \
2673      *   (1D is RESERVED)                                               \
2674      * for SHL shift is calculated as:                                  \
2675      *   0001 xxx       8B/16B, shift = UInt(immh:immb) - 8             \
2676      *   001x xxx       4H/8H,  shift = UInt(immh:immb) - 16            \
2677      *   01xx xxx       2S/4S,  shift = UInt(immh:immb) - 32            \
2678      *   1xxx xxx       1D/2D,  shift = UInt(immh:immb) - 64            \
2679      *   (1D is RESERVED)                                               \
2680      */                                                                 \
2681     guarantee(!isSHR || (isSHR && (shift != 0)), "impossible encoding");\
2682     assert((1 << ((T>>1)+3)) > shift, "Invalid Shift value");           \
2683     int cVal = (1 << (((T >> 1) + 3) + (isSHR ? 1 : 0)));               \
2684     int encodedShift = isSHR ? cVal - shift : cVal + shift;             \
2685     f(0, 31), f(T & 1, 30), f(opc, 29), f(0b011110, 28, 23),            \
2686     f(encodedShift, 22, 16); f(opc2, 15, 10), rf(Vn, 5), rf(Vd, 0);     \
2687   }
2688 
2689   INSN(shl,  0, 0b010101, /* isSHR = */ false);
2690   INSN(sshr, 0, 0b000001, /* isSHR = */ true);
2691   INSN(ushr, 1, 0b000001, /* isSHR = */ true);
2692   INSN(usra, 1, 0b000101, /* isSHR = */ true);
2693   INSN(ssra, 0, 0b000101, /* isSHR = */ true);
2694 
2695 #undef INSN
2696 
2697 #define INSN(NAME, opc, opc2, isSHR)                                    \
2698   void NAME(FloatRegister Vd, FloatRegister Vn, int shift){             \
2699     starti;                                                             \
2700     int encodedShift = isSHR ? 128 - shift : 64 + shift;                \
2701     f(0b01, 31, 30), f(opc, 29), f(0b111110, 28, 23),                   \
2702     f(encodedShift, 22, 16); f(opc2, 15, 10), rf(Vn, 5), rf(Vd, 0);     \
2703   }
2704 
2705   INSN(shld,  0, 0b010101, /* isSHR = */ false);
2706   INSN(sshrd, 0, 0b000001, /* isSHR = */ true);
2707   INSN(ushrd, 1, 0b000001, /* isSHR = */ true);
2708 
2709 #undef INSN
2710 
2711 private:
2712   void _xshll(sign_kind sign, FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, SIMD_Arrangement Tb, int shift) {
2713     starti;
2714     /* The encodings for the immh:immb fields (bits 22:16) are
2715      *   0001 xxx       8H, 8B/16B shift = xxx
2716      *   001x xxx       4S, 4H/8H  shift = xxxx
2717      *   01xx xxx       2D, 2S/4S  shift = xxxxx
2718      *   1xxx xxx       RESERVED
2719      */
2720     assert((Tb >> 1) + 1 == (Ta >> 1), "Incompatible arrangement");
2721     assert((1 << ((Tb>>1)+3)) > shift, "Invalid shift value");
2722     f(0, 31), f(Tb & 1, 30), f(sign == SIGNED ? 0 : 1, 29), f(0b011110, 28, 23);
2723     f((1 << ((Tb>>1)+3))|shift, 22, 16);
2724     f(0b101001, 15, 10), rf(Vn, 5), rf(Vd, 0);
2725   }
2726 
2727 public:
2728   void ushll(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn,  SIMD_Arrangement Tb, int shift) {
2729     assert(Tb == T8B || Tb == T4H || Tb == T2S, "invalid arrangement");
2730     _xshll(UNSIGNED, Vd, Ta, Vn, Tb, shift);
2731   }
2732 
2733   void ushll2(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn,  SIMD_Arrangement Tb, int shift) {
2734     assert(Tb == T16B || Tb == T8H || Tb == T4S, "invalid arrangement");
2735     _xshll(UNSIGNED, Vd, Ta, Vn, Tb, shift);
2736   }
2737 
2738   void uxtl(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn,  SIMD_Arrangement Tb) {
2739     ushll(Vd, Ta, Vn, Tb, 0);
2740   }
2741 
2742   void sshll(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn,  SIMD_Arrangement Tb, int shift) {
2743     assert(Tb == T8B || Tb == T4H || Tb == T2S, "invalid arrangement");
2744     _xshll(SIGNED, Vd, Ta, Vn, Tb, shift);
2745   }
2746 
2747   void sshll2(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn,  SIMD_Arrangement Tb, int shift) {
2748     assert(Tb == T16B || Tb == T8H || Tb == T4S, "invalid arrangement");
2749     _xshll(SIGNED, Vd, Ta, Vn, Tb, shift);
2750   }
2751 
2752   void sxtl(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn,  SIMD_Arrangement Tb) {
2753     sshll(Vd, Ta, Vn, Tb, 0);
2754   }
2755 
2756   // Move from general purpose register
2757   //   mov  Vd.T[index], Rn
2758   void mov(FloatRegister Vd, SIMD_Arrangement T, int index, Register Xn) {
2759     starti;
2760     f(0b01001110000, 31, 21), f(((1 << (T >> 1)) | (index << ((T >> 1) + 1))), 20, 16);
2761     f(0b000111, 15, 10), zrf(Xn, 5), rf(Vd, 0);
2762   }
2763 
2764   // Move to general purpose register
2765   //   mov  Rd, Vn.T[index]
2766   void mov(Register Xd, FloatRegister Vn, SIMD_Arrangement T, int index) {
2767     guarantee(T >= T2S && T < T1Q, "only D and S arrangements are supported");
2768     starti;
2769     f(0, 31), f((T >= T1D) ? 1:0, 30), f(0b001110000, 29, 21);
2770     f(((1 << (T >> 1)) | (index << ((T >> 1) + 1))), 20, 16);
2771     f(0b001111, 15, 10), rf(Vn, 5), rf(Xd, 0);
2772   }
2773 
2774 private:
2775   void _pmull(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, FloatRegister Vm, SIMD_Arrangement Tb) {
2776     starti;
2777     assert((Ta == T1Q && (Tb == T1D || Tb == T2D)) ||
2778            (Ta == T8H && (Tb == T8B || Tb == T16B)), "Invalid Size specifier");
2779     int size = (Ta == T1Q) ? 0b11 : 0b00;
2780     f(0, 31), f(Tb & 1, 30), f(0b001110, 29, 24), f(size, 23, 22);
2781     f(1, 21), rf(Vm, 16), f(0b111000, 15, 10), rf(Vn, 5), rf(Vd, 0);
2782   }
2783 
2784 public:
2785   void pmull(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, FloatRegister Vm, SIMD_Arrangement Tb) {
2786     assert(Tb == T1D || Tb == T8B, "pmull assumes T1D or T8B as the second size specifier");
2787     _pmull(Vd, Ta, Vn, Vm, Tb);
2788   }
2789 
2790   void pmull2(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, FloatRegister Vm, SIMD_Arrangement Tb) {
2791     assert(Tb == T2D || Tb == T16B, "pmull2 assumes T2D or T16B as the second size specifier");
2792     _pmull(Vd, Ta, Vn, Vm, Tb);
2793   }
2794 
2795   void uqxtn(FloatRegister Vd, SIMD_Arrangement Tb, FloatRegister Vn, SIMD_Arrangement Ta) {
2796     starti;
2797     int size_b = (int)Tb >> 1;
2798     int size_a = (int)Ta >> 1;
2799     assert(size_b < 3 && size_b == size_a - 1, "Invalid size specifier");
2800     f(0, 31), f(Tb & 1, 30), f(0b101110, 29, 24), f(size_b, 23, 22);
2801     f(0b100001010010, 21, 10), rf(Vn, 5), rf(Vd, 0);
2802   }
2803 
2804   void xtn(FloatRegister Vd, SIMD_Arrangement Tb, FloatRegister Vn, SIMD_Arrangement Ta) {
2805     starti;
2806     int size_b = (int)Tb >> 1;
2807     int size_a = (int)Ta >> 1;
2808     assert(size_b < 3 && size_b == size_a - 1, "Invalid size specifier");
2809     f(0, 31), f(Tb & 1, 30), f(0b001110, 29, 24), f(size_b, 23, 22);
2810     f(0b100001001010, 21, 10), rf(Vn, 5), rf(Vd, 0);
2811   }
2812 
2813   void dup(FloatRegister Vd, SIMD_Arrangement T, Register Xs)
2814   {
2815     starti;
2816     assert(T != T1D, "reserved encoding");
2817     f(0,31), f((int)T & 1, 30), f(0b001110000, 29, 21);
2818     f((1 << (T >> 1)), 20, 16), f(0b000011, 15, 10), zrf(Xs, 5), rf(Vd, 0);
2819   }
2820 
2821   void dup(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, int index = 0)
2822   {
2823     starti;
2824     assert(T != T1D, "reserved encoding");
2825     f(0, 31), f((int)T & 1, 30), f(0b001110000, 29, 21);
2826     f(((1 << (T >> 1)) | (index << ((T >> 1) + 1))), 20, 16);
2827     f(0b000001, 15, 10), rf(Vn, 5), rf(Vd, 0);
2828   }
2829 
2830   // Advanced SIMD scalar copy
2831   void dup(FloatRegister Vd, SIMD_RegVariant T, FloatRegister Vn, int index = 0)
2832   {
2833     starti;
2834     assert(T != Q, "invalid size");
2835     f(0b01011110000, 31, 21);
2836     f((1 << T) | (index << (T + 1)), 20, 16);
2837     f(0b000001, 15, 10), rf(Vn, 5), rf(Vd, 0);
2838   }
2839 
2840   // AdvSIMD ZIP/UZP/TRN
2841 #define INSN(NAME, opcode)                                              \
2842   void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, FloatRegister Vm) { \
2843     guarantee(T != T1D && T != T1Q, "invalid arrangement");             \
2844     starti;                                                             \
2845     f(0, 31), f(0b001110, 29, 24), f(0, 21), f(0, 15);                  \
2846     f(opcode, 14, 12), f(0b10, 11, 10);                                 \
2847     rf(Vm, 16), rf(Vn, 5), rf(Vd, 0);                                   \
2848     f(T & 1, 30), f(T >> 1, 23, 22);                                    \
2849   }
2850 
2851   INSN(uzp1, 0b001);
2852   INSN(trn1, 0b010);
2853   INSN(zip1, 0b011);
2854   INSN(uzp2, 0b101);
2855   INSN(trn2, 0b110);
2856   INSN(zip2, 0b111);
2857 
2858 #undef INSN
2859 
2860   // CRC32 instructions
2861 #define INSN(NAME, c, sf, sz)                                             \
2862   void NAME(Register Rd, Register Rn, Register Rm) {                      \
2863     starti;                                                               \
2864     f(sf, 31), f(0b0011010110, 30, 21), f(0b010, 15, 13), f(c, 12);       \
2865     f(sz, 11, 10), rf(Rm, 16), rf(Rn, 5), rf(Rd, 0);                      \
2866   }
2867 
2868   INSN(crc32b,  0, 0, 0b00);
2869   INSN(crc32h,  0, 0, 0b01);
2870   INSN(crc32w,  0, 0, 0b10);
2871   INSN(crc32x,  0, 1, 0b11);
2872   INSN(crc32cb, 1, 0, 0b00);
2873   INSN(crc32ch, 1, 0, 0b01);
2874   INSN(crc32cw, 1, 0, 0b10);
2875   INSN(crc32cx, 1, 1, 0b11);
2876 
2877 #undef INSN
2878 
2879   // Table vector lookup
2880 #define INSN(NAME, op)                                                  \
2881   void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, unsigned registers, FloatRegister Vm) { \
2882     starti;                                                             \
2883     assert(T == T8B || T == T16B, "invalid arrangement");               \
2884     assert(0 < registers && registers <= 4, "invalid number of registers"); \
2885     f(0, 31), f((int)T & 1, 30), f(0b001110000, 29, 21), rf(Vm, 16), f(0, 15); \
2886     f(registers - 1, 14, 13), f(op, 12),f(0b00, 11, 10), rf(Vn, 5), rf(Vd, 0); \
2887   }
2888 
2889   INSN(tbl, 0);
2890   INSN(tbx, 1);
2891 
2892 #undef INSN
2893 
2894   // AdvSIMD two-reg misc
2895   // In this instruction group, the 2 bits in the size field ([23:22]) may be
2896   // fixed or determined by the "SIMD_Arrangement T", or both. The additional
2897   // parameter "tmask" is a 2-bit mask used to indicate which bits in the size
2898   // field are determined by the SIMD_Arrangement. The bit of "tmask" should be
2899   // set to 1 if corresponding bit marked as "x" in the ArmARM.
2900 #define INSN(NAME, U, size, tmask, opcode)                                          \
2901   void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn) {               \
2902        starti;                                                                      \
2903        assert((ASSERTION), MSG);                                                    \
2904        f(0, 31), f((int)T & 1, 30), f(U, 29), f(0b01110, 28, 24);                   \
2905        f(size | ((int)(T >> 1) & tmask), 23, 22), f(0b10000, 21, 17);               \
2906        f(opcode, 16, 12), f(0b10, 11, 10), rf(Vn, 5), rf(Vd, 0);                    \
2907  }
2908 
2909 #define MSG "invalid arrangement"
2910 
2911 #define ASSERTION (T == T2S || T == T4S || T == T2D)
2912   INSN(fsqrt,  1, 0b10, 0b01, 0b11111);
2913   INSN(fabs,   0, 0b10, 0b01, 0b01111);
2914   INSN(fneg,   1, 0b10, 0b01, 0b01111);
2915   INSN(frintn, 0, 0b00, 0b01, 0b11000);
2916   INSN(frintm, 0, 0b00, 0b01, 0b11001);
2917   INSN(frintp, 0, 0b10, 0b01, 0b11000);
2918   INSN(fcvtzs, 0, 0b10, 0b01, 0b11011);
2919 #undef ASSERTION
2920 
2921 #define ASSERTION (T == T8B || T == T16B || T == T4H || T == T8H || T == T2S || T == T4S)
2922   INSN(rev64, 0, 0b00, 0b11, 0b00000);
2923 #undef ASSERTION
2924 
2925 #define ASSERTION (T == T8B || T == T16B || T == T4H || T == T8H)
2926   INSN(rev32, 1, 0b00, 0b11, 0b00000);
2927 #undef ASSERTION
2928 
2929 #define ASSERTION (T == T8B || T == T16B)
2930   INSN(rev16, 0, 0b00, 0b11, 0b00001);
2931   INSN(rbit,  1, 0b01, 0b00, 0b00101);
2932 #undef ASSERTION
2933 
2934 #undef MSG
2935 
2936 #undef INSN
2937 
2938   void ext(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, FloatRegister Vm, int index)
2939   {
2940     starti;
2941     assert(T == T8B || T == T16B, "invalid arrangement");
2942     assert((T == T8B && index <= 0b0111) || (T == T16B && index <= 0b1111), "Invalid index value");
2943     f(0, 31), f((int)T & 1, 30), f(0b101110000, 29, 21);
2944     rf(Vm, 16), f(0, 15), f(index, 14, 11);
2945     f(0, 10), rf(Vn, 5), rf(Vd, 0);
2946   }
2947 
2948 // SVE arithmetic - unpredicated
2949 #define INSN(NAME, opcode)                                                             \
2950   void NAME(FloatRegister Zd, SIMD_RegVariant T, FloatRegister Zn, FloatRegister Zm) { \
2951     starti;                                                                            \
2952     assert(T != Q, "invalid register variant");                                        \
2953     f(0b00000100, 31, 24), f(T, 23, 22), f(1, 21),                                     \
2954     rf(Zm, 16), f(0, 15, 13), f(opcode, 12, 10), rf(Zn, 5), rf(Zd, 0);                 \
2955   }
2956   INSN(sve_add, 0b000);
2957   INSN(sve_sub, 0b001);
2958 #undef INSN
2959 
2960 // SVE integer add/subtract immediate (unpredicated)
2961 #define INSN(NAME, op)                                                  \
2962   void NAME(FloatRegister Zd, SIMD_RegVariant T, unsigned imm8) {       \
2963     starti;                                                             \
2964     /* The immediate is an unsigned value in the range 0 to 255, and    \
2965      * for element width of 16 bits or higher it may also be a          \
2966      * positive multiple of 256 in the range 256 to 65280.              \
2967      */                                                                 \
2968     assert(T != Q, "invalid size");                                     \
2969     int sh = 0;                                                         \
2970     if (imm8 <= 0xff) {                                                 \
2971       sh = 0;                                                           \
2972     } else if (T != B && imm8 <= 0xff00 && (imm8 & 0xff) == 0) {        \
2973       sh = 1;                                                           \
2974       imm8 = (imm8 >> 8);                                               \
2975     } else {                                                            \
2976       guarantee(false, "invalid immediate");                            \
2977     }                                                                   \
2978     f(0b00100101, 31, 24), f(T, 23, 22), f(0b10000, 21, 17);            \
2979     f(op, 16, 14), f(sh, 13), f(imm8, 12, 5), rf(Zd, 0);                \
2980   }
2981 
2982   INSN(sve_add, 0b011);
2983   INSN(sve_sub, 0b111);
2984 #undef INSN
2985 
2986 // SVE floating-point arithmetic - unpredicated
2987 #define INSN(NAME, opcode)                                                             \
2988   void NAME(FloatRegister Zd, SIMD_RegVariant T, FloatRegister Zn, FloatRegister Zm) { \
2989     starti;                                                                            \
2990     assert(T == S || T == D, "invalid register variant");                              \
2991     f(0b01100101, 31, 24), f(T, 23, 22), f(0, 21),                                     \
2992     rf(Zm, 16), f(0, 15, 13), f(opcode, 12, 10), rf(Zn, 5), rf(Zd, 0);                 \
2993   }
2994 
2995   INSN(sve_fadd, 0b000);
2996   INSN(sve_fmul, 0b010);
2997   INSN(sve_fsub, 0b001);
2998 #undef INSN
2999 
3000 private:
3001   void sve_predicate_reg_insn(unsigned op24, unsigned op13,
3002                               FloatRegister Zd_or_Vd, SIMD_RegVariant T,
3003                               PRegister Pg, FloatRegister Zn_or_Vn) {
3004     starti;
3005     f(op24, 31, 24), f(T, 23, 22), f(op13, 21, 13);
3006     pgrf(Pg, 10), rf(Zn_or_Vn, 5), rf(Zd_or_Vd, 0);
3007   }
3008 
3009   void sve_shift_imm_encoding(SIMD_RegVariant T, int shift, bool isSHR,
3010                               int& tszh, int& tszl_imm) {
3011     /* The encodings for the tszh:tszl:imm3 fields
3012      * for shift right is calculated as:
3013      *   0001 xxx       B, shift = 16  - UInt(tszh:tszl:imm3)
3014      *   001x xxx       H, shift = 32  - UInt(tszh:tszl:imm3)
3015      *   01xx xxx       S, shift = 64  - UInt(tszh:tszl:imm3)
3016      *   1xxx xxx       D, shift = 128 - UInt(tszh:tszl:imm3)
3017      * for shift left is calculated as:
3018      *   0001 xxx       B, shift = UInt(tszh:tszl:imm3) - 8
3019      *   001x xxx       H, shift = UInt(tszh:tszl:imm3) - 16
3020      *   01xx xxx       S, shift = UInt(tszh:tszl:imm3) - 32
3021      *   1xxx xxx       D, shift = UInt(tszh:tszl:imm3) - 64
3022      */
3023     assert(T != Q, "Invalid register variant");
3024     if (isSHR) {
3025       assert(((1 << (T + 3)) >= shift) && (shift > 0) , "Invalid shift value");
3026     } else {
3027       assert(((1 << (T + 3)) > shift) && (shift >= 0) , "Invalid shift value");
3028     }
3029     int cVal = (1 << ((T + 3) + (isSHR ? 1 : 0)));
3030     int encodedShift = isSHR ? cVal - shift : cVal + shift;
3031     tszh = encodedShift >> 5;
3032     tszl_imm = encodedShift & 0x1f;
3033   }
3034 
3035 public:
3036 
3037 // SVE integer arithmetic - predicate
3038 #define INSN(NAME, op1, op2)                                                                            \
3039   void NAME(FloatRegister Zdn_or_Zd_or_Vd, SIMD_RegVariant T, PRegister Pg, FloatRegister Znm_or_Vn) {  \
3040     assert(T != Q, "invalid register variant");                                                         \
3041     sve_predicate_reg_insn(op1, op2, Zdn_or_Zd_or_Vd, T, Pg, Znm_or_Vn);                                \
3042   }
3043 
3044   INSN(sve_abs,  0b00000100, 0b010110101); // vector abs, unary
3045   INSN(sve_add,  0b00000100, 0b000000000); // vector add
3046   INSN(sve_and,  0b00000100, 0b011010000); // vector and
3047   INSN(sve_andv, 0b00000100, 0b011010001); // bitwise and reduction to scalar
3048   INSN(sve_asr,  0b00000100, 0b010000100); // vector arithmetic shift right
3049   INSN(sve_cnt,  0b00000100, 0b011010101); // count non-zero bits
3050   INSN(sve_cpy,  0b00000101, 0b100000100); // copy scalar to each active vector element
3051   INSN(sve_eor,  0b00000100, 0b011001000); // vector eor
3052   INSN(sve_eorv, 0b00000100, 0b011001001); // bitwise xor reduction to scalar
3053   INSN(sve_lsl,  0b00000100, 0b010011100); // vector logical shift left
3054   INSN(sve_lsr,  0b00000100, 0b010001100); // vector logical shift right
3055   INSN(sve_mul,  0b00000100, 0b010000000); // vector mul
3056   INSN(sve_neg,  0b00000100, 0b010111101); // vector neg, unary
3057   INSN(sve_not,  0b00000100, 0b011110101); // bitwise invert vector, unary
3058   INSN(sve_orr,  0b00000100, 0b011000000); // vector or
3059   INSN(sve_orv,  0b00000100, 0b011000001); // bitwise or reduction to scalar
3060   INSN(sve_smax, 0b00000100, 0b001000000); // signed maximum vectors
3061   INSN(sve_smaxv, 0b00000100, 0b001000001); // signed maximum reduction to scalar
3062   INSN(sve_smin,  0b00000100, 0b001010000); // signed minimum vectors
3063   INSN(sve_sminv, 0b00000100, 0b001010001); // signed minimum reduction to scalar
3064   INSN(sve_sub,   0b00000100, 0b000001000); // vector sub
3065   INSN(sve_uaddv, 0b00000100, 0b000001001); // unsigned add reduction to scalar
3066 #undef INSN
3067 
3068 // SVE floating-point arithmetic - predicate
3069 #define INSN(NAME, op1, op2)                                                                          \
3070   void NAME(FloatRegister Zd_or_Zdn_or_Vd, SIMD_RegVariant T, PRegister Pg, FloatRegister Zn_or_Zm) { \
3071     assert(T == S || T == D, "invalid register variant");                                             \
3072     sve_predicate_reg_insn(op1, op2, Zd_or_Zdn_or_Vd, T, Pg, Zn_or_Zm);                               \
3073   }
3074 
3075   INSN(sve_fabs,    0b00000100, 0b011100101);
3076   INSN(sve_fadd,    0b01100101, 0b000000100);
3077   INSN(sve_fadda,   0b01100101, 0b011000001); // add strictly-ordered reduction to scalar Vd
3078   INSN(sve_fdiv,    0b01100101, 0b001101100);
3079   INSN(sve_fmax,    0b01100101, 0b000110100); // floating-point maximum
3080   INSN(sve_fmaxv,   0b01100101, 0b000110001); // floating-point maximum recursive reduction to scalar
3081   INSN(sve_fmin,    0b01100101, 0b000111100); // floating-point minimum
3082   INSN(sve_fminv,   0b01100101, 0b000111001); // floating-point minimum recursive reduction to scalar
3083   INSN(sve_fmul,    0b01100101, 0b000010100);
3084   INSN(sve_fneg,    0b00000100, 0b011101101);
3085   INSN(sve_frintm,  0b01100101, 0b000010101); // floating-point round to integral value, toward minus infinity
3086   INSN(sve_frintn,  0b01100101, 0b000000101); // floating-point round to integral value, nearest with ties to even
3087   INSN(sve_frintp,  0b01100101, 0b000001101); // floating-point round to integral value, toward plus infinity
3088   INSN(sve_fsqrt,   0b01100101, 0b001101101);
3089   INSN(sve_fsub,    0b01100101, 0b000001100);
3090 #undef INSN
3091 
3092   // SVE multiple-add/sub - predicated
3093 #define INSN(NAME, op0, op1, op2)                                                                     \
3094   void NAME(FloatRegister Zda, SIMD_RegVariant T, PRegister Pg, FloatRegister Zn, FloatRegister Zm) { \
3095     starti;                                                                                           \
3096     assert(T != Q, "invalid size");                                                                   \
3097     f(op0, 31, 24), f(T, 23, 22), f(op1, 21), rf(Zm, 16);                                             \
3098     f(op2, 15, 13), pgrf(Pg, 10), rf(Zn, 5), rf(Zda, 0);                                              \
3099   }
3100 
3101   INSN(sve_fmla,  0b01100101, 1, 0b000); // floating-point fused multiply-add, writing addend: Zda = Zda + Zn * Zm
3102   INSN(sve_fmls,  0b01100101, 1, 0b001); // floating-point fused multiply-subtract: Zda = Zda + -Zn * Zm
3103   INSN(sve_fnmla, 0b01100101, 1, 0b010); // floating-point negated fused multiply-add: Zda = -Zda + -Zn * Zm
3104   INSN(sve_fnmls, 0b01100101, 1, 0b011); // floating-point negated fused multiply-subtract: Zda = -Zda + Zn * Zm
3105   INSN(sve_fmad,  0b01100101, 1, 0b100); // floating-point fused multiply-add, writing multiplicand: Zda = Zm + Zda * Zn
3106   INSN(sve_mla,   0b00000100, 0, 0b010); // multiply-add: Zda = Zda + Zn*Zm
3107   INSN(sve_mls,   0b00000100, 0, 0b011); // multiply-subtract: Zda = Zda + -Zn*Zm
3108 #undef INSN
3109 
3110 // SVE bitwise logical - unpredicated
3111 #define INSN(NAME, opc)                                              \
3112   void NAME(FloatRegister Zd, FloatRegister Zn, FloatRegister Zm) {  \
3113     starti;                                                          \
3114     f(0b00000100, 31, 24), f(opc, 23, 22), f(1, 21),                 \
3115     rf(Zm, 16), f(0b001100, 15, 10), rf(Zn, 5), rf(Zd, 0);           \
3116   }
3117   INSN(sve_and, 0b00);
3118   INSN(sve_eor, 0b10);
3119   INSN(sve_orr, 0b01);
3120   INSN(sve_bic, 0b11);
3121 #undef INSN
3122 
3123 // SVE bitwise logical with immediate (unpredicated)
3124 #define INSN(NAME, opc)                                                      \
3125   void NAME(FloatRegister Zd, SIMD_RegVariant T, uint64_t imm) {             \
3126     starti;                                                                  \
3127     unsigned elembits = regVariant_to_elemBits(T);                           \
3128     uint32_t val = encode_sve_logical_immediate(elembits, imm);              \
3129     f(0b00000101, 31, 24), f(opc, 23, 22), f(0b0000, 21, 18);                \
3130     f(val, 17, 5), rf(Zd, 0);                                                \
3131   }
3132   INSN(sve_and, 0b10);
3133   INSN(sve_eor, 0b01);
3134   INSN(sve_orr, 0b00);
3135 #undef INSN
3136 
3137 // SVE shift immediate - unpredicated
3138 #define INSN(NAME, opc, isSHR)                                                  \
3139   void NAME(FloatRegister Zd, SIMD_RegVariant T, FloatRegister Zn, int shift) { \
3140     starti;                                                                     \
3141     int tszh, tszl_imm;                                                         \
3142     sve_shift_imm_encoding(T, shift, isSHR, tszh, tszl_imm);                    \
3143     f(0b00000100, 31, 24);                                                      \
3144     f(tszh, 23, 22), f(1,21), f(tszl_imm, 20, 16);                              \
3145     f(0b100, 15, 13), f(opc, 12, 10), rf(Zn, 5), rf(Zd, 0);                     \
3146   }
3147 
3148   INSN(sve_asr, 0b100, /* isSHR = */ true);
3149   INSN(sve_lsl, 0b111, /* isSHR = */ false);
3150   INSN(sve_lsr, 0b101, /* isSHR = */ true);
3151 #undef INSN
3152 
3153 // SVE bitwise shift by immediate (predicated)
3154 #define INSN(NAME, opc, isSHR)                                                  \
3155   void NAME(FloatRegister Zdn, SIMD_RegVariant T, PRegister Pg, int shift) {    \
3156     starti;                                                                     \
3157     int tszh, tszl_imm;                                                         \
3158     sve_shift_imm_encoding(T, shift, isSHR, tszh, tszl_imm);                    \
3159     f(0b00000100, 31, 24), f(tszh, 23, 22), f(0b00, 21, 20), f(opc, 19, 16);    \
3160     f(0b100, 15, 13), pgrf(Pg, 10), f(tszl_imm, 9, 5), rf(Zdn, 0);              \
3161   }
3162 
3163   INSN(sve_asr, 0b0000, /* isSHR = */ true);
3164   INSN(sve_lsl, 0b0011, /* isSHR = */ false);
3165   INSN(sve_lsr, 0b0001, /* isSHR = */ true);
3166 #undef INSN
3167 
3168 private:
3169 
3170   // Scalar base + immediate index
3171   void sve_ld_st1(FloatRegister Zt, Register Xn, int imm, PRegister Pg,
3172               SIMD_RegVariant T, int op1, int type, int op2) {
3173     starti;
3174     assert_cond(T >= type);
3175     f(op1, 31, 25), f(type, 24, 23), f(T, 22, 21);
3176     f(0, 20), sf(imm, 19, 16), f(op2, 15, 13);
3177     pgrf(Pg, 10), srf(Xn, 5), rf(Zt, 0);
3178   }
3179 
3180   // Scalar base + scalar index
3181   void sve_ld_st1(FloatRegister Zt, Register Xn, Register Xm, PRegister Pg,
3182               SIMD_RegVariant T, int op1, int type, int op2) {
3183     starti;
3184     assert_cond(T >= type);
3185     f(op1, 31, 25), f(type, 24, 23), f(T, 22, 21);
3186     rf(Xm, 16), f(op2, 15, 13);
3187     pgrf(Pg, 10), srf(Xn, 5), rf(Zt, 0);
3188   }
3189 
3190   void sve_ld_st1(FloatRegister Zt, PRegister Pg,
3191               SIMD_RegVariant T, const Address &a,
3192               int op1, int type, int imm_op2, int scalar_op2) {
3193     switch (a.getMode()) {
3194     case Address::base_plus_offset:
3195       sve_ld_st1(Zt, a.base(), a.offset(), Pg, T, op1, type, imm_op2);
3196       break;
3197     case Address::base_plus_offset_reg:
3198       sve_ld_st1(Zt, a.base(), a.index(), Pg, T, op1, type, scalar_op2);
3199       break;
3200     default:
3201       ShouldNotReachHere();
3202     }
3203   }
3204 
3205 public:
3206 
3207 // SVE contiguous load/store
3208 #define INSN(NAME, op1, type, imm_op2, scalar_op2)                                   \
3209   void NAME(FloatRegister Zt, SIMD_RegVariant T, PRegister Pg, const Address &a) {   \
3210     assert(T != Q, "invalid register variant");                                      \
3211     sve_ld_st1(Zt, Pg, T, a, op1, type, imm_op2, scalar_op2);                        \
3212   }
3213 
3214   INSN(sve_ld1b, 0b1010010, 0b00, 0b101, 0b010);
3215   INSN(sve_st1b, 0b1110010, 0b00, 0b111, 0b010);
3216   INSN(sve_ld1h, 0b1010010, 0b01, 0b101, 0b010);
3217   INSN(sve_st1h, 0b1110010, 0b01, 0b111, 0b010);
3218   INSN(sve_ld1w, 0b1010010, 0b10, 0b101, 0b010);
3219   INSN(sve_st1w, 0b1110010, 0b10, 0b111, 0b010);
3220   INSN(sve_ld1d, 0b1010010, 0b11, 0b101, 0b010);
3221   INSN(sve_st1d, 0b1110010, 0b11, 0b111, 0b010);
3222 #undef INSN
3223 
3224 // Gather/scatter load/store (SVE) - scalar plus vector
3225 #define INSN(NAME, op1, type, op2, op3)                                         \
3226   void NAME(FloatRegister Zt, PRegister Pg, Register Xn, FloatRegister Zm) {    \
3227     starti;                                                                     \
3228     f(op1, 31, 25), f(type, 24, 23), f(op2, 22, 21), rf(Zm, 16);                \
3229     f(op3, 15, 13), pgrf(Pg, 10), srf(Xn, 5), rf(Zt, 0);                        \
3230   }
3231   // SVE 32-bit gather load words (scalar plus 32-bit scaled offsets)
3232   INSN(sve_ld1w_gather,  0b1000010, 0b10, 0b01, 0b010);
3233   // SVE 64-bit gather load (scalar plus 32-bit unpacked scaled offsets)
3234   INSN(sve_ld1d_gather,  0b1100010, 0b11, 0b01, 0b010);
3235   // SVE 32-bit scatter store (scalar plus 32-bit scaled offsets)
3236   INSN(sve_st1w_scatter, 0b1110010, 0b10, 0b11, 0b100);
3237   // SVE 64-bit scatter store (scalar plus unpacked 32-bit scaled offsets)
3238   INSN(sve_st1d_scatter, 0b1110010, 0b11, 0b01, 0b100);
3239 #undef INSN
3240 
3241 // SVE load/store - unpredicated
3242 #define INSN(NAME, op1)                                                         \
3243   void NAME(FloatRegister Zt, const Address &a)  {                              \
3244     starti;                                                                     \
3245     assert(a.index() == noreg, "invalid address variant");                      \
3246     f(op1, 31, 29), f(0b0010110, 28, 22), sf(a.offset() >> 3, 21, 16),          \
3247     f(0b010, 15, 13), f(a.offset() & 0x7, 12, 10), srf(a.base(), 5), rf(Zt, 0); \
3248   }
3249 
3250   INSN(sve_ldr, 0b100); // LDR (vector)
3251   INSN(sve_str, 0b111); // STR (vector)
3252 #undef INSN
3253 
3254 // SVE stack frame adjustment
3255 #define INSN(NAME, op) \
3256   void NAME(Register Xd, Register Xn, int imm6) {                 \
3257     starti;                                                       \
3258     f(0b000001000, 31, 23), f(op, 22, 21);                        \
3259     srf(Xn, 16), f(0b01010, 15, 11), sf(imm6, 10, 5), srf(Xd, 0); \
3260   }
3261 
3262   INSN(sve_addvl, 0b01); // Add multiple of vector register size to scalar register
3263   INSN(sve_addpl, 0b11); // Add multiple of predicate register size to scalar register
3264 #undef INSN
3265 
3266 // SVE inc/dec register by element count
3267 #define INSN(NAME, op) \
3268   void NAME(Register Xdn, SIMD_RegVariant T, unsigned imm4 = 1, int pattern = 0b11111) { \
3269     starti;                                                                              \
3270     assert(T != Q, "invalid size");                                                      \
3271     f(0b00000100,31, 24), f(T, 23, 22), f(0b11, 21, 20);                                 \
3272     f(imm4 - 1, 19, 16), f(0b11100, 15, 11), f(op, 10), f(pattern, 9, 5), rf(Xdn, 0);    \
3273   }
3274 
3275   INSN(sve_inc, 0);
3276   INSN(sve_dec, 1);
3277 #undef INSN
3278 
3279 // SVE predicate logical operations
3280 #define INSN(NAME, op1, op2, op3) \
3281   void NAME(PRegister Pd, PRegister Pg, PRegister Pn, PRegister Pm) { \
3282     starti;                                                           \
3283     f(0b00100101, 31, 24), f(op1, 23, 22), f(0b00, 21, 20);           \
3284     prf(Pm, 16), f(0b01, 15, 14), prf(Pg, 10), f(op2, 9);             \
3285     prf(Pn, 5), f(op3, 4), prf(Pd, 0);                                \
3286   }
3287 
3288   INSN(sve_and,  0b00, 0b0, 0b0);
3289   INSN(sve_ands, 0b01, 0b0, 0b0);
3290   INSN(sve_eor,  0b00, 0b1, 0b0);
3291   INSN(sve_eors, 0b01, 0b1, 0b0);
3292   INSN(sve_orr,  0b10, 0b0, 0b0);
3293   INSN(sve_orrs, 0b11, 0b0, 0b0);
3294   INSN(sve_bic,  0b00, 0b0, 0b1);
3295 #undef INSN
3296 
3297   // SVE increment register by predicate count
3298   void sve_incp(const Register rd, SIMD_RegVariant T, PRegister pg) {
3299     starti;
3300     assert(T != Q, "invalid size");
3301     f(0b00100101, 31, 24), f(T, 23, 22), f(0b1011001000100, 21, 9),
3302     prf(pg, 5), rf(rd, 0);
3303   }
3304 
3305   // SVE broadcast general-purpose register to vector elements (unpredicated)
3306   void sve_dup(FloatRegister Zd, SIMD_RegVariant T, Register Rn) {
3307     starti;
3308     assert(T != Q, "invalid size");
3309     f(0b00000101, 31, 24), f(T, 23, 22), f(0b100000001110, 21, 10);
3310     srf(Rn, 5), rf(Zd, 0);
3311   }
3312 
3313   // SVE broadcast signed immediate to vector elements (unpredicated)
3314   void sve_dup(FloatRegister Zd, SIMD_RegVariant T, int imm8) {
3315     starti;
3316     assert(T != Q, "invalid size");
3317     int sh = 0;
3318     if (imm8 <= 127 && imm8 >= -128) {
3319       sh = 0;
3320     } else if (T != B && imm8 <= 32512 && imm8 >= -32768 && (imm8 & 0xff) == 0) {
3321       sh = 1;
3322       imm8 = (imm8 >> 8);
3323     } else {
3324       guarantee(false, "invalid immediate");
3325     }
3326     f(0b00100101, 31, 24), f(T, 23, 22), f(0b11100011, 21, 14);
3327     f(sh, 13), sf(imm8, 12, 5), rf(Zd, 0);
3328   }
3329 
3330   // SVE predicate test
3331   void sve_ptest(PRegister Pg, PRegister Pn) {
3332     starti;
3333     f(0b001001010101000011, 31, 14), prf(Pg, 10), f(0, 9), prf(Pn, 5), f(0, 4, 0);
3334   }
3335 
3336   // SVE predicate initialize
3337   void sve_ptrue(PRegister pd, SIMD_RegVariant esize, int pattern = 0b11111) {
3338     starti;
3339     f(0b00100101, 31, 24), f(esize, 23, 22), f(0b011000111000, 21, 10);
3340     f(pattern, 9, 5), f(0b0, 4), prf(pd, 0);
3341   }
3342 
3343   // SVE predicate zero
3344   void sve_pfalse(PRegister pd) {
3345     starti;
3346     f(0b00100101, 31, 24), f(0b00, 23, 22), f(0b011000111001, 21, 10);
3347     f(0b000000, 9, 4), prf(pd, 0);
3348   }
3349 
3350 // SVE load/store predicate register
3351 #define INSN(NAME, op1)                                                  \
3352   void NAME(PRegister Pt, const Address &a)  {                           \
3353     starti;                                                              \
3354     assert(a.index() == noreg, "invalid address variant");               \
3355     f(op1, 31, 29), f(0b0010110, 28, 22), sf(a.offset() >> 3, 21, 16),   \
3356     f(0b000, 15, 13), f(a.offset() & 0x7, 12, 10), srf(a.base(), 5),     \
3357     f(0, 4), prf(Pt, 0);                                                 \
3358   }
3359 
3360   INSN(sve_ldr, 0b100); // LDR (predicate)
3361   INSN(sve_str, 0b111); // STR (predicate)
3362 #undef INSN
3363 
3364   // SVE move predicate register
3365   void sve_mov(PRegister Pd, PRegister Pn) {
3366     starti;
3367     f(0b001001011000, 31, 20), prf(Pn, 16), f(0b01, 15, 14), prf(Pn, 10);
3368     f(0, 9), prf(Pn, 5), f(0, 4), prf(Pd, 0);
3369   }
3370 
3371   // SVE copy general-purpose register to vector elements (predicated)
3372   void sve_cpy(FloatRegister Zd, SIMD_RegVariant T, PRegister Pg, Register Rn) {
3373     starti;
3374     assert(T != Q, "invalid size");
3375     f(0b00000101, 31, 24), f(T, 23, 22), f(0b101000101, 21, 13);
3376     pgrf(Pg, 10), srf(Rn, 5), rf(Zd, 0);
3377   }
3378 
3379   // SVE copy signed integer immediate to vector elements (predicated)
3380   void sve_cpy(FloatRegister Zd, SIMD_RegVariant T, PRegister Pg, int imm8, bool isMerge) {
3381     starti;
3382     assert(T != Q, "invalid size");
3383     int sh = 0;
3384     if (imm8 <= 127 && imm8 >= -128) {
3385       sh = 0;
3386     } else if (T != B && imm8 <= 32512 && imm8 >= -32768 && (imm8 & 0xff) == 0) {
3387       sh = 1;
3388       imm8 = (imm8 >> 8);
3389     } else {
3390       guarantee(false, "invalid immediate");
3391     }
3392     int m = isMerge ? 1 : 0;
3393     f(0b00000101, 31, 24), f(T, 23, 22), f(0b01, 21, 20);
3394     prf(Pg, 16), f(0b0, 15), f(m, 14), f(sh, 13), sf(imm8, 12, 5), rf(Zd, 0);
3395   }
3396 
3397   // SVE conditionally select elements from two vectors
3398   void sve_sel(FloatRegister Zd, SIMD_RegVariant T, PRegister Pg,
3399                FloatRegister Zn, FloatRegister Zm) {
3400     starti;
3401     assert(T != Q, "invalid size");
3402     f(0b00000101, 31, 24), f(T, 23, 22), f(0b1, 21), rf(Zm, 16);
3403     f(0b11, 15, 14), prf(Pg, 10), rf(Zn, 5), rf(Zd, 0);
3404   }
3405 
3406 // SVE Integer/Floating-Point Compare - Vectors
3407 #define INSN(NAME, op1, op2, fp)  \
3408   void NAME(Condition cond, PRegister Pd, SIMD_RegVariant T, PRegister Pg,             \
3409             FloatRegister Zn, FloatRegister Zm) {                                      \
3410     starti;                                                                            \
3411     if (fp == 0) {                                                                     \
3412       assert(T != Q, "invalid size");                                                  \
3413     } else {                                                                           \
3414       assert(T != B && T != Q, "invalid size");                                        \
3415       assert(cond != HI && cond != HS, "invalid condition for fcm");                   \
3416     }                                                                                  \
3417     int cond_op;                                                                       \
3418     switch(cond) {                                                                     \
3419       case EQ: cond_op = (op2 << 2) | 0b10; break;                                     \
3420       case NE: cond_op = (op2 << 2) | 0b11; break;                                     \
3421       case GE: cond_op = (op2 << 2) | 0b00; break;                                     \
3422       case GT: cond_op = (op2 << 2) | 0b01; break;                                     \
3423       case HI: cond_op = 0b0001; break;                                                \
3424       case HS: cond_op = 0b0000; break;                                                \
3425       default:                                                                         \
3426         ShouldNotReachHere();                                                          \
3427     }                                                                                  \
3428     f(op1, 31, 24), f(T, 23, 22), f(0, 21), rf(Zm, 16), f((cond_op >> 1) & 7, 15, 13); \
3429     pgrf(Pg, 10), rf(Zn, 5), f(cond_op & 1, 4), prf(Pd, 0);                            \
3430   }
3431 
3432   INSN(sve_cmp, 0b00100100, 0b10, 0);
3433   INSN(sve_fcm, 0b01100101, 0b01, 1);
3434 #undef INSN
3435 
3436 // SVE Integer Compare - Signed Immediate
3437 void sve_cmp(Condition cond, PRegister Pd, SIMD_RegVariant T,
3438              PRegister Pg, FloatRegister Zn, int imm5) {
3439   starti;
3440   assert(T != Q, "invalid size");
3441   guarantee(-16 <= imm5 && imm5 <= 15, "invalid immediate");
3442   int cond_op;
3443   switch(cond) {
3444     case EQ: cond_op = 0b1000; break;
3445     case NE: cond_op = 0b1001; break;
3446     case GE: cond_op = 0b0000; break;
3447     case GT: cond_op = 0b0001; break;
3448     case LE: cond_op = 0b0011; break;
3449     case LT: cond_op = 0b0010; break;
3450     default:
3451       ShouldNotReachHere();
3452   }
3453   f(0b00100101, 31, 24), f(T, 23, 22), f(0b0, 21), sf(imm5, 20, 16),
3454   f((cond_op >> 1) & 0x7, 15, 13), pgrf(Pg, 10), rf(Zn, 5);
3455   f(cond_op & 0x1, 4), prf(Pd, 0);
3456 }
3457 
3458 // SVE unpack vector elements
3459 #define INSN(NAME, op) \
3460   void NAME(FloatRegister Zd, SIMD_RegVariant T, FloatRegister Zn) { \
3461     starti;                                                          \
3462     assert(T != B && T != Q, "invalid size");                        \
3463     f(0b00000101, 31, 24), f(T, 23, 22), f(0b1100, 21, 18);          \
3464     f(op, 17, 16), f(0b001110, 15, 10), rf(Zn, 5), rf(Zd, 0);        \
3465   }
3466 
3467   INSN(sve_uunpkhi, 0b11); // Signed unpack and extend half of vector - high half
3468   INSN(sve_uunpklo, 0b10); // Signed unpack and extend half of vector - low half
3469   INSN(sve_sunpkhi, 0b01); // Unsigned unpack and extend half of vector - high half
3470   INSN(sve_sunpklo, 0b00); // Unsigned unpack and extend half of vector - low half
3471 #undef INSN
3472 
3473 // SVE unpack predicate elements
3474 #define INSN(NAME, op) \
3475   void NAME(PRegister Pd, PRegister Pn) { \
3476     starti;                                                          \
3477     f(0b000001010011000, 31, 17), f(op, 16), f(0b0100000, 15, 9);    \
3478     prf(Pn, 5), f(0b0, 4), prf(Pd, 0);                               \
3479   }
3480 
3481   INSN(sve_punpkhi, 0b1); // Unpack and widen high half of predicate
3482   INSN(sve_punpklo, 0b0); // Unpack and widen low half of predicate
3483 #undef INSN
3484 
3485 // SVE permute vector elements
3486 #define INSN(NAME, op) \
3487   void NAME(FloatRegister Zd, SIMD_RegVariant T, FloatRegister Zn, FloatRegister Zm) { \
3488     starti;                                                                            \
3489     assert(T != Q, "invalid size");                                                    \
3490     f(0b00000101, 31, 24), f(T, 23, 22), f(0b1, 21), rf(Zm, 16);                       \
3491     f(0b01101, 15, 11), f(op, 10), rf(Zn, 5), rf(Zd, 0);                               \
3492   }
3493 
3494   INSN(sve_uzp1, 0b0); // Concatenate even elements from two vectors
3495   INSN(sve_uzp2, 0b1); // Concatenate odd elements from two vectors
3496 #undef INSN
3497 
3498 // SVE permute predicate elements
3499 #define INSN(NAME, op) \
3500   void NAME(PRegister Pd, SIMD_RegVariant T, PRegister Pn, PRegister Pm) {             \
3501     starti;                                                                            \
3502     assert(T != Q, "invalid size");                                                    \
3503     f(0b00000101, 31, 24), f(T, 23, 22), f(0b10, 21, 20), prf(Pm, 16);                 \
3504     f(0b01001, 15, 11), f(op, 10), f(0b0, 9), prf(Pn, 5), f(0b0, 4), prf(Pd, 0);       \
3505   }
3506 
3507   INSN(sve_uzp1, 0b0); // Concatenate even elements from two predicates
3508   INSN(sve_uzp2, 0b1); // Concatenate odd elements from two predicates
3509 #undef INSN
3510 
3511 // Predicate counted loop (SVE) (32-bit variants are not included)
3512 #define INSN(NAME, decode)                                                \
3513   void NAME(PRegister Pd, SIMD_RegVariant T, Register Rn, Register Rm) {  \
3514     starti;                                                               \
3515     assert(T != Q, "invalid register variant");                           \
3516     f(0b00100101, 31, 24), f(T, 23, 22), f(1, 21),                        \
3517     zrf(Rm, 16), f(0, 15, 13), f(1, 12), f(decode >> 1, 11, 10),          \
3518     zrf(Rn, 5), f(decode & 1, 4), prf(Pd, 0);                             \
3519   }
3520 
3521   INSN(sve_whilelt, 0b010);  // While incrementing signed scalar less than scalar
3522   INSN(sve_whilele, 0b011);  // While incrementing signed scalar less than or equal to scalar
3523   INSN(sve_whilelo, 0b110);  // While incrementing unsigned scalar lower than scalar
3524   INSN(sve_whilels, 0b111);  // While incrementing unsigned scalar lower than or the same as scalar
3525 #undef INSN
3526 
3527   // SVE predicate reverse
3528   void sve_rev(PRegister Pd, SIMD_RegVariant T, PRegister Pn) {
3529     starti;
3530     assert(T != Q, "invalid size");
3531     f(0b00000101, 31, 24), f(T, 23, 22), f(0b1101000100000, 21, 9);
3532     prf(Pn, 5), f(0, 4), prf(Pd, 0);
3533   }
3534 
3535 // SVE partition break condition
3536 #define INSN(NAME, op) \
3537   void NAME(PRegister Pd, PRegister Pg, PRegister Pn, bool isMerge) {      \
3538     starti;                                                                \
3539     f(0b00100101, 31, 24), f(op, 23, 22), f(0b01000001, 21, 14);           \
3540     prf(Pg, 10), f(0b0, 9), prf(Pn, 5), f(isMerge ? 1 : 0, 4), prf(Pd, 0); \
3541   }
3542 
3543   INSN(sve_brka, 0b00); // Break after first true condition
3544   INSN(sve_brkb, 0b10); // Break before first true condition
3545 #undef INSN
3546 
3547 // Element count and increment scalar (SVE)
3548 #define INSN(NAME, TYPE)                                                             \
3549   void NAME(Register Xdn, unsigned imm4 = 1, int pattern = 0b11111) {                \
3550     starti;                                                                          \
3551     f(0b00000100, 31, 24), f(TYPE, 23, 22), f(0b10, 21, 20);                         \
3552     f(imm4 - 1, 19, 16), f(0b11100, 15, 11), f(0, 10), f(pattern, 9, 5), rf(Xdn, 0); \
3553   }
3554 
3555   INSN(sve_cntb, B);  // Set scalar to multiple of 8-bit predicate constraint element count
3556   INSN(sve_cnth, H);  // Set scalar to multiple of 16-bit predicate constraint element count
3557   INSN(sve_cntw, S);  // Set scalar to multiple of 32-bit predicate constraint element count
3558   INSN(sve_cntd, D);  // Set scalar to multiple of 64-bit predicate constraint element count
3559 #undef INSN
3560 
3561   // Set scalar to active predicate element count
3562   void sve_cntp(Register Xd, SIMD_RegVariant T, PRegister Pg, PRegister Pn) {
3563     starti;
3564     assert(T != Q, "invalid size");
3565     f(0b00100101, 31, 24), f(T, 23, 22), f(0b10000010, 21, 14);
3566     prf(Pg, 10), f(0, 9), prf(Pn, 5), rf(Xd, 0);
3567   }
3568 
3569   // SVE convert signed integer to floating-point (predicated)
3570   void sve_scvtf(FloatRegister Zd, SIMD_RegVariant T_dst, PRegister Pg,
3571                  FloatRegister Zn, SIMD_RegVariant T_src) {
3572     starti;
3573     assert(T_src != B && T_dst != B && T_src != Q && T_dst != Q &&
3574            (T_src != H || T_dst == T_src), "invalid register variant");
3575     int opc = T_dst;
3576     int opc2 = T_src;
3577     // In most cases we can treat T_dst, T_src as opc, opc2,
3578     // except for the following two combinations.
3579     // +-----+------+---+------------------------------------+
3580     // | opc | opc2 | U |        Instruction Details         |
3581     // +-----+------+---+------------------------------------+
3582     // |  11 |   00 | 0 | SCVTF - 32-bit to double-precision |
3583     // |  11 |   10 | 0 | SCVTF - 64-bit to single-precision |
3584     // +-----+------+---+------------------------------------+
3585     if (T_src == S && T_dst == D) {
3586       opc = 0b11;
3587       opc2 = 0b00;
3588     } else if (T_src == D && T_dst == S) {
3589       opc = 0b11;
3590       opc2 = 0b10;
3591     }
3592     f(0b01100101, 31, 24), f(opc, 23, 22), f(0b010, 21, 19);
3593     f(opc2, 18, 17), f(0b0101, 16, 13);
3594     pgrf(Pg, 10), rf(Zn, 5), rf(Zd, 0);
3595   }
3596 
3597   // SVE floating-point convert to signed integer, rounding toward zero (predicated)
3598   void sve_fcvtzs(FloatRegister Zd, SIMD_RegVariant T_dst, PRegister Pg,
3599                   FloatRegister Zn, SIMD_RegVariant T_src) {
3600     starti;
3601     assert(T_src != B && T_dst != B && T_src != Q && T_dst != Q &&
3602            (T_dst != H || T_src == H), "invalid register variant");
3603     int opc = T_src;
3604     int opc2 = T_dst;
3605     // In most cases we can treat T_src, T_dst as opc, opc2,
3606     // except for the following two combinations.
3607     // +-----+------+---+-------------------------------------+
3608     // | opc | opc2 | U |         Instruction Details         |
3609     // +-----+------+---+-------------------------------------+
3610     // |  11 |  10  | 0 | FCVTZS - single-precision to 64-bit |
3611     // |  11 |  00  | 0 | FCVTZS - double-precision to 32-bit |
3612     // +-----+------+---+-------------------------------------+
3613     if (T_src == S && T_dst == D) {
3614       opc = 0b11;
3615       opc2 = 0b10;
3616     } else if (T_src == D && T_dst == S) {
3617       opc = 0b11;
3618       opc2 = 0b00;
3619     }
3620     f(0b01100101, 31, 24), f(opc, 23, 22), f(0b011, 21, 19);
3621     f(opc2, 18, 17), f(0b0101, 16, 13);
3622     pgrf(Pg, 10), rf(Zn, 5), rf(Zd, 0);
3623   }
3624 
3625   // SVE floating-point convert precision (predicated)
3626   void sve_fcvt(FloatRegister Zd, SIMD_RegVariant T_dst, PRegister Pg,
3627                 FloatRegister Zn, SIMD_RegVariant T_src) {
3628     starti;
3629     assert(T_src != B && T_dst != B && T_src != Q && T_dst != Q &&
3630            T_src != T_dst, "invalid register variant");
3631     guarantee(T_src != H && T_dst != H, "half-precision unsupported");
3632     f(0b01100101, 31, 24), f(0b11, 23, 22), f(0b0010, 21, 18);
3633     f(T_dst, 17, 16), f(0b101, 15, 13);
3634     pgrf(Pg, 10), rf(Zn, 5), rf(Zd, 0);
3635   }
3636 
3637 // SVE extract element to general-purpose register
3638 #define INSN(NAME, before)                                                      \
3639   void NAME(Register Rd, SIMD_RegVariant T, PRegister Pg,  FloatRegister Zn) {  \
3640     starti;                                                                     \
3641     f(0b00000101, 31, 24), f(T, 23, 22), f(0b10000, 21, 17);                    \
3642     f(before, 16), f(0b101, 15, 13);                                            \
3643     pgrf(Pg, 10), rf(Zn, 5), rf(Rd, 0);                                         \
3644   }
3645 
3646   INSN(sve_lasta, 0b0);
3647   INSN(sve_lastb, 0b1);
3648 #undef INSN
3649 
3650 // SVE extract element to SIMD&FP scalar register
3651 #define INSN(NAME, before)                                                           \
3652   void NAME(FloatRegister Vd, SIMD_RegVariant T, PRegister Pg,  FloatRegister Zn) {  \
3653     starti;                                                                          \
3654     f(0b00000101, 31, 24), f(T, 23, 22), f(0b10001, 21, 17);                         \
3655     f(before, 16), f(0b100, 15, 13);                                                 \
3656     pgrf(Pg, 10), rf(Zn, 5), rf(Vd, 0);                                              \
3657   }
3658 
3659   INSN(sve_lasta, 0b0);
3660   INSN(sve_lastb, 0b1);
3661 #undef INSN
3662 
3663   // SVE create index starting from and incremented by immediate
3664   void sve_index(FloatRegister Zd, SIMD_RegVariant T, int imm1, int imm2) {
3665     starti;
3666     f(0b00000100, 31, 24), f(T, 23, 22), f(0b1, 21);
3667     sf(imm2, 20, 16), f(0b010000, 15, 10);
3668     sf(imm1, 9, 5), rf(Zd, 0);
3669   }
3670 
3671   // SVE programmable table lookup/permute using vector of element indices
3672   void sve_tbl(FloatRegister Zd, SIMD_RegVariant T, FloatRegister Zn, FloatRegister Zm) {
3673     starti;
3674     assert(T != Q, "invalid size");
3675     f(0b00000101, 31, 24), f(T, 23, 22), f(0b1, 21), rf(Zm, 16);
3676     f(0b001100, 15, 10), rf(Zn, 5), rf(Zd, 0);
3677   }
3678 








3679   Assembler(CodeBuffer* code) : AbstractAssembler(code) {
3680   }
3681 
3682   // Stack overflow checking
3683   virtual void bang_stack_with_offset(int offset);
3684 
3685   static bool operand_valid_for_logical_immediate(bool is32, uint64_t imm);
3686   static bool operand_valid_for_sve_logical_immediate(unsigned elembits, uint64_t imm);
3687   static bool operand_valid_for_add_sub_immediate(int64_t imm);
3688   static bool operand_valid_for_sve_add_sub_immediate(int64_t imm);
3689   static bool operand_valid_for_float_immediate(double imm);
3690 
3691   void emit_data64(jlong data, relocInfo::relocType rtype, int format = 0);
3692   void emit_data64(jlong data, RelocationHolder const& rspec, int format = 0);
3693 };
3694 
3695 inline Assembler::Membar_mask_bits operator|(Assembler::Membar_mask_bits a,
3696                                              Assembler::Membar_mask_bits b) {
3697   return Assembler::Membar_mask_bits(unsigned(a)|unsigned(b));
3698 }
3699 
3700 Instruction_aarch64::~Instruction_aarch64() {
3701   assem->emit_int32(insn);
3702   assert_cond(get_bits() == 0xffffffff);
3703 }
3704 
3705 #undef f
3706 #undef sf
3707 #undef rf
3708 #undef srf
3709 #undef zrf
3710 #undef prf
3711 #undef pgrf
3712 #undef fixed
3713 
3714 #undef starti
3715 
3716 // Invert a condition
3717 inline const Assembler::Condition operator~(const Assembler::Condition cond) {
3718   return Assembler::Condition(int(cond) ^ 1);
3719 }
3720 
3721 extern "C" void das(uint64_t start, int len);
3722 
3723 #endif // CPU_AARCH64_ASSEMBLER_AARCH64_HPP
--- EOF ---