1 /*
   2  * Copyright (c) 1997, 2021, Oracle and/or its affiliates. All rights reserved.
   3  * Copyright (c) 2014, 2021, Red Hat Inc. All rights reserved.
   4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   5  *
   6  * This code is free software; you can redistribute it and/or modify it
   7  * under the terms of the GNU General Public License version 2 only, as
   8  * published by the Free Software Foundation.
   9  *
  10  * This code is distributed in the hope that it will be useful, but WITHOUT
  11  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  13  * version 2 for more details (a copy is included in the LICENSE file that
  14  * accompanied this code).
  15  *
  16  * You should have received a copy of the GNU General Public License version
  17  * 2 along with this work; if not, write to the Free Software Foundation,
  18  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  19  *
  20  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  21  * or visit www.oracle.com if you need additional information or have any
  22  * questions.
  23  *
  24  */
  25 
  26 #ifndef CPU_AARCH64_ASSEMBLER_AARCH64_HPP
  27 #define CPU_AARCH64_ASSEMBLER_AARCH64_HPP
  28 
  29 #include "asm/register.hpp"
  30 
  31 #ifdef __GNUC__
  32 
  33 // __nop needs volatile so that compiler doesn't optimize it away
  34 #define NOP() asm volatile ("nop");
  35 
  36 #elif defined(_MSC_VER)
  37 
  38 // Use MSVC instrinsic: https://docs.microsoft.com/en-us/cpp/intrinsics/arm64-intrinsics?view=vs-2019#I
  39 #define NOP() __nop();
  40 
  41 #endif
  42 
  43 
  44 // definitions of various symbolic names for machine registers
  45 
  46 // First intercalls between C and Java which use 8 general registers
  47 // and 8 floating registers
  48 
  49 // we also have to copy between x86 and ARM registers but that's a
  50 // secondary complication -- not all code employing C call convention
  51 // executes as x86 code though -- we generate some of it
  52 
  53 class Argument {
  54  public:
  55   enum {
  56     n_int_register_parameters_c   = 8,  // r0, r1, ... r7 (c_rarg0, c_rarg1, ...)
  57     n_float_register_parameters_c = 8,  // v0, v1, ... v7 (c_farg0, c_farg1, ... )
  58 
  59     n_int_register_parameters_j   = 8, // r1, ... r7, r0 (rj_rarg0, j_rarg1, ...
  60     n_float_register_parameters_j = 8  // v0, v1, ... v7 (j_farg0, j_farg1, ...
  61   };
  62 };
  63 
  64 REGISTER_DECLARATION(Register, c_rarg0, r0);
  65 REGISTER_DECLARATION(Register, c_rarg1, r1);
  66 REGISTER_DECLARATION(Register, c_rarg2, r2);
  67 REGISTER_DECLARATION(Register, c_rarg3, r3);
  68 REGISTER_DECLARATION(Register, c_rarg4, r4);
  69 REGISTER_DECLARATION(Register, c_rarg5, r5);
  70 REGISTER_DECLARATION(Register, c_rarg6, r6);
  71 REGISTER_DECLARATION(Register, c_rarg7, r7);
  72 
  73 REGISTER_DECLARATION(FloatRegister, c_farg0, v0);
  74 REGISTER_DECLARATION(FloatRegister, c_farg1, v1);
  75 REGISTER_DECLARATION(FloatRegister, c_farg2, v2);
  76 REGISTER_DECLARATION(FloatRegister, c_farg3, v3);
  77 REGISTER_DECLARATION(FloatRegister, c_farg4, v4);
  78 REGISTER_DECLARATION(FloatRegister, c_farg5, v5);
  79 REGISTER_DECLARATION(FloatRegister, c_farg6, v6);
  80 REGISTER_DECLARATION(FloatRegister, c_farg7, v7);
  81 
  82 // Symbolically name the register arguments used by the Java calling convention.
  83 // We have control over the convention for java so we can do what we please.
  84 // What pleases us is to offset the java calling convention so that when
  85 // we call a suitable jni method the arguments are lined up and we don't
  86 // have to do much shuffling. A suitable jni method is non-static and a
  87 // small number of arguments
  88 //
  89 //  |--------------------------------------------------------------------|
  90 //  | c_rarg0  c_rarg1  c_rarg2 c_rarg3 c_rarg4 c_rarg5 c_rarg6 c_rarg7  |
  91 //  |--------------------------------------------------------------------|
  92 //  | r0       r1       r2      r3      r4      r5      r6      r7       |
  93 //  |--------------------------------------------------------------------|
  94 //  | j_rarg7  j_rarg0  j_rarg1 j_rarg2 j_rarg3 j_rarg4 j_rarg5 j_rarg6  |
  95 //  |--------------------------------------------------------------------|
  96 
  97 
  98 REGISTER_DECLARATION(Register, j_rarg0, c_rarg1);
  99 REGISTER_DECLARATION(Register, j_rarg1, c_rarg2);
 100 REGISTER_DECLARATION(Register, j_rarg2, c_rarg3);
 101 REGISTER_DECLARATION(Register, j_rarg3, c_rarg4);
 102 REGISTER_DECLARATION(Register, j_rarg4, c_rarg5);
 103 REGISTER_DECLARATION(Register, j_rarg5, c_rarg6);
 104 REGISTER_DECLARATION(Register, j_rarg6, c_rarg7);
 105 REGISTER_DECLARATION(Register, j_rarg7, c_rarg0);
 106 
 107 // Java floating args are passed as per C
 108 
 109 REGISTER_DECLARATION(FloatRegister, j_farg0, v0);
 110 REGISTER_DECLARATION(FloatRegister, j_farg1, v1);
 111 REGISTER_DECLARATION(FloatRegister, j_farg2, v2);
 112 REGISTER_DECLARATION(FloatRegister, j_farg3, v3);
 113 REGISTER_DECLARATION(FloatRegister, j_farg4, v4);
 114 REGISTER_DECLARATION(FloatRegister, j_farg5, v5);
 115 REGISTER_DECLARATION(FloatRegister, j_farg6, v6);
 116 REGISTER_DECLARATION(FloatRegister, j_farg7, v7);
 117 
 118 // registers used to hold VM data either temporarily within a method
 119 // or across method calls
 120 
 121 // volatile (caller-save) registers
 122 
 123 // r8 is used for indirect result location return
 124 // we use it and r9 as scratch registers
 125 REGISTER_DECLARATION(Register, rscratch1, r8);
 126 REGISTER_DECLARATION(Register, rscratch2, r9);
 127 
 128 // current method -- must be in a call-clobbered register
 129 REGISTER_DECLARATION(Register, rmethod,   r12);
 130 
 131 // non-volatile (callee-save) registers are r16-29
 132 // of which the following are dedicated global state
 133 
 134 // link register
 135 REGISTER_DECLARATION(Register, lr,        r30);
 136 // frame pointer
 137 REGISTER_DECLARATION(Register, rfp,       r29);
 138 // current thread
 139 REGISTER_DECLARATION(Register, rthread,   r28);
 140 // base of heap
 141 REGISTER_DECLARATION(Register, rheapbase, r27);
 142 // constant pool cache
 143 REGISTER_DECLARATION(Register, rcpool,    r26);
 144 // monitors allocated on stack
 145 REGISTER_DECLARATION(Register, rmonitors, r25);
 146 // locals on stack
 147 REGISTER_DECLARATION(Register, rlocals,   r24);
 148 // bytecode pointer
 149 REGISTER_DECLARATION(Register, rbcp,      r22);
 150 // Dispatch table base
 151 REGISTER_DECLARATION(Register, rdispatch, r21);
 152 // Java stack pointer
 153 REGISTER_DECLARATION(Register, esp,      r20);
 154 
 155 // Preserved predicate register with all elements set TRUE.
 156 REGISTER_DECLARATION(PRegister, ptrue, p7);
 157 
 158 #define assert_cond(ARG1) assert(ARG1, #ARG1)
 159 
 160 namespace asm_util {
 161   uint32_t encode_logical_immediate(bool is32, uint64_t imm);
 162 };
 163 
 164 using namespace asm_util;
 165 
 166 
 167 class Assembler;
 168 
 169 class Instruction_aarch64 {
 170   unsigned insn;
 171 #ifdef ASSERT
 172   unsigned bits;
 173 #endif
 174   Assembler *assem;
 175 
 176 public:
 177 
 178   Instruction_aarch64(class Assembler *as) {
 179 #ifdef ASSERT
 180     bits = 0;
 181 #endif
 182     insn = 0;
 183     assem = as;
 184   }
 185 
 186   inline ~Instruction_aarch64();
 187 
 188   unsigned &get_insn() { return insn; }
 189 #ifdef ASSERT
 190   unsigned &get_bits() { return bits; }
 191 #endif
 192 
 193   static inline int32_t extend(unsigned val, int hi = 31, int lo = 0) {
 194     union {
 195       unsigned u;
 196       int n;
 197     };
 198 
 199     u = val << (31 - hi);
 200     n = n >> (31 - hi + lo);
 201     return n;
 202   }
 203 
 204   static inline uint32_t extract(uint32_t val, int msb, int lsb) {
 205     int nbits = msb - lsb + 1;
 206     assert_cond(msb >= lsb);
 207     uint32_t mask = checked_cast<uint32_t>(right_n_bits(nbits));
 208     uint32_t result = val >> lsb;
 209     result &= mask;
 210     return result;
 211   }
 212 
 213   static inline int32_t sextract(uint32_t val, int msb, int lsb) {
 214     uint32_t uval = extract(val, msb, lsb);
 215     return extend(uval, msb - lsb);
 216   }
 217 
 218   static void patch(address a, int msb, int lsb, uint64_t val) {
 219     int nbits = msb - lsb + 1;
 220     guarantee(val < (1ULL << nbits), "Field too big for insn");
 221     assert_cond(msb >= lsb);
 222     unsigned mask = checked_cast<unsigned>(right_n_bits(nbits));
 223     val <<= lsb;
 224     mask <<= lsb;
 225     unsigned target = *(unsigned *)a;
 226     target &= ~mask;
 227     target |= val;
 228     *(unsigned *)a = target;
 229   }
 230 
 231   static void spatch(address a, int msb, int lsb, int64_t val) {
 232     int nbits = msb - lsb + 1;
 233     int64_t chk = val >> (nbits - 1);
 234     guarantee (chk == -1 || chk == 0, "Field too big for insn");
 235     unsigned uval = val;
 236     unsigned mask = checked_cast<unsigned>(right_n_bits(nbits));
 237     uval &= mask;
 238     uval <<= lsb;
 239     mask <<= lsb;
 240     unsigned target = *(unsigned *)a;
 241     target &= ~mask;
 242     target |= uval;
 243     *(unsigned *)a = target;
 244   }
 245 
 246   void f(unsigned val, int msb, int lsb) {
 247     int nbits = msb - lsb + 1;
 248     guarantee(val < (1ULL << nbits), "Field too big for insn");
 249     assert_cond(msb >= lsb);
 250     val <<= lsb;
 251     insn |= val;
 252 #ifdef ASSERT
 253     unsigned mask = checked_cast<unsigned>(right_n_bits(nbits));
 254     mask <<= lsb;
 255     assert_cond((bits & mask) == 0);
 256     bits |= mask;
 257 #endif
 258   }
 259 
 260   void f(unsigned val, int bit) {
 261     f(val, bit, bit);
 262   }
 263 
 264   void sf(int64_t val, int msb, int lsb) {
 265     int nbits = msb - lsb + 1;
 266     int64_t chk = val >> (nbits - 1);
 267     guarantee (chk == -1 || chk == 0, "Field too big for insn");
 268     unsigned uval = val;
 269     unsigned mask = checked_cast<unsigned>(right_n_bits(nbits));
 270     uval &= mask;
 271     f(uval, lsb + nbits - 1, lsb);
 272   }
 273 
 274   void rf(Register r, int lsb) {
 275     f(r->encoding_nocheck(), lsb + 4, lsb);
 276   }
 277 
 278   // reg|ZR
 279   void zrf(Register r, int lsb) {
 280     f(r->encoding_nocheck() - (r == zr), lsb + 4, lsb);
 281   }
 282 
 283   // reg|SP
 284   void srf(Register r, int lsb) {
 285     f(r == sp ? 31 : r->encoding_nocheck(), lsb + 4, lsb);
 286   }
 287 
 288   void rf(FloatRegister r, int lsb) {
 289     f(r->encoding_nocheck(), lsb + 4, lsb);
 290   }
 291 
 292   void prf(PRegister r, int lsb) {
 293     f(r->encoding_nocheck(), lsb + 3, lsb);
 294   }
 295 
 296   void pgrf(PRegister r, int lsb) {
 297     f(r->encoding_nocheck(), lsb + 2, lsb);
 298   }
 299 
 300   unsigned get(int msb = 31, int lsb = 0) {
 301     int nbits = msb - lsb + 1;
 302     unsigned mask = checked_cast<unsigned>(right_n_bits(nbits)) << lsb;
 303     assert_cond((bits & mask) == mask);
 304     return (insn & mask) >> lsb;
 305   }
 306 
 307   void fixed(unsigned value, unsigned mask) {
 308     assert_cond ((mask & bits) == 0);
 309 #ifdef ASSERT
 310     bits |= mask;
 311 #endif
 312     insn |= value;
 313   }
 314 };
 315 
 316 #define starti Instruction_aarch64 current_insn(this);
 317 
 318 class PrePost {
 319   int _offset;
 320   Register _r;
 321 public:
 322   PrePost(Register reg, int o) : _offset(o), _r(reg) { }
 323   int offset() { return _offset; }
 324   Register reg() { return _r; }
 325 };
 326 
 327 class Pre : public PrePost {
 328 public:
 329   Pre(Register reg, int o) : PrePost(reg, o) { }
 330 };
 331 class Post : public PrePost {
 332   Register _idx;
 333   bool _is_postreg;
 334 public:
 335   Post(Register reg, int o) : PrePost(reg, o) { _idx = NULL; _is_postreg = false; }
 336   Post(Register reg, Register idx) : PrePost(reg, 0) { _idx = idx; _is_postreg = true; }
 337   Register idx_reg() { return _idx; }
 338   bool is_postreg() {return _is_postreg; }
 339 };
 340 
 341 namespace ext
 342 {
 343   enum operation { uxtb, uxth, uxtw, uxtx, sxtb, sxth, sxtw, sxtx };
 344 };
 345 
 346 // Addressing modes
 347 class Address {
 348  public:
 349 
 350   enum mode { no_mode, base_plus_offset, pre, post, post_reg, pcrel,
 351               base_plus_offset_reg, literal };
 352 
 353   // Shift and extend for base reg + reg offset addressing
 354   class extend {
 355     int _option, _shift;
 356     ext::operation _op;
 357   public:
 358     extend() { }
 359     extend(int s, int o, ext::operation op) : _option(o), _shift(s), _op(op) { }
 360     int option() const{ return _option; }
 361     int shift() const { return _shift; }
 362     ext::operation op() const { return _op; }
 363   };
 364   class uxtw : public extend {
 365   public:
 366     uxtw(int shift = -1): extend(shift, 0b010, ext::uxtw) { }
 367   };
 368   class lsl : public extend {
 369   public:
 370     lsl(int shift = -1): extend(shift, 0b011, ext::uxtx) { }
 371   };
 372   class sxtw : public extend {
 373   public:
 374     sxtw(int shift = -1): extend(shift, 0b110, ext::sxtw) { }
 375   };
 376   class sxtx : public extend {
 377   public:
 378     sxtx(int shift = -1): extend(shift, 0b111, ext::sxtx) { }
 379   };
 380 
 381  private:
 382   Register _base;
 383   Register _index;
 384   int64_t _offset;
 385   enum mode _mode;
 386   extend _ext;
 387 
 388   RelocationHolder _rspec;
 389 
 390   // Typically we use AddressLiterals we want to use their rval
 391   // However in some situations we want the lval (effect address) of
 392   // the item.  We provide a special factory for making those lvals.
 393   bool _is_lval;
 394 
 395   // If the target is far we'll need to load the ea of this to a
 396   // register to reach it. Otherwise if near we can do PC-relative
 397   // addressing.
 398   address          _target;
 399 
 400  public:
 401   Address()
 402     : _mode(no_mode) { }
 403   Address(Register r)
 404     : _base(r), _index(noreg), _offset(0), _mode(base_plus_offset), _target(0) { }
 405   Address(Register r, int o)
 406     : _base(r), _index(noreg), _offset(o), _mode(base_plus_offset), _target(0) { }
 407   Address(Register r, long o)
 408     : _base(r), _index(noreg), _offset(o), _mode(base_plus_offset), _target(0) { }
 409   Address(Register r, long long o)
 410     : _base(r), _index(noreg), _offset(o), _mode(base_plus_offset), _target(0) { }
 411   Address(Register r, unsigned int o)
 412     : _base(r), _index(noreg), _offset(o), _mode(base_plus_offset), _target(0) { }
 413   Address(Register r, unsigned long o)
 414     : _base(r), _index(noreg), _offset(o), _mode(base_plus_offset), _target(0) { }
 415   Address(Register r, unsigned long long o)
 416     : _base(r), _index(noreg), _offset(o), _mode(base_plus_offset), _target(0) { }
 417   Address(Register r, ByteSize disp)
 418     : Address(r, in_bytes(disp)) { }
 419   Address(Register r, Register r1, extend ext = lsl())
 420     : _base(r), _index(r1), _offset(0), _mode(base_plus_offset_reg),
 421       _ext(ext), _target(0) { }
 422   Address(Pre p)
 423     : _base(p.reg()), _offset(p.offset()), _mode(pre) { }
 424   Address(Post p)
 425     : _base(p.reg()),  _index(p.idx_reg()), _offset(p.offset()),
 426       _mode(p.is_postreg() ? post_reg : post), _target(0) { }
 427   Address(address target, RelocationHolder const& rspec)
 428     : _mode(literal),
 429       _rspec(rspec),
 430       _is_lval(false),
 431       _target(target)  { }
 432   Address(address target, relocInfo::relocType rtype = relocInfo::external_word_type);
 433   Address(Register base, RegisterOrConstant index, extend ext = lsl())
 434     : _base (base),
 435       _offset(0), _ext(ext), _target(0) {
 436     if (index.is_register()) {
 437       _mode = base_plus_offset_reg;
 438       _index = index.as_register();
 439     } else {
 440       guarantee(ext.option() == ext::uxtx, "should be");
 441       assert(index.is_constant(), "should be");
 442       _mode = base_plus_offset;
 443       _offset = index.as_constant() << ext.shift();
 444     }
 445   }
 446 
 447   Register base() const {
 448     guarantee((_mode == base_plus_offset || _mode == base_plus_offset_reg
 449                || _mode == post || _mode == post_reg),
 450               "wrong mode");
 451     return _base;
 452   }
 453   int64_t offset() const {
 454     return _offset;
 455   }
 456   Register index() const {
 457     return _index;
 458   }
 459   mode getMode() const {
 460     return _mode;
 461   }
 462   bool uses(Register reg) const { return _base == reg || _index == reg; }
 463   address target() const { return _target; }
 464   const RelocationHolder& rspec() const { return _rspec; }
 465 
 466   void encode(Instruction_aarch64 *i) const {
 467     i->f(0b111, 29, 27);
 468     i->srf(_base, 5);
 469 
 470     switch(_mode) {
 471     case base_plus_offset:
 472       {
 473         unsigned size = i->get(31, 30);
 474         if (i->get(26, 26) && i->get(23, 23)) {
 475           // SIMD Q Type - Size = 128 bits
 476           assert(size == 0, "bad size");
 477           size = 0b100;
 478         }
 479         unsigned mask = (1 << size) - 1;
 480         if (_offset < 0 || _offset & mask)
 481           {
 482             i->f(0b00, 25, 24);
 483             i->f(0, 21), i->f(0b00, 11, 10);
 484             i->sf(_offset, 20, 12);
 485           } else {
 486             i->f(0b01, 25, 24);
 487             i->f(_offset >> size, 21, 10);
 488           }
 489       }
 490       break;
 491 
 492     case base_plus_offset_reg:
 493       {
 494         i->f(0b00, 25, 24);
 495         i->f(1, 21);
 496         i->rf(_index, 16);
 497         i->f(_ext.option(), 15, 13);
 498         unsigned size = i->get(31, 30);
 499         if (i->get(26, 26) && i->get(23, 23)) {
 500           // SIMD Q Type - Size = 128 bits
 501           assert(size == 0, "bad size");
 502           size = 0b100;
 503         }
 504         if (size == 0) // It's a byte
 505           i->f(_ext.shift() >= 0, 12);
 506         else {
 507           assert(_ext.shift() <= 0 || _ext.shift() == (int)size, "bad shift");
 508           i->f(_ext.shift() > 0, 12);
 509         }
 510         i->f(0b10, 11, 10);
 511       }
 512       break;
 513 
 514     case pre:
 515       i->f(0b00, 25, 24);
 516       i->f(0, 21), i->f(0b11, 11, 10);
 517       i->sf(_offset, 20, 12);
 518       break;
 519 
 520     case post:
 521       i->f(0b00, 25, 24);
 522       i->f(0, 21), i->f(0b01, 11, 10);
 523       i->sf(_offset, 20, 12);
 524       break;
 525 
 526     default:
 527       ShouldNotReachHere();
 528     }
 529   }
 530 
 531   void encode_pair(Instruction_aarch64 *i) const {
 532     switch(_mode) {
 533     case base_plus_offset:
 534       i->f(0b010, 25, 23);
 535       break;
 536     case pre:
 537       i->f(0b011, 25, 23);
 538       break;
 539     case post:
 540       i->f(0b001, 25, 23);
 541       break;
 542     default:
 543       ShouldNotReachHere();
 544     }
 545 
 546     unsigned size; // Operand shift in 32-bit words
 547 
 548     if (i->get(26, 26)) { // float
 549       switch(i->get(31, 30)) {
 550       case 0b10:
 551         size = 2; break;
 552       case 0b01:
 553         size = 1; break;
 554       case 0b00:
 555         size = 0; break;
 556       default:
 557         ShouldNotReachHere();
 558         size = 0;  // unreachable
 559       }
 560     } else {
 561       size = i->get(31, 31);
 562     }
 563 
 564     size = 4 << size;
 565     guarantee(_offset % size == 0, "bad offset");
 566     i->sf(_offset / size, 21, 15);
 567     i->srf(_base, 5);
 568   }
 569 
 570   void encode_nontemporal_pair(Instruction_aarch64 *i) const {
 571     // Only base + offset is allowed
 572     i->f(0b000, 25, 23);
 573     unsigned size = i->get(31, 31);
 574     size = 4 << size;
 575     guarantee(_offset % size == 0, "bad offset");
 576     i->sf(_offset / size, 21, 15);
 577     i->srf(_base, 5);
 578     guarantee(_mode == Address::base_plus_offset,
 579               "Bad addressing mode for non-temporal op");
 580   }
 581 
 582   void lea(MacroAssembler *, Register) const;
 583 
 584   static bool offset_ok_for_immed(int64_t offset, uint shift);
 585 
 586   static bool offset_ok_for_sve_immed(int64_t offset, int shift, int vl /* sve vector length */) {
 587     if (offset % vl == 0) {
 588       // Convert address offset into sve imm offset (MUL VL).
 589       int sve_offset = offset / vl;
 590       if (((-(1 << (shift - 1))) <= sve_offset) && (sve_offset < (1 << (shift - 1)))) {
 591         // sve_offset can be encoded
 592         return true;
 593       }
 594     }
 595     return false;
 596   }
 597 };
 598 
 599 // Convience classes
 600 class RuntimeAddress: public Address {
 601 
 602   public:
 603 
 604   RuntimeAddress(address target) : Address(target, relocInfo::runtime_call_type) {}
 605 
 606 };
 607 
 608 class OopAddress: public Address {
 609 
 610   public:
 611 
 612   OopAddress(address target) : Address(target, relocInfo::oop_type){}
 613 
 614 };
 615 
 616 class ExternalAddress: public Address {
 617  private:
 618   static relocInfo::relocType reloc_for_target(address target) {
 619     // Sometimes ExternalAddress is used for values which aren't
 620     // exactly addresses, like the card table base.
 621     // external_word_type can't be used for values in the first page
 622     // so just skip the reloc in that case.
 623     return external_word_Relocation::can_be_relocated(target) ? relocInfo::external_word_type : relocInfo::none;
 624   }
 625 
 626  public:
 627 
 628   ExternalAddress(address target) : Address(target, reloc_for_target(target)) {}
 629 
 630 };
 631 
 632 class InternalAddress: public Address {
 633 
 634   public:
 635 
 636   InternalAddress(address target) : Address(target, relocInfo::internal_word_type) {}
 637 };
 638 
 639 const int FPUStateSizeInWords = FloatRegisterImpl::number_of_registers *
 640                                 FloatRegisterImpl::save_slots_per_register;
 641 
 642 typedef enum {
 643   PLDL1KEEP = 0b00000, PLDL1STRM, PLDL2KEEP, PLDL2STRM, PLDL3KEEP, PLDL3STRM,
 644   PSTL1KEEP = 0b10000, PSTL1STRM, PSTL2KEEP, PSTL2STRM, PSTL3KEEP, PSTL3STRM,
 645   PLIL1KEEP = 0b01000, PLIL1STRM, PLIL2KEEP, PLIL2STRM, PLIL3KEEP, PLIL3STRM
 646 } prfop;
 647 
 648 class Assembler : public AbstractAssembler {
 649 
 650 public:
 651 
 652 #ifndef PRODUCT
 653   static const uintptr_t asm_bp;
 654 
 655   void emit_int32(jint x) {
 656     if ((uintptr_t)pc() == asm_bp)
 657       NOP();
 658     AbstractAssembler::emit_int32(x);
 659   }
 660 #else
 661   void emit_int32(jint x) {
 662     AbstractAssembler::emit_int32(x);
 663   }
 664 #endif
 665 
 666   enum { instruction_size = 4 };
 667 
 668   //---<  calculate length of instruction  >---
 669   // We just use the values set above.
 670   // instruction must start at passed address
 671   static unsigned int instr_len(unsigned char *instr) { return instruction_size; }
 672 
 673   //---<  longest instructions  >---
 674   static unsigned int instr_maxlen() { return instruction_size; }
 675 
 676   Address adjust(Register base, int offset, bool preIncrement) {
 677     if (preIncrement)
 678       return Address(Pre(base, offset));
 679     else
 680       return Address(Post(base, offset));
 681   }
 682 
 683   Address pre(Register base, int offset) {
 684     return adjust(base, offset, true);
 685   }
 686 
 687   Address post(Register base, int offset) {
 688     return adjust(base, offset, false);
 689   }
 690 
 691   Address post(Register base, Register idx) {
 692     return Address(Post(base, idx));
 693   }
 694 
 695   static address locate_next_instruction(address inst);
 696 
 697 #define f current_insn.f
 698 #define sf current_insn.sf
 699 #define rf current_insn.rf
 700 #define srf current_insn.srf
 701 #define zrf current_insn.zrf
 702 #define prf current_insn.prf
 703 #define pgrf current_insn.pgrf
 704 #define fixed current_insn.fixed
 705 
 706   typedef void (Assembler::* uncond_branch_insn)(address dest);
 707   typedef void (Assembler::* compare_and_branch_insn)(Register Rt, address dest);
 708   typedef void (Assembler::* test_and_branch_insn)(Register Rt, int bitpos, address dest);
 709   typedef void (Assembler::* prefetch_insn)(address target, prfop);
 710 
 711   void wrap_label(Label &L, uncond_branch_insn insn);
 712   void wrap_label(Register r, Label &L, compare_and_branch_insn insn);
 713   void wrap_label(Register r, int bitpos, Label &L, test_and_branch_insn insn);
 714   void wrap_label(Label &L, prfop, prefetch_insn insn);
 715 
 716   // PC-rel. addressing
 717 
 718   void adr(Register Rd, address dest);
 719   void _adrp(Register Rd, address dest);
 720 
 721   void adr(Register Rd, const Address &dest);
 722   void _adrp(Register Rd, const Address &dest);
 723 
 724   void adr(Register Rd, Label &L) {
 725     wrap_label(Rd, L, &Assembler::Assembler::adr);
 726   }
 727   void _adrp(Register Rd, Label &L) {
 728     wrap_label(Rd, L, &Assembler::_adrp);
 729   }
 730 
 731   void adrp(Register Rd, const Address &dest, uint64_t &offset);
 732 
 733 #undef INSN
 734 
 735   void add_sub_immediate(Instruction_aarch64 &current_insn, Register Rd, Register Rn,
 736                          unsigned uimm, int op, int negated_op);
 737 
 738   // Add/subtract (immediate)
 739 #define INSN(NAME, decode, negated)                                     \
 740   void NAME(Register Rd, Register Rn, unsigned imm, unsigned shift) {   \
 741     starti;                                                             \
 742     f(decode, 31, 29), f(0b10001, 28, 24), f(shift, 23, 22), f(imm, 21, 10); \
 743     zrf(Rd, 0), srf(Rn, 5);                                             \
 744   }                                                                     \
 745                                                                         \
 746   void NAME(Register Rd, Register Rn, unsigned imm) {                   \
 747     starti;                                                             \
 748     add_sub_immediate(current_insn, Rd, Rn, imm, decode, negated);      \
 749   }
 750 
 751   INSN(addsw, 0b001, 0b011);
 752   INSN(subsw, 0b011, 0b001);
 753   INSN(adds,  0b101, 0b111);
 754   INSN(subs,  0b111, 0b101);
 755 
 756 #undef INSN
 757 
 758 #define INSN(NAME, decode, negated)                     \
 759   void NAME(Register Rd, Register Rn, unsigned imm) {   \
 760     starti;                                             \
 761     add_sub_immediate(current_insn, Rd, Rn, imm, decode, negated);     \
 762   }
 763 
 764   INSN(addw, 0b000, 0b010);
 765   INSN(subw, 0b010, 0b000);
 766   INSN(add,  0b100, 0b110);
 767   INSN(sub,  0b110, 0b100);
 768 
 769 #undef INSN
 770 
 771  // Logical (immediate)
 772 #define INSN(NAME, decode, is32)                                \
 773   void NAME(Register Rd, Register Rn, uint64_t imm) {           \
 774     starti;                                                     \
 775     uint32_t val = encode_logical_immediate(is32, imm);         \
 776     f(decode, 31, 29), f(0b100100, 28, 23), f(val, 22, 10);     \
 777     srf(Rd, 0), zrf(Rn, 5);                                     \
 778   }
 779 
 780   INSN(andw, 0b000, true);
 781   INSN(orrw, 0b001, true);
 782   INSN(eorw, 0b010, true);
 783   INSN(andr,  0b100, false);
 784   INSN(orr,  0b101, false);
 785   INSN(eor,  0b110, false);
 786 
 787 #undef INSN
 788 
 789 #define INSN(NAME, decode, is32)                                \
 790   void NAME(Register Rd, Register Rn, uint64_t imm) {           \
 791     starti;                                                     \
 792     uint32_t val = encode_logical_immediate(is32, imm);         \
 793     f(decode, 31, 29), f(0b100100, 28, 23), f(val, 22, 10);     \
 794     zrf(Rd, 0), zrf(Rn, 5);                                     \
 795   }
 796 
 797   INSN(ands, 0b111, false);
 798   INSN(andsw, 0b011, true);
 799 
 800 #undef INSN
 801 
 802   // Move wide (immediate)
 803 #define INSN(NAME, opcode)                                              \
 804   void NAME(Register Rd, unsigned imm, unsigned shift = 0) {            \
 805     assert_cond((shift/16)*16 == shift);                                \
 806     starti;                                                             \
 807     f(opcode, 31, 29), f(0b100101, 28, 23), f(shift/16, 22, 21),        \
 808       f(imm, 20, 5);                                                    \
 809     rf(Rd, 0);                                                          \
 810   }
 811 
 812   INSN(movnw, 0b000);
 813   INSN(movzw, 0b010);
 814   INSN(movkw, 0b011);
 815   INSN(movn, 0b100);
 816   INSN(movz, 0b110);
 817   INSN(movk, 0b111);
 818 
 819 #undef INSN
 820 
 821   // Bitfield
 822 #define INSN(NAME, opcode, size)                                        \
 823   void NAME(Register Rd, Register Rn, unsigned immr, unsigned imms) {   \
 824     starti;                                                             \
 825     guarantee(size == 1 || (immr < 32 && imms < 32), "incorrect immr/imms");\
 826     f(opcode, 31, 22), f(immr, 21, 16), f(imms, 15, 10);                \
 827     zrf(Rn, 5), rf(Rd, 0);                                              \
 828   }
 829 
 830   INSN(sbfmw, 0b0001001100, 0);
 831   INSN(bfmw,  0b0011001100, 0);
 832   INSN(ubfmw, 0b0101001100, 0);
 833   INSN(sbfm,  0b1001001101, 1);
 834   INSN(bfm,   0b1011001101, 1);
 835   INSN(ubfm,  0b1101001101, 1);
 836 
 837 #undef INSN
 838 
 839   // Extract
 840 #define INSN(NAME, opcode, size)                                        \
 841   void NAME(Register Rd, Register Rn, Register Rm, unsigned imms) {     \
 842     starti;                                                             \
 843     guarantee(size == 1 || imms < 32, "incorrect imms");                \
 844     f(opcode, 31, 21), f(imms, 15, 10);                                 \
 845     zrf(Rm, 16), zrf(Rn, 5), zrf(Rd, 0);                                \
 846   }
 847 
 848   INSN(extrw, 0b00010011100, 0);
 849   INSN(extr,  0b10010011110, 1);
 850 
 851 #undef INSN
 852 
 853   // The maximum range of a branch is fixed for the AArch64
 854   // architecture.  In debug mode we shrink it in order to test
 855   // trampolines, but not so small that branches in the interpreter
 856   // are out of range.
 857   static const uint64_t branch_range = NOT_DEBUG(128 * M) DEBUG_ONLY(2 * M);
 858 
 859   static bool reachable_from_branch_at(address branch, address target) {
 860     return uabs(target - branch) < branch_range;
 861   }
 862 
 863   // Unconditional branch (immediate)
 864 #define INSN(NAME, opcode)                                              \
 865   void NAME(address dest) {                                             \
 866     starti;                                                             \
 867     int64_t offset = (dest - pc()) >> 2;                                \
 868     DEBUG_ONLY(assert(reachable_from_branch_at(pc(), dest), "debug only")); \
 869     f(opcode, 31), f(0b00101, 30, 26), sf(offset, 25, 0);               \
 870   }                                                                     \
 871   void NAME(Label &L) {                                                 \
 872     wrap_label(L, &Assembler::NAME);                                    \
 873   }                                                                     \
 874   void NAME(const Address &dest);
 875 
 876   INSN(b, 0);
 877   INSN(bl, 1);
 878 
 879 #undef INSN
 880 
 881   // Compare & branch (immediate)
 882 #define INSN(NAME, opcode)                              \
 883   void NAME(Register Rt, address dest) {                \
 884     int64_t offset = (dest - pc()) >> 2;                \
 885     starti;                                             \
 886     f(opcode, 31, 24), sf(offset, 23, 5), rf(Rt, 0);    \
 887   }                                                     \
 888   void NAME(Register Rt, Label &L) {                    \
 889     wrap_label(Rt, L, &Assembler::NAME);                \
 890   }
 891 
 892   INSN(cbzw,  0b00110100);
 893   INSN(cbnzw, 0b00110101);
 894   INSN(cbz,   0b10110100);
 895   INSN(cbnz,  0b10110101);
 896 
 897 #undef INSN
 898 
 899   // Test & branch (immediate)
 900 #define INSN(NAME, opcode)                                              \
 901   void NAME(Register Rt, int bitpos, address dest) {                    \
 902     int64_t offset = (dest - pc()) >> 2;                                \
 903     int b5 = bitpos >> 5;                                               \
 904     bitpos &= 0x1f;                                                     \
 905     starti;                                                             \
 906     f(b5, 31), f(opcode, 30, 24), f(bitpos, 23, 19), sf(offset, 18, 5); \
 907     rf(Rt, 0);                                                          \
 908   }                                                                     \
 909   void NAME(Register Rt, int bitpos, Label &L) {                        \
 910     wrap_label(Rt, bitpos, L, &Assembler::NAME);                        \
 911   }
 912 
 913   INSN(tbz,  0b0110110);
 914   INSN(tbnz, 0b0110111);
 915 
 916 #undef INSN
 917 
 918   // Conditional branch (immediate)
 919   enum Condition
 920     {EQ, NE, HS, CS=HS, LO, CC=LO, MI, PL, VS, VC, HI, LS, GE, LT, GT, LE, AL, NV};
 921 
 922   void br(Condition  cond, address dest) {
 923     int64_t offset = (dest - pc()) >> 2;
 924     starti;
 925     f(0b0101010, 31, 25), f(0, 24), sf(offset, 23, 5), f(0, 4), f(cond, 3, 0);
 926   }
 927 
 928 #define INSN(NAME, cond)                        \
 929   void NAME(address dest) {                     \
 930     br(cond, dest);                             \
 931   }
 932 
 933   INSN(beq, EQ);
 934   INSN(bne, NE);
 935   INSN(bhs, HS);
 936   INSN(bcs, CS);
 937   INSN(blo, LO);
 938   INSN(bcc, CC);
 939   INSN(bmi, MI);
 940   INSN(bpl, PL);
 941   INSN(bvs, VS);
 942   INSN(bvc, VC);
 943   INSN(bhi, HI);
 944   INSN(bls, LS);
 945   INSN(bge, GE);
 946   INSN(blt, LT);
 947   INSN(bgt, GT);
 948   INSN(ble, LE);
 949   INSN(bal, AL);
 950   INSN(bnv, NV);
 951 
 952   void br(Condition cc, Label &L);
 953 
 954 #undef INSN
 955 
 956   // Exception generation
 957   void generate_exception(int opc, int op2, int LL, unsigned imm) {
 958     starti;
 959     f(0b11010100, 31, 24);
 960     f(opc, 23, 21), f(imm, 20, 5), f(op2, 4, 2), f(LL, 1, 0);
 961   }
 962 
 963 #define INSN(NAME, opc, op2, LL)                \
 964   void NAME(unsigned imm) {                     \
 965     generate_exception(opc, op2, LL, imm);      \
 966   }
 967 
 968   INSN(svc, 0b000, 0, 0b01);
 969   INSN(hvc, 0b000, 0, 0b10);
 970   INSN(smc, 0b000, 0, 0b11);
 971   INSN(brk, 0b001, 0, 0b00);
 972   INSN(hlt, 0b010, 0, 0b00);
 973   INSN(dcps1, 0b101, 0, 0b01);
 974   INSN(dcps2, 0b101, 0, 0b10);
 975   INSN(dcps3, 0b101, 0, 0b11);
 976 
 977 #undef INSN
 978 
 979   // System
 980   void system(int op0, int op1, int CRn, int CRm, int op2,
 981               Register rt = dummy_reg)
 982   {
 983     starti;
 984     f(0b11010101000, 31, 21);
 985     f(op0, 20, 19);
 986     f(op1, 18, 16);
 987     f(CRn, 15, 12);
 988     f(CRm, 11, 8);
 989     f(op2, 7, 5);
 990     rf(rt, 0);
 991   }
 992 
 993   void hint(int imm) {
 994     system(0b00, 0b011, 0b0010, 0b0000, imm);
 995   }
 996 
 997   void nop() {
 998     hint(0);
 999   }
1000 
1001   void yield() {
1002     hint(1);
1003   }
1004 
1005   void wfe() {
1006     hint(2);
1007   }
1008 
1009   void wfi() {
1010     hint(3);
1011   }
1012 
1013   void sev() {
1014     hint(4);
1015   }
1016 
1017   void sevl() {
1018     hint(5);
1019   }
1020 
1021   // we only provide mrs and msr for the special purpose system
1022   // registers where op1 (instr[20:19]) == 11 and, (currently) only
1023   // use it for FPSR n.b msr has L (instr[21]) == 0 mrs has L == 1
1024 
1025   void msr(int op1, int CRn, int CRm, int op2, Register rt) {
1026     starti;
1027     f(0b1101010100011, 31, 19);
1028     f(op1, 18, 16);
1029     f(CRn, 15, 12);
1030     f(CRm, 11, 8);
1031     f(op2, 7, 5);
1032     // writing zr is ok
1033     zrf(rt, 0);
1034   }
1035 
1036   void mrs(int op1, int CRn, int CRm, int op2, Register rt) {
1037     starti;
1038     f(0b1101010100111, 31, 19);
1039     f(op1, 18, 16);
1040     f(CRn, 15, 12);
1041     f(CRm, 11, 8);
1042     f(op2, 7, 5);
1043     // reading to zr is a mistake
1044     rf(rt, 0);
1045   }
1046 
1047   enum barrier {OSHLD = 0b0001, OSHST, OSH, NSHLD=0b0101, NSHST, NSH,
1048                 ISHLD = 0b1001, ISHST, ISH, LD=0b1101, ST, SY};
1049 
1050   void dsb(barrier imm) {
1051     system(0b00, 0b011, 0b00011, imm, 0b100);
1052   }
1053 
1054   void dmb(barrier imm) {
1055     system(0b00, 0b011, 0b00011, imm, 0b101);
1056   }
1057 
1058   void isb() {
1059     system(0b00, 0b011, 0b00011, SY, 0b110);
1060   }
1061 
1062   void sys(int op1, int CRn, int CRm, int op2,
1063            Register rt = as_Register(0b11111)) {
1064     system(0b01, op1, CRn, CRm, op2, rt);
1065   }
1066 
1067   // Only implement operations accessible from EL0 or higher, i.e.,
1068   //            op1    CRn    CRm    op2
1069   // IC IVAU     3      7      5      1
1070   // DC CVAC     3      7      10     1
1071   // DC CVAP     3      7      12     1
1072   // DC CVAU     3      7      11     1
1073   // DC CIVAC    3      7      14     1
1074   // DC ZVA      3      7      4      1
1075   // So only deal with the CRm field.
1076   enum icache_maintenance {IVAU = 0b0101};
1077   enum dcache_maintenance {CVAC = 0b1010, CVAP = 0b1100, CVAU = 0b1011, CIVAC = 0b1110, ZVA = 0b100};
1078 
1079   void dc(dcache_maintenance cm, Register Rt) {
1080     sys(0b011, 0b0111, cm, 0b001, Rt);
1081   }
1082 
1083   void ic(icache_maintenance cm, Register Rt) {
1084     sys(0b011, 0b0111, cm, 0b001, Rt);
1085   }
1086 
1087   // A more convenient access to dmb for our purposes
1088   enum Membar_mask_bits {
1089     // We can use ISH for a barrier because the ARM ARM says "This
1090     // architecture assumes that all Processing Elements that use the
1091     // same operating system or hypervisor are in the same Inner
1092     // Shareable shareability domain."
1093     StoreStore = ISHST,
1094     LoadStore  = ISHLD,
1095     LoadLoad   = ISHLD,
1096     StoreLoad  = ISH,
1097     AnyAny     = ISH
1098   };
1099 
1100   void membar(Membar_mask_bits order_constraint) {
1101     dmb(Assembler::barrier(order_constraint));
1102   }
1103 
1104   // Unconditional branch (register)
1105   void branch_reg(Register R, int opc) {
1106     starti;
1107     f(0b1101011, 31, 25);
1108     f(opc, 24, 21);
1109     f(0b11111000000, 20, 10);
1110     rf(R, 5);
1111     f(0b00000, 4, 0);
1112   }
1113 
1114 #define INSN(NAME, opc)                         \
1115   void NAME(Register R) {                       \
1116     branch_reg(R, opc);                         \
1117   }
1118 
1119   INSN(br, 0b0000);
1120   INSN(blr, 0b0001);
1121   INSN(ret, 0b0010);
1122 
1123   void ret(void *p); // This forces a compile-time error for ret(0)
1124 
1125 #undef INSN
1126 
1127 #define INSN(NAME, opc)                         \
1128   void NAME() {                 \
1129     branch_reg(dummy_reg, opc);         \
1130   }
1131 
1132   INSN(eret, 0b0100);
1133   INSN(drps, 0b0101);
1134 
1135 #undef INSN
1136 
1137   // Load/store exclusive
1138   enum operand_size { byte, halfword, word, xword };
1139 
1140   void load_store_exclusive(Register Rs, Register Rt1, Register Rt2,
1141     Register Rn, enum operand_size sz, int op, bool ordered) {
1142     starti;
1143     f(sz, 31, 30), f(0b001000, 29, 24), f(op, 23, 21);
1144     rf(Rs, 16), f(ordered, 15), zrf(Rt2, 10), srf(Rn, 5), zrf(Rt1, 0);
1145   }
1146 
1147   void load_exclusive(Register dst, Register addr,
1148                       enum operand_size sz, bool ordered) {
1149     load_store_exclusive(dummy_reg, dst, dummy_reg, addr,
1150                          sz, 0b010, ordered);
1151   }
1152 
1153   void store_exclusive(Register status, Register new_val, Register addr,
1154                        enum operand_size sz, bool ordered) {
1155     load_store_exclusive(status, new_val, dummy_reg, addr,
1156                          sz, 0b000, ordered);
1157   }
1158 
1159 #define INSN4(NAME, sz, op, o0) /* Four registers */                    \
1160   void NAME(Register Rs, Register Rt1, Register Rt2, Register Rn) {     \
1161     guarantee(Rs != Rn && Rs != Rt1 && Rs != Rt2, "unpredictable instruction"); \
1162     load_store_exclusive(Rs, Rt1, Rt2, Rn, sz, op, o0);                 \
1163   }
1164 
1165 #define INSN3(NAME, sz, op, o0) /* Three registers */                   \
1166   void NAME(Register Rs, Register Rt, Register Rn) {                    \
1167     guarantee(Rs != Rn && Rs != Rt, "unpredictable instruction");       \
1168     load_store_exclusive(Rs, Rt, dummy_reg, Rn, sz, op, o0); \
1169   }
1170 
1171 #define INSN2(NAME, sz, op, o0) /* Two registers */                     \
1172   void NAME(Register Rt, Register Rn) {                                 \
1173     load_store_exclusive(dummy_reg, Rt, dummy_reg, \
1174                          Rn, sz, op, o0);                               \
1175   }
1176 
1177 #define INSN_FOO(NAME, sz, op, o0) /* Three registers, encoded differently */ \
1178   void NAME(Register Rt1, Register Rt2, Register Rn) {                  \
1179     guarantee(Rt1 != Rt2, "unpredictable instruction");                 \
1180     load_store_exclusive(dummy_reg, Rt1, Rt2, Rn, sz, op, o0);          \
1181   }
1182 
1183   // bytes
1184   INSN3(stxrb, byte, 0b000, 0);
1185   INSN3(stlxrb, byte, 0b000, 1);
1186   INSN2(ldxrb, byte, 0b010, 0);
1187   INSN2(ldaxrb, byte, 0b010, 1);
1188   INSN2(stlrb, byte, 0b100, 1);
1189   INSN2(ldarb, byte, 0b110, 1);
1190 
1191   // halfwords
1192   INSN3(stxrh, halfword, 0b000, 0);
1193   INSN3(stlxrh, halfword, 0b000, 1);
1194   INSN2(ldxrh, halfword, 0b010, 0);
1195   INSN2(ldaxrh, halfword, 0b010, 1);
1196   INSN2(stlrh, halfword, 0b100, 1);
1197   INSN2(ldarh, halfword, 0b110, 1);
1198 
1199   // words
1200   INSN3(stxrw, word, 0b000, 0);
1201   INSN3(stlxrw, word, 0b000, 1);
1202   INSN4(stxpw, word, 0b001, 0);
1203   INSN4(stlxpw, word, 0b001, 1);
1204   INSN2(ldxrw, word, 0b010, 0);
1205   INSN2(ldaxrw, word, 0b010, 1);
1206   INSN_FOO(ldxpw, word, 0b011, 0);
1207   INSN_FOO(ldaxpw, word, 0b011, 1);
1208   INSN2(stlrw, word, 0b100, 1);
1209   INSN2(ldarw, word, 0b110, 1);
1210 
1211   // xwords
1212   INSN3(stxr, xword, 0b000, 0);
1213   INSN3(stlxr, xword, 0b000, 1);
1214   INSN4(stxp, xword, 0b001, 0);
1215   INSN4(stlxp, xword, 0b001, 1);
1216   INSN2(ldxr, xword, 0b010, 0);
1217   INSN2(ldaxr, xword, 0b010, 1);
1218   INSN_FOO(ldxp, xword, 0b011, 0);
1219   INSN_FOO(ldaxp, xword, 0b011, 1);
1220   INSN2(stlr, xword, 0b100, 1);
1221   INSN2(ldar, xword, 0b110, 1);
1222 
1223 #undef INSN2
1224 #undef INSN3
1225 #undef INSN4
1226 #undef INSN_FOO
1227 
1228   // 8.1 Compare and swap extensions
1229   void lse_cas(Register Rs, Register Rt, Register Rn,
1230                         enum operand_size sz, bool a, bool r, bool not_pair) {
1231     starti;
1232     if (! not_pair) { // Pair
1233       assert(sz == word || sz == xword, "invalid size");
1234       /* The size bit is in bit 30, not 31 */
1235       sz = (operand_size)(sz == word ? 0b00:0b01);
1236     }
1237     f(sz, 31, 30), f(0b001000, 29, 24), f(not_pair ? 1 : 0, 23), f(a, 22), f(1, 21);
1238     zrf(Rs, 16), f(r, 15), f(0b11111, 14, 10), srf(Rn, 5), zrf(Rt, 0);
1239   }
1240 
1241   // CAS
1242 #define INSN(NAME, a, r)                                                \
1243   void NAME(operand_size sz, Register Rs, Register Rt, Register Rn) {   \
1244     assert(Rs != Rn && Rs != Rt, "unpredictable instruction");          \
1245     lse_cas(Rs, Rt, Rn, sz, a, r, true);                                \
1246   }
1247   INSN(cas,    false, false)
1248   INSN(casa,   true,  false)
1249   INSN(casl,   false, true)
1250   INSN(casal,  true,  true)
1251 #undef INSN
1252 
1253   // CASP
1254 #define INSN(NAME, a, r)                                                \
1255   void NAME(operand_size sz, Register Rs, Register Rs1,                 \
1256             Register Rt, Register Rt1, Register Rn) {                   \
1257     assert((Rs->encoding() & 1) == 0 && (Rt->encoding() & 1) == 0 &&    \
1258            Rs->successor() == Rs1 && Rt->successor() == Rt1 &&          \
1259            Rs != Rn && Rs1 != Rn && Rs != Rt, "invalid registers");     \
1260     lse_cas(Rs, Rt, Rn, sz, a, r, false);                               \
1261   }
1262   INSN(casp,    false, false)
1263   INSN(caspa,   true,  false)
1264   INSN(caspl,   false, true)
1265   INSN(caspal,  true,  true)
1266 #undef INSN
1267 
1268   // 8.1 Atomic operations
1269   void lse_atomic(Register Rs, Register Rt, Register Rn,
1270                   enum operand_size sz, int op1, int op2, bool a, bool r) {
1271     starti;
1272     f(sz, 31, 30), f(0b111000, 29, 24), f(a, 23), f(r, 22), f(1, 21);
1273     zrf(Rs, 16), f(op1, 15), f(op2, 14, 12), f(0, 11, 10), srf(Rn, 5), zrf(Rt, 0);
1274   }
1275 
1276 #define INSN(NAME, NAME_A, NAME_L, NAME_AL, op1, op2)                   \
1277   void NAME(operand_size sz, Register Rs, Register Rt, Register Rn) {   \
1278     lse_atomic(Rs, Rt, Rn, sz, op1, op2, false, false);                 \
1279   }                                                                     \
1280   void NAME_A(operand_size sz, Register Rs, Register Rt, Register Rn) { \
1281     lse_atomic(Rs, Rt, Rn, sz, op1, op2, true, false);                  \
1282   }                                                                     \
1283   void NAME_L(operand_size sz, Register Rs, Register Rt, Register Rn) { \
1284     lse_atomic(Rs, Rt, Rn, sz, op1, op2, false, true);                  \
1285   }                                                                     \
1286   void NAME_AL(operand_size sz, Register Rs, Register Rt, Register Rn) {\
1287     lse_atomic(Rs, Rt, Rn, sz, op1, op2, true, true);                   \
1288   }
1289   INSN(ldadd,  ldadda,  ldaddl,  ldaddal,  0, 0b000);
1290   INSN(ldbic,  ldbica,  ldbicl,  ldbical,  0, 0b001);
1291   INSN(ldeor,  ldeora,  ldeorl,  ldeoral,  0, 0b010);
1292   INSN(ldorr,  ldorra,  ldorrl,  ldorral,  0, 0b011);
1293   INSN(ldsmax, ldsmaxa, ldsmaxl, ldsmaxal, 0, 0b100);
1294   INSN(ldsmin, ldsmina, ldsminl, ldsminal, 0, 0b101);
1295   INSN(ldumax, ldumaxa, ldumaxl, ldumaxal, 0, 0b110);
1296   INSN(ldumin, ldumina, lduminl, lduminal, 0, 0b111);
1297   INSN(swp,    swpa,    swpl,    swpal,    1, 0b000);
1298 #undef INSN
1299 
1300   // Load register (literal)
1301 #define INSN(NAME, opc, V)                                              \
1302   void NAME(Register Rt, address dest) {                                \
1303     int64_t offset = (dest - pc()) >> 2;                                \
1304     starti;                                                             \
1305     f(opc, 31, 30), f(0b011, 29, 27), f(V, 26), f(0b00, 25, 24),        \
1306       sf(offset, 23, 5);                                                \
1307     rf(Rt, 0);                                                          \
1308   }                                                                     \
1309   void NAME(Register Rt, address dest, relocInfo::relocType rtype) {    \
1310     InstructionMark im(this);                                           \
1311     guarantee(rtype == relocInfo::internal_word_type,                   \
1312               "only internal_word_type relocs make sense here");        \
1313     code_section()->relocate(inst_mark(), InternalAddress(dest).rspec()); \
1314     NAME(Rt, dest);                                                     \
1315   }                                                                     \
1316   void NAME(Register Rt, Label &L) {                                    \
1317     wrap_label(Rt, L, &Assembler::NAME);                                \
1318   }
1319 
1320   INSN(ldrw, 0b00, 0);
1321   INSN(ldr, 0b01, 0);
1322   INSN(ldrsw, 0b10, 0);
1323 
1324 #undef INSN
1325 
1326 #define INSN(NAME, opc, V)                                              \
1327   void NAME(FloatRegister Rt, address dest) {                           \
1328     int64_t offset = (dest - pc()) >> 2;                                \
1329     starti;                                                             \
1330     f(opc, 31, 30), f(0b011, 29, 27), f(V, 26), f(0b00, 25, 24),        \
1331       sf(offset, 23, 5);                                                \
1332     rf(as_Register(Rt), 0);                                             \
1333   }
1334 
1335   INSN(ldrs, 0b00, 1);
1336   INSN(ldrd, 0b01, 1);
1337   INSN(ldrq, 0b10, 1);
1338 
1339 #undef INSN
1340 
1341 #define INSN(NAME, size, opc)                                           \
1342   void NAME(FloatRegister Rt, Register Rn) {                            \
1343     starti;                                                             \
1344     f(size, 31, 30), f(0b111100, 29, 24), f(opc, 23, 22), f(0, 21);     \
1345     f(0, 20, 12), f(0b01, 11, 10);                                      \
1346     rf(Rn, 5), rf(as_Register(Rt), 0);                                  \
1347   }
1348 
1349   INSN(ldrs, 0b10, 0b01);
1350   INSN(ldrd, 0b11, 0b01);
1351   INSN(ldrq, 0b00, 0b11);
1352 
1353 #undef INSN
1354 
1355 
1356 #define INSN(NAME, opc, V)                                              \
1357   void NAME(address dest, prfop op = PLDL1KEEP) {                       \
1358     int64_t offset = (dest - pc()) >> 2;                                \
1359     starti;                                                             \
1360     f(opc, 31, 30), f(0b011, 29, 27), f(V, 26), f(0b00, 25, 24),        \
1361       sf(offset, 23, 5);                                                \
1362     f(op, 4, 0);                                                        \
1363   }                                                                     \
1364   void NAME(Label &L, prfop op = PLDL1KEEP) {                           \
1365     wrap_label(L, op, &Assembler::NAME);                                \
1366   }
1367 
1368   INSN(prfm, 0b11, 0);
1369 
1370 #undef INSN
1371 
1372   // Load/store
1373   void ld_st1(int opc, int p1, int V, int L,
1374               Register Rt1, Register Rt2, Address adr, bool no_allocate) {
1375     starti;
1376     f(opc, 31, 30), f(p1, 29, 27), f(V, 26), f(L, 22);
1377     zrf(Rt2, 10), zrf(Rt1, 0);
1378     if (no_allocate) {
1379       adr.encode_nontemporal_pair(&current_insn);
1380     } else {
1381       adr.encode_pair(&current_insn);
1382     }
1383   }
1384 
1385   // Load/store register pair (offset)
1386 #define INSN(NAME, size, p1, V, L, no_allocate)         \
1387   void NAME(Register Rt1, Register Rt2, Address adr) {  \
1388     ld_st1(size, p1, V, L, Rt1, Rt2, adr, no_allocate); \
1389    }
1390 
1391   INSN(stpw, 0b00, 0b101, 0, 0, false);
1392   INSN(ldpw, 0b00, 0b101, 0, 1, false);
1393   INSN(ldpsw, 0b01, 0b101, 0, 1, false);
1394   INSN(stp, 0b10, 0b101, 0, 0, false);
1395   INSN(ldp, 0b10, 0b101, 0, 1, false);
1396 
1397   // Load/store no-allocate pair (offset)
1398   INSN(stnpw, 0b00, 0b101, 0, 0, true);
1399   INSN(ldnpw, 0b00, 0b101, 0, 1, true);
1400   INSN(stnp, 0b10, 0b101, 0, 0, true);
1401   INSN(ldnp, 0b10, 0b101, 0, 1, true);
1402 
1403 #undef INSN
1404 
1405 #define INSN(NAME, size, p1, V, L, no_allocate)                         \
1406   void NAME(FloatRegister Rt1, FloatRegister Rt2, Address adr) {        \
1407     ld_st1(size, p1, V, L,                                              \
1408            as_Register(Rt1), as_Register(Rt2), adr, no_allocate);       \
1409    }
1410 
1411   INSN(stps, 0b00, 0b101, 1, 0, false);
1412   INSN(ldps, 0b00, 0b101, 1, 1, false);
1413   INSN(stpd, 0b01, 0b101, 1, 0, false);
1414   INSN(ldpd, 0b01, 0b101, 1, 1, false);
1415   INSN(stpq, 0b10, 0b101, 1, 0, false);
1416   INSN(ldpq, 0b10, 0b101, 1, 1, false);
1417 
1418 #undef INSN
1419 
1420   // Load/store register (all modes)
1421   void ld_st2(Register Rt, const Address &adr, int size, int op, int V = 0) {
1422     starti;
1423 
1424     f(V, 26); // general reg?
1425     zrf(Rt, 0);
1426 
1427     // Encoding for literal loads is done here (rather than pushed
1428     // down into Address::encode) because the encoding of this
1429     // instruction is too different from all of the other forms to
1430     // make it worth sharing.
1431     if (adr.getMode() == Address::literal) {
1432       assert(size == 0b10 || size == 0b11, "bad operand size in ldr");
1433       assert(op == 0b01, "literal form can only be used with loads");
1434       f(size & 0b01, 31, 30), f(0b011, 29, 27), f(0b00, 25, 24);
1435       int64_t offset = (adr.target() - pc()) >> 2;
1436       sf(offset, 23, 5);
1437       code_section()->relocate(pc(), adr.rspec());
1438       return;
1439     }
1440 
1441     f(size, 31, 30);
1442     f(op, 23, 22); // str
1443     adr.encode(&current_insn);
1444   }
1445 
1446 #define INSN(NAME, size, op)                            \
1447   void NAME(Register Rt, const Address &adr) {          \
1448     ld_st2(Rt, adr, size, op);                          \
1449   }                                                     \
1450 
1451   INSN(str, 0b11, 0b00);
1452   INSN(strw, 0b10, 0b00);
1453   INSN(strb, 0b00, 0b00);
1454   INSN(strh, 0b01, 0b00);
1455 
1456   INSN(ldr, 0b11, 0b01);
1457   INSN(ldrw, 0b10, 0b01);
1458   INSN(ldrb, 0b00, 0b01);
1459   INSN(ldrh, 0b01, 0b01);
1460 
1461   INSN(ldrsb, 0b00, 0b10);
1462   INSN(ldrsbw, 0b00, 0b11);
1463   INSN(ldrsh, 0b01, 0b10);
1464   INSN(ldrshw, 0b01, 0b11);
1465   INSN(ldrsw, 0b10, 0b10);
1466 
1467 #undef INSN
1468 
1469 #define INSN(NAME, size, op)                                    \
1470   void NAME(const Address &adr, prfop pfop = PLDL1KEEP) {       \
1471     ld_st2(as_Register(pfop), adr, size, op);                   \
1472   }
1473 
1474   INSN(prfm, 0b11, 0b10); // FIXME: PRFM should not be used with
1475                           // writeback modes, but the assembler
1476                           // doesn't enfore that.
1477 
1478 #undef INSN
1479 
1480 #define INSN(NAME, size, op)                            \
1481   void NAME(FloatRegister Rt, const Address &adr) {     \
1482     ld_st2(as_Register(Rt), adr, size, op, 1);          \
1483   }
1484 
1485   INSN(strd, 0b11, 0b00);
1486   INSN(strs, 0b10, 0b00);
1487   INSN(ldrd, 0b11, 0b01);
1488   INSN(ldrs, 0b10, 0b01);
1489   INSN(strq, 0b00, 0b10);
1490   INSN(ldrq, 0x00, 0b11);
1491 
1492 #undef INSN
1493 
1494 /* SIMD extensions
1495  *
1496  * We just use FloatRegister in the following. They are exactly the same
1497  * as SIMD registers.
1498  */
1499 public:
1500 
1501   enum SIMD_Arrangement {
1502     T8B, T16B, T4H, T8H, T2S, T4S, T1D, T2D, T1Q, INVALID_ARRANGEMENT
1503   };
1504 
1505   enum SIMD_RegVariant {
1506       B, H, S, D, Q, INVALID
1507   };
1508 
1509 private:
1510 
1511   static SIMD_Arrangement _esize2arrangement_table[9][2];
1512   static SIMD_RegVariant _esize2regvariant[9];
1513 
1514 public:
1515 
1516   static SIMD_Arrangement esize2arrangement(unsigned esize, bool isQ);
1517   static SIMD_RegVariant elemType_to_regVariant(BasicType bt);
1518   static SIMD_RegVariant elemBytes_to_regVariant(unsigned esize);
1519 
1520   enum shift_kind { LSL, LSR, ASR, ROR };
1521 
1522   void op_shifted_reg(Instruction_aarch64 &current_insn, unsigned decode,
1523                       enum shift_kind kind, unsigned shift,
1524                       unsigned size, unsigned op) {
1525     f(size, 31);
1526     f(op, 30, 29);
1527     f(decode, 28, 24);
1528     f(shift, 15, 10);
1529     f(kind, 23, 22);
1530   }
1531 
1532   // Logical (shifted register)
1533 #define INSN(NAME, size, op, N)                                         \
1534   void NAME(Register Rd, Register Rn, Register Rm,                      \
1535             enum shift_kind kind = LSL, unsigned shift = 0) {           \
1536     starti;                                                             \
1537     guarantee(size == 1 || shift < 32, "incorrect shift");              \
1538     f(N, 21);                                                           \
1539     zrf(Rm, 16), zrf(Rn, 5), zrf(Rd, 0);                                \
1540     op_shifted_reg(current_insn, 0b01010, kind, shift, size, op);       \
1541   }
1542 
1543   INSN(andr, 1, 0b00, 0);
1544   INSN(orr, 1, 0b01, 0);
1545   INSN(eor, 1, 0b10, 0);
1546   INSN(ands, 1, 0b11, 0);
1547   INSN(andw, 0, 0b00, 0);
1548   INSN(orrw, 0, 0b01, 0);
1549   INSN(eorw, 0, 0b10, 0);
1550   INSN(andsw, 0, 0b11, 0);
1551 
1552 #undef INSN
1553 
1554 #define INSN(NAME, size, op, N)                                         \
1555   void NAME(Register Rd, Register Rn, Register Rm,                      \
1556             enum shift_kind kind = LSL, unsigned shift = 0) {           \
1557     starti;                                                             \
1558     f(N, 21);                                                           \
1559     zrf(Rm, 16), zrf(Rn, 5), zrf(Rd, 0);                                \
1560     op_shifted_reg(current_insn, 0b01010, kind, shift, size, op);       \
1561   }                                                                     \
1562                                                                         \
1563   /* These instructions have no immediate form. Provide an overload so  \
1564      that if anyone does try to use an immediate operand -- this has    \
1565      happened! -- we'll get a compile-time error. */                    \
1566   void NAME(Register Rd, Register Rn, unsigned imm,                     \
1567             enum shift_kind kind = LSL, unsigned shift = 0) {           \
1568     assert(false, " can't be used with immediate operand");             \
1569   }
1570 
1571   INSN(bic, 1, 0b00, 1);
1572   INSN(orn, 1, 0b01, 1);
1573   INSN(eon, 1, 0b10, 1);
1574   INSN(bics, 1, 0b11, 1);
1575   INSN(bicw, 0, 0b00, 1);
1576   INSN(ornw, 0, 0b01, 1);
1577   INSN(eonw, 0, 0b10, 1);
1578   INSN(bicsw, 0, 0b11, 1);
1579 
1580 #undef INSN
1581 
1582 #ifdef _WIN64
1583 // In MSVC, `mvn` is defined as a macro and it affects compilation
1584 #undef mvn
1585 #endif
1586 
1587   // Aliases for short forms of orn
1588 void mvn(Register Rd, Register Rm,
1589             enum shift_kind kind = LSL, unsigned shift = 0) {
1590   orn(Rd, zr, Rm, kind, shift);
1591 }
1592 
1593 void mvnw(Register Rd, Register Rm,
1594             enum shift_kind kind = LSL, unsigned shift = 0) {
1595   ornw(Rd, zr, Rm, kind, shift);
1596 }
1597 
1598   // Add/subtract (shifted register)
1599 #define INSN(NAME, size, op)                            \
1600   void NAME(Register Rd, Register Rn, Register Rm,      \
1601             enum shift_kind kind, unsigned shift = 0) { \
1602     starti;                                             \
1603     f(0, 21);                                           \
1604     assert_cond(kind != ROR);                           \
1605     guarantee(size == 1 || shift < 32, "incorrect shift");\
1606     zrf(Rd, 0), zrf(Rn, 5), zrf(Rm, 16);                \
1607     op_shifted_reg(current_insn, 0b01011, kind, shift, size, op);      \
1608   }
1609 
1610   INSN(add, 1, 0b000);
1611   INSN(sub, 1, 0b10);
1612   INSN(addw, 0, 0b000);
1613   INSN(subw, 0, 0b10);
1614 
1615   INSN(adds, 1, 0b001);
1616   INSN(subs, 1, 0b11);
1617   INSN(addsw, 0, 0b001);
1618   INSN(subsw, 0, 0b11);
1619 
1620 #undef INSN
1621 
1622   // Add/subtract (extended register)
1623 #define INSN(NAME, op)                                                  \
1624   void NAME(Register Rd, Register Rn, Register Rm,                      \
1625            ext::operation option, int amount = 0) {                     \
1626     starti;                                                             \
1627     zrf(Rm, 16), srf(Rn, 5), srf(Rd, 0);                                \
1628     add_sub_extended_reg(current_insn, op, 0b01011, Rd, Rn, Rm, 0b00, option, amount); \
1629   }
1630 
1631   void add_sub_extended_reg(Instruction_aarch64 &current_insn, unsigned op, unsigned decode,
1632     Register Rd, Register Rn, Register Rm,
1633     unsigned opt, ext::operation option, unsigned imm) {
1634     guarantee(imm <= 4, "shift amount must be <= 4");
1635     f(op, 31, 29), f(decode, 28, 24), f(opt, 23, 22), f(1, 21);
1636     f(option, 15, 13), f(imm, 12, 10);
1637   }
1638 
1639   INSN(addw, 0b000);
1640   INSN(subw, 0b010);
1641   INSN(add, 0b100);
1642   INSN(sub, 0b110);
1643 
1644 #undef INSN
1645 
1646 #define INSN(NAME, op)                                                  \
1647   void NAME(Register Rd, Register Rn, Register Rm,                      \
1648            ext::operation option, int amount = 0) {                     \
1649     starti;                                                             \
1650     zrf(Rm, 16), srf(Rn, 5), zrf(Rd, 0);                                \
1651     add_sub_extended_reg(current_insn, op, 0b01011, Rd, Rn, Rm, 0b00, option, amount); \
1652   }
1653 
1654   INSN(addsw, 0b001);
1655   INSN(subsw, 0b011);
1656   INSN(adds, 0b101);
1657   INSN(subs, 0b111);
1658 
1659 #undef INSN
1660 
1661   // Aliases for short forms of add and sub
1662 #define INSN(NAME)                                      \
1663   void NAME(Register Rd, Register Rn, Register Rm) {    \
1664     if (Rd == sp || Rn == sp)                           \
1665       NAME(Rd, Rn, Rm, ext::uxtx);                      \
1666     else                                                \
1667       NAME(Rd, Rn, Rm, LSL);                            \
1668   }
1669 
1670   INSN(addw);
1671   INSN(subw);
1672   INSN(add);
1673   INSN(sub);
1674 
1675   INSN(addsw);
1676   INSN(subsw);
1677   INSN(adds);
1678   INSN(subs);
1679 
1680 #undef INSN
1681 
1682   // Add/subtract (with carry)
1683   void add_sub_carry(unsigned op, Register Rd, Register Rn, Register Rm) {
1684     starti;
1685     f(op, 31, 29);
1686     f(0b11010000, 28, 21);
1687     f(0b000000, 15, 10);
1688     zrf(Rm, 16), zrf(Rn, 5), zrf(Rd, 0);
1689   }
1690 
1691   #define INSN(NAME, op)                                \
1692     void NAME(Register Rd, Register Rn, Register Rm) {  \
1693       add_sub_carry(op, Rd, Rn, Rm);                    \
1694     }
1695 
1696   INSN(adcw, 0b000);
1697   INSN(adcsw, 0b001);
1698   INSN(sbcw, 0b010);
1699   INSN(sbcsw, 0b011);
1700   INSN(adc, 0b100);
1701   INSN(adcs, 0b101);
1702   INSN(sbc,0b110);
1703   INSN(sbcs, 0b111);
1704 
1705 #undef INSN
1706 
1707   // Conditional compare (both kinds)
1708   void conditional_compare(unsigned op, int o1, int o2, int o3,
1709                            Register Rn, unsigned imm5, unsigned nzcv,
1710                            unsigned cond) {
1711     starti;
1712     f(op, 31, 29);
1713     f(0b11010010, 28, 21);
1714     f(cond, 15, 12);
1715     f(o1, 11);
1716     f(o2, 10);
1717     f(o3, 4);
1718     f(nzcv, 3, 0);
1719     f(imm5, 20, 16), zrf(Rn, 5);
1720   }
1721 
1722 #define INSN(NAME, op)                                                  \
1723   void NAME(Register Rn, Register Rm, int imm, Condition cond) {        \
1724     int regNumber = (Rm == zr ? 31 : (uintptr_t)Rm);                    \
1725     conditional_compare(op, 0, 0, 0, Rn, regNumber, imm, cond);         \
1726   }                                                                     \
1727                                                                         \
1728   void NAME(Register Rn, int imm5, int imm, Condition cond) {           \
1729     conditional_compare(op, 1, 0, 0, Rn, imm5, imm, cond);              \
1730   }
1731 
1732   INSN(ccmnw, 0b001);
1733   INSN(ccmpw, 0b011);
1734   INSN(ccmn, 0b101);
1735   INSN(ccmp, 0b111);
1736 
1737 #undef INSN
1738 
1739   // Conditional select
1740   void conditional_select(unsigned op, unsigned op2,
1741                           Register Rd, Register Rn, Register Rm,
1742                           unsigned cond) {
1743     starti;
1744     f(op, 31, 29);
1745     f(0b11010100, 28, 21);
1746     f(cond, 15, 12);
1747     f(op2, 11, 10);
1748     zrf(Rm, 16), zrf(Rn, 5), rf(Rd, 0);
1749   }
1750 
1751 #define INSN(NAME, op, op2)                                             \
1752   void NAME(Register Rd, Register Rn, Register Rm, Condition cond) {    \
1753     conditional_select(op, op2, Rd, Rn, Rm, cond);                      \
1754   }
1755 
1756   INSN(cselw, 0b000, 0b00);
1757   INSN(csincw, 0b000, 0b01);
1758   INSN(csinvw, 0b010, 0b00);
1759   INSN(csnegw, 0b010, 0b01);
1760   INSN(csel, 0b100, 0b00);
1761   INSN(csinc, 0b100, 0b01);
1762   INSN(csinv, 0b110, 0b00);
1763   INSN(csneg, 0b110, 0b01);
1764 
1765 #undef INSN
1766 
1767   // Data processing
1768   void data_processing(Instruction_aarch64 &current_insn, unsigned op29, unsigned opcode,
1769                        Register Rd, Register Rn) {
1770     f(op29, 31, 29), f(0b11010110, 28, 21);
1771     f(opcode, 15, 10);
1772     rf(Rn, 5), rf(Rd, 0);
1773   }
1774 
1775   // (1 source)
1776 #define INSN(NAME, op29, opcode2, opcode)                       \
1777   void NAME(Register Rd, Register Rn) {                         \
1778     starti;                                                     \
1779     f(opcode2, 20, 16);                                         \
1780     data_processing(current_insn, op29, opcode, Rd, Rn);        \
1781   }
1782 
1783   INSN(rbitw,  0b010, 0b00000, 0b00000);
1784   INSN(rev16w, 0b010, 0b00000, 0b00001);
1785   INSN(revw,   0b010, 0b00000, 0b00010);
1786   INSN(clzw,   0b010, 0b00000, 0b00100);
1787   INSN(clsw,   0b010, 0b00000, 0b00101);
1788 
1789   INSN(rbit,   0b110, 0b00000, 0b00000);
1790   INSN(rev16,  0b110, 0b00000, 0b00001);
1791   INSN(rev32,  0b110, 0b00000, 0b00010);
1792   INSN(rev,    0b110, 0b00000, 0b00011);
1793   INSN(clz,    0b110, 0b00000, 0b00100);
1794   INSN(cls,    0b110, 0b00000, 0b00101);
1795 
1796 #undef INSN
1797 
1798   // (2 sources)
1799 #define INSN(NAME, op29, opcode)                                \
1800   void NAME(Register Rd, Register Rn, Register Rm) {            \
1801     starti;                                                     \
1802     rf(Rm, 16);                                                 \
1803     data_processing(current_insn, op29, opcode, Rd, Rn);        \
1804   }
1805 
1806   INSN(udivw, 0b000, 0b000010);
1807   INSN(sdivw, 0b000, 0b000011);
1808   INSN(lslvw, 0b000, 0b001000);
1809   INSN(lsrvw, 0b000, 0b001001);
1810   INSN(asrvw, 0b000, 0b001010);
1811   INSN(rorvw, 0b000, 0b001011);
1812 
1813   INSN(udiv, 0b100, 0b000010);
1814   INSN(sdiv, 0b100, 0b000011);
1815   INSN(lslv, 0b100, 0b001000);
1816   INSN(lsrv, 0b100, 0b001001);
1817   INSN(asrv, 0b100, 0b001010);
1818   INSN(rorv, 0b100, 0b001011);
1819 
1820 #undef INSN
1821 
1822   // (3 sources)
1823   void data_processing(unsigned op54, unsigned op31, unsigned o0,
1824                        Register Rd, Register Rn, Register Rm,
1825                        Register Ra) {
1826     starti;
1827     f(op54, 31, 29), f(0b11011, 28, 24);
1828     f(op31, 23, 21), f(o0, 15);
1829     zrf(Rm, 16), zrf(Ra, 10), zrf(Rn, 5), zrf(Rd, 0);
1830   }
1831 
1832 #define INSN(NAME, op54, op31, o0)                                      \
1833   void NAME(Register Rd, Register Rn, Register Rm, Register Ra) {       \
1834     data_processing(op54, op31, o0, Rd, Rn, Rm, Ra);                    \
1835   }
1836 
1837   INSN(maddw, 0b000, 0b000, 0);
1838   INSN(msubw, 0b000, 0b000, 1);
1839   INSN(madd, 0b100, 0b000, 0);
1840   INSN(msub, 0b100, 0b000, 1);
1841   INSN(smaddl, 0b100, 0b001, 0);
1842   INSN(smsubl, 0b100, 0b001, 1);
1843   INSN(umaddl, 0b100, 0b101, 0);
1844   INSN(umsubl, 0b100, 0b101, 1);
1845 
1846 #undef INSN
1847 
1848 #define INSN(NAME, op54, op31, o0)                                      \
1849   void NAME(Register Rd, Register Rn, Register Rm) {                    \
1850     data_processing(op54, op31, o0, Rd, Rn, Rm, as_Register(31));       \
1851   }
1852 
1853   INSN(smulh, 0b100, 0b010, 0);
1854   INSN(umulh, 0b100, 0b110, 0);
1855 
1856 #undef INSN
1857 
1858   // Floating-point data-processing (1 source)
1859   void data_processing(unsigned op31, unsigned type, unsigned opcode,
1860                        FloatRegister Vd, FloatRegister Vn) {
1861     starti;
1862     f(op31, 31, 29);
1863     f(0b11110, 28, 24);
1864     f(type, 23, 22), f(1, 21), f(opcode, 20, 15), f(0b10000, 14, 10);
1865     rf(Vn, 5), rf(Vd, 0);
1866   }
1867 
1868 #define INSN(NAME, op31, type, opcode)                  \
1869   void NAME(FloatRegister Vd, FloatRegister Vn) {       \
1870     data_processing(op31, type, opcode, Vd, Vn);        \
1871   }
1872 
1873 private:
1874   INSN(i_fmovs, 0b000, 0b00, 0b000000);
1875 public:
1876   INSN(fabss, 0b000, 0b00, 0b000001);
1877   INSN(fnegs, 0b000, 0b00, 0b000010);
1878   INSN(fsqrts, 0b000, 0b00, 0b000011);
1879   INSN(fcvts, 0b000, 0b00, 0b000101);   // Single-precision to double-precision
1880 
1881 private:
1882   INSN(i_fmovd, 0b000, 0b01, 0b000000);
1883 public:
1884   INSN(fabsd, 0b000, 0b01, 0b000001);
1885   INSN(fnegd, 0b000, 0b01, 0b000010);
1886   INSN(fsqrtd, 0b000, 0b01, 0b000011);
1887   INSN(fcvtd, 0b000, 0b01, 0b000100);   // Double-precision to single-precision
1888 
1889   void fmovd(FloatRegister Vd, FloatRegister Vn) {
1890     assert(Vd != Vn, "should be");
1891     i_fmovd(Vd, Vn);
1892   }
1893 
1894   void fmovs(FloatRegister Vd, FloatRegister Vn) {
1895     assert(Vd != Vn, "should be");
1896     i_fmovs(Vd, Vn);
1897   }
1898 
1899 private:
1900   void _fcvt_narrow_extend(FloatRegister Vd, SIMD_Arrangement Ta,
1901                            FloatRegister Vn, SIMD_Arrangement Tb, bool do_extend) {
1902     assert((do_extend && (Tb >> 1) + 1 == (Ta >> 1))
1903            || (!do_extend && (Ta >> 1) + 1 == (Tb >> 1)), "Incompatible arrangement");
1904     starti;
1905     int op30 = (do_extend ? Tb : Ta) & 1;
1906     int op22 = ((do_extend ? Ta : Tb) >> 1) & 1;
1907     f(0, 31), f(op30, 30), f(0b0011100, 29, 23), f(op22, 22);
1908     f(0b100001011, 21, 13), f(do_extend ? 1 : 0, 12), f(0b10, 11, 10);
1909     rf(Vn, 5), rf(Vd, 0);
1910   }
1911 
1912 public:
1913   void fcvtl(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn,  SIMD_Arrangement Tb) {
1914     assert(Tb == T4H || Tb == T8H|| Tb == T2S || Tb == T4S, "invalid arrangement");
1915     _fcvt_narrow_extend(Vd, Ta, Vn, Tb, true);
1916   }
1917 
1918   void fcvtn(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn,  SIMD_Arrangement Tb) {
1919     assert(Ta == T4H || Ta == T8H|| Ta == T2S || Ta == T4S, "invalid arrangement");
1920     _fcvt_narrow_extend(Vd, Ta, Vn, Tb, false);
1921   }
1922 
1923 #undef INSN
1924 
1925   // Floating-point data-processing (2 source)
1926   void data_processing(unsigned op31, unsigned type, unsigned opcode,
1927                        FloatRegister Vd, FloatRegister Vn, FloatRegister Vm) {
1928     starti;
1929     f(op31, 31, 29);
1930     f(0b11110, 28, 24);
1931     f(type, 23, 22), f(1, 21), f(opcode, 15, 10);
1932     rf(Vm, 16), rf(Vn, 5), rf(Vd, 0);
1933   }
1934 
1935 #define INSN(NAME, op31, type, opcode)                  \
1936   void NAME(FloatRegister Vd, FloatRegister Vn, FloatRegister Vm) {     \
1937     data_processing(op31, type, opcode, Vd, Vn, Vm);    \
1938   }
1939 
1940   INSN(fabds,  0b011, 0b10, 0b110101);
1941   INSN(fmuls,  0b000, 0b00, 0b000010);
1942   INSN(fdivs,  0b000, 0b00, 0b000110);
1943   INSN(fadds,  0b000, 0b00, 0b001010);
1944   INSN(fsubs,  0b000, 0b00, 0b001110);
1945   INSN(fmaxs,  0b000, 0b00, 0b010010);
1946   INSN(fmins,  0b000, 0b00, 0b010110);
1947   INSN(fnmuls, 0b000, 0b00, 0b100010);
1948 
1949   INSN(fabdd,  0b011, 0b11, 0b110101);
1950   INSN(fmuld,  0b000, 0b01, 0b000010);
1951   INSN(fdivd,  0b000, 0b01, 0b000110);
1952   INSN(faddd,  0b000, 0b01, 0b001010);
1953   INSN(fsubd,  0b000, 0b01, 0b001110);
1954   INSN(fmaxd,  0b000, 0b01, 0b010010);
1955   INSN(fmind,  0b000, 0b01, 0b010110);
1956   INSN(fnmuld, 0b000, 0b01, 0b100010);
1957 
1958 #undef INSN
1959 
1960    // Floating-point data-processing (3 source)
1961   void data_processing(unsigned op31, unsigned type, unsigned o1, unsigned o0,
1962                        FloatRegister Vd, FloatRegister Vn, FloatRegister Vm,
1963                        FloatRegister Va) {
1964     starti;
1965     f(op31, 31, 29);
1966     f(0b11111, 28, 24);
1967     f(type, 23, 22), f(o1, 21), f(o0, 15);
1968     rf(Vm, 16), rf(Va, 10), rf(Vn, 5), rf(Vd, 0);
1969   }
1970 
1971 #define INSN(NAME, op31, type, o1, o0)                                  \
1972   void NAME(FloatRegister Vd, FloatRegister Vn, FloatRegister Vm,       \
1973             FloatRegister Va) {                                         \
1974     data_processing(op31, type, o1, o0, Vd, Vn, Vm, Va);                \
1975   }
1976 
1977   INSN(fmadds, 0b000, 0b00, 0, 0);
1978   INSN(fmsubs, 0b000, 0b00, 0, 1);
1979   INSN(fnmadds, 0b000, 0b00, 1, 0);
1980   INSN(fnmsubs, 0b000, 0b00, 1, 1);
1981 
1982   INSN(fmaddd, 0b000, 0b01, 0, 0);
1983   INSN(fmsubd, 0b000, 0b01, 0, 1);
1984   INSN(fnmaddd, 0b000, 0b01, 1, 0);
1985   INSN(fnmsub, 0b000, 0b01, 1, 1);
1986 
1987 #undef INSN
1988 
1989    // Floating-point conditional select
1990   void fp_conditional_select(unsigned op31, unsigned type,
1991                              unsigned op1, unsigned op2,
1992                              Condition cond, FloatRegister Vd,
1993                              FloatRegister Vn, FloatRegister Vm) {
1994     starti;
1995     f(op31, 31, 29);
1996     f(0b11110, 28, 24);
1997     f(type, 23, 22);
1998     f(op1, 21, 21);
1999     f(op2, 11, 10);
2000     f(cond, 15, 12);
2001     rf(Vm, 16), rf(Vn, 5), rf(Vd, 0);
2002   }
2003 
2004 #define INSN(NAME, op31, type, op1, op2)                                \
2005   void NAME(FloatRegister Vd, FloatRegister Vn,                         \
2006             FloatRegister Vm, Condition cond) {                         \
2007     fp_conditional_select(op31, type, op1, op2, cond, Vd, Vn, Vm);      \
2008   }
2009 
2010   INSN(fcsels, 0b000, 0b00, 0b1, 0b11);
2011   INSN(fcseld, 0b000, 0b01, 0b1, 0b11);
2012 
2013 #undef INSN
2014 
2015    // Floating-point<->integer conversions
2016   void float_int_convert(unsigned op31, unsigned type,
2017                          unsigned rmode, unsigned opcode,
2018                          Register Rd, Register Rn) {
2019     starti;
2020     f(op31, 31, 29);
2021     f(0b11110, 28, 24);
2022     f(type, 23, 22), f(1, 21), f(rmode, 20, 19);
2023     f(opcode, 18, 16), f(0b000000, 15, 10);
2024     zrf(Rn, 5), zrf(Rd, 0);
2025   }
2026 
2027 #define INSN(NAME, op31, type, rmode, opcode)                           \
2028   void NAME(Register Rd, FloatRegister Vn) {                            \
2029     float_int_convert(op31, type, rmode, opcode, Rd, as_Register(Vn));  \
2030   }
2031 
2032   INSN(fcvtzsw, 0b000, 0b00, 0b11, 0b000);
2033   INSN(fcvtzs,  0b100, 0b00, 0b11, 0b000);
2034   INSN(fcvtzdw, 0b000, 0b01, 0b11, 0b000);
2035   INSN(fcvtzd,  0b100, 0b01, 0b11, 0b000);
2036 
2037   INSN(fmovs, 0b000, 0b00, 0b00, 0b110);
2038   INSN(fmovd, 0b100, 0b01, 0b00, 0b110);
2039 
2040   INSN(fmovhid, 0b100, 0b10, 0b01, 0b110);
2041 
2042 #undef INSN
2043 
2044 #define INSN(NAME, op31, type, rmode, opcode)                           \
2045   void NAME(FloatRegister Vd, Register Rn) {                            \
2046     float_int_convert(op31, type, rmode, opcode, as_Register(Vd), Rn);  \
2047   }
2048 
2049   INSN(fmovs, 0b000, 0b00, 0b00, 0b111);
2050   INSN(fmovd, 0b100, 0b01, 0b00, 0b111);
2051 
2052   INSN(scvtfws, 0b000, 0b00, 0b00, 0b010);
2053   INSN(scvtfs,  0b100, 0b00, 0b00, 0b010);
2054   INSN(scvtfwd, 0b000, 0b01, 0b00, 0b010);
2055   INSN(scvtfd,  0b100, 0b01, 0b00, 0b010);
2056 
2057   // INSN(fmovhid, 0b100, 0b10, 0b01, 0b111);
2058 
2059 #undef INSN
2060 
2061   enum sign_kind { SIGNED, UNSIGNED };
2062 
2063 private:
2064   void _xcvtf_scalar_integer(sign_kind sign, unsigned sz,
2065                              FloatRegister Rd, FloatRegister Rn) {
2066     starti;
2067     f(0b01, 31, 30), f(sign == SIGNED ? 0 : 1, 29);
2068     f(0b111100, 27, 23), f((sz >> 1) & 1, 22), f(0b100001110110, 21, 10);
2069     rf(Rn, 5), rf(Rd, 0);
2070   }
2071 
2072 public:
2073 #define INSN(NAME, sign, sz)                        \
2074   void NAME(FloatRegister Rd, FloatRegister Rn) {   \
2075     _xcvtf_scalar_integer(sign, sz, Rd, Rn);        \
2076   }
2077 
2078   INSN(scvtfs, SIGNED, 0);
2079   INSN(scvtfd, SIGNED, 1);
2080 
2081 #undef INSN
2082 
2083 private:
2084   void _xcvtf_vector_integer(sign_kind sign, SIMD_Arrangement T,
2085                              FloatRegister Rd, FloatRegister Rn) {
2086     assert(T == T2S || T == T4S || T == T2D, "invalid arrangement");
2087     starti;
2088     f(0, 31), f(T & 1, 30), f(sign == SIGNED ? 0 : 1, 29);
2089     f(0b011100, 28, 23), f((T >> 1) & 1, 22), f(0b100001110110, 21, 10);
2090     rf(Rn, 5), rf(Rd, 0);
2091   }
2092 
2093 public:
2094   void scvtfv(SIMD_Arrangement T, FloatRegister Rd, FloatRegister Rn) {
2095     _xcvtf_vector_integer(SIGNED, T, Rd, Rn);
2096   }
2097 
2098   // Floating-point compare
2099   void float_compare(unsigned op31, unsigned type,
2100                      unsigned op, unsigned op2,
2101                      FloatRegister Vn, FloatRegister Vm = as_FloatRegister(0)) {
2102     starti;
2103     f(op31, 31, 29);
2104     f(0b11110, 28, 24);
2105     f(type, 23, 22), f(1, 21);
2106     f(op, 15, 14), f(0b1000, 13, 10), f(op2, 4, 0);
2107     rf(Vn, 5), rf(Vm, 16);
2108   }
2109 
2110 
2111 #define INSN(NAME, op31, type, op, op2)                 \
2112   void NAME(FloatRegister Vn, FloatRegister Vm) {       \
2113     float_compare(op31, type, op, op2, Vn, Vm);         \
2114   }
2115 
2116 #define INSN1(NAME, op31, type, op, op2)        \
2117   void NAME(FloatRegister Vn, double d) {       \
2118     assert_cond(d == 0.0);                      \
2119     float_compare(op31, type, op, op2, Vn);     \
2120   }
2121 
2122   INSN(fcmps, 0b000, 0b00, 0b00, 0b00000);
2123   INSN1(fcmps, 0b000, 0b00, 0b00, 0b01000);
2124   // INSN(fcmpes, 0b000, 0b00, 0b00, 0b10000);
2125   // INSN1(fcmpes, 0b000, 0b00, 0b00, 0b11000);
2126 
2127   INSN(fcmpd, 0b000,   0b01, 0b00, 0b00000);
2128   INSN1(fcmpd, 0b000,  0b01, 0b00, 0b01000);
2129   // INSN(fcmped, 0b000,  0b01, 0b00, 0b10000);
2130   // INSN1(fcmped, 0b000, 0b01, 0b00, 0b11000);
2131 
2132 #undef INSN
2133 #undef INSN1
2134 
2135 // Floating-point compare. 3-registers versions (scalar).
2136 #define INSN(NAME, sz, e)                                             \
2137   void NAME(FloatRegister Vd, FloatRegister Vn, FloatRegister Vm) {   \
2138     starti;                                                           \
2139     f(0b01111110, 31, 24), f(e, 23), f(sz, 22), f(1, 21), rf(Vm, 16); \
2140     f(0b111011, 15, 10), rf(Vn, 5), rf(Vd, 0);                        \
2141   }                                                                   \
2142 
2143   INSN(facged, 1, 0); // facge-double
2144   INSN(facges, 0, 0); // facge-single
2145   INSN(facgtd, 1, 1); // facgt-double
2146   INSN(facgts, 0, 1); // facgt-single
2147 
2148 #undef INSN
2149 
2150   // Floating-point Move (immediate)
2151 private:
2152   unsigned pack(double value);
2153 
2154   void fmov_imm(FloatRegister Vn, double value, unsigned size) {
2155     starti;
2156     f(0b00011110, 31, 24), f(size, 23, 22), f(1, 21);
2157     f(pack(value), 20, 13), f(0b10000000, 12, 5);
2158     rf(Vn, 0);
2159   }
2160 
2161 public:
2162 
2163   void fmovs(FloatRegister Vn, double value) {
2164     if (value)
2165       fmov_imm(Vn, value, 0b00);
2166     else
2167       movi(Vn, T2S, 0);
2168   }
2169   void fmovd(FloatRegister Vn, double value) {
2170     if (value)
2171       fmov_imm(Vn, value, 0b01);
2172     else
2173       movi(Vn, T1D, 0);
2174   }
2175 
2176    // Floating-point rounding
2177    // type: half-precision = 11
2178    //       single         = 00
2179    //       double         = 01
2180    // rmode: A = Away     = 100
2181    //        I = current  = 111
2182    //        M = MinusInf = 010
2183    //        N = eveN     = 000
2184    //        P = PlusInf  = 001
2185    //        X = eXact    = 110
2186    //        Z = Zero     = 011
2187   void float_round(unsigned type, unsigned rmode, FloatRegister Rd, FloatRegister Rn) {
2188     starti;
2189     f(0b00011110, 31, 24);
2190     f(type, 23, 22);
2191     f(0b1001, 21, 18);
2192     f(rmode, 17, 15);
2193     f(0b10000, 14, 10);
2194     rf(Rn, 5), rf(Rd, 0);
2195   }
2196 #define INSN(NAME, type, rmode)                   \
2197   void NAME(FloatRegister Vd, FloatRegister Vn) { \
2198     float_round(type, rmode, Vd, Vn);             \
2199   }
2200 
2201 public:
2202   INSN(frintah, 0b11, 0b100);
2203   INSN(frintih, 0b11, 0b111);
2204   INSN(frintmh, 0b11, 0b010);
2205   INSN(frintnh, 0b11, 0b000);
2206   INSN(frintph, 0b11, 0b001);
2207   INSN(frintxh, 0b11, 0b110);
2208   INSN(frintzh, 0b11, 0b011);
2209 
2210   INSN(frintas, 0b00, 0b100);
2211   INSN(frintis, 0b00, 0b111);
2212   INSN(frintms, 0b00, 0b010);
2213   INSN(frintns, 0b00, 0b000);
2214   INSN(frintps, 0b00, 0b001);
2215   INSN(frintxs, 0b00, 0b110);
2216   INSN(frintzs, 0b00, 0b011);
2217 
2218   INSN(frintad, 0b01, 0b100);
2219   INSN(frintid, 0b01, 0b111);
2220   INSN(frintmd, 0b01, 0b010);
2221   INSN(frintnd, 0b01, 0b000);
2222   INSN(frintpd, 0b01, 0b001);
2223   INSN(frintxd, 0b01, 0b110);
2224   INSN(frintzd, 0b01, 0b011);
2225 #undef INSN
2226 
2227 private:
2228   static short SIMD_Size_in_bytes[];
2229 
2230 public:
2231 #define INSN(NAME, op)                                                  \
2232   void NAME(FloatRegister Rt, SIMD_RegVariant T, const Address &adr) {  \
2233     ld_st2(as_Register(Rt), adr, (int)T & 3, op + ((T==Q) ? 0b10:0b00), 1); \
2234   }
2235 
2236   INSN(ldr, 1);
2237   INSN(str, 0);
2238 
2239 #undef INSN
2240 
2241  private:
2242 
2243   void ld_st(FloatRegister Vt, SIMD_Arrangement T, Register Xn, int op1, int op2) {
2244     starti;
2245     f(0,31), f((int)T & 1, 30);
2246     f(op1, 29, 21), f(0, 20, 16), f(op2, 15, 12);
2247     f((int)T >> 1, 11, 10), srf(Xn, 5), rf(Vt, 0);
2248   }
2249   void ld_st(FloatRegister Vt, SIMD_Arrangement T, Register Xn,
2250              int imm, int op1, int op2, int regs) {
2251 
2252     bool replicate = op2 >> 2 == 3;
2253     // post-index value (imm) is formed differently for replicate/non-replicate ld* instructions
2254     int expectedImmediate = replicate ? regs * (1 << (T >> 1)) : SIMD_Size_in_bytes[T] * regs;
2255     guarantee(T < T1Q , "incorrect arrangement");
2256     guarantee(imm == expectedImmediate, "bad offset");
2257     starti;
2258     f(0,31), f((int)T & 1, 30);
2259     f(op1 | 0b100, 29, 21), f(0b11111, 20, 16), f(op2, 15, 12);
2260     f((int)T >> 1, 11, 10), srf(Xn, 5), rf(Vt, 0);
2261   }
2262   void ld_st(FloatRegister Vt, SIMD_Arrangement T, Register Xn,
2263              Register Xm, int op1, int op2) {
2264     starti;
2265     f(0,31), f((int)T & 1, 30);
2266     f(op1 | 0b100, 29, 21), rf(Xm, 16), f(op2, 15, 12);
2267     f((int)T >> 1, 11, 10), srf(Xn, 5), rf(Vt, 0);
2268   }
2269 
2270   void ld_st(FloatRegister Vt, SIMD_Arrangement T, Address a, int op1, int op2, int regs) {
2271     switch (a.getMode()) {
2272     case Address::base_plus_offset:
2273       guarantee(a.offset() == 0, "no offset allowed here");
2274       ld_st(Vt, T, a.base(), op1, op2);
2275       break;
2276     case Address::post:
2277       ld_st(Vt, T, a.base(), a.offset(), op1, op2, regs);
2278       break;
2279     case Address::post_reg:
2280       ld_st(Vt, T, a.base(), a.index(), op1, op2);
2281       break;
2282     default:
2283       ShouldNotReachHere();
2284     }
2285   }
2286 
2287  public:
2288 
2289 #define INSN1(NAME, op1, op2)                                           \
2290   void NAME(FloatRegister Vt, SIMD_Arrangement T, const Address &a) {   \
2291     ld_st(Vt, T, a, op1, op2, 1);                                       \
2292  }
2293 
2294 #define INSN2(NAME, op1, op2)                                           \
2295   void NAME(FloatRegister Vt, FloatRegister Vt2, SIMD_Arrangement T, const Address &a) { \
2296     assert(Vt->successor() == Vt2, "Registers must be ordered");        \
2297     ld_st(Vt, T, a, op1, op2, 2);                                       \
2298   }
2299 
2300 #define INSN3(NAME, op1, op2)                                           \
2301   void NAME(FloatRegister Vt, FloatRegister Vt2, FloatRegister Vt3,     \
2302             SIMD_Arrangement T, const Address &a) {                     \
2303     assert(Vt->successor() == Vt2 && Vt2->successor() == Vt3,           \
2304            "Registers must be ordered");                                \
2305     ld_st(Vt, T, a, op1, op2, 3);                                       \
2306   }
2307 
2308 #define INSN4(NAME, op1, op2)                                           \
2309   void NAME(FloatRegister Vt, FloatRegister Vt2, FloatRegister Vt3,     \
2310             FloatRegister Vt4, SIMD_Arrangement T, const Address &a) {  \
2311     assert(Vt->successor() == Vt2 && Vt2->successor() == Vt3 &&         \
2312            Vt3->successor() == Vt4, "Registers must be ordered");       \
2313     ld_st(Vt, T, a, op1, op2, 4);                                       \
2314   }
2315 
2316   INSN1(ld1,  0b001100010, 0b0111);
2317   INSN2(ld1,  0b001100010, 0b1010);
2318   INSN3(ld1,  0b001100010, 0b0110);
2319   INSN4(ld1,  0b001100010, 0b0010);
2320 
2321   INSN2(ld2,  0b001100010, 0b1000);
2322   INSN3(ld3,  0b001100010, 0b0100);
2323   INSN4(ld4,  0b001100010, 0b0000);
2324 
2325   INSN1(st1,  0b001100000, 0b0111);
2326   INSN2(st1,  0b001100000, 0b1010);
2327   INSN3(st1,  0b001100000, 0b0110);
2328   INSN4(st1,  0b001100000, 0b0010);
2329 
2330   INSN2(st2,  0b001100000, 0b1000);
2331   INSN3(st3,  0b001100000, 0b0100);
2332   INSN4(st4,  0b001100000, 0b0000);
2333 
2334   INSN1(ld1r, 0b001101010, 0b1100);
2335   INSN2(ld2r, 0b001101011, 0b1100);
2336   INSN3(ld3r, 0b001101010, 0b1110);
2337   INSN4(ld4r, 0b001101011, 0b1110);
2338 
2339 #undef INSN1
2340 #undef INSN2
2341 #undef INSN3
2342 #undef INSN4
2343 
2344 #define INSN(NAME, opc)                                                                 \
2345   void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, FloatRegister Vm) { \
2346     starti;                                                                             \
2347     assert(T == T8B || T == T16B, "must be T8B or T16B");                               \
2348     f(0, 31), f((int)T & 1, 30), f(opc, 29, 21);                                        \
2349     rf(Vm, 16), f(0b000111, 15, 10), rf(Vn, 5), rf(Vd, 0);                              \
2350   }
2351 
2352   INSN(eor,  0b101110001);
2353   INSN(orr,  0b001110101);
2354   INSN(andr, 0b001110001);
2355   INSN(bic,  0b001110011);
2356   INSN(bif,  0b101110111);
2357   INSN(bit,  0b101110101);
2358   INSN(bsl,  0b101110011);
2359   INSN(orn,  0b001110111);
2360 
2361 #undef INSN
2362 
2363 #define INSN(NAME, opc, opc2, acceptT2D)                                                \
2364   void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, FloatRegister Vm) { \
2365     guarantee(T != T1Q && T != T1D, "incorrect arrangement");                           \
2366     if (!acceptT2D) guarantee(T != T2D, "incorrect arrangement");                       \
2367     starti;                                                                             \
2368     f(0, 31), f((int)T & 1, 30), f(opc, 29), f(0b01110, 28, 24);                        \
2369     f((int)T >> 1, 23, 22), f(1, 21), rf(Vm, 16), f(opc2, 15, 10);                      \
2370     rf(Vn, 5), rf(Vd, 0);                                                               \
2371   }
2372 
2373   INSN(addv,   0, 0b100001, true);  // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S, T2D
2374   INSN(subv,   1, 0b100001, true);  // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S, T2D
2375   INSN(uqsubv, 1, 0b001011, true);  // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S, T2D
2376   INSN(mulv,   0, 0b100111, false); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S
2377   INSN(mlav,   0, 0b100101, false); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S
2378   INSN(mlsv,   1, 0b100101, false); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S
2379   INSN(sshl,   0, 0b010001, true);  // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S, T2D
2380   INSN(ushl,   1, 0b010001, true);  // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S, T2D
2381   INSN(addpv,  0, 0b101111, true);  // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S, T2D
2382   INSN(smullv, 0, 0b110000, false); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S
2383   INSN(umullv, 1, 0b110000, false); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S
2384   INSN(umlalv, 1, 0b100000, false); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S
2385   INSN(maxv,   0, 0b011001, false); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S
2386   INSN(minv,   0, 0b011011, false); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S
2387   INSN(smaxp,  0, 0b101001, false); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S
2388   INSN(sminp,  0, 0b101011, false); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S
2389   INSN(cmeq,   1, 0b100011, true);  // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S, T2D
2390   INSN(cmgt,   0, 0b001101, true);  // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S, T2D
2391   INSN(cmge,   0, 0b001111, true);  // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S, T2D
2392   INSN(cmhi,   1, 0b001101, true);  // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S, T2D
2393   INSN(cmhs,   1, 0b001111, true);  // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S, T2D
2394 
2395 #undef INSN
2396 
2397 #define INSN(NAME, opc, opc2, accepted) \
2398   void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn) {                   \
2399     guarantee(T != T1Q && T != T1D, "incorrect arrangement");                           \
2400     if (accepted < 3) guarantee(T != T2D, "incorrect arrangement");                     \
2401     if (accepted < 2) guarantee(T != T2S, "incorrect arrangement");                     \
2402     if (accepted < 1) guarantee(T == T8B || T == T16B, "incorrect arrangement");        \
2403     starti;                                                                             \
2404     f(0, 31), f((int)T & 1, 30), f(opc, 29), f(0b01110, 28, 24);                        \
2405     f((int)T >> 1, 23, 22), f(opc2, 21, 10);                                            \
2406     rf(Vn, 5), rf(Vd, 0);                                                               \
2407   }
2408 
2409   INSN(absr,   0, 0b100000101110, 3); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S, T2D
2410   INSN(negr,   1, 0b100000101110, 3); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S, T2D
2411   INSN(notr,   1, 0b100000010110, 0); // accepted arrangements: T8B, T16B
2412   INSN(addv,   0, 0b110001101110, 1); // accepted arrangements: T8B, T16B, T4H, T8H,      T4S
2413   INSN(smaxv,  0, 0b110000101010, 1); // accepted arrangements: T8B, T16B, T4H, T8H,      T4S
2414   INSN(umaxv,  1, 0b110000101010, 1); // accepted arrangements: T8B, T16B, T4H, T8H,      T4S
2415   INSN(sminv,  0, 0b110001101010, 1); // accepted arrangements: T8B, T16B, T4H, T8H,      T4S
2416   INSN(uminv,  1, 0b110001101010, 1); // accepted arrangements: T8B, T16B, T4H, T8H,      T4S
2417   INSN(cls,    0, 0b100000010010, 2); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S
2418   INSN(clz,    1, 0b100000010010, 2); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S
2419   INSN(cnt,    0, 0b100000010110, 0); // accepted arrangements: T8B, T16B
2420   INSN(uaddlp, 1, 0b100000001010, 2); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S
2421   INSN(uaddlv, 1, 0b110000001110, 1); // accepted arrangements: T8B, T16B, T4H, T8H,      T4S
2422 
2423 #undef INSN
2424 
2425 #define INSN(NAME, opc) \
2426   void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn) {                  \
2427     starti;                                                                            \
2428     assert(T == T4S, "arrangement must be T4S");                                       \
2429     f(0, 31), f((int)T & 1, 30), f(0b101110, 29, 24), f(opc, 23),                      \
2430     f(T == T4S ? 0 : 1, 22), f(0b110000111110, 21, 10); rf(Vn, 5), rf(Vd, 0);          \
2431   }
2432 
2433   INSN(fmaxv, 0);
2434   INSN(fminv, 1);
2435 
2436 #undef INSN
2437 
2438 #define INSN(NAME, op0, cmode0) \
2439   void NAME(FloatRegister Vd, SIMD_Arrangement T, unsigned imm8, unsigned lsl = 0) {   \
2440     unsigned cmode = cmode0;                                                           \
2441     unsigned op = op0;                                                                 \
2442     starti;                                                                            \
2443     assert(lsl == 0 ||                                                                 \
2444            ((T == T4H || T == T8H) && lsl == 8) ||                                     \
2445            ((T == T2S || T == T4S) && ((lsl >> 3) < 4) && ((lsl & 7) == 0)), "invalid shift");\
2446     cmode |= lsl >> 2;                                                                 \
2447     if (T == T4H || T == T8H) cmode |= 0b1000;                                         \
2448     if (!(T == T4H || T == T8H || T == T2S || T == T4S)) {                             \
2449       assert(op == 0 && cmode0 == 0, "must be MOVI");                                  \
2450       cmode = 0b1110;                                                                  \
2451       if (T == T1D || T == T2D) op = 1;                                                \
2452     }                                                                                  \
2453     f(0, 31), f((int)T & 1, 30), f(op, 29), f(0b0111100000, 28, 19);                   \
2454     f(imm8 >> 5, 18, 16), f(cmode, 15, 12), f(0x01, 11, 10), f(imm8 & 0b11111, 9, 5);  \
2455     rf(Vd, 0);                                                                         \
2456   }
2457 
2458   INSN(movi, 0, 0);
2459   INSN(orri, 0, 1);
2460   INSN(mvni, 1, 0);
2461   INSN(bici, 1, 1);
2462 
2463 #undef INSN
2464 
2465 #define INSN(NAME, op1, op2, op3) \
2466   void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, FloatRegister Vm) { \
2467     starti;                                                                             \
2468     assert(T == T2S || T == T4S || T == T2D, "invalid arrangement");                    \
2469     f(0, 31), f((int)T & 1, 30), f(op1, 29), f(0b01110, 28, 24), f(op2, 23);            \
2470     f(T==T2D ? 1:0, 22); f(1, 21), rf(Vm, 16), f(op3, 15, 10), rf(Vn, 5), rf(Vd, 0);    \
2471   }
2472 
2473   INSN(fabd, 1, 1, 0b110101);
2474   INSN(fadd, 0, 0, 0b110101);
2475   INSN(fdiv, 1, 0, 0b111111);
2476   INSN(fmul, 1, 0, 0b110111);
2477   INSN(fsub, 0, 1, 0b110101);
2478   INSN(fmla, 0, 0, 0b110011);
2479   INSN(fmls, 0, 1, 0b110011);
2480   INSN(fmax, 0, 0, 0b111101);
2481   INSN(fmin, 0, 1, 0b111101);
2482   INSN(fcmeq, 0, 0, 0b111001);
2483   INSN(fcmgt, 1, 1, 0b111001);
2484   INSN(fcmge, 1, 0, 0b111001);
2485 
2486 #undef INSN
2487 
2488 #define INSN(NAME, opc)                                                                 \
2489   void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, FloatRegister Vm) { \
2490     starti;                                                                             \
2491     assert(T == T4S, "arrangement must be T4S");                                        \
2492     f(0b01011110000, 31, 21), rf(Vm, 16), f(opc, 15, 10), rf(Vn, 5), rf(Vd, 0);         \
2493   }
2494 
2495   INSN(sha1c,     0b000000);
2496   INSN(sha1m,     0b001000);
2497   INSN(sha1p,     0b000100);
2498   INSN(sha1su0,   0b001100);
2499   INSN(sha256h2,  0b010100);
2500   INSN(sha256h,   0b010000);
2501   INSN(sha256su1, 0b011000);
2502 
2503 #undef INSN
2504 
2505 #define INSN(NAME, opc)                                                                 \
2506   void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn) {                   \
2507     starti;                                                                             \
2508     assert(T == T4S, "arrangement must be T4S");                                        \
2509     f(0b0101111000101000, 31, 16), f(opc, 15, 10), rf(Vn, 5), rf(Vd, 0);                \
2510   }
2511 
2512   INSN(sha1h,     0b000010);
2513   INSN(sha1su1,   0b000110);
2514   INSN(sha256su0, 0b001010);
2515 
2516 #undef INSN
2517 
2518 #define INSN(NAME, opc)                                                                 \
2519   void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, FloatRegister Vm) { \
2520     starti;                                                                             \
2521     assert(T == T2D, "arrangement must be T2D");                                        \
2522     f(0b11001110011, 31, 21), rf(Vm, 16), f(opc, 15, 10), rf(Vn, 5), rf(Vd, 0);         \
2523   }
2524 
2525   INSN(sha512h,   0b100000);
2526   INSN(sha512h2,  0b100001);
2527   INSN(sha512su1, 0b100010);
2528 
2529 #undef INSN
2530 
2531 #define INSN(NAME, opc)                                                                 \
2532   void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn) {                   \
2533     starti;                                                                             \
2534     assert(T == T2D, "arrangement must be T2D");                                        \
2535     f(opc, 31, 10), rf(Vn, 5), rf(Vd, 0);                                               \
2536   }
2537 
2538   INSN(sha512su0, 0b1100111011000000100000);
2539 
2540 #undef INSN
2541 
2542 #define INSN(NAME, opc)                                                                                   \
2543   void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, FloatRegister Vm, FloatRegister Va) { \
2544     starti;                                                                                               \
2545     assert(T == T16B, "arrangement must be T16B");                                                        \
2546     f(0b11001110, 31, 24), f(opc, 23, 21), rf(Vm, 16), f(0b0, 15, 15), rf(Va, 10), rf(Vn, 5), rf(Vd, 0);  \
2547   }
2548 
2549   INSN(eor3, 0b000);
2550   INSN(bcax, 0b001);
2551 
2552 #undef INSN
2553 
2554 #define INSN(NAME, opc)                                                                               \
2555   void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, FloatRegister Vm, unsigned imm) { \
2556     starti;                                                                                           \
2557     assert(T == T2D, "arrangement must be T2D");                                                      \
2558     f(0b11001110, 31, 24), f(opc, 23, 21), rf(Vm, 16), f(imm, 15, 10), rf(Vn, 5), rf(Vd, 0);          \
2559   }
2560 
2561   INSN(xar, 0b100);
2562 
2563 #undef INSN
2564 
2565 #define INSN(NAME, opc)                                                                           \
2566   void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, FloatRegister Vm) {           \
2567     starti;                                                                                       \
2568     assert(T == T2D, "arrangement must be T2D");                                                  \
2569     f(0b11001110, 31, 24), f(opc, 23, 21), rf(Vm, 16), f(0b100011, 15, 10), rf(Vn, 5), rf(Vd, 0); \
2570   }
2571 
2572   INSN(rax1, 0b011);
2573 
2574 #undef INSN
2575 
2576 #define INSN(NAME, opc)                           \
2577   void NAME(FloatRegister Vd, FloatRegister Vn) { \
2578     starti;                                       \
2579     f(opc, 31, 10), rf(Vn, 5), rf(Vd, 0);         \
2580   }
2581 
2582   INSN(aese, 0b0100111000101000010010);
2583   INSN(aesd, 0b0100111000101000010110);
2584   INSN(aesmc, 0b0100111000101000011010);
2585   INSN(aesimc, 0b0100111000101000011110);
2586 
2587 #undef INSN
2588 
2589 #define INSN(NAME, op1, op2) \
2590   void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, FloatRegister Vm, int index = 0) { \
2591     starti;                                                                                            \
2592     assert(T == T2S || T == T4S || T == T2D, "invalid arrangement");                                   \
2593     assert(index >= 0 && ((T == T2D && index <= 1) || (T != T2D && index <= 3)), "invalid index");     \
2594     f(0, 31), f((int)T & 1, 30), f(op1, 29); f(0b011111, 28, 23);                                      \
2595     f(T == T2D ? 1 : 0, 22), f(T == T2D ? 0 : index & 1, 21), rf(Vm, 16);                              \
2596     f(op2, 15, 12), f(T == T2D ? index : (index >> 1), 11), f(0, 10);                                  \
2597     rf(Vn, 5), rf(Vd, 0);                                                                              \
2598   }
2599 
2600   // FMLA/FMLS - Vector - Scalar
2601   INSN(fmlavs, 0, 0b0001);
2602   INSN(fmlsvs, 0, 0b0101);
2603   // FMULX - Vector - Scalar
2604   INSN(fmulxvs, 1, 0b1001);
2605 
2606 #undef INSN
2607 
2608   // Floating-point Reciprocal Estimate
2609   void frecpe(FloatRegister Vd, FloatRegister Vn, SIMD_RegVariant type) {
2610     assert(type == D || type == S, "Wrong type for frecpe");
2611     starti;
2612     f(0b010111101, 31, 23);
2613     f(type == D ? 1 : 0, 22);
2614     f(0b100001110110, 21, 10);
2615     rf(Vn, 5), rf(Vd, 0);
2616   }
2617 
2618   // (long) {a, b} -> (a + b)
2619   void addpd(FloatRegister Vd, FloatRegister Vn) {
2620     starti;
2621     f(0b0101111011110001101110, 31, 10);
2622     rf(Vn, 5), rf(Vd, 0);
2623   }
2624 
2625   // Floating-point AdvSIMD scalar pairwise
2626 #define INSN(NAME, op1, op2) \
2627   void NAME(FloatRegister Vd, FloatRegister Vn, SIMD_RegVariant type) {                 \
2628     starti;                                                                             \
2629     assert(type == D || type == S, "Wrong type for faddp/fmaxp/fminp");                 \
2630     f(0b0111111, 31, 25), f(op1, 24, 23),                                               \
2631     f(type == S ? 0 : 1, 22), f(0b11000, 21, 17), f(op2, 16, 10), rf(Vn, 5), rf(Vd, 0); \
2632   }
2633 
2634   INSN(faddp, 0b00, 0b0110110);
2635   INSN(fmaxp, 0b00, 0b0111110);
2636   INSN(fminp, 0b01, 0b0111110);
2637 
2638 #undef INSN
2639 
2640   void ins(FloatRegister Vd, SIMD_RegVariant T, FloatRegister Vn, int didx, int sidx) {
2641     starti;
2642     assert(T != Q, "invalid register variant");
2643     f(0b01101110000, 31, 21), f(((didx<<1)|1)<<(int)T, 20, 16), f(0, 15);
2644     f(sidx<<(int)T, 14, 11), f(1, 10), rf(Vn, 5), rf(Vd, 0);
2645   }
2646 
2647 #define INSN(NAME, cond, op1, op2)                                                      \
2648   void NAME(Register Rd, FloatRegister Vn, SIMD_RegVariant T, int idx) {                \
2649     starti;                                                                             \
2650     assert(cond, "invalid register variant");                                           \
2651     f(0, 31), f(op1, 30), f(0b001110000, 29, 21);                                       \
2652     f(((idx << 1) | 1) << (int)T, 20, 16), f(op2, 15, 10);                              \
2653     rf(Vn, 5), rf(Rd, 0);                                                               \
2654   }
2655 
2656   INSN(umov, (T != Q), (T == D ? 1 : 0), 0b001111);
2657   INSN(smov, (T < D),  1,                0b001011);
2658 
2659 #undef INSN
2660 
2661 #define INSN(NAME, opc, opc2, isSHR)                                    \
2662   void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, int shift){ \
2663     starti;                                                             \
2664     /* The encodings for the immh:immb fields (bits 22:16) in *SHR are  \
2665      *   0001 xxx       8B/16B, shift = 16  - UInt(immh:immb)           \
2666      *   001x xxx       4H/8H,  shift = 32  - UInt(immh:immb)           \
2667      *   01xx xxx       2S/4S,  shift = 64  - UInt(immh:immb)           \
2668      *   1xxx xxx       1D/2D,  shift = 128 - UInt(immh:immb)           \
2669      *   (1D is RESERVED)                                               \
2670      * for SHL shift is calculated as:                                  \
2671      *   0001 xxx       8B/16B, shift = UInt(immh:immb) - 8             \
2672      *   001x xxx       4H/8H,  shift = UInt(immh:immb) - 16            \
2673      *   01xx xxx       2S/4S,  shift = UInt(immh:immb) - 32            \
2674      *   1xxx xxx       1D/2D,  shift = UInt(immh:immb) - 64            \
2675      *   (1D is RESERVED)                                               \
2676      */                                                                 \
2677     guarantee(!isSHR || (isSHR && (shift != 0)), "impossible encoding");\
2678     assert((1 << ((T>>1)+3)) > shift, "Invalid Shift value");           \
2679     int cVal = (1 << (((T >> 1) + 3) + (isSHR ? 1 : 0)));               \
2680     int encodedShift = isSHR ? cVal - shift : cVal + shift;             \
2681     f(0, 31), f(T & 1, 30), f(opc, 29), f(0b011110, 28, 23),            \
2682     f(encodedShift, 22, 16); f(opc2, 15, 10), rf(Vn, 5), rf(Vd, 0);     \
2683   }
2684 
2685   INSN(shl,  0, 0b010101, /* isSHR = */ false);
2686   INSN(sshr, 0, 0b000001, /* isSHR = */ true);
2687   INSN(ushr, 1, 0b000001, /* isSHR = */ true);
2688   INSN(usra, 1, 0b000101, /* isSHR = */ true);
2689   INSN(ssra, 0, 0b000101, /* isSHR = */ true);
2690 
2691 #undef INSN
2692 
2693 #define INSN(NAME, opc, opc2, isSHR)                                    \
2694   void NAME(FloatRegister Vd, FloatRegister Vn, int shift){             \
2695     starti;                                                             \
2696     int encodedShift = isSHR ? 128 - shift : 64 + shift;                \
2697     f(0b01, 31, 30), f(opc, 29), f(0b111110, 28, 23),                   \
2698     f(encodedShift, 22, 16); f(opc2, 15, 10), rf(Vn, 5), rf(Vd, 0);     \
2699   }
2700 
2701   INSN(shld,  0, 0b010101, /* isSHR = */ false);
2702   INSN(sshrd, 0, 0b000001, /* isSHR = */ true);
2703   INSN(ushrd, 1, 0b000001, /* isSHR = */ true);
2704 
2705 #undef INSN
2706 
2707 private:
2708   void _xshll(sign_kind sign, FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, SIMD_Arrangement Tb, int shift) {
2709     starti;
2710     /* The encodings for the immh:immb fields (bits 22:16) are
2711      *   0001 xxx       8H, 8B/16B shift = xxx
2712      *   001x xxx       4S, 4H/8H  shift = xxxx
2713      *   01xx xxx       2D, 2S/4S  shift = xxxxx
2714      *   1xxx xxx       RESERVED
2715      */
2716     assert((Tb >> 1) + 1 == (Ta >> 1), "Incompatible arrangement");
2717     assert((1 << ((Tb>>1)+3)) > shift, "Invalid shift value");
2718     f(0, 31), f(Tb & 1, 30), f(sign == SIGNED ? 0 : 1, 29), f(0b011110, 28, 23);
2719     f((1 << ((Tb>>1)+3))|shift, 22, 16);
2720     f(0b101001, 15, 10), rf(Vn, 5), rf(Vd, 0);
2721   }
2722 
2723 public:
2724   void ushll(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn,  SIMD_Arrangement Tb, int shift) {
2725     assert(Tb == T8B || Tb == T4H || Tb == T2S, "invalid arrangement");
2726     _xshll(UNSIGNED, Vd, Ta, Vn, Tb, shift);
2727   }
2728 
2729   void ushll2(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn,  SIMD_Arrangement Tb, int shift) {
2730     assert(Tb == T16B || Tb == T8H || Tb == T4S, "invalid arrangement");
2731     _xshll(UNSIGNED, Vd, Ta, Vn, Tb, shift);
2732   }
2733 
2734   void uxtl(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn,  SIMD_Arrangement Tb) {
2735     ushll(Vd, Ta, Vn, Tb, 0);
2736   }
2737 
2738   void sshll(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn,  SIMD_Arrangement Tb, int shift) {
2739     assert(Tb == T8B || Tb == T4H || Tb == T2S, "invalid arrangement");
2740     _xshll(SIGNED, Vd, Ta, Vn, Tb, shift);
2741   }
2742 
2743   void sshll2(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn,  SIMD_Arrangement Tb, int shift) {
2744     assert(Tb == T16B || Tb == T8H || Tb == T4S, "invalid arrangement");
2745     _xshll(SIGNED, Vd, Ta, Vn, Tb, shift);
2746   }
2747 
2748   void sxtl(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn,  SIMD_Arrangement Tb) {
2749     sshll(Vd, Ta, Vn, Tb, 0);
2750   }
2751 
2752   // Move from general purpose register
2753   //   mov  Vd.T[index], Rn
2754   void mov(FloatRegister Vd, SIMD_Arrangement T, int index, Register Xn) {
2755     starti;
2756     f(0b01001110000, 31, 21), f(((1 << (T >> 1)) | (index << ((T >> 1) + 1))), 20, 16);
2757     f(0b000111, 15, 10), zrf(Xn, 5), rf(Vd, 0);
2758   }
2759 
2760   // Move to general purpose register
2761   //   mov  Rd, Vn.T[index]
2762   void mov(Register Xd, FloatRegister Vn, SIMD_Arrangement T, int index) {
2763     guarantee(T >= T2S && T < T1Q, "only D and S arrangements are supported");
2764     starti;
2765     f(0, 31), f((T >= T1D) ? 1:0, 30), f(0b001110000, 29, 21);
2766     f(((1 << (T >> 1)) | (index << ((T >> 1) + 1))), 20, 16);
2767     f(0b001111, 15, 10), rf(Vn, 5), rf(Xd, 0);
2768   }
2769 
2770 private:
2771   void _pmull(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, FloatRegister Vm, SIMD_Arrangement Tb) {
2772     starti;
2773     assert((Ta == T1Q && (Tb == T1D || Tb == T2D)) ||
2774            (Ta == T8H && (Tb == T8B || Tb == T16B)), "Invalid Size specifier");
2775     int size = (Ta == T1Q) ? 0b11 : 0b00;
2776     f(0, 31), f(Tb & 1, 30), f(0b001110, 29, 24), f(size, 23, 22);
2777     f(1, 21), rf(Vm, 16), f(0b111000, 15, 10), rf(Vn, 5), rf(Vd, 0);
2778   }
2779 
2780 public:
2781   void pmull(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, FloatRegister Vm, SIMD_Arrangement Tb) {
2782     assert(Tb == T1D || Tb == T8B, "pmull assumes T1D or T8B as the second size specifier");
2783     _pmull(Vd, Ta, Vn, Vm, Tb);
2784   }
2785 
2786   void pmull2(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, FloatRegister Vm, SIMD_Arrangement Tb) {
2787     assert(Tb == T2D || Tb == T16B, "pmull2 assumes T2D or T16B as the second size specifier");
2788     _pmull(Vd, Ta, Vn, Vm, Tb);
2789   }
2790 
2791   void uqxtn(FloatRegister Vd, SIMD_Arrangement Tb, FloatRegister Vn, SIMD_Arrangement Ta) {
2792     starti;
2793     int size_b = (int)Tb >> 1;
2794     int size_a = (int)Ta >> 1;
2795     assert(size_b < 3 && size_b == size_a - 1, "Invalid size specifier");
2796     f(0, 31), f(Tb & 1, 30), f(0b101110, 29, 24), f(size_b, 23, 22);
2797     f(0b100001010010, 21, 10), rf(Vn, 5), rf(Vd, 0);
2798   }
2799 
2800   void xtn(FloatRegister Vd, SIMD_Arrangement Tb, FloatRegister Vn, SIMD_Arrangement Ta) {
2801     starti;
2802     int size_b = (int)Tb >> 1;
2803     int size_a = (int)Ta >> 1;
2804     assert(size_b < 3 && size_b == size_a - 1, "Invalid size specifier");
2805     f(0, 31), f(Tb & 1, 30), f(0b001110, 29, 24), f(size_b, 23, 22);
2806     f(0b100001001010, 21, 10), rf(Vn, 5), rf(Vd, 0);
2807   }
2808 
2809   void dup(FloatRegister Vd, SIMD_Arrangement T, Register Xs)
2810   {
2811     starti;
2812     assert(T != T1D, "reserved encoding");
2813     f(0,31), f((int)T & 1, 30), f(0b001110000, 29, 21);
2814     f((1 << (T >> 1)), 20, 16), f(0b000011, 15, 10), zrf(Xs, 5), rf(Vd, 0);
2815   }
2816 
2817   void dup(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, int index = 0)
2818   {
2819     starti;
2820     assert(T != T1D, "reserved encoding");
2821     f(0, 31), f((int)T & 1, 30), f(0b001110000, 29, 21);
2822     f(((1 << (T >> 1)) | (index << ((T >> 1) + 1))), 20, 16);
2823     f(0b000001, 15, 10), rf(Vn, 5), rf(Vd, 0);
2824   }
2825 
2826   // AdvSIMD ZIP/UZP/TRN
2827 #define INSN(NAME, opcode)                                              \
2828   void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, FloatRegister Vm) { \
2829     guarantee(T != T1D && T != T1Q, "invalid arrangement");             \
2830     starti;                                                             \
2831     f(0, 31), f(0b001110, 29, 24), f(0, 21), f(0, 15);                  \
2832     f(opcode, 14, 12), f(0b10, 11, 10);                                 \
2833     rf(Vm, 16), rf(Vn, 5), rf(Vd, 0);                                   \
2834     f(T & 1, 30), f(T >> 1, 23, 22);                                    \
2835   }
2836 
2837   INSN(uzp1, 0b001);
2838   INSN(trn1, 0b010);
2839   INSN(zip1, 0b011);
2840   INSN(uzp2, 0b101);
2841   INSN(trn2, 0b110);
2842   INSN(zip2, 0b111);
2843 
2844 #undef INSN
2845 
2846   // CRC32 instructions
2847 #define INSN(NAME, c, sf, sz)                                             \
2848   void NAME(Register Rd, Register Rn, Register Rm) {                      \
2849     starti;                                                               \
2850     f(sf, 31), f(0b0011010110, 30, 21), f(0b010, 15, 13), f(c, 12);       \
2851     f(sz, 11, 10), rf(Rm, 16), rf(Rn, 5), rf(Rd, 0);                      \
2852   }
2853 
2854   INSN(crc32b,  0, 0, 0b00);
2855   INSN(crc32h,  0, 0, 0b01);
2856   INSN(crc32w,  0, 0, 0b10);
2857   INSN(crc32x,  0, 1, 0b11);
2858   INSN(crc32cb, 1, 0, 0b00);
2859   INSN(crc32ch, 1, 0, 0b01);
2860   INSN(crc32cw, 1, 0, 0b10);
2861   INSN(crc32cx, 1, 1, 0b11);
2862 
2863 #undef INSN
2864 
2865   // Table vector lookup
2866 #define INSN(NAME, op)                                                  \
2867   void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, unsigned registers, FloatRegister Vm) { \
2868     starti;                                                             \
2869     assert(T == T8B || T == T16B, "invalid arrangement");               \
2870     assert(0 < registers && registers <= 4, "invalid number of registers"); \
2871     f(0, 31), f((int)T & 1, 30), f(0b001110000, 29, 21), rf(Vm, 16), f(0, 15); \
2872     f(registers - 1, 14, 13), f(op, 12),f(0b00, 11, 10), rf(Vn, 5), rf(Vd, 0); \
2873   }
2874 
2875   INSN(tbl, 0);
2876   INSN(tbx, 1);
2877 
2878 #undef INSN
2879 
2880   // AdvSIMD two-reg misc
2881   // In this instruction group, the 2 bits in the size field ([23:22]) may be
2882   // fixed or determined by the "SIMD_Arrangement T", or both. The additional
2883   // parameter "tmask" is a 2-bit mask used to indicate which bits in the size
2884   // field are determined by the SIMD_Arrangement. The bit of "tmask" should be
2885   // set to 1 if corresponding bit marked as "x" in the ArmARM.
2886 #define INSN(NAME, U, size, tmask, opcode)                                          \
2887   void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn) {               \
2888        starti;                                                                      \
2889        assert((ASSERTION), MSG);                                                    \
2890        f(0, 31), f((int)T & 1, 30), f(U, 29), f(0b01110, 28, 24);                   \
2891        f(size | ((int)(T >> 1) & tmask), 23, 22), f(0b10000, 21, 17);               \
2892        f(opcode, 16, 12), f(0b10, 11, 10), rf(Vn, 5), rf(Vd, 0);                    \
2893  }
2894 
2895 #define MSG "invalid arrangement"
2896 
2897 #define ASSERTION (T == T2S || T == T4S || T == T2D)
2898   INSN(fsqrt,  1, 0b10, 0b01, 0b11111);
2899   INSN(fabs,   0, 0b10, 0b01, 0b01111);
2900   INSN(fneg,   1, 0b10, 0b01, 0b01111);
2901   INSN(frintn, 0, 0b00, 0b01, 0b11000);
2902   INSN(frintm, 0, 0b00, 0b01, 0b11001);
2903   INSN(frintp, 0, 0b10, 0b01, 0b11000);
2904 #undef ASSERTION
2905 
2906 #define ASSERTION (T == T8B || T == T16B || T == T4H || T == T8H || T == T2S || T == T4S)
2907   INSN(rev64, 0, 0b00, 0b11, 0b00000);
2908 #undef ASSERTION
2909 
2910 #define ASSERTION (T == T8B || T == T16B || T == T4H || T == T8H)
2911   INSN(rev32, 1, 0b00, 0b11, 0b00000);
2912 #undef ASSERTION
2913 
2914 #define ASSERTION (T == T8B || T == T16B)
2915   INSN(rev16, 0, 0b00, 0b11, 0b00001);
2916   INSN(rbit,  1, 0b01, 0b00, 0b00101);
2917 #undef ASSERTION
2918 
2919 #undef MSG
2920 
2921 #undef INSN
2922 
2923   void ext(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, FloatRegister Vm, int index)
2924   {
2925     starti;
2926     assert(T == T8B || T == T16B, "invalid arrangement");
2927     assert((T == T8B && index <= 0b0111) || (T == T16B && index <= 0b1111), "Invalid index value");
2928     f(0, 31), f((int)T & 1, 30), f(0b101110000, 29, 21);
2929     rf(Vm, 16), f(0, 15), f(index, 14, 11);
2930     f(0, 10), rf(Vn, 5), rf(Vd, 0);
2931   }
2932 
2933 // SVE arithmetic - unpredicated
2934 #define INSN(NAME, opcode)                                                             \
2935   void NAME(FloatRegister Zd, SIMD_RegVariant T, FloatRegister Zn, FloatRegister Zm) { \
2936     starti;                                                                            \
2937     assert(T != Q, "invalid register variant");                                        \
2938     f(0b00000100, 31, 24), f(T, 23, 22), f(1, 21),                                     \
2939     rf(Zm, 16), f(0, 15, 13), f(opcode, 12, 10), rf(Zn, 5), rf(Zd, 0);                 \
2940   }
2941   INSN(sve_add, 0b000);
2942   INSN(sve_sub, 0b001);
2943 #undef INSN
2944 
2945 // SVE floating-point arithmetic - unpredicated
2946 #define INSN(NAME, opcode)                                                             \
2947   void NAME(FloatRegister Zd, SIMD_RegVariant T, FloatRegister Zn, FloatRegister Zm) { \
2948     starti;                                                                            \
2949     assert(T == S || T == D, "invalid register variant");                              \
2950     f(0b01100101, 31, 24), f(T, 23, 22), f(0, 21),                                     \
2951     rf(Zm, 16), f(0, 15, 13), f(opcode, 12, 10), rf(Zn, 5), rf(Zd, 0);                 \
2952   }
2953 
2954   INSN(sve_fadd, 0b000);
2955   INSN(sve_fmul, 0b010);
2956   INSN(sve_fsub, 0b001);
2957 #undef INSN
2958 
2959 private:
2960   void sve_predicate_reg_insn(unsigned op24, unsigned op13,
2961                               FloatRegister Zd_or_Vd, SIMD_RegVariant T,
2962                               PRegister Pg, FloatRegister Zn_or_Vn) {
2963     starti;
2964     f(op24, 31, 24), f(T, 23, 22), f(op13, 21, 13);
2965     pgrf(Pg, 10), rf(Zn_or_Vn, 5), rf(Zd_or_Vd, 0);
2966   }
2967 
2968   void sve_shift_imm_encoding(SIMD_RegVariant T, int shift, bool isSHR,
2969                               int& tszh, int& tszl_imm) {
2970     /* The encodings for the tszh:tszl:imm3 fields
2971      * for shift right is calculated as:
2972      *   0001 xxx       B, shift = 16  - UInt(tszh:tszl:imm3)
2973      *   001x xxx       H, shift = 32  - UInt(tszh:tszl:imm3)
2974      *   01xx xxx       S, shift = 64  - UInt(tszh:tszl:imm3)
2975      *   1xxx xxx       D, shift = 128 - UInt(tszh:tszl:imm3)
2976      * for shift left is calculated as:
2977      *   0001 xxx       B, shift = UInt(tszh:tszl:imm3) - 8
2978      *   001x xxx       H, shift = UInt(tszh:tszl:imm3) - 16
2979      *   01xx xxx       S, shift = UInt(tszh:tszl:imm3) - 32
2980      *   1xxx xxx       D, shift = UInt(tszh:tszl:imm3) - 64
2981      */
2982     assert(T != Q, "Invalid register variant");
2983     if (isSHR) {
2984       assert(((1 << (T + 3)) >= shift) && (shift > 0) , "Invalid shift value");
2985     } else {
2986       assert(((1 << (T + 3)) > shift) && (shift >= 0) , "Invalid shift value");
2987     }
2988     int cVal = (1 << ((T + 3) + (isSHR ? 1 : 0)));
2989     int encodedShift = isSHR ? cVal - shift : cVal + shift;
2990     tszh = encodedShift >> 5;
2991     tszl_imm = encodedShift & 0x1f;
2992   }
2993 
2994 public:
2995 
2996 // SVE integer arithmetic - predicate
2997 #define INSN(NAME, op1, op2)                                                                            \
2998   void NAME(FloatRegister Zdn_or_Zd_or_Vd, SIMD_RegVariant T, PRegister Pg, FloatRegister Znm_or_Vn) {  \
2999     assert(T != Q, "invalid register variant");                                                         \
3000     sve_predicate_reg_insn(op1, op2, Zdn_or_Zd_or_Vd, T, Pg, Znm_or_Vn);                                \
3001   }
3002 
3003   INSN(sve_abs,  0b00000100, 0b010110101); // vector abs, unary
3004   INSN(sve_add,  0b00000100, 0b000000000); // vector add
3005   INSN(sve_and,  0b00000100, 0b011010000); // vector and
3006   INSN(sve_andv, 0b00000100, 0b011010001); // bitwise and reduction to scalar
3007   INSN(sve_asr,  0b00000100, 0b010000100); // vector arithmetic shift right
3008   INSN(sve_cnt,  0b00000100, 0b011010101); // count non-zero bits
3009   INSN(sve_cpy,  0b00000101, 0b100000100); // copy scalar to each active vector element
3010   INSN(sve_eor,  0b00000100, 0b011001000); // vector eor
3011   INSN(sve_eorv, 0b00000100, 0b011001001); // bitwise xor reduction to scalar
3012   INSN(sve_lsl,  0b00000100, 0b010011100); // vector logical shift left
3013   INSN(sve_lsr,  0b00000100, 0b010001100); // vector logical shift right
3014   INSN(sve_mul,  0b00000100, 0b010000000); // vector mul
3015   INSN(sve_neg,  0b00000100, 0b010111101); // vector neg, unary
3016   INSN(sve_not,  0b00000100, 0b011110101); // bitwise invert vector, unary
3017   INSN(sve_orr,  0b00000100, 0b011000000); // vector or
3018   INSN(sve_orv,  0b00000100, 0b011000001); // bitwise or reduction to scalar
3019   INSN(sve_smax, 0b00000100, 0b001000000); // signed maximum vectors
3020   INSN(sve_smaxv, 0b00000100, 0b001000001); // signed maximum reduction to scalar
3021   INSN(sve_smin,  0b00000100, 0b001010000); // signed minimum vectors
3022   INSN(sve_sminv, 0b00000100, 0b001010001); // signed minimum reduction to scalar
3023   INSN(sve_sub,   0b00000100, 0b000001000); // vector sub
3024   INSN(sve_uaddv, 0b00000100, 0b000001001); // unsigned add reduction to scalar
3025 #undef INSN
3026 
3027 // SVE floating-point arithmetic - predicate
3028 #define INSN(NAME, op1, op2)                                                                          \
3029   void NAME(FloatRegister Zd_or_Zdn_or_Vd, SIMD_RegVariant T, PRegister Pg, FloatRegister Zn_or_Zm) { \
3030     assert(T == S || T == D, "invalid register variant");                                             \
3031     sve_predicate_reg_insn(op1, op2, Zd_or_Zdn_or_Vd, T, Pg, Zn_or_Zm);                               \
3032   }
3033 
3034   INSN(sve_fabs,    0b00000100, 0b011100101);
3035   INSN(sve_fadd,    0b01100101, 0b000000100);
3036   INSN(sve_fadda,   0b01100101, 0b011000001); // add strictly-ordered reduction to scalar Vd
3037   INSN(sve_fdiv,    0b01100101, 0b001101100);
3038   INSN(sve_fmax,    0b01100101, 0b000110100); // floating-point maximum
3039   INSN(sve_fmaxv,   0b01100101, 0b000110001); // floating-point maximum recursive reduction to scalar
3040   INSN(sve_fmin,    0b01100101, 0b000111100); // floating-point minimum
3041   INSN(sve_fminv,   0b01100101, 0b000111001); // floating-point minimum recursive reduction to scalar
3042   INSN(sve_fmul,    0b01100101, 0b000010100);
3043   INSN(sve_fneg,    0b00000100, 0b011101101);
3044   INSN(sve_frintm,  0b01100101, 0b000010101); // floating-point round to integral value, toward minus infinity
3045   INSN(sve_frintn,  0b01100101, 0b000000101); // floating-point round to integral value, nearest with ties to even
3046   INSN(sve_frintp,  0b01100101, 0b000001101); // floating-point round to integral value, toward plus infinity
3047   INSN(sve_fsqrt,   0b01100101, 0b001101101);
3048   INSN(sve_fsub,    0b01100101, 0b000001100);
3049 #undef INSN
3050 
3051   // SVE multiple-add/sub - predicated
3052 #define INSN(NAME, op0, op1, op2)                                                                     \
3053   void NAME(FloatRegister Zda, SIMD_RegVariant T, PRegister Pg, FloatRegister Zn, FloatRegister Zm) { \
3054     starti;                                                                                           \
3055     assert(T != Q, "invalid size");                                                                   \
3056     f(op0, 31, 24), f(T, 23, 22), f(op1, 21), rf(Zm, 16);                                             \
3057     f(op2, 15, 13), pgrf(Pg, 10), rf(Zn, 5), rf(Zda, 0);                                              \
3058   }
3059 
3060   INSN(sve_fmla,  0b01100101, 1, 0b000); // floating-point fused multiply-add, writing addend: Zda = Zda + Zn * Zm
3061   INSN(sve_fmls,  0b01100101, 1, 0b001); // floating-point fused multiply-subtract: Zda = Zda + -Zn * Zm
3062   INSN(sve_fnmla, 0b01100101, 1, 0b010); // floating-point negated fused multiply-add: Zda = -Zda + -Zn * Zm
3063   INSN(sve_fnmls, 0b01100101, 1, 0b011); // floating-point negated fused multiply-subtract: Zda = -Zda + Zn * Zm
3064   INSN(sve_fmad,  0b01100101, 1, 0b100); // floating-point fused multiply-add, writing multiplicand: Zda = Zm + Zda * Zn
3065   INSN(sve_mla,   0b00000100, 0, 0b010); // multiply-add: Zda = Zda + Zn*Zm
3066   INSN(sve_mls,   0b00000100, 0, 0b011); // multiply-subtract: Zda = Zda + -Zn*Zm
3067 #undef INSN
3068 
3069 // SVE bitwise logical - unpredicated
3070 #define INSN(NAME, opc)                                              \
3071   void NAME(FloatRegister Zd, FloatRegister Zn, FloatRegister Zm) {  \
3072     starti;                                                          \
3073     f(0b00000100, 31, 24), f(opc, 23, 22), f(1, 21),                 \
3074     rf(Zm, 16), f(0b001100, 15, 10), rf(Zn, 5), rf(Zd, 0);           \
3075   }
3076   INSN(sve_and, 0b00);
3077   INSN(sve_eor, 0b10);
3078   INSN(sve_orr, 0b01);
3079   INSN(sve_bic, 0b11);
3080 #undef INSN
3081 
3082 // SVE shift immediate - unpredicated
3083 #define INSN(NAME, opc, isSHR)                                                  \
3084   void NAME(FloatRegister Zd, SIMD_RegVariant T, FloatRegister Zn, int shift) { \
3085     starti;                                                                     \
3086     int tszh, tszl_imm;                                                         \
3087     sve_shift_imm_encoding(T, shift, isSHR, tszh, tszl_imm);                    \
3088     f(0b00000100, 31, 24);                                                      \
3089     f(tszh, 23, 22), f(1,21), f(tszl_imm, 20, 16);                              \
3090     f(0b100, 15, 13), f(opc, 12, 10), rf(Zn, 5), rf(Zd, 0);                     \
3091   }
3092 
3093   INSN(sve_asr, 0b100, /* isSHR = */ true);
3094   INSN(sve_lsl, 0b111, /* isSHR = */ false);
3095   INSN(sve_lsr, 0b101, /* isSHR = */ true);
3096 #undef INSN
3097 
3098 // SVE bitwise shift by immediate (predicated)
3099 #define INSN(NAME, opc, isSHR)                                                  \
3100   void NAME(FloatRegister Zdn, SIMD_RegVariant T, PRegister Pg, int shift) {    \
3101     starti;                                                                     \
3102     int tszh, tszl_imm;                                                         \
3103     sve_shift_imm_encoding(T, shift, isSHR, tszh, tszl_imm);                    \
3104     f(0b00000100, 31, 24), f(tszh, 23, 22), f(0b00, 21, 20), f(opc, 19, 16);    \
3105     f(0b100, 15, 13), pgrf(Pg, 10), f(tszl_imm, 9, 5), rf(Zdn, 0);              \
3106   }
3107 
3108   INSN(sve_asr, 0b0000, /* isSHR = */ true);
3109   INSN(sve_lsl, 0b0011, /* isSHR = */ false);
3110   INSN(sve_lsr, 0b0001, /* isSHR = */ true);
3111 #undef INSN
3112 
3113 private:
3114 
3115   // Scalar base + immediate index
3116   void sve_ld_st1(FloatRegister Zt, Register Xn, int imm, PRegister Pg,
3117               SIMD_RegVariant T, int op1, int type, int op2) {
3118     starti;
3119     assert_cond(T >= type);
3120     f(op1, 31, 25), f(type, 24, 23), f(T, 22, 21);
3121     f(0, 20), sf(imm, 19, 16), f(op2, 15, 13);
3122     pgrf(Pg, 10), srf(Xn, 5), rf(Zt, 0);
3123   }
3124 
3125   // Scalar base + scalar index
3126   void sve_ld_st1(FloatRegister Zt, Register Xn, Register Xm, PRegister Pg,
3127               SIMD_RegVariant T, int op1, int type, int op2) {
3128     starti;
3129     assert_cond(T >= type);
3130     f(op1, 31, 25), f(type, 24, 23), f(T, 22, 21);
3131     rf(Xm, 16), f(op2, 15, 13);
3132     pgrf(Pg, 10), srf(Xn, 5), rf(Zt, 0);
3133   }
3134 
3135   void sve_ld_st1(FloatRegister Zt, PRegister Pg,
3136               SIMD_RegVariant T, const Address &a,
3137               int op1, int type, int imm_op2, int scalar_op2) {
3138     switch (a.getMode()) {
3139     case Address::base_plus_offset:
3140       sve_ld_st1(Zt, a.base(), a.offset(), Pg, T, op1, type, imm_op2);
3141       break;
3142     case Address::base_plus_offset_reg:
3143       sve_ld_st1(Zt, a.base(), a.index(), Pg, T, op1, type, scalar_op2);
3144       break;
3145     default:
3146       ShouldNotReachHere();
3147     }
3148   }
3149 
3150 public:
3151 
3152 // SVE contiguous load/store
3153 #define INSN(NAME, op1, type, imm_op2, scalar_op2)                                   \
3154   void NAME(FloatRegister Zt, SIMD_RegVariant T, PRegister Pg, const Address &a) {   \
3155     assert(T != Q, "invalid register variant");                                      \
3156     sve_ld_st1(Zt, Pg, T, a, op1, type, imm_op2, scalar_op2);                        \
3157   }
3158 
3159   INSN(sve_ld1b, 0b1010010, 0b00, 0b101, 0b010);
3160   INSN(sve_st1b, 0b1110010, 0b00, 0b111, 0b010);
3161   INSN(sve_ld1h, 0b1010010, 0b01, 0b101, 0b010);
3162   INSN(sve_st1h, 0b1110010, 0b01, 0b111, 0b010);
3163   INSN(sve_ld1w, 0b1010010, 0b10, 0b101, 0b010);
3164   INSN(sve_st1w, 0b1110010, 0b10, 0b111, 0b010);
3165   INSN(sve_ld1d, 0b1010010, 0b11, 0b101, 0b010);
3166   INSN(sve_st1d, 0b1110010, 0b11, 0b111, 0b010);
3167 #undef INSN
3168 
3169 // Gather/scatter load/store (SVE) - scalar plus vector
3170 #define INSN(NAME, op1, type, op2, op3)                                         \
3171   void NAME(FloatRegister Zt, PRegister Pg, Register Xn, FloatRegister Zm) {    \
3172     starti;                                                                     \
3173     f(op1, 31, 25), f(type, 24, 23), f(op2, 22, 21), rf(Zm, 16);                \
3174     f(op3, 15, 13), pgrf(Pg, 10), srf(Xn, 5), rf(Zt, 0);                        \
3175   }
3176   // SVE 32-bit gather load words (scalar plus 32-bit scaled offsets)
3177   INSN(sve_ld1w_gather,  0b1000010, 0b10, 0b01, 0b010);
3178   // SVE 64-bit gather load (scalar plus 32-bit unpacked scaled offsets)
3179   INSN(sve_ld1d_gather,  0b1100010, 0b11, 0b01, 0b010);
3180   // SVE 32-bit scatter store (scalar plus 32-bit scaled offsets)
3181   INSN(sve_st1w_scatter, 0b1110010, 0b10, 0b11, 0b100);
3182   // SVE 64-bit scatter store (scalar plus unpacked 32-bit scaled offsets)
3183   INSN(sve_st1d_scatter, 0b1110010, 0b11, 0b01, 0b100);
3184 #undef INSN
3185 
3186 // SVE load/store - unpredicated
3187 #define INSN(NAME, op1)                                                         \
3188   void NAME(FloatRegister Zt, const Address &a)  {                              \
3189     starti;                                                                     \
3190     assert(a.index() == noreg, "invalid address variant");                      \
3191     f(op1, 31, 29), f(0b0010110, 28, 22), sf(a.offset() >> 3, 21, 16),          \
3192     f(0b010, 15, 13), f(a.offset() & 0x7, 12, 10), srf(a.base(), 5), rf(Zt, 0); \
3193   }
3194 
3195   INSN(sve_ldr, 0b100); // LDR (vector)
3196   INSN(sve_str, 0b111); // STR (vector)
3197 #undef INSN
3198 
3199 // SVE stack frame adjustment
3200 #define INSN(NAME, op) \
3201   void NAME(Register Xd, Register Xn, int imm6) {                 \
3202     starti;                                                       \
3203     f(0b000001000, 31, 23), f(op, 22, 21);                        \
3204     srf(Xn, 16), f(0b01010, 15, 11), sf(imm6, 10, 5), srf(Xd, 0); \
3205   }
3206 
3207   INSN(sve_addvl, 0b01); // Add multiple of vector register size to scalar register
3208   INSN(sve_addpl, 0b11); // Add multiple of predicate register size to scalar register
3209 #undef INSN
3210 
3211 // SVE inc/dec register by element count
3212 #define INSN(NAME, op) \
3213   void NAME(Register Xdn, SIMD_RegVariant T, unsigned imm4 = 1, int pattern = 0b11111) { \
3214     starti;                                                                              \
3215     assert(T != Q, "invalid size");                                                      \
3216     f(0b00000100,31, 24), f(T, 23, 22), f(0b11, 21, 20);                                 \
3217     f(imm4 - 1, 19, 16), f(0b11100, 15, 11), f(op, 10), f(pattern, 9, 5), rf(Xdn, 0);    \
3218   }
3219 
3220   INSN(sve_inc, 0);
3221   INSN(sve_dec, 1);
3222 #undef INSN
3223 
3224 // SVE predicate logical operations
3225 #define INSN(NAME, op1, op2, op3) \
3226   void NAME(PRegister Pd, PRegister Pg, PRegister Pn, PRegister Pm) { \
3227     starti;                                                           \
3228     f(0b00100101, 31, 24), f(op1, 23, 22), f(0b00, 21, 20);           \
3229     prf(Pm, 16), f(0b01, 15, 14), prf(Pg, 10), f(op2, 9);             \
3230     prf(Pn, 5), f(op3, 4), prf(Pd, 0);                                \
3231   }
3232 
3233   INSN(sve_and,  0b00, 0b0, 0b0);
3234   INSN(sve_ands, 0b01, 0b0, 0b0);
3235   INSN(sve_eor,  0b00, 0b1, 0b0);
3236   INSN(sve_eors, 0b01, 0b1, 0b0);
3237   INSN(sve_orr,  0b10, 0b0, 0b0);
3238   INSN(sve_orrs, 0b11, 0b0, 0b0);
3239   INSN(sve_bic,  0b00, 0b0, 0b1);
3240 #undef INSN
3241 
3242   // SVE increment register by predicate count
3243   void sve_incp(const Register rd, SIMD_RegVariant T, PRegister pg) {
3244     starti;
3245     assert(T != Q, "invalid size");
3246     f(0b00100101, 31, 24), f(T, 23, 22), f(0b1011001000100, 21, 9),
3247     prf(pg, 5), rf(rd, 0);
3248   }
3249 
3250   // SVE broadcast general-purpose register to vector elements (unpredicated)
3251   void sve_dup(FloatRegister Zd, SIMD_RegVariant T, Register Rn) {
3252     starti;
3253     assert(T != Q, "invalid size");
3254     f(0b00000101, 31, 24), f(T, 23, 22), f(0b100000001110, 21, 10);
3255     srf(Rn, 5), rf(Zd, 0);
3256   }
3257 
3258   // SVE broadcast signed immediate to vector elements (unpredicated)
3259   void sve_dup(FloatRegister Zd, SIMD_RegVariant T, int imm8) {
3260     starti;
3261     assert(T != Q, "invalid size");
3262     int sh = 0;
3263     if (imm8 <= 127 && imm8 >= -128) {
3264       sh = 0;
3265     } else if (T != B && imm8 <= 32512 && imm8 >= -32768 && (imm8 & 0xff) == 0) {
3266       sh = 1;
3267       imm8 = (imm8 >> 8);
3268     } else {
3269       guarantee(false, "invalid immediate");
3270     }
3271     f(0b00100101, 31, 24), f(T, 23, 22), f(0b11100011, 21, 14);
3272     f(sh, 13), sf(imm8, 12, 5), rf(Zd, 0);
3273   }
3274 
3275   // SVE predicate test
3276   void sve_ptest(PRegister Pg, PRegister Pn) {
3277     starti;
3278     f(0b001001010101000011, 31, 14), prf(Pg, 10), f(0, 9), prf(Pn, 5), f(0, 4, 0);
3279   }
3280 
3281   // SVE predicate initialize
3282   void sve_ptrue(PRegister pd, SIMD_RegVariant esize, int pattern = 0b11111) {
3283     starti;
3284     f(0b00100101, 31, 24), f(esize, 23, 22), f(0b011000111000, 21, 10);
3285     f(pattern, 9, 5), f(0b0, 4), prf(pd, 0);
3286   }
3287 
3288   // SVE predicate zero
3289   void sve_pfalse(PRegister pd) {
3290     starti;
3291     f(0b00100101, 31, 24), f(0b00, 23, 22), f(0b011000111001, 21, 10);
3292     f(0b000000, 9, 4), prf(pd, 0);
3293   }
3294 
3295 // SVE load/store predicate register
3296 #define INSN(NAME, op1)                                                  \
3297   void NAME(PRegister Pt, const Address &a)  {                           \
3298     starti;                                                              \
3299     assert(a.index() == noreg, "invalid address variant");               \
3300     f(op1, 31, 29), f(0b0010110, 28, 22), sf(a.offset() >> 3, 21, 16),   \
3301     f(0b000, 15, 13), f(a.offset() & 0x7, 12, 10), srf(a.base(), 5),     \
3302     f(0, 4), prf(Pt, 0);                                                 \
3303   }
3304 
3305   INSN(sve_ldr, 0b100); // LDR (predicate)
3306   INSN(sve_str, 0b111); // STR (predicate)
3307 #undef INSN
3308 
3309   // SVE move predicate register
3310   void sve_mov(PRegister Pd, PRegister Pn) {
3311     starti;
3312     f(0b001001011000, 31, 20), prf(Pn, 16), f(0b01, 15, 14), prf(Pn, 10);
3313     f(0, 9), prf(Pn, 5), f(0, 4), prf(Pd, 0);
3314   }
3315 
3316   // SVE copy general-purpose register to vector elements (predicated)
3317   void sve_cpy(FloatRegister Zd, SIMD_RegVariant T, PRegister Pg, Register Rn) {
3318     starti;
3319     assert(T != Q, "invalid size");
3320     f(0b00000101, 31, 24), f(T, 23, 22), f(0b101000101, 21, 13);
3321     pgrf(Pg, 10), srf(Rn, 5), rf(Zd, 0);
3322   }
3323 
3324   // SVE copy signed integer immediate to vector elements (predicated)
3325   void sve_cpy(FloatRegister Zd, SIMD_RegVariant T, PRegister Pg, int imm8, bool isMerge) {
3326     starti;
3327     assert(T != Q, "invalid size");
3328     int sh = 0;
3329     if (imm8 <= 127 && imm8 >= -128) {
3330       sh = 0;
3331     } else if (T != B && imm8 <= 32512 && imm8 >= -32768 && (imm8 & 0xff) == 0) {
3332       sh = 1;
3333       imm8 = (imm8 >> 8);
3334     } else {
3335       guarantee(false, "invalid immediate");
3336     }
3337     int m = isMerge ? 1 : 0;
3338     f(0b00000101, 31, 24), f(T, 23, 22), f(0b01, 21, 20);
3339     prf(Pg, 16), f(0b0, 15), f(m, 14), f(sh, 13), sf(imm8, 12, 5), rf(Zd, 0);
3340   }
3341 
3342   // SVE conditionally select elements from two vectors
3343   void sve_sel(FloatRegister Zd, SIMD_RegVariant T, PRegister Pg,
3344                FloatRegister Zn, FloatRegister Zm) {
3345     starti;
3346     assert(T != Q, "invalid size");
3347     f(0b00000101, 31, 24), f(T, 23, 22), f(0b1, 21), rf(Zm, 16);
3348     f(0b11, 15, 14), prf(Pg, 10), rf(Zn, 5), rf(Zd, 0);
3349   }
3350 
3351 // SVE Integer/Floating-Point Compare - Vectors
3352 #define INSN(NAME, op1, op2, fp)  \
3353   void NAME(Condition cond, PRegister Pd, SIMD_RegVariant T, PRegister Pg,             \
3354             FloatRegister Zn, FloatRegister Zm) {                                      \
3355     starti;                                                                            \
3356     if (fp == 0) {                                                                     \
3357       assert(T != Q, "invalid size");                                                  \
3358     } else {                                                                           \
3359       assert(T != B && T != Q, "invalid size");                                        \
3360       assert(cond != HI && cond != HS, "invalid condition for fcm");                   \
3361     }                                                                                  \
3362     int cond_op;                                                                       \
3363     switch(cond) {                                                                     \
3364       case EQ: cond_op = (op2 << 2) | 0b10; break;                                     \
3365       case NE: cond_op = (op2 << 2) | 0b11; break;                                     \
3366       case GE: cond_op = (op2 << 2) | 0b00; break;                                     \
3367       case GT: cond_op = (op2 << 2) | 0b01; break;                                     \
3368       case HI: cond_op = 0b0001; break;                                                \
3369       case HS: cond_op = 0b0000; break;                                                \
3370       default:                                                                         \
3371         ShouldNotReachHere();                                                          \
3372     }                                                                                  \
3373     f(op1, 31, 24), f(T, 23, 22), f(0, 21), rf(Zm, 16), f((cond_op >> 1) & 7, 15, 13); \
3374     pgrf(Pg, 10), rf(Zn, 5), f(cond_op & 1, 4), prf(Pd, 0);                            \
3375   }
3376 
3377   INSN(sve_cmp, 0b00100100, 0b10, 0);
3378   INSN(sve_fcm, 0b01100101, 0b01, 1);
3379 #undef INSN
3380 
3381 // SVE Integer Compare - Signed Immediate
3382 void sve_cmp(Condition cond, PRegister Pd, SIMD_RegVariant T,
3383              PRegister Pg, FloatRegister Zn, int imm5) {
3384   starti;
3385   assert(T != Q, "invalid size");
3386   guarantee(-16 <= imm5 && imm5 <= 15, "invalid immediate");
3387   int cond_op;
3388   switch(cond) {
3389     case EQ: cond_op = 0b1000; break;
3390     case NE: cond_op = 0b1001; break;
3391     case GE: cond_op = 0b0000; break;
3392     case GT: cond_op = 0b0001; break;
3393     case LE: cond_op = 0b0011; break;
3394     case LT: cond_op = 0b0010; break;
3395     default:
3396       ShouldNotReachHere();
3397   }
3398   f(0b00100101, 31, 24), f(T, 23, 22), f(0b0, 21), sf(imm5, 20, 16),
3399   f((cond_op >> 1) & 0x7, 15, 13), pgrf(Pg, 10), rf(Zn, 5);
3400   f(cond_op & 0x1, 4), prf(Pd, 0);
3401 }
3402 
3403 // SVE unpack vector elements
3404 #define INSN(NAME, op) \
3405   void NAME(FloatRegister Zd, SIMD_RegVariant T, FloatRegister Zn) { \
3406     starti;                                                          \
3407     assert(T != B && T != Q, "invalid size");                        \
3408     f(0b00000101, 31, 24), f(T, 23, 22), f(0b1100, 21, 18);          \
3409     f(op, 17, 16), f(0b001110, 15, 10), rf(Zn, 5), rf(Zd, 0);        \
3410   }
3411 
3412   INSN(sve_uunpkhi, 0b11); // Signed unpack and extend half of vector - high half
3413   INSN(sve_uunpklo, 0b10); // Signed unpack and extend half of vector - low half
3414   INSN(sve_sunpkhi, 0b01); // Unsigned unpack and extend half of vector - high half
3415   INSN(sve_sunpklo, 0b00); // Unsigned unpack and extend half of vector - low half
3416 #undef INSN
3417 
3418 // SVE unpack predicate elements
3419 #define INSN(NAME, op) \
3420   void NAME(PRegister Pd, PRegister Pn) { \
3421     starti;                                                          \
3422     f(0b000001010011000, 31, 17), f(op, 16), f(0b0100000, 15, 9);    \
3423     prf(Pn, 5), f(0b0, 4), prf(Pd, 0);                               \
3424   }
3425 
3426   INSN(sve_punpkhi, 0b1); // Unpack and widen high half of predicate
3427   INSN(sve_punpklo, 0b0); // Unpack and widen low half of predicate
3428 #undef INSN
3429 
3430 // SVE permute vector elements
3431 #define INSN(NAME, op) \
3432   void NAME(FloatRegister Zd, SIMD_RegVariant T, FloatRegister Zn, FloatRegister Zm) { \
3433     starti;                                                                            \
3434     assert(T != Q, "invalid size");                                                    \
3435     f(0b00000101, 31, 24), f(T, 23, 22), f(0b1, 21), rf(Zm, 16);                       \
3436     f(0b01101, 15, 11), f(op, 10), rf(Zn, 5), rf(Zd, 0);                               \
3437   }
3438 
3439   INSN(sve_uzp1, 0b0); // Concatenate even elements from two vectors
3440   INSN(sve_uzp2, 0b1); // Concatenate odd elements from two vectors
3441 #undef INSN
3442 
3443 // SVE permute predicate elements
3444 #define INSN(NAME, op) \
3445   void NAME(PRegister Pd, SIMD_RegVariant T, PRegister Pn, PRegister Pm) {             \
3446     starti;                                                                            \
3447     assert(T != Q, "invalid size");                                                    \
3448     f(0b00000101, 31, 24), f(T, 23, 22), f(0b10, 21, 20), prf(Pm, 16);                 \
3449     f(0b01001, 15, 11), f(op, 10), f(0b0, 9), prf(Pn, 5), f(0b0, 4), prf(Pd, 0);       \
3450   }
3451 
3452   INSN(sve_uzp1, 0b0); // Concatenate even elements from two predicates
3453   INSN(sve_uzp2, 0b1); // Concatenate odd elements from two predicates
3454 #undef INSN
3455 
3456 // Predicate counted loop (SVE) (32-bit variants are not included)
3457 #define INSN(NAME, decode)                                                \
3458   void NAME(PRegister Pd, SIMD_RegVariant T, Register Rn, Register Rm) {  \
3459     starti;                                                               \
3460     assert(T != Q, "invalid register variant");                           \
3461     f(0b00100101, 31, 24), f(T, 23, 22), f(1, 21),                        \
3462     zrf(Rm, 16), f(0, 15, 13), f(1, 12), f(decode >> 1, 11, 10),          \
3463     zrf(Rn, 5), f(decode & 1, 4), prf(Pd, 0);                             \
3464   }
3465 
3466   INSN(sve_whilelt, 0b010);  // While incrementing signed scalar less than scalar
3467   INSN(sve_whilele, 0b011);  // While incrementing signed scalar less than or equal to scalar
3468   INSN(sve_whilelo, 0b110);  // While incrementing unsigned scalar lower than scalar
3469   INSN(sve_whilels, 0b111);  // While incrementing unsigned scalar lower than or the same as scalar
3470 #undef INSN
3471 
3472   // SVE predicate reverse
3473   void sve_rev(PRegister Pd, SIMD_RegVariant T, PRegister Pn) {
3474     starti;
3475     assert(T != Q, "invalid size");
3476     f(0b00000101, 31, 24), f(T, 23, 22), f(0b1101000100000, 21, 9);
3477     prf(Pn, 5), f(0, 4), prf(Pd, 0);
3478   }
3479 
3480 // SVE partition break condition
3481 #define INSN(NAME, op) \
3482   void NAME(PRegister Pd, PRegister Pg, PRegister Pn, bool isMerge) {      \
3483     starti;                                                                \
3484     f(0b00100101, 31, 24), f(op, 23, 22), f(0b01000001, 21, 14);           \
3485     prf(Pg, 10), f(0b0, 9), prf(Pn, 5), f(isMerge ? 1 : 0, 4), prf(Pd, 0); \
3486   }
3487 
3488   INSN(sve_brka, 0b00); // Break after first true condition
3489   INSN(sve_brkb, 0b10); // Break before first true condition
3490 #undef INSN
3491 
3492 // Element count and increment scalar (SVE)
3493 #define INSN(NAME, TYPE)                                                             \
3494   void NAME(Register Xdn, unsigned imm4 = 1, int pattern = 0b11111) {                \
3495     starti;                                                                          \
3496     f(0b00000100, 31, 24), f(TYPE, 23, 22), f(0b10, 21, 20);                         \
3497     f(imm4 - 1, 19, 16), f(0b11100, 15, 11), f(0, 10), f(pattern, 9, 5), rf(Xdn, 0); \
3498   }
3499 
3500   INSN(sve_cntb, B);  // Set scalar to multiple of 8-bit predicate constraint element count
3501   INSN(sve_cnth, H);  // Set scalar to multiple of 16-bit predicate constraint element count
3502   INSN(sve_cntw, S);  // Set scalar to multiple of 32-bit predicate constraint element count
3503   INSN(sve_cntd, D);  // Set scalar to multiple of 64-bit predicate constraint element count
3504 #undef INSN
3505 
3506   // Set scalar to active predicate element count
3507   void sve_cntp(Register Xd, SIMD_RegVariant T, PRegister Pg, PRegister Pn) {
3508     starti;
3509     assert(T != Q, "invalid size");
3510     f(0b00100101, 31, 24), f(T, 23, 22), f(0b10000010, 21, 14);
3511     prf(Pg, 10), f(0, 9), prf(Pn, 5), rf(Xd, 0);
3512   }
3513 
3514   // SVE convert signed integer to floating-point (predicated)
3515   void sve_scvtf(FloatRegister Zd, SIMD_RegVariant T_dst, PRegister Pg,
3516                  FloatRegister Zn, SIMD_RegVariant T_src) {
3517     starti;
3518     assert(T_src != B && T_dst != B && T_src != Q && T_dst != Q &&
3519            (T_src != H || T_dst == T_src), "invalid register variant");
3520     int opc = T_dst;
3521     int opc2 = T_src;
3522     // In most cases we can treat T_dst, T_src as opc, opc2,
3523     // except for the following two combinations.
3524     // +-----+------+---+------------------------------------+
3525     // | opc | opc2 | U |        Instruction Details         |
3526     // +-----+------+---+------------------------------------+
3527     // |  11 |   00 | 0 | SCVTF - 32-bit to double-precision |
3528     // |  11 |   10 | 0 | SCVTF - 64-bit to single-precision |
3529     // +-----+------+---+------------------------------------+
3530     if (T_src == S && T_dst == D) {
3531       opc = 0b11;
3532       opc2 = 0b00;
3533     } else if (T_src == D && T_dst == S) {
3534       opc = 0b11;
3535       opc2 = 0b10;
3536     }
3537     f(0b01100101, 31, 24), f(opc, 23, 22), f(0b010, 21, 19);
3538     f(opc2, 18, 17), f(0b0101, 16, 13);
3539     pgrf(Pg, 10), rf(Zn, 5), rf(Zd, 0);
3540   }
3541 
3542   // SVE floating-point convert to signed integer, rounding toward zero (predicated)
3543   void sve_fcvtzs(FloatRegister Zd, SIMD_RegVariant T_dst, PRegister Pg,
3544                   FloatRegister Zn, SIMD_RegVariant T_src) {
3545     starti;
3546     assert(T_src != B && T_dst != B && T_src != Q && T_dst != Q &&
3547            (T_dst != H || T_src == H), "invalid register variant");
3548     int opc = T_src;
3549     int opc2 = T_dst;
3550     // In most cases we can treat T_src, T_dst as opc, opc2,
3551     // except for the following two combinations.
3552     // +-----+------+---+-------------------------------------+
3553     // | opc | opc2 | U |         Instruction Details         |
3554     // +-----+------+---+-------------------------------------+
3555     // |  11 |  10  | 0 | FCVTZS - single-precision to 64-bit |
3556     // |  11 |  00  | 0 | FCVTZS - double-precision to 32-bit |
3557     // +-----+------+---+-------------------------------------+
3558     if (T_src == S && T_dst == D) {
3559       opc = 0b11;
3560       opc2 = 0b10;
3561     } else if (T_src == D && T_dst == S) {
3562       opc = 0b11;
3563       opc2 = 0b00;
3564     }
3565     f(0b01100101, 31, 24), f(opc, 23, 22), f(0b011, 21, 19);
3566     f(opc2, 18, 17), f(0b0101, 16, 13);
3567     pgrf(Pg, 10), rf(Zn, 5), rf(Zd, 0);
3568   }
3569 
3570   // SVE floating-point convert precision (predicated)
3571   void sve_fcvt(FloatRegister Zd, SIMD_RegVariant T_dst, PRegister Pg,
3572                 FloatRegister Zn, SIMD_RegVariant T_src) {
3573     starti;
3574     assert(T_src != B && T_dst != B && T_src != Q && T_dst != Q &&
3575            T_src != T_dst, "invalid register variant");
3576     guarantee(T_src != H && T_dst != H, "half-precision unsupported");
3577     f(0b01100101, 31, 24), f(0b11, 23, 22), f(0b0010, 21, 18);
3578     f(T_dst, 17, 16), f(0b101, 15, 13);
3579     pgrf(Pg, 10), rf(Zn, 5), rf(Zd, 0);
3580   }
3581 
3582 // SVE extract element to general-purpose register
3583 #define INSN(NAME, before)                                                      \
3584   void NAME(Register Rd, SIMD_RegVariant T, PRegister Pg,  FloatRegister Zn) {  \
3585     starti;                                                                     \
3586     f(0b00000101, 31, 24), f(T, 23, 22), f(0b10000, 21, 17);                    \
3587     f(before, 16), f(0b101, 15, 13);                                            \
3588     pgrf(Pg, 10), rf(Zn, 5), rf(Rd, 0);                                         \
3589   }
3590 
3591   INSN(sve_lasta, 0b0);
3592   INSN(sve_lastb, 0b1);
3593 #undef INSN
3594 
3595 // SVE extract element to SIMD&FP scalar register
3596 #define INSN(NAME, before)                                                           \
3597   void NAME(FloatRegister Vd, SIMD_RegVariant T, PRegister Pg,  FloatRegister Zn) {  \
3598     starti;                                                                          \
3599     f(0b00000101, 31, 24), f(T, 23, 22), f(0b10001, 21, 17);                         \
3600     f(before, 16), f(0b100, 15, 13);                                                 \
3601     pgrf(Pg, 10), rf(Zn, 5), rf(Vd, 0);                                              \
3602   }
3603 
3604   INSN(sve_lasta, 0b0);
3605   INSN(sve_lastb, 0b1);
3606 #undef INSN
3607 
3608   // SVE create index starting from and incremented by immediate
3609   void sve_index(FloatRegister Zd, SIMD_RegVariant T, int imm1, int imm2) {
3610     starti;
3611     f(0b00000100, 31, 24), f(T, 23, 22), f(0b1, 21);
3612     sf(imm2, 20, 16), f(0b010000, 15, 10);
3613     sf(imm1, 9, 5), rf(Zd, 0);
3614   }
3615 
3616   // SVE programmable table lookup/permute using vector of element indices
3617   void sve_tbl(FloatRegister Zd, SIMD_RegVariant T, FloatRegister Zn, FloatRegister Zm) {
3618     starti;
3619     assert(T != Q, "invalid size");
3620     f(0b00000101, 31, 24), f(T, 23, 22), f(0b1, 21), rf(Zm, 16);
3621     f(0b001100, 15, 10), rf(Zn, 5), rf(Zd, 0);
3622   }
3623 
3624   Assembler(CodeBuffer* code) : AbstractAssembler(code) {
3625   }
3626 
3627   // Stack overflow checking
3628   virtual void bang_stack_with_offset(int offset);
3629 
3630   static bool operand_valid_for_logical_immediate(bool is32, uint64_t imm);
3631   static bool operand_valid_for_add_sub_immediate(int64_t imm);
3632   static bool operand_valid_for_float_immediate(double imm);
3633 
3634   void emit_data64(jlong data, relocInfo::relocType rtype, int format = 0);
3635   void emit_data64(jlong data, RelocationHolder const& rspec, int format = 0);
3636 };
3637 
3638 inline Assembler::Membar_mask_bits operator|(Assembler::Membar_mask_bits a,
3639                                              Assembler::Membar_mask_bits b) {
3640   return Assembler::Membar_mask_bits(unsigned(a)|unsigned(b));
3641 }
3642 
3643 Instruction_aarch64::~Instruction_aarch64() {
3644   assem->emit_int32(insn);
3645   assert_cond(get_bits() == 0xffffffff);
3646 }
3647 
3648 #undef f
3649 #undef sf
3650 #undef rf
3651 #undef srf
3652 #undef zrf
3653 #undef prf
3654 #undef pgrf
3655 #undef fixed
3656 
3657 #undef starti
3658 
3659 // Invert a condition
3660 inline const Assembler::Condition operator~(const Assembler::Condition cond) {
3661   return Assembler::Condition(int(cond) ^ 1);
3662 }
3663 
3664 extern "C" void das(uint64_t start, int len);
3665 
3666 #endif // CPU_AARCH64_ASSEMBLER_AARCH64_HPP