1 /* 2 * Copyright (c) 1997, 2022, Oracle and/or its affiliates. All rights reserved. 3 * Copyright (c) 2014, 2021, Red Hat Inc. All rights reserved. 4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 5 * 6 * This code is free software; you can redistribute it and/or modify it 7 * under the terms of the GNU General Public License version 2 only, as 8 * published by the Free Software Foundation. 9 * 10 * This code is distributed in the hope that it will be useful, but WITHOUT 11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 13 * version 2 for more details (a copy is included in the LICENSE file that 14 * accompanied this code). 15 * 16 * You should have received a copy of the GNU General Public License version 17 * 2 along with this work; if not, write to the Free Software Foundation, 18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 19 * 20 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 21 * or visit www.oracle.com if you need additional information or have any 22 * questions. 23 * 24 */ 25 26 #ifndef CPU_AARCH64_ASSEMBLER_AARCH64_HPP 27 #define CPU_AARCH64_ASSEMBLER_AARCH64_HPP 28 29 #include "asm/register.hpp" 30 #include "metaprogramming/enableIf.hpp" 31 32 #ifdef __GNUC__ 33 34 // __nop needs volatile so that compiler doesn't optimize it away 35 #define NOP() asm volatile ("nop"); 36 37 #elif defined(_MSC_VER) 38 39 // Use MSVC intrinsic: https://docs.microsoft.com/en-us/cpp/intrinsics/arm64-intrinsics?view=vs-2019#I 40 #define NOP() __nop(); 41 42 #endif 43 44 45 // definitions of various symbolic names for machine registers 46 47 // First intercalls between C and Java which use 8 general registers 48 // and 8 floating registers 49 50 // we also have to copy between x86 and ARM registers but that's a 51 // secondary complication -- not all code employing C call convention 52 // executes as x86 code though -- we generate some of it 53 54 class Argument { 55 public: 56 enum { 57 n_int_register_parameters_c = 8, // r0, r1, ... r7 (c_rarg0, c_rarg1, ...) 58 n_float_register_parameters_c = 8, // v0, v1, ... v7 (c_farg0, c_farg1, ... ) 59 60 n_int_register_parameters_j = 8, // r1, ... r7, r0 (rj_rarg0, j_rarg1, ... 61 n_float_register_parameters_j = 8 // v0, v1, ... v7 (j_farg0, j_farg1, ... 62 }; 63 }; 64 65 REGISTER_DECLARATION(Register, c_rarg0, r0); 66 REGISTER_DECLARATION(Register, c_rarg1, r1); 67 REGISTER_DECLARATION(Register, c_rarg2, r2); 68 REGISTER_DECLARATION(Register, c_rarg3, r3); 69 REGISTER_DECLARATION(Register, c_rarg4, r4); 70 REGISTER_DECLARATION(Register, c_rarg5, r5); 71 REGISTER_DECLARATION(Register, c_rarg6, r6); 72 REGISTER_DECLARATION(Register, c_rarg7, r7); 73 74 REGISTER_DECLARATION(FloatRegister, c_farg0, v0); 75 REGISTER_DECLARATION(FloatRegister, c_farg1, v1); 76 REGISTER_DECLARATION(FloatRegister, c_farg2, v2); 77 REGISTER_DECLARATION(FloatRegister, c_farg3, v3); 78 REGISTER_DECLARATION(FloatRegister, c_farg4, v4); 79 REGISTER_DECLARATION(FloatRegister, c_farg5, v5); 80 REGISTER_DECLARATION(FloatRegister, c_farg6, v6); 81 REGISTER_DECLARATION(FloatRegister, c_farg7, v7); 82 83 // Symbolically name the register arguments used by the Java calling convention. 84 // We have control over the convention for java so we can do what we please. 85 // What pleases us is to offset the java calling convention so that when 86 // we call a suitable jni method the arguments are lined up and we don't 87 // have to do much shuffling. A suitable jni method is non-static and a 88 // small number of arguments 89 // 90 // |--------------------------------------------------------------------| 91 // | c_rarg0 c_rarg1 c_rarg2 c_rarg3 c_rarg4 c_rarg5 c_rarg6 c_rarg7 | 92 // |--------------------------------------------------------------------| 93 // | r0 r1 r2 r3 r4 r5 r6 r7 | 94 // |--------------------------------------------------------------------| 95 // | j_rarg7 j_rarg0 j_rarg1 j_rarg2 j_rarg3 j_rarg4 j_rarg5 j_rarg6 | 96 // |--------------------------------------------------------------------| 97 98 99 REGISTER_DECLARATION(Register, j_rarg0, c_rarg1); 100 REGISTER_DECLARATION(Register, j_rarg1, c_rarg2); 101 REGISTER_DECLARATION(Register, j_rarg2, c_rarg3); 102 REGISTER_DECLARATION(Register, j_rarg3, c_rarg4); 103 REGISTER_DECLARATION(Register, j_rarg4, c_rarg5); 104 REGISTER_DECLARATION(Register, j_rarg5, c_rarg6); 105 REGISTER_DECLARATION(Register, j_rarg6, c_rarg7); 106 REGISTER_DECLARATION(Register, j_rarg7, c_rarg0); 107 108 // Java floating args are passed as per C 109 110 REGISTER_DECLARATION(FloatRegister, j_farg0, v0); 111 REGISTER_DECLARATION(FloatRegister, j_farg1, v1); 112 REGISTER_DECLARATION(FloatRegister, j_farg2, v2); 113 REGISTER_DECLARATION(FloatRegister, j_farg3, v3); 114 REGISTER_DECLARATION(FloatRegister, j_farg4, v4); 115 REGISTER_DECLARATION(FloatRegister, j_farg5, v5); 116 REGISTER_DECLARATION(FloatRegister, j_farg6, v6); 117 REGISTER_DECLARATION(FloatRegister, j_farg7, v7); 118 119 // registers used to hold VM data either temporarily within a method 120 // or across method calls 121 122 // volatile (caller-save) registers 123 124 // r8 is used for indirect result location return 125 // we use it and r9 as scratch registers 126 REGISTER_DECLARATION(Register, rscratch1, r8); 127 REGISTER_DECLARATION(Register, rscratch2, r9); 128 129 // current method -- must be in a call-clobbered register 130 REGISTER_DECLARATION(Register, rmethod, r12); 131 132 // non-volatile (callee-save) registers are r16-29 133 // of which the following are dedicated global state 134 135 // link register 136 REGISTER_DECLARATION(Register, lr, r30); 137 // frame pointer 138 REGISTER_DECLARATION(Register, rfp, r29); 139 // current thread 140 REGISTER_DECLARATION(Register, rthread, r28); 141 // base of heap 142 REGISTER_DECLARATION(Register, rheapbase, r27); 143 // constant pool cache 144 REGISTER_DECLARATION(Register, rcpool, r26); 145 // monitors allocated on stack 146 REGISTER_DECLARATION(Register, rmonitors, r25); 147 // locals on stack 148 REGISTER_DECLARATION(Register, rlocals, r24); 149 // bytecode pointer 150 REGISTER_DECLARATION(Register, rbcp, r22); 151 // Dispatch table base 152 REGISTER_DECLARATION(Register, rdispatch, r21); 153 // Java stack pointer 154 REGISTER_DECLARATION(Register, esp, r20); 155 156 // Preserved predicate register with all elements set TRUE. 157 REGISTER_DECLARATION(PRegister, ptrue, p7); 158 159 #define assert_cond(ARG1) assert(ARG1, #ARG1) 160 161 namespace asm_util { 162 uint32_t encode_logical_immediate(bool is32, uint64_t imm); 163 uint32_t encode_sve_logical_immediate(unsigned elembits, uint64_t imm); 164 bool operand_valid_for_immediate_bits(int64_t imm, unsigned nbits); 165 }; 166 167 using namespace asm_util; 168 169 170 class Assembler; 171 172 class Instruction_aarch64 { 173 unsigned insn; 174 #ifdef ASSERT 175 unsigned bits; 176 #endif 177 Assembler *assem; 178 179 public: 180 181 Instruction_aarch64(class Assembler *as) { 182 #ifdef ASSERT 183 bits = 0; 184 #endif 185 insn = 0; 186 assem = as; 187 } 188 189 inline ~Instruction_aarch64(); 190 191 unsigned &get_insn() { return insn; } 192 #ifdef ASSERT 193 unsigned &get_bits() { return bits; } 194 #endif 195 196 static inline int32_t extend(unsigned val, int hi = 31, int lo = 0) { 197 union { 198 unsigned u; 199 int n; 200 }; 201 202 u = val << (31 - hi); 203 n = n >> (31 - hi + lo); 204 return n; 205 } 206 207 static inline uint32_t extract(uint32_t val, int msb, int lsb) { 208 int nbits = msb - lsb + 1; 209 assert_cond(msb >= lsb); 210 uint32_t mask = checked_cast<uint32_t>(right_n_bits(nbits)); 211 uint32_t result = val >> lsb; 212 result &= mask; 213 return result; 214 } 215 216 static inline int32_t sextract(uint32_t val, int msb, int lsb) { 217 uint32_t uval = extract(val, msb, lsb); 218 return extend(uval, msb - lsb); 219 } 220 221 static void patch(address a, int msb, int lsb, uint64_t val) { 222 int nbits = msb - lsb + 1; 223 guarantee(val < (1ULL << nbits), "Field too big for insn"); 224 assert_cond(msb >= lsb); 225 unsigned mask = checked_cast<unsigned>(right_n_bits(nbits)); 226 val <<= lsb; 227 mask <<= lsb; 228 unsigned target = *(unsigned *)a; 229 target &= ~mask; 230 target |= val; 231 *(unsigned *)a = target; 232 } 233 234 static void spatch(address a, int msb, int lsb, int64_t val) { 235 int nbits = msb - lsb + 1; 236 int64_t chk = val >> (nbits - 1); 237 guarantee (chk == -1 || chk == 0, "Field too big for insn"); 238 unsigned uval = val; 239 unsigned mask = checked_cast<unsigned>(right_n_bits(nbits)); 240 uval &= mask; 241 uval <<= lsb; 242 mask <<= lsb; 243 unsigned target = *(unsigned *)a; 244 target &= ~mask; 245 target |= uval; 246 *(unsigned *)a = target; 247 } 248 249 void f(unsigned val, int msb, int lsb) { 250 int nbits = msb - lsb + 1; 251 guarantee(val < (1ULL << nbits), "Field too big for insn"); 252 assert_cond(msb >= lsb); 253 val <<= lsb; 254 insn |= val; 255 #ifdef ASSERT 256 unsigned mask = checked_cast<unsigned>(right_n_bits(nbits)); 257 mask <<= lsb; 258 assert_cond((bits & mask) == 0); 259 bits |= mask; 260 #endif 261 } 262 263 void f(unsigned val, int bit) { 264 f(val, bit, bit); 265 } 266 267 void sf(int64_t val, int msb, int lsb) { 268 int nbits = msb - lsb + 1; 269 int64_t chk = val >> (nbits - 1); 270 guarantee (chk == -1 || chk == 0, "Field too big for insn"); 271 unsigned uval = val; 272 unsigned mask = checked_cast<unsigned>(right_n_bits(nbits)); 273 uval &= mask; 274 f(uval, lsb + nbits - 1, lsb); 275 } 276 277 void rf(Register r, int lsb) { 278 f(r->encoding_nocheck(), lsb + 4, lsb); 279 } 280 281 // reg|ZR 282 void zrf(Register r, int lsb) { 283 f(r->encoding_nocheck() - (r == zr), lsb + 4, lsb); 284 } 285 286 // reg|SP 287 void srf(Register r, int lsb) { 288 f(r == sp ? 31 : r->encoding_nocheck(), lsb + 4, lsb); 289 } 290 291 void rf(FloatRegister r, int lsb) { 292 f(r->encoding_nocheck(), lsb + 4, lsb); 293 } 294 295 void prf(PRegister r, int lsb) { 296 f(r->encoding_nocheck(), lsb + 3, lsb); 297 } 298 299 void pgrf(PRegister r, int lsb) { 300 f(r->encoding_nocheck(), lsb + 2, lsb); 301 } 302 303 unsigned get(int msb = 31, int lsb = 0) { 304 int nbits = msb - lsb + 1; 305 unsigned mask = checked_cast<unsigned>(right_n_bits(nbits)) << lsb; 306 assert_cond((bits & mask) == mask); 307 return (insn & mask) >> lsb; 308 } 309 }; 310 311 #define starti Instruction_aarch64 current_insn(this); 312 313 class PrePost { 314 int _offset; 315 Register _r; 316 public: 317 PrePost(Register reg, int o) : _offset(o), _r(reg) { } 318 int offset() { return _offset; } 319 Register reg() { return _r; } 320 }; 321 322 class Pre : public PrePost { 323 public: 324 Pre(Register reg, int o) : PrePost(reg, o) { } 325 }; 326 class Post : public PrePost { 327 Register _idx; 328 bool _is_postreg; 329 public: 330 Post(Register reg, int o) : PrePost(reg, o) { _idx = NULL; _is_postreg = false; } 331 Post(Register reg, Register idx) : PrePost(reg, 0) { _idx = idx; _is_postreg = true; } 332 Register idx_reg() { return _idx; } 333 bool is_postreg() {return _is_postreg; } 334 }; 335 336 namespace ext 337 { 338 enum operation { uxtb, uxth, uxtw, uxtx, sxtb, sxth, sxtw, sxtx }; 339 }; 340 341 // Addressing modes 342 class Address { 343 public: 344 345 enum mode { no_mode, base_plus_offset, pre, post, post_reg, pcrel, 346 base_plus_offset_reg, literal }; 347 348 // Shift and extend for base reg + reg offset addressing 349 class extend { 350 int _option, _shift; 351 ext::operation _op; 352 public: 353 extend() { } 354 extend(int s, int o, ext::operation op) : _option(o), _shift(s), _op(op) { } 355 int option() const{ return _option; } 356 int shift() const { return _shift; } 357 ext::operation op() const { return _op; } 358 }; 359 class uxtw : public extend { 360 public: 361 uxtw(int shift = -1): extend(shift, 0b010, ext::uxtw) { } 362 }; 363 class lsl : public extend { 364 public: 365 lsl(int shift = -1): extend(shift, 0b011, ext::uxtx) { } 366 }; 367 class sxtw : public extend { 368 public: 369 sxtw(int shift = -1): extend(shift, 0b110, ext::sxtw) { } 370 }; 371 class sxtx : public extend { 372 public: 373 sxtx(int shift = -1): extend(shift, 0b111, ext::sxtx) { } 374 }; 375 376 private: 377 Register _base; 378 Register _index; 379 int64_t _offset; 380 enum mode _mode; 381 extend _ext; 382 383 RelocationHolder _rspec; 384 385 // Typically we use AddressLiterals we want to use their rval 386 // However in some situations we want the lval (effect address) of 387 // the item. We provide a special factory for making those lvals. 388 bool _is_lval; 389 390 // If the target is far we'll need to load the ea of this to a 391 // register to reach it. Otherwise if near we can do PC-relative 392 // addressing. 393 address _target; 394 395 public: 396 Address() 397 : _mode(no_mode) { } 398 Address(Register r) 399 : _base(r), _index(noreg), _offset(0), _mode(base_plus_offset), _target(0) { } 400 401 template<typename T, ENABLE_IF(std::is_integral<T>::value)> 402 Address(Register r, T o) 403 : _base(r), _index(noreg), _offset(o), _mode(base_plus_offset), _target(0) {} 404 405 Address(Register r, ByteSize disp) 406 : Address(r, in_bytes(disp)) { } 407 Address(Register r, Register r1, extend ext = lsl()) 408 : _base(r), _index(r1), _offset(0), _mode(base_plus_offset_reg), 409 _ext(ext), _target(0) { } 410 Address(Pre p) 411 : _base(p.reg()), _offset(p.offset()), _mode(pre) { } 412 Address(Post p) 413 : _base(p.reg()), _index(p.idx_reg()), _offset(p.offset()), 414 _mode(p.is_postreg() ? post_reg : post), _target(0) { } 415 Address(address target, RelocationHolder const& rspec) 416 : _mode(literal), 417 _rspec(rspec), 418 _is_lval(false), 419 _target(target) { } 420 Address(address target, relocInfo::relocType rtype = relocInfo::external_word_type); 421 Address(Register base, RegisterOrConstant index, extend ext = lsl()) 422 : _base (base), 423 _offset(0), _ext(ext), _target(0) { 424 if (index.is_register()) { 425 _mode = base_plus_offset_reg; 426 _index = index.as_register(); 427 } else { 428 guarantee(ext.option() == ext::uxtx, "should be"); 429 assert(index.is_constant(), "should be"); 430 _mode = base_plus_offset; 431 _offset = index.as_constant() << ext.shift(); 432 } 433 } 434 435 Register base() const { 436 guarantee((_mode == base_plus_offset || _mode == base_plus_offset_reg 437 || _mode == post || _mode == post_reg), 438 "wrong mode"); 439 return _base; 440 } 441 int64_t offset() const { 442 return _offset; 443 } 444 Register index() const { 445 return _index; 446 } 447 mode getMode() const { 448 return _mode; 449 } 450 bool uses(Register reg) const { return _base == reg || _index == reg; } 451 address target() const { return _target; } 452 const RelocationHolder& rspec() const { return _rspec; } 453 454 void encode(Instruction_aarch64 *i) const { 455 i->f(0b111, 29, 27); 456 i->srf(_base, 5); 457 458 switch(_mode) { 459 case base_plus_offset: 460 { 461 unsigned size = i->get(31, 30); 462 if (i->get(26, 26) && i->get(23, 23)) { 463 // SIMD Q Type - Size = 128 bits 464 assert(size == 0, "bad size"); 465 size = 0b100; 466 } 467 assert(offset_ok_for_immed(_offset, size), 468 "must be, was: " INT64_FORMAT ", %d", _offset, size); 469 unsigned mask = (1 << size) - 1; 470 if (_offset < 0 || _offset & mask) { 471 i->f(0b00, 25, 24); 472 i->f(0, 21), i->f(0b00, 11, 10); 473 i->sf(_offset, 20, 12); 474 } else { 475 i->f(0b01, 25, 24); 476 i->f(_offset >> size, 21, 10); 477 } 478 } 479 break; 480 481 case base_plus_offset_reg: 482 { 483 i->f(0b00, 25, 24); 484 i->f(1, 21); 485 i->rf(_index, 16); 486 i->f(_ext.option(), 15, 13); 487 unsigned size = i->get(31, 30); 488 if (i->get(26, 26) && i->get(23, 23)) { 489 // SIMD Q Type - Size = 128 bits 490 assert(size == 0, "bad size"); 491 size = 0b100; 492 } 493 if (size == 0) // It's a byte 494 i->f(_ext.shift() >= 0, 12); 495 else { 496 assert(_ext.shift() <= 0 || _ext.shift() == (int)size, "bad shift"); 497 i->f(_ext.shift() > 0, 12); 498 } 499 i->f(0b10, 11, 10); 500 } 501 break; 502 503 case pre: 504 i->f(0b00, 25, 24); 505 i->f(0, 21), i->f(0b11, 11, 10); 506 i->sf(_offset, 20, 12); 507 break; 508 509 case post: 510 i->f(0b00, 25, 24); 511 i->f(0, 21), i->f(0b01, 11, 10); 512 i->sf(_offset, 20, 12); 513 break; 514 515 default: 516 ShouldNotReachHere(); 517 } 518 } 519 520 void encode_pair(Instruction_aarch64 *i) const { 521 switch(_mode) { 522 case base_plus_offset: 523 i->f(0b010, 25, 23); 524 break; 525 case pre: 526 i->f(0b011, 25, 23); 527 break; 528 case post: 529 i->f(0b001, 25, 23); 530 break; 531 default: 532 ShouldNotReachHere(); 533 } 534 535 unsigned size; // Operand shift in 32-bit words 536 537 if (i->get(26, 26)) { // float 538 switch(i->get(31, 30)) { 539 case 0b10: 540 size = 2; break; 541 case 0b01: 542 size = 1; break; 543 case 0b00: 544 size = 0; break; 545 default: 546 ShouldNotReachHere(); 547 size = 0; // unreachable 548 } 549 } else { 550 size = i->get(31, 31); 551 } 552 553 size = 4 << size; 554 guarantee(_offset % size == 0, "bad offset"); 555 i->sf(_offset / size, 21, 15); 556 i->srf(_base, 5); 557 } 558 559 void encode_nontemporal_pair(Instruction_aarch64 *i) const { 560 // Only base + offset is allowed 561 i->f(0b000, 25, 23); 562 unsigned size = i->get(31, 31); 563 size = 4 << size; 564 guarantee(_offset % size == 0, "bad offset"); 565 i->sf(_offset / size, 21, 15); 566 i->srf(_base, 5); 567 guarantee(_mode == Address::base_plus_offset, 568 "Bad addressing mode for non-temporal op"); 569 } 570 571 void lea(MacroAssembler *, Register) const; 572 573 static bool offset_ok_for_immed(int64_t offset, uint shift); 574 575 static bool offset_ok_for_sve_immed(int64_t offset, int shift, int vl /* sve vector length */) { 576 if (offset % vl == 0) { 577 // Convert address offset into sve imm offset (MUL VL). 578 int sve_offset = offset / vl; 579 if (((-(1 << (shift - 1))) <= sve_offset) && (sve_offset < (1 << (shift - 1)))) { 580 // sve_offset can be encoded 581 return true; 582 } 583 } 584 return false; 585 } 586 }; 587 588 // Convenience classes 589 class RuntimeAddress: public Address { 590 591 public: 592 593 RuntimeAddress(address target) : Address(target, relocInfo::runtime_call_type) {} 594 595 }; 596 597 class OopAddress: public Address { 598 599 public: 600 601 OopAddress(address target) : Address(target, relocInfo::oop_type){} 602 603 }; 604 605 class ExternalAddress: public Address { 606 private: 607 static relocInfo::relocType reloc_for_target(address target) { 608 // Sometimes ExternalAddress is used for values which aren't 609 // exactly addresses, like the card table base. 610 // external_word_type can't be used for values in the first page 611 // so just skip the reloc in that case. 612 return external_word_Relocation::can_be_relocated(target) ? relocInfo::external_word_type : relocInfo::none; 613 } 614 615 public: 616 617 ExternalAddress(address target) : Address(target, reloc_for_target(target)) {} 618 619 }; 620 621 class InternalAddress: public Address { 622 623 public: 624 625 InternalAddress(address target) : Address(target, relocInfo::internal_word_type) {} 626 }; 627 628 const int FPUStateSizeInWords = FloatRegisterImpl::number_of_registers * 629 FloatRegisterImpl::save_slots_per_register; 630 631 typedef enum { 632 PLDL1KEEP = 0b00000, PLDL1STRM, PLDL2KEEP, PLDL2STRM, PLDL3KEEP, PLDL3STRM, 633 PSTL1KEEP = 0b10000, PSTL1STRM, PSTL2KEEP, PSTL2STRM, PSTL3KEEP, PSTL3STRM, 634 PLIL1KEEP = 0b01000, PLIL1STRM, PLIL2KEEP, PLIL2STRM, PLIL3KEEP, PLIL3STRM 635 } prfop; 636 637 class Assembler : public AbstractAssembler { 638 639 public: 640 641 #ifndef PRODUCT 642 static const uintptr_t asm_bp; 643 644 void emit_int32(jint x) { 645 if ((uintptr_t)pc() == asm_bp) 646 NOP(); 647 AbstractAssembler::emit_int32(x); 648 } 649 #else 650 void emit_int32(jint x) { 651 AbstractAssembler::emit_int32(x); 652 } 653 #endif 654 655 enum { instruction_size = 4 }; 656 657 //---< calculate length of instruction >--- 658 // We just use the values set above. 659 // instruction must start at passed address 660 static unsigned int instr_len(unsigned char *instr) { return instruction_size; } 661 662 //---< longest instructions >--- 663 static unsigned int instr_maxlen() { return instruction_size; } 664 665 Address adjust(Register base, int offset, bool preIncrement) { 666 if (preIncrement) 667 return Address(Pre(base, offset)); 668 else 669 return Address(Post(base, offset)); 670 } 671 672 Address pre(Register base, int offset) { 673 return adjust(base, offset, true); 674 } 675 676 Address post(Register base, int offset) { 677 return adjust(base, offset, false); 678 } 679 680 Address post(Register base, Register idx) { 681 return Address(Post(base, idx)); 682 } 683 684 static address locate_next_instruction(address inst); 685 686 #define f current_insn.f 687 #define sf current_insn.sf 688 #define rf current_insn.rf 689 #define srf current_insn.srf 690 #define zrf current_insn.zrf 691 #define prf current_insn.prf 692 #define pgrf current_insn.pgrf 693 694 typedef void (Assembler::* uncond_branch_insn)(address dest); 695 typedef void (Assembler::* compare_and_branch_insn)(Register Rt, address dest); 696 typedef void (Assembler::* test_and_branch_insn)(Register Rt, int bitpos, address dest); 697 typedef void (Assembler::* prefetch_insn)(address target, prfop); 698 699 void wrap_label(Label &L, uncond_branch_insn insn); 700 void wrap_label(Register r, Label &L, compare_and_branch_insn insn); 701 void wrap_label(Register r, int bitpos, Label &L, test_and_branch_insn insn); 702 void wrap_label(Label &L, prfop, prefetch_insn insn); 703 704 // PC-rel. addressing 705 706 void adr(Register Rd, address dest); 707 void _adrp(Register Rd, address dest); 708 709 void adr(Register Rd, const Address &dest); 710 void _adrp(Register Rd, const Address &dest); 711 712 void adr(Register Rd, Label &L) { 713 wrap_label(Rd, L, &Assembler::Assembler::adr); 714 } 715 void _adrp(Register Rd, Label &L) { 716 wrap_label(Rd, L, &Assembler::_adrp); 717 } 718 719 void adrp(Register Rd, const Address &dest, uint64_t &offset); 720 721 #undef INSN 722 723 void add_sub_immediate(Instruction_aarch64 ¤t_insn, Register Rd, Register Rn, 724 unsigned uimm, int op, int negated_op); 725 726 // Add/subtract (immediate) 727 #define INSN(NAME, decode, negated) \ 728 void NAME(Register Rd, Register Rn, unsigned imm, unsigned shift) { \ 729 starti; \ 730 f(decode, 31, 29), f(0b10001, 28, 24), f(shift, 23, 22), f(imm, 21, 10); \ 731 zrf(Rd, 0), srf(Rn, 5); \ 732 } \ 733 \ 734 void NAME(Register Rd, Register Rn, unsigned imm) { \ 735 starti; \ 736 add_sub_immediate(current_insn, Rd, Rn, imm, decode, negated); \ 737 } 738 739 INSN(addsw, 0b001, 0b011); 740 INSN(subsw, 0b011, 0b001); 741 INSN(adds, 0b101, 0b111); 742 INSN(subs, 0b111, 0b101); 743 744 #undef INSN 745 746 #define INSN(NAME, decode, negated) \ 747 void NAME(Register Rd, Register Rn, unsigned imm) { \ 748 starti; \ 749 add_sub_immediate(current_insn, Rd, Rn, imm, decode, negated); \ 750 } 751 752 INSN(addw, 0b000, 0b010); 753 INSN(subw, 0b010, 0b000); 754 INSN(add, 0b100, 0b110); 755 INSN(sub, 0b110, 0b100); 756 757 #undef INSN 758 759 // Logical (immediate) 760 #define INSN(NAME, decode, is32) \ 761 void NAME(Register Rd, Register Rn, uint64_t imm) { \ 762 starti; \ 763 uint32_t val = encode_logical_immediate(is32, imm); \ 764 f(decode, 31, 29), f(0b100100, 28, 23), f(val, 22, 10); \ 765 srf(Rd, 0), zrf(Rn, 5); \ 766 } 767 768 INSN(andw, 0b000, true); 769 INSN(orrw, 0b001, true); 770 INSN(eorw, 0b010, true); 771 INSN(andr, 0b100, false); 772 INSN(orr, 0b101, false); 773 INSN(eor, 0b110, false); 774 775 #undef INSN 776 777 #define INSN(NAME, decode, is32) \ 778 void NAME(Register Rd, Register Rn, uint64_t imm) { \ 779 starti; \ 780 uint32_t val = encode_logical_immediate(is32, imm); \ 781 f(decode, 31, 29), f(0b100100, 28, 23), f(val, 22, 10); \ 782 zrf(Rd, 0), zrf(Rn, 5); \ 783 } 784 785 INSN(ands, 0b111, false); 786 INSN(andsw, 0b011, true); 787 788 #undef INSN 789 790 // Move wide (immediate) 791 #define INSN(NAME, opcode) \ 792 void NAME(Register Rd, unsigned imm, unsigned shift = 0) { \ 793 assert_cond((shift/16)*16 == shift); \ 794 starti; \ 795 f(opcode, 31, 29), f(0b100101, 28, 23), f(shift/16, 22, 21), \ 796 f(imm, 20, 5); \ 797 rf(Rd, 0); \ 798 } 799 800 INSN(movnw, 0b000); 801 INSN(movzw, 0b010); 802 INSN(movkw, 0b011); 803 INSN(movn, 0b100); 804 INSN(movz, 0b110); 805 INSN(movk, 0b111); 806 807 #undef INSN 808 809 // Bitfield 810 #define INSN(NAME, opcode, size) \ 811 void NAME(Register Rd, Register Rn, unsigned immr, unsigned imms) { \ 812 starti; \ 813 guarantee(size == 1 || (immr < 32 && imms < 32), "incorrect immr/imms");\ 814 f(opcode, 31, 22), f(immr, 21, 16), f(imms, 15, 10); \ 815 zrf(Rn, 5), rf(Rd, 0); \ 816 } 817 818 INSN(sbfmw, 0b0001001100, 0); 819 INSN(bfmw, 0b0011001100, 0); 820 INSN(ubfmw, 0b0101001100, 0); 821 INSN(sbfm, 0b1001001101, 1); 822 INSN(bfm, 0b1011001101, 1); 823 INSN(ubfm, 0b1101001101, 1); 824 825 #undef INSN 826 827 // Extract 828 #define INSN(NAME, opcode, size) \ 829 void NAME(Register Rd, Register Rn, Register Rm, unsigned imms) { \ 830 starti; \ 831 guarantee(size == 1 || imms < 32, "incorrect imms"); \ 832 f(opcode, 31, 21), f(imms, 15, 10); \ 833 zrf(Rm, 16), zrf(Rn, 5), zrf(Rd, 0); \ 834 } 835 836 INSN(extrw, 0b00010011100, 0); 837 INSN(extr, 0b10010011110, 1); 838 839 #undef INSN 840 841 // The maximum range of a branch is fixed for the AArch64 842 // architecture. In debug mode we shrink it in order to test 843 // trampolines, but not so small that branches in the interpreter 844 // are out of range. 845 static const uint64_t branch_range = NOT_DEBUG(128 * M) DEBUG_ONLY(2 * M); 846 847 static bool reachable_from_branch_at(address branch, address target) { 848 return uabs(target - branch) < branch_range; 849 } 850 851 // Unconditional branch (immediate) 852 #define INSN(NAME, opcode) \ 853 void NAME(address dest) { \ 854 starti; \ 855 int64_t offset = (dest - pc()) >> 2; \ 856 DEBUG_ONLY(assert(reachable_from_branch_at(pc(), dest), "debug only")); \ 857 f(opcode, 31), f(0b00101, 30, 26), sf(offset, 25, 0); \ 858 } \ 859 void NAME(Label &L) { \ 860 wrap_label(L, &Assembler::NAME); \ 861 } \ 862 void NAME(const Address &dest); 863 864 INSN(b, 0); 865 INSN(bl, 1); 866 867 #undef INSN 868 869 // Compare & branch (immediate) 870 #define INSN(NAME, opcode) \ 871 void NAME(Register Rt, address dest) { \ 872 int64_t offset = (dest - pc()) >> 2; \ 873 starti; \ 874 f(opcode, 31, 24), sf(offset, 23, 5), rf(Rt, 0); \ 875 } \ 876 void NAME(Register Rt, Label &L) { \ 877 wrap_label(Rt, L, &Assembler::NAME); \ 878 } 879 880 INSN(cbzw, 0b00110100); 881 INSN(cbnzw, 0b00110101); 882 INSN(cbz, 0b10110100); 883 INSN(cbnz, 0b10110101); 884 885 #undef INSN 886 887 // Test & branch (immediate) 888 #define INSN(NAME, opcode) \ 889 void NAME(Register Rt, int bitpos, address dest) { \ 890 int64_t offset = (dest - pc()) >> 2; \ 891 int b5 = bitpos >> 5; \ 892 bitpos &= 0x1f; \ 893 starti; \ 894 f(b5, 31), f(opcode, 30, 24), f(bitpos, 23, 19), sf(offset, 18, 5); \ 895 rf(Rt, 0); \ 896 } \ 897 void NAME(Register Rt, int bitpos, Label &L) { \ 898 wrap_label(Rt, bitpos, L, &Assembler::NAME); \ 899 } 900 901 INSN(tbz, 0b0110110); 902 INSN(tbnz, 0b0110111); 903 904 #undef INSN 905 906 // Conditional branch (immediate) 907 enum Condition 908 {EQ, NE, HS, CS=HS, LO, CC=LO, MI, PL, VS, VC, HI, LS, GE, LT, GT, LE, AL, NV}; 909 910 void br(Condition cond, address dest) { 911 int64_t offset = (dest - pc()) >> 2; 912 starti; 913 f(0b0101010, 31, 25), f(0, 24), sf(offset, 23, 5), f(0, 4), f(cond, 3, 0); 914 } 915 916 #define INSN(NAME, cond) \ 917 void NAME(address dest) { \ 918 br(cond, dest); \ 919 } 920 921 INSN(beq, EQ); 922 INSN(bne, NE); 923 INSN(bhs, HS); 924 INSN(bcs, CS); 925 INSN(blo, LO); 926 INSN(bcc, CC); 927 INSN(bmi, MI); 928 INSN(bpl, PL); 929 INSN(bvs, VS); 930 INSN(bvc, VC); 931 INSN(bhi, HI); 932 INSN(bls, LS); 933 INSN(bge, GE); 934 INSN(blt, LT); 935 INSN(bgt, GT); 936 INSN(ble, LE); 937 INSN(bal, AL); 938 INSN(bnv, NV); 939 940 void br(Condition cc, Label &L); 941 942 #undef INSN 943 944 // Exception generation 945 void generate_exception(int opc, int op2, int LL, unsigned imm) { 946 starti; 947 f(0b11010100, 31, 24); 948 f(opc, 23, 21), f(imm, 20, 5), f(op2, 4, 2), f(LL, 1, 0); 949 } 950 951 #define INSN(NAME, opc, op2, LL) \ 952 void NAME(unsigned imm) { \ 953 generate_exception(opc, op2, LL, imm); \ 954 } 955 956 INSN(svc, 0b000, 0, 0b01); 957 INSN(hvc, 0b000, 0, 0b10); 958 INSN(smc, 0b000, 0, 0b11); 959 INSN(brk, 0b001, 0, 0b00); 960 INSN(hlt, 0b010, 0, 0b00); 961 INSN(dcps1, 0b101, 0, 0b01); 962 INSN(dcps2, 0b101, 0, 0b10); 963 INSN(dcps3, 0b101, 0, 0b11); 964 965 #undef INSN 966 967 // System 968 void system(int op0, int op1, int CRn, int CRm, int op2, 969 Register rt = dummy_reg) 970 { 971 starti; 972 f(0b11010101000, 31, 21); 973 f(op0, 20, 19); 974 f(op1, 18, 16); 975 f(CRn, 15, 12); 976 f(CRm, 11, 8); 977 f(op2, 7, 5); 978 rf(rt, 0); 979 } 980 981 // Hint instructions 982 983 #define INSN(NAME, crm, op2) \ 984 void NAME() { \ 985 system(0b00, 0b011, 0b0010, crm, op2); \ 986 } 987 988 INSN(nop, 0b000, 0b0000); 989 INSN(yield, 0b000, 0b0001); 990 INSN(wfe, 0b000, 0b0010); 991 INSN(wfi, 0b000, 0b0011); 992 INSN(sev, 0b000, 0b0100); 993 INSN(sevl, 0b000, 0b0101); 994 995 INSN(autia1716, 0b0001, 0b100); 996 INSN(autiasp, 0b0011, 0b101); 997 INSN(autiaz, 0b0011, 0b100); 998 INSN(autib1716, 0b0001, 0b110); 999 INSN(autibsp, 0b0011, 0b111); 1000 INSN(autibz, 0b0011, 0b110); 1001 INSN(pacia1716, 0b0001, 0b000); 1002 INSN(paciasp, 0b0011, 0b001); 1003 INSN(paciaz, 0b0011, 0b000); 1004 INSN(pacib1716, 0b0001, 0b010); 1005 INSN(pacibsp, 0b0011, 0b011); 1006 INSN(pacibz, 0b0011, 0b010); 1007 INSN(xpaclri, 0b0000, 0b111); 1008 1009 #undef INSN 1010 1011 // we only provide mrs and msr for the special purpose system 1012 // registers where op1 (instr[20:19]) == 11 and, (currently) only 1013 // use it for FPSR n.b msr has L (instr[21]) == 0 mrs has L == 1 1014 1015 void msr(int op1, int CRn, int CRm, int op2, Register rt) { 1016 starti; 1017 f(0b1101010100011, 31, 19); 1018 f(op1, 18, 16); 1019 f(CRn, 15, 12); 1020 f(CRm, 11, 8); 1021 f(op2, 7, 5); 1022 // writing zr is ok 1023 zrf(rt, 0); 1024 } 1025 1026 void mrs(int op1, int CRn, int CRm, int op2, Register rt) { 1027 starti; 1028 f(0b1101010100111, 31, 19); 1029 f(op1, 18, 16); 1030 f(CRn, 15, 12); 1031 f(CRm, 11, 8); 1032 f(op2, 7, 5); 1033 // reading to zr is a mistake 1034 rf(rt, 0); 1035 } 1036 1037 enum barrier {OSHLD = 0b0001, OSHST, OSH, NSHLD=0b0101, NSHST, NSH, 1038 ISHLD = 0b1001, ISHST, ISH, LD=0b1101, ST, SY}; 1039 1040 void dsb(barrier imm) { 1041 system(0b00, 0b011, 0b00011, imm, 0b100); 1042 } 1043 1044 void dmb(barrier imm) { 1045 system(0b00, 0b011, 0b00011, imm, 0b101); 1046 } 1047 1048 void isb() { 1049 system(0b00, 0b011, 0b00011, SY, 0b110); 1050 } 1051 1052 void sys(int op1, int CRn, int CRm, int op2, 1053 Register rt = as_Register(0b11111)) { 1054 system(0b01, op1, CRn, CRm, op2, rt); 1055 } 1056 1057 // Only implement operations accessible from EL0 or higher, i.e., 1058 // op1 CRn CRm op2 1059 // IC IVAU 3 7 5 1 1060 // DC CVAC 3 7 10 1 1061 // DC CVAP 3 7 12 1 1062 // DC CVAU 3 7 11 1 1063 // DC CIVAC 3 7 14 1 1064 // DC ZVA 3 7 4 1 1065 // So only deal with the CRm field. 1066 enum icache_maintenance {IVAU = 0b0101}; 1067 enum dcache_maintenance {CVAC = 0b1010, CVAP = 0b1100, CVAU = 0b1011, CIVAC = 0b1110, ZVA = 0b100}; 1068 1069 void dc(dcache_maintenance cm, Register Rt) { 1070 sys(0b011, 0b0111, cm, 0b001, Rt); 1071 } 1072 1073 void ic(icache_maintenance cm, Register Rt) { 1074 sys(0b011, 0b0111, cm, 0b001, Rt); 1075 } 1076 1077 // A more convenient access to dmb for our purposes 1078 enum Membar_mask_bits { 1079 // We can use ISH for a barrier because the Arm ARM says "This 1080 // architecture assumes that all Processing Elements that use the 1081 // same operating system or hypervisor are in the same Inner 1082 // Shareable shareability domain." 1083 StoreStore = ISHST, 1084 LoadStore = ISHLD, 1085 LoadLoad = ISHLD, 1086 StoreLoad = ISH, 1087 AnyAny = ISH 1088 }; 1089 1090 void membar(Membar_mask_bits order_constraint) { 1091 dmb(Assembler::barrier(order_constraint)); 1092 } 1093 1094 // Unconditional branch (register) 1095 1096 void branch_reg(int OP, int A, int M, Register RN, Register RM) { 1097 starti; 1098 f(0b1101011, 31, 25); 1099 f(OP, 24, 21); 1100 f(0b111110000, 20, 12); 1101 f(A, 11, 11); 1102 f(M, 10, 10); 1103 rf(RN, 5); 1104 rf(RM, 0); 1105 } 1106 1107 #define INSN(NAME, opc) \ 1108 void NAME(Register RN) { \ 1109 branch_reg(opc, 0, 0, RN, r0); \ 1110 } 1111 1112 INSN(br, 0b0000); 1113 INSN(blr, 0b0001); 1114 INSN(ret, 0b0010); 1115 1116 void ret(void *p); // This forces a compile-time error for ret(0) 1117 1118 #undef INSN 1119 1120 #define INSN(NAME, opc) \ 1121 void NAME() { \ 1122 branch_reg(opc, 0, 0, dummy_reg, r0); \ 1123 } 1124 1125 INSN(eret, 0b0100); 1126 INSN(drps, 0b0101); 1127 1128 #undef INSN 1129 1130 #define INSN(NAME, M) \ 1131 void NAME() { \ 1132 branch_reg(0b0010, 1, M, dummy_reg, dummy_reg); \ 1133 } 1134 1135 INSN(retaa, 0); 1136 INSN(retab, 1); 1137 1138 #undef INSN 1139 1140 #define INSN(NAME, OP, M) \ 1141 void NAME(Register rn) { \ 1142 branch_reg(OP, 1, M, rn, dummy_reg); \ 1143 } 1144 1145 INSN(braaz, 0b0000, 0); 1146 INSN(brabz, 0b0000, 1); 1147 INSN(blraaz, 0b0001, 0); 1148 INSN(blrabz, 0b0001, 1); 1149 1150 #undef INSN 1151 1152 #define INSN(NAME, OP, M) \ 1153 void NAME(Register rn, Register rm) { \ 1154 branch_reg(OP, 1, M, rn, rm); \ 1155 } 1156 1157 INSN(braa, 0b1000, 0); 1158 INSN(brab, 0b1000, 1); 1159 INSN(blraa, 0b1001, 0); 1160 INSN(blrab, 0b1001, 1); 1161 1162 #undef INSN 1163 1164 // Load/store exclusive 1165 enum operand_size { byte, halfword, word, xword }; 1166 1167 void load_store_exclusive(Register Rs, Register Rt1, Register Rt2, 1168 Register Rn, enum operand_size sz, int op, bool ordered) { 1169 starti; 1170 f(sz, 31, 30), f(0b001000, 29, 24), f(op, 23, 21); 1171 rf(Rs, 16), f(ordered, 15), zrf(Rt2, 10), srf(Rn, 5), zrf(Rt1, 0); 1172 } 1173 1174 void load_exclusive(Register dst, Register addr, 1175 enum operand_size sz, bool ordered) { 1176 load_store_exclusive(dummy_reg, dst, dummy_reg, addr, 1177 sz, 0b010, ordered); 1178 } 1179 1180 void store_exclusive(Register status, Register new_val, Register addr, 1181 enum operand_size sz, bool ordered) { 1182 load_store_exclusive(status, new_val, dummy_reg, addr, 1183 sz, 0b000, ordered); 1184 } 1185 1186 #define INSN4(NAME, sz, op, o0) /* Four registers */ \ 1187 void NAME(Register Rs, Register Rt1, Register Rt2, Register Rn) { \ 1188 guarantee(Rs != Rn && Rs != Rt1 && Rs != Rt2, "unpredictable instruction"); \ 1189 load_store_exclusive(Rs, Rt1, Rt2, Rn, sz, op, o0); \ 1190 } 1191 1192 #define INSN3(NAME, sz, op, o0) /* Three registers */ \ 1193 void NAME(Register Rs, Register Rt, Register Rn) { \ 1194 guarantee(Rs != Rn && Rs != Rt, "unpredictable instruction"); \ 1195 load_store_exclusive(Rs, Rt, dummy_reg, Rn, sz, op, o0); \ 1196 } 1197 1198 #define INSN2(NAME, sz, op, o0) /* Two registers */ \ 1199 void NAME(Register Rt, Register Rn) { \ 1200 load_store_exclusive(dummy_reg, Rt, dummy_reg, \ 1201 Rn, sz, op, o0); \ 1202 } 1203 1204 #define INSN_FOO(NAME, sz, op, o0) /* Three registers, encoded differently */ \ 1205 void NAME(Register Rt1, Register Rt2, Register Rn) { \ 1206 guarantee(Rt1 != Rt2, "unpredictable instruction"); \ 1207 load_store_exclusive(dummy_reg, Rt1, Rt2, Rn, sz, op, o0); \ 1208 } 1209 1210 // bytes 1211 INSN3(stxrb, byte, 0b000, 0); 1212 INSN3(stlxrb, byte, 0b000, 1); 1213 INSN2(ldxrb, byte, 0b010, 0); 1214 INSN2(ldaxrb, byte, 0b010, 1); 1215 INSN2(stlrb, byte, 0b100, 1); 1216 INSN2(ldarb, byte, 0b110, 1); 1217 1218 // halfwords 1219 INSN3(stxrh, halfword, 0b000, 0); 1220 INSN3(stlxrh, halfword, 0b000, 1); 1221 INSN2(ldxrh, halfword, 0b010, 0); 1222 INSN2(ldaxrh, halfword, 0b010, 1); 1223 INSN2(stlrh, halfword, 0b100, 1); 1224 INSN2(ldarh, halfword, 0b110, 1); 1225 1226 // words 1227 INSN3(stxrw, word, 0b000, 0); 1228 INSN3(stlxrw, word, 0b000, 1); 1229 INSN4(stxpw, word, 0b001, 0); 1230 INSN4(stlxpw, word, 0b001, 1); 1231 INSN2(ldxrw, word, 0b010, 0); 1232 INSN2(ldaxrw, word, 0b010, 1); 1233 INSN_FOO(ldxpw, word, 0b011, 0); 1234 INSN_FOO(ldaxpw, word, 0b011, 1); 1235 INSN2(stlrw, word, 0b100, 1); 1236 INSN2(ldarw, word, 0b110, 1); 1237 1238 // xwords 1239 INSN3(stxr, xword, 0b000, 0); 1240 INSN3(stlxr, xword, 0b000, 1); 1241 INSN4(stxp, xword, 0b001, 0); 1242 INSN4(stlxp, xword, 0b001, 1); 1243 INSN2(ldxr, xword, 0b010, 0); 1244 INSN2(ldaxr, xword, 0b010, 1); 1245 INSN_FOO(ldxp, xword, 0b011, 0); 1246 INSN_FOO(ldaxp, xword, 0b011, 1); 1247 INSN2(stlr, xword, 0b100, 1); 1248 INSN2(ldar, xword, 0b110, 1); 1249 1250 #undef INSN2 1251 #undef INSN3 1252 #undef INSN4 1253 #undef INSN_FOO 1254 1255 // 8.1 Compare and swap extensions 1256 void lse_cas(Register Rs, Register Rt, Register Rn, 1257 enum operand_size sz, bool a, bool r, bool not_pair) { 1258 starti; 1259 if (! not_pair) { // Pair 1260 assert(sz == word || sz == xword, "invalid size"); 1261 /* The size bit is in bit 30, not 31 */ 1262 sz = (operand_size)(sz == word ? 0b00:0b01); 1263 } 1264 f(sz, 31, 30), f(0b001000, 29, 24), f(not_pair ? 1 : 0, 23), f(a, 22), f(1, 21); 1265 zrf(Rs, 16), f(r, 15), f(0b11111, 14, 10), srf(Rn, 5), zrf(Rt, 0); 1266 } 1267 1268 // CAS 1269 #define INSN(NAME, a, r) \ 1270 void NAME(operand_size sz, Register Rs, Register Rt, Register Rn) { \ 1271 assert(Rs != Rn && Rs != Rt, "unpredictable instruction"); \ 1272 lse_cas(Rs, Rt, Rn, sz, a, r, true); \ 1273 } 1274 INSN(cas, false, false) 1275 INSN(casa, true, false) 1276 INSN(casl, false, true) 1277 INSN(casal, true, true) 1278 #undef INSN 1279 1280 // CASP 1281 #define INSN(NAME, a, r) \ 1282 void NAME(operand_size sz, Register Rs, Register Rs1, \ 1283 Register Rt, Register Rt1, Register Rn) { \ 1284 assert((Rs->encoding() & 1) == 0 && (Rt->encoding() & 1) == 0 && \ 1285 Rs->successor() == Rs1 && Rt->successor() == Rt1 && \ 1286 Rs != Rn && Rs1 != Rn && Rs != Rt, "invalid registers"); \ 1287 lse_cas(Rs, Rt, Rn, sz, a, r, false); \ 1288 } 1289 INSN(casp, false, false) 1290 INSN(caspa, true, false) 1291 INSN(caspl, false, true) 1292 INSN(caspal, true, true) 1293 #undef INSN 1294 1295 // 8.1 Atomic operations 1296 void lse_atomic(Register Rs, Register Rt, Register Rn, 1297 enum operand_size sz, int op1, int op2, bool a, bool r) { 1298 starti; 1299 f(sz, 31, 30), f(0b111000, 29, 24), f(a, 23), f(r, 22), f(1, 21); 1300 zrf(Rs, 16), f(op1, 15), f(op2, 14, 12), f(0, 11, 10), srf(Rn, 5), zrf(Rt, 0); 1301 } 1302 1303 #define INSN(NAME, NAME_A, NAME_L, NAME_AL, op1, op2) \ 1304 void NAME(operand_size sz, Register Rs, Register Rt, Register Rn) { \ 1305 lse_atomic(Rs, Rt, Rn, sz, op1, op2, false, false); \ 1306 } \ 1307 void NAME_A(operand_size sz, Register Rs, Register Rt, Register Rn) { \ 1308 lse_atomic(Rs, Rt, Rn, sz, op1, op2, true, false); \ 1309 } \ 1310 void NAME_L(operand_size sz, Register Rs, Register Rt, Register Rn) { \ 1311 lse_atomic(Rs, Rt, Rn, sz, op1, op2, false, true); \ 1312 } \ 1313 void NAME_AL(operand_size sz, Register Rs, Register Rt, Register Rn) {\ 1314 lse_atomic(Rs, Rt, Rn, sz, op1, op2, true, true); \ 1315 } 1316 INSN(ldadd, ldadda, ldaddl, ldaddal, 0, 0b000); 1317 INSN(ldbic, ldbica, ldbicl, ldbical, 0, 0b001); 1318 INSN(ldeor, ldeora, ldeorl, ldeoral, 0, 0b010); 1319 INSN(ldorr, ldorra, ldorrl, ldorral, 0, 0b011); 1320 INSN(ldsmax, ldsmaxa, ldsmaxl, ldsmaxal, 0, 0b100); 1321 INSN(ldsmin, ldsmina, ldsminl, ldsminal, 0, 0b101); 1322 INSN(ldumax, ldumaxa, ldumaxl, ldumaxal, 0, 0b110); 1323 INSN(ldumin, ldumina, lduminl, lduminal, 0, 0b111); 1324 INSN(swp, swpa, swpl, swpal, 1, 0b000); 1325 #undef INSN 1326 1327 // Load register (literal) 1328 #define INSN(NAME, opc, V) \ 1329 void NAME(Register Rt, address dest) { \ 1330 int64_t offset = (dest - pc()) >> 2; \ 1331 starti; \ 1332 f(opc, 31, 30), f(0b011, 29, 27), f(V, 26), f(0b00, 25, 24), \ 1333 sf(offset, 23, 5); \ 1334 rf(Rt, 0); \ 1335 } \ 1336 void NAME(Register Rt, address dest, relocInfo::relocType rtype) { \ 1337 InstructionMark im(this); \ 1338 guarantee(rtype == relocInfo::internal_word_type, \ 1339 "only internal_word_type relocs make sense here"); \ 1340 code_section()->relocate(inst_mark(), InternalAddress(dest).rspec()); \ 1341 NAME(Rt, dest); \ 1342 } \ 1343 void NAME(Register Rt, Label &L) { \ 1344 wrap_label(Rt, L, &Assembler::NAME); \ 1345 } 1346 1347 INSN(ldrw, 0b00, 0); 1348 INSN(ldr, 0b01, 0); 1349 INSN(ldrsw, 0b10, 0); 1350 1351 #undef INSN 1352 1353 #define INSN(NAME, opc, V) \ 1354 void NAME(FloatRegister Rt, address dest) { \ 1355 int64_t offset = (dest - pc()) >> 2; \ 1356 starti; \ 1357 f(opc, 31, 30), f(0b011, 29, 27), f(V, 26), f(0b00, 25, 24), \ 1358 sf(offset, 23, 5); \ 1359 rf(as_Register(Rt), 0); \ 1360 } 1361 1362 INSN(ldrs, 0b00, 1); 1363 INSN(ldrd, 0b01, 1); 1364 INSN(ldrq, 0b10, 1); 1365 1366 #undef INSN 1367 1368 #define INSN(NAME, size, opc) \ 1369 void NAME(FloatRegister Rt, Register Rn) { \ 1370 starti; \ 1371 f(size, 31, 30), f(0b111100, 29, 24), f(opc, 23, 22), f(0, 21); \ 1372 f(0, 20, 12), f(0b01, 11, 10); \ 1373 rf(Rn, 5), rf(as_Register(Rt), 0); \ 1374 } 1375 1376 INSN(ldrs, 0b10, 0b01); 1377 INSN(ldrd, 0b11, 0b01); 1378 INSN(ldrq, 0b00, 0b11); 1379 1380 #undef INSN 1381 1382 1383 #define INSN(NAME, opc, V) \ 1384 void NAME(address dest, prfop op = PLDL1KEEP) { \ 1385 int64_t offset = (dest - pc()) >> 2; \ 1386 starti; \ 1387 f(opc, 31, 30), f(0b011, 29, 27), f(V, 26), f(0b00, 25, 24), \ 1388 sf(offset, 23, 5); \ 1389 f(op, 4, 0); \ 1390 } \ 1391 void NAME(Label &L, prfop op = PLDL1KEEP) { \ 1392 wrap_label(L, op, &Assembler::NAME); \ 1393 } 1394 1395 INSN(prfm, 0b11, 0); 1396 1397 #undef INSN 1398 1399 // Load/store 1400 void ld_st1(int opc, int p1, int V, int L, 1401 Register Rt1, Register Rt2, Address adr, bool no_allocate) { 1402 starti; 1403 f(opc, 31, 30), f(p1, 29, 27), f(V, 26), f(L, 22); 1404 zrf(Rt2, 10), zrf(Rt1, 0); 1405 if (no_allocate) { 1406 adr.encode_nontemporal_pair(¤t_insn); 1407 } else { 1408 adr.encode_pair(¤t_insn); 1409 } 1410 } 1411 1412 // Load/store register pair (offset) 1413 #define INSN(NAME, size, p1, V, L, no_allocate) \ 1414 void NAME(Register Rt1, Register Rt2, Address adr) { \ 1415 ld_st1(size, p1, V, L, Rt1, Rt2, adr, no_allocate); \ 1416 } 1417 1418 INSN(stpw, 0b00, 0b101, 0, 0, false); 1419 INSN(ldpw, 0b00, 0b101, 0, 1, false); 1420 INSN(ldpsw, 0b01, 0b101, 0, 1, false); 1421 INSN(stp, 0b10, 0b101, 0, 0, false); 1422 INSN(ldp, 0b10, 0b101, 0, 1, false); 1423 1424 // Load/store no-allocate pair (offset) 1425 INSN(stnpw, 0b00, 0b101, 0, 0, true); 1426 INSN(ldnpw, 0b00, 0b101, 0, 1, true); 1427 INSN(stnp, 0b10, 0b101, 0, 0, true); 1428 INSN(ldnp, 0b10, 0b101, 0, 1, true); 1429 1430 #undef INSN 1431 1432 #define INSN(NAME, size, p1, V, L, no_allocate) \ 1433 void NAME(FloatRegister Rt1, FloatRegister Rt2, Address adr) { \ 1434 ld_st1(size, p1, V, L, \ 1435 as_Register(Rt1), as_Register(Rt2), adr, no_allocate); \ 1436 } 1437 1438 INSN(stps, 0b00, 0b101, 1, 0, false); 1439 INSN(ldps, 0b00, 0b101, 1, 1, false); 1440 INSN(stpd, 0b01, 0b101, 1, 0, false); 1441 INSN(ldpd, 0b01, 0b101, 1, 1, false); 1442 INSN(stpq, 0b10, 0b101, 1, 0, false); 1443 INSN(ldpq, 0b10, 0b101, 1, 1, false); 1444 1445 #undef INSN 1446 1447 // Load/store register (all modes) 1448 void ld_st2(Register Rt, const Address &adr, int size, int op, int V = 0) { 1449 starti; 1450 1451 f(V, 26); // general reg? 1452 zrf(Rt, 0); 1453 1454 // Encoding for literal loads is done here (rather than pushed 1455 // down into Address::encode) because the encoding of this 1456 // instruction is too different from all of the other forms to 1457 // make it worth sharing. 1458 if (adr.getMode() == Address::literal) { 1459 assert(size == 0b10 || size == 0b11, "bad operand size in ldr"); 1460 assert(op == 0b01, "literal form can only be used with loads"); 1461 f(size & 0b01, 31, 30), f(0b011, 29, 27), f(0b00, 25, 24); 1462 int64_t offset = (adr.target() - pc()) >> 2; 1463 sf(offset, 23, 5); 1464 code_section()->relocate(pc(), adr.rspec()); 1465 return; 1466 } 1467 1468 f(size, 31, 30); 1469 f(op, 23, 22); // str 1470 adr.encode(¤t_insn); 1471 } 1472 1473 #define INSN(NAME, size, op) \ 1474 void NAME(Register Rt, const Address &adr) { \ 1475 ld_st2(Rt, adr, size, op); \ 1476 } \ 1477 1478 INSN(str, 0b11, 0b00); 1479 INSN(strw, 0b10, 0b00); 1480 INSN(strb, 0b00, 0b00); 1481 INSN(strh, 0b01, 0b00); 1482 1483 INSN(ldr, 0b11, 0b01); 1484 INSN(ldrw, 0b10, 0b01); 1485 INSN(ldrb, 0b00, 0b01); 1486 INSN(ldrh, 0b01, 0b01); 1487 1488 INSN(ldrsb, 0b00, 0b10); 1489 INSN(ldrsbw, 0b00, 0b11); 1490 INSN(ldrsh, 0b01, 0b10); 1491 INSN(ldrshw, 0b01, 0b11); 1492 INSN(ldrsw, 0b10, 0b10); 1493 1494 #undef INSN 1495 1496 #define INSN(NAME, size, op) \ 1497 void NAME(const Address &adr, prfop pfop = PLDL1KEEP) { \ 1498 ld_st2(as_Register(pfop), adr, size, op); \ 1499 } 1500 1501 INSN(prfm, 0b11, 0b10); // FIXME: PRFM should not be used with 1502 // writeback modes, but the assembler 1503 // doesn't enfore that. 1504 1505 #undef INSN 1506 1507 #define INSN(NAME, size, op) \ 1508 void NAME(FloatRegister Rt, const Address &adr) { \ 1509 ld_st2(as_Register(Rt), adr, size, op, 1); \ 1510 } 1511 1512 INSN(strd, 0b11, 0b00); 1513 INSN(strs, 0b10, 0b00); 1514 INSN(ldrd, 0b11, 0b01); 1515 INSN(ldrs, 0b10, 0b01); 1516 INSN(strq, 0b00, 0b10); 1517 INSN(ldrq, 0x00, 0b11); 1518 1519 #undef INSN 1520 1521 /* SIMD extensions 1522 * 1523 * We just use FloatRegister in the following. They are exactly the same 1524 * as SIMD registers. 1525 */ 1526 public: 1527 1528 enum SIMD_Arrangement { 1529 T8B, T16B, T4H, T8H, T2S, T4S, T1D, T2D, T1Q, INVALID_ARRANGEMENT 1530 }; 1531 1532 enum SIMD_RegVariant { 1533 B, H, S, D, Q, INVALID 1534 }; 1535 1536 private: 1537 1538 static SIMD_Arrangement _esize2arrangement_table[9][2]; 1539 static SIMD_RegVariant _esize2regvariant[9]; 1540 1541 public: 1542 1543 static SIMD_Arrangement esize2arrangement(unsigned esize, bool isQ); 1544 static SIMD_RegVariant elemType_to_regVariant(BasicType bt); 1545 static SIMD_RegVariant elemBytes_to_regVariant(unsigned esize); 1546 // Return the corresponding bits for different SIMD_RegVariant value. 1547 static unsigned regVariant_to_elemBits(SIMD_RegVariant T); 1548 1549 enum shift_kind { LSL, LSR, ASR, ROR }; 1550 1551 void op_shifted_reg(Instruction_aarch64 ¤t_insn, unsigned decode, 1552 enum shift_kind kind, unsigned shift, 1553 unsigned size, unsigned op) { 1554 f(size, 31); 1555 f(op, 30, 29); 1556 f(decode, 28, 24); 1557 f(shift, 15, 10); 1558 f(kind, 23, 22); 1559 } 1560 1561 // Logical (shifted register) 1562 #define INSN(NAME, size, op, N) \ 1563 void NAME(Register Rd, Register Rn, Register Rm, \ 1564 enum shift_kind kind = LSL, unsigned shift = 0) { \ 1565 starti; \ 1566 guarantee(size == 1 || shift < 32, "incorrect shift"); \ 1567 f(N, 21); \ 1568 zrf(Rm, 16), zrf(Rn, 5), zrf(Rd, 0); \ 1569 op_shifted_reg(current_insn, 0b01010, kind, shift, size, op); \ 1570 } 1571 1572 INSN(andr, 1, 0b00, 0); 1573 INSN(orr, 1, 0b01, 0); 1574 INSN(eor, 1, 0b10, 0); 1575 INSN(ands, 1, 0b11, 0); 1576 INSN(andw, 0, 0b00, 0); 1577 INSN(orrw, 0, 0b01, 0); 1578 INSN(eorw, 0, 0b10, 0); 1579 INSN(andsw, 0, 0b11, 0); 1580 1581 #undef INSN 1582 1583 #define INSN(NAME, size, op, N) \ 1584 void NAME(Register Rd, Register Rn, Register Rm, \ 1585 enum shift_kind kind = LSL, unsigned shift = 0) { \ 1586 starti; \ 1587 f(N, 21); \ 1588 zrf(Rm, 16), zrf(Rn, 5), zrf(Rd, 0); \ 1589 op_shifted_reg(current_insn, 0b01010, kind, shift, size, op); \ 1590 } \ 1591 \ 1592 /* These instructions have no immediate form. Provide an overload so \ 1593 that if anyone does try to use an immediate operand -- this has \ 1594 happened! -- we'll get a compile-time error. */ \ 1595 void NAME(Register Rd, Register Rn, unsigned imm, \ 1596 enum shift_kind kind = LSL, unsigned shift = 0) { \ 1597 assert(false, " can't be used with immediate operand"); \ 1598 } 1599 1600 INSN(bic, 1, 0b00, 1); 1601 INSN(orn, 1, 0b01, 1); 1602 INSN(eon, 1, 0b10, 1); 1603 INSN(bics, 1, 0b11, 1); 1604 INSN(bicw, 0, 0b00, 1); 1605 INSN(ornw, 0, 0b01, 1); 1606 INSN(eonw, 0, 0b10, 1); 1607 INSN(bicsw, 0, 0b11, 1); 1608 1609 #undef INSN 1610 1611 #ifdef _WIN64 1612 // In MSVC, `mvn` is defined as a macro and it affects compilation 1613 #undef mvn 1614 #endif 1615 1616 // Aliases for short forms of orn 1617 void mvn(Register Rd, Register Rm, 1618 enum shift_kind kind = LSL, unsigned shift = 0) { 1619 orn(Rd, zr, Rm, kind, shift); 1620 } 1621 1622 void mvnw(Register Rd, Register Rm, 1623 enum shift_kind kind = LSL, unsigned shift = 0) { 1624 ornw(Rd, zr, Rm, kind, shift); 1625 } 1626 1627 // Add/subtract (shifted register) 1628 #define INSN(NAME, size, op) \ 1629 void NAME(Register Rd, Register Rn, Register Rm, \ 1630 enum shift_kind kind, unsigned shift = 0) { \ 1631 starti; \ 1632 f(0, 21); \ 1633 assert_cond(kind != ROR); \ 1634 guarantee(size == 1 || shift < 32, "incorrect shift");\ 1635 zrf(Rd, 0), zrf(Rn, 5), zrf(Rm, 16); \ 1636 op_shifted_reg(current_insn, 0b01011, kind, shift, size, op); \ 1637 } 1638 1639 INSN(add, 1, 0b000); 1640 INSN(sub, 1, 0b10); 1641 INSN(addw, 0, 0b000); 1642 INSN(subw, 0, 0b10); 1643 1644 INSN(adds, 1, 0b001); 1645 INSN(subs, 1, 0b11); 1646 INSN(addsw, 0, 0b001); 1647 INSN(subsw, 0, 0b11); 1648 1649 #undef INSN 1650 1651 // Add/subtract (extended register) 1652 #define INSN(NAME, op) \ 1653 void NAME(Register Rd, Register Rn, Register Rm, \ 1654 ext::operation option, int amount = 0) { \ 1655 starti; \ 1656 zrf(Rm, 16), srf(Rn, 5), srf(Rd, 0); \ 1657 add_sub_extended_reg(current_insn, op, 0b01011, Rd, Rn, Rm, 0b00, option, amount); \ 1658 } 1659 1660 void add_sub_extended_reg(Instruction_aarch64 ¤t_insn, unsigned op, unsigned decode, 1661 Register Rd, Register Rn, Register Rm, 1662 unsigned opt, ext::operation option, unsigned imm) { 1663 guarantee(imm <= 4, "shift amount must be <= 4"); 1664 f(op, 31, 29), f(decode, 28, 24), f(opt, 23, 22), f(1, 21); 1665 f(option, 15, 13), f(imm, 12, 10); 1666 } 1667 1668 INSN(addw, 0b000); 1669 INSN(subw, 0b010); 1670 INSN(add, 0b100); 1671 INSN(sub, 0b110); 1672 1673 #undef INSN 1674 1675 #define INSN(NAME, op) \ 1676 void NAME(Register Rd, Register Rn, Register Rm, \ 1677 ext::operation option, int amount = 0) { \ 1678 starti; \ 1679 zrf(Rm, 16), srf(Rn, 5), zrf(Rd, 0); \ 1680 add_sub_extended_reg(current_insn, op, 0b01011, Rd, Rn, Rm, 0b00, option, amount); \ 1681 } 1682 1683 INSN(addsw, 0b001); 1684 INSN(subsw, 0b011); 1685 INSN(adds, 0b101); 1686 INSN(subs, 0b111); 1687 1688 #undef INSN 1689 1690 // Aliases for short forms of add and sub 1691 #define INSN(NAME) \ 1692 void NAME(Register Rd, Register Rn, Register Rm) { \ 1693 if (Rd == sp || Rn == sp) \ 1694 NAME(Rd, Rn, Rm, ext::uxtx); \ 1695 else \ 1696 NAME(Rd, Rn, Rm, LSL); \ 1697 } 1698 1699 INSN(addw); 1700 INSN(subw); 1701 INSN(add); 1702 INSN(sub); 1703 1704 INSN(addsw); 1705 INSN(subsw); 1706 INSN(adds); 1707 INSN(subs); 1708 1709 #undef INSN 1710 1711 // Add/subtract (with carry) 1712 void add_sub_carry(unsigned op, Register Rd, Register Rn, Register Rm) { 1713 starti; 1714 f(op, 31, 29); 1715 f(0b11010000, 28, 21); 1716 f(0b000000, 15, 10); 1717 zrf(Rm, 16), zrf(Rn, 5), zrf(Rd, 0); 1718 } 1719 1720 #define INSN(NAME, op) \ 1721 void NAME(Register Rd, Register Rn, Register Rm) { \ 1722 add_sub_carry(op, Rd, Rn, Rm); \ 1723 } 1724 1725 INSN(adcw, 0b000); 1726 INSN(adcsw, 0b001); 1727 INSN(sbcw, 0b010); 1728 INSN(sbcsw, 0b011); 1729 INSN(adc, 0b100); 1730 INSN(adcs, 0b101); 1731 INSN(sbc,0b110); 1732 INSN(sbcs, 0b111); 1733 1734 #undef INSN 1735 1736 // Conditional compare (both kinds) 1737 void conditional_compare(unsigned op, int o1, int o2, int o3, 1738 Register Rn, unsigned imm5, unsigned nzcv, 1739 unsigned cond) { 1740 starti; 1741 f(op, 31, 29); 1742 f(0b11010010, 28, 21); 1743 f(cond, 15, 12); 1744 f(o1, 11); 1745 f(o2, 10); 1746 f(o3, 4); 1747 f(nzcv, 3, 0); 1748 f(imm5, 20, 16), zrf(Rn, 5); 1749 } 1750 1751 #define INSN(NAME, op) \ 1752 void NAME(Register Rn, Register Rm, int imm, Condition cond) { \ 1753 int regNumber = (Rm == zr ? 31 : Rm->encoding()); \ 1754 conditional_compare(op, 0, 0, 0, Rn, regNumber, imm, cond); \ 1755 } \ 1756 \ 1757 void NAME(Register Rn, int imm5, int imm, Condition cond) { \ 1758 conditional_compare(op, 1, 0, 0, Rn, imm5, imm, cond); \ 1759 } 1760 1761 INSN(ccmnw, 0b001); 1762 INSN(ccmpw, 0b011); 1763 INSN(ccmn, 0b101); 1764 INSN(ccmp, 0b111); 1765 1766 #undef INSN 1767 1768 // Conditional select 1769 void conditional_select(unsigned op, unsigned op2, 1770 Register Rd, Register Rn, Register Rm, 1771 unsigned cond) { 1772 starti; 1773 f(op, 31, 29); 1774 f(0b11010100, 28, 21); 1775 f(cond, 15, 12); 1776 f(op2, 11, 10); 1777 zrf(Rm, 16), zrf(Rn, 5), rf(Rd, 0); 1778 } 1779 1780 #define INSN(NAME, op, op2) \ 1781 void NAME(Register Rd, Register Rn, Register Rm, Condition cond) { \ 1782 conditional_select(op, op2, Rd, Rn, Rm, cond); \ 1783 } 1784 1785 INSN(cselw, 0b000, 0b00); 1786 INSN(csincw, 0b000, 0b01); 1787 INSN(csinvw, 0b010, 0b00); 1788 INSN(csnegw, 0b010, 0b01); 1789 INSN(csel, 0b100, 0b00); 1790 INSN(csinc, 0b100, 0b01); 1791 INSN(csinv, 0b110, 0b00); 1792 INSN(csneg, 0b110, 0b01); 1793 1794 #undef INSN 1795 1796 // Data processing 1797 void data_processing(Instruction_aarch64 ¤t_insn, unsigned op29, unsigned opcode, 1798 Register Rd, Register Rn) { 1799 f(op29, 31, 29), f(0b11010110, 28, 21); 1800 f(opcode, 15, 10); 1801 rf(Rn, 5), rf(Rd, 0); 1802 } 1803 1804 // (1 source) 1805 #define INSN(NAME, op29, opcode2, opcode) \ 1806 void NAME(Register Rd, Register Rn) { \ 1807 starti; \ 1808 f(opcode2, 20, 16); \ 1809 data_processing(current_insn, op29, opcode, Rd, Rn); \ 1810 } 1811 1812 INSN(rbitw, 0b010, 0b00000, 0b00000); 1813 INSN(rev16w, 0b010, 0b00000, 0b00001); 1814 INSN(revw, 0b010, 0b00000, 0b00010); 1815 INSN(clzw, 0b010, 0b00000, 0b00100); 1816 INSN(clsw, 0b010, 0b00000, 0b00101); 1817 1818 INSN(rbit, 0b110, 0b00000, 0b00000); 1819 INSN(rev16, 0b110, 0b00000, 0b00001); 1820 INSN(rev32, 0b110, 0b00000, 0b00010); 1821 INSN(rev, 0b110, 0b00000, 0b00011); 1822 INSN(clz, 0b110, 0b00000, 0b00100); 1823 INSN(cls, 0b110, 0b00000, 0b00101); 1824 1825 // PAC instructions 1826 INSN(pacia, 0b110, 0b00001, 0b00000); 1827 INSN(pacib, 0b110, 0b00001, 0b00001); 1828 INSN(pacda, 0b110, 0b00001, 0b00010); 1829 INSN(pacdb, 0b110, 0b00001, 0b00011); 1830 INSN(autia, 0b110, 0b00001, 0b00100); 1831 INSN(autib, 0b110, 0b00001, 0b00101); 1832 INSN(autda, 0b110, 0b00001, 0b00110); 1833 INSN(autdb, 0b110, 0b00001, 0b00111); 1834 1835 #undef INSN 1836 1837 #define INSN(NAME, op29, opcode2, opcode) \ 1838 void NAME(Register Rd) { \ 1839 starti; \ 1840 f(opcode2, 20, 16); \ 1841 data_processing(current_insn, op29, opcode, Rd, dummy_reg); \ 1842 } 1843 1844 // PAC instructions (with zero modifier) 1845 INSN(paciza, 0b110, 0b00001, 0b01000); 1846 INSN(pacizb, 0b110, 0b00001, 0b01001); 1847 INSN(pacdza, 0b110, 0b00001, 0b01010); 1848 INSN(pacdzb, 0b110, 0b00001, 0b01011); 1849 INSN(autiza, 0b110, 0b00001, 0b01100); 1850 INSN(autizb, 0b110, 0b00001, 0b01101); 1851 INSN(autdza, 0b110, 0b00001, 0b01110); 1852 INSN(autdzb, 0b110, 0b00001, 0b01111); 1853 INSN(xpaci, 0b110, 0b00001, 0b10000); 1854 INSN(xpacd, 0b110, 0b00001, 0b10001); 1855 1856 #undef INSN 1857 1858 // (2 sources) 1859 #define INSN(NAME, op29, opcode) \ 1860 void NAME(Register Rd, Register Rn, Register Rm) { \ 1861 starti; \ 1862 rf(Rm, 16); \ 1863 data_processing(current_insn, op29, opcode, Rd, Rn); \ 1864 } 1865 1866 INSN(udivw, 0b000, 0b000010); 1867 INSN(sdivw, 0b000, 0b000011); 1868 INSN(lslvw, 0b000, 0b001000); 1869 INSN(lsrvw, 0b000, 0b001001); 1870 INSN(asrvw, 0b000, 0b001010); 1871 INSN(rorvw, 0b000, 0b001011); 1872 1873 INSN(udiv, 0b100, 0b000010); 1874 INSN(sdiv, 0b100, 0b000011); 1875 INSN(lslv, 0b100, 0b001000); 1876 INSN(lsrv, 0b100, 0b001001); 1877 INSN(asrv, 0b100, 0b001010); 1878 INSN(rorv, 0b100, 0b001011); 1879 1880 #undef INSN 1881 1882 // (3 sources) 1883 void data_processing(unsigned op54, unsigned op31, unsigned o0, 1884 Register Rd, Register Rn, Register Rm, 1885 Register Ra) { 1886 starti; 1887 f(op54, 31, 29), f(0b11011, 28, 24); 1888 f(op31, 23, 21), f(o0, 15); 1889 zrf(Rm, 16), zrf(Ra, 10), zrf(Rn, 5), zrf(Rd, 0); 1890 } 1891 1892 #define INSN(NAME, op54, op31, o0) \ 1893 void NAME(Register Rd, Register Rn, Register Rm, Register Ra) { \ 1894 data_processing(op54, op31, o0, Rd, Rn, Rm, Ra); \ 1895 } 1896 1897 INSN(maddw, 0b000, 0b000, 0); 1898 INSN(msubw, 0b000, 0b000, 1); 1899 INSN(madd, 0b100, 0b000, 0); 1900 INSN(msub, 0b100, 0b000, 1); 1901 INSN(smaddl, 0b100, 0b001, 0); 1902 INSN(smsubl, 0b100, 0b001, 1); 1903 INSN(umaddl, 0b100, 0b101, 0); 1904 INSN(umsubl, 0b100, 0b101, 1); 1905 1906 #undef INSN 1907 1908 #define INSN(NAME, op54, op31, o0) \ 1909 void NAME(Register Rd, Register Rn, Register Rm) { \ 1910 data_processing(op54, op31, o0, Rd, Rn, Rm, as_Register(31)); \ 1911 } 1912 1913 INSN(smulh, 0b100, 0b010, 0); 1914 INSN(umulh, 0b100, 0b110, 0); 1915 1916 #undef INSN 1917 1918 // Floating-point data-processing (1 source) 1919 void data_processing(unsigned op31, unsigned type, unsigned opcode, 1920 FloatRegister Vd, FloatRegister Vn) { 1921 starti; 1922 f(op31, 31, 29); 1923 f(0b11110, 28, 24); 1924 f(type, 23, 22), f(1, 21), f(opcode, 20, 15), f(0b10000, 14, 10); 1925 rf(Vn, 5), rf(Vd, 0); 1926 } 1927 1928 #define INSN(NAME, op31, type, opcode) \ 1929 void NAME(FloatRegister Vd, FloatRegister Vn) { \ 1930 data_processing(op31, type, opcode, Vd, Vn); \ 1931 } 1932 1933 private: 1934 INSN(i_fmovs, 0b000, 0b00, 0b000000); 1935 public: 1936 INSN(fabss, 0b000, 0b00, 0b000001); 1937 INSN(fnegs, 0b000, 0b00, 0b000010); 1938 INSN(fsqrts, 0b000, 0b00, 0b000011); 1939 INSN(fcvts, 0b000, 0b00, 0b000101); // Single-precision to double-precision 1940 1941 private: 1942 INSN(i_fmovd, 0b000, 0b01, 0b000000); 1943 public: 1944 INSN(fabsd, 0b000, 0b01, 0b000001); 1945 INSN(fnegd, 0b000, 0b01, 0b000010); 1946 INSN(fsqrtd, 0b000, 0b01, 0b000011); 1947 INSN(fcvtd, 0b000, 0b01, 0b000100); // Double-precision to single-precision 1948 1949 void fmovd(FloatRegister Vd, FloatRegister Vn) { 1950 assert(Vd != Vn, "should be"); 1951 i_fmovd(Vd, Vn); 1952 } 1953 1954 void fmovs(FloatRegister Vd, FloatRegister Vn) { 1955 assert(Vd != Vn, "should be"); 1956 i_fmovs(Vd, Vn); 1957 } 1958 1959 private: 1960 void _fcvt_narrow_extend(FloatRegister Vd, SIMD_Arrangement Ta, 1961 FloatRegister Vn, SIMD_Arrangement Tb, bool do_extend) { 1962 assert((do_extend && (Tb >> 1) + 1 == (Ta >> 1)) 1963 || (!do_extend && (Ta >> 1) + 1 == (Tb >> 1)), "Incompatible arrangement"); 1964 starti; 1965 int op30 = (do_extend ? Tb : Ta) & 1; 1966 int op22 = ((do_extend ? Ta : Tb) >> 1) & 1; 1967 f(0, 31), f(op30, 30), f(0b0011100, 29, 23), f(op22, 22); 1968 f(0b100001011, 21, 13), f(do_extend ? 1 : 0, 12), f(0b10, 11, 10); 1969 rf(Vn, 5), rf(Vd, 0); 1970 } 1971 1972 public: 1973 void fcvtl(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, SIMD_Arrangement Tb) { 1974 assert(Tb == T4H || Tb == T8H|| Tb == T2S || Tb == T4S, "invalid arrangement"); 1975 _fcvt_narrow_extend(Vd, Ta, Vn, Tb, true); 1976 } 1977 1978 void fcvtn(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, SIMD_Arrangement Tb) { 1979 assert(Ta == T4H || Ta == T8H|| Ta == T2S || Ta == T4S, "invalid arrangement"); 1980 _fcvt_narrow_extend(Vd, Ta, Vn, Tb, false); 1981 } 1982 1983 #undef INSN 1984 1985 // Floating-point data-processing (2 source) 1986 void data_processing(unsigned op31, unsigned type, unsigned opcode, 1987 FloatRegister Vd, FloatRegister Vn, FloatRegister Vm) { 1988 starti; 1989 f(op31, 31, 29); 1990 f(0b11110, 28, 24); 1991 f(type, 23, 22), f(1, 21), f(opcode, 15, 10); 1992 rf(Vm, 16), rf(Vn, 5), rf(Vd, 0); 1993 } 1994 1995 #define INSN(NAME, op31, type, opcode) \ 1996 void NAME(FloatRegister Vd, FloatRegister Vn, FloatRegister Vm) { \ 1997 data_processing(op31, type, opcode, Vd, Vn, Vm); \ 1998 } 1999 2000 INSN(fabds, 0b011, 0b10, 0b110101); 2001 INSN(fmuls, 0b000, 0b00, 0b000010); 2002 INSN(fdivs, 0b000, 0b00, 0b000110); 2003 INSN(fadds, 0b000, 0b00, 0b001010); 2004 INSN(fsubs, 0b000, 0b00, 0b001110); 2005 INSN(fmaxs, 0b000, 0b00, 0b010010); 2006 INSN(fmins, 0b000, 0b00, 0b010110); 2007 INSN(fnmuls, 0b000, 0b00, 0b100010); 2008 2009 INSN(fabdd, 0b011, 0b11, 0b110101); 2010 INSN(fmuld, 0b000, 0b01, 0b000010); 2011 INSN(fdivd, 0b000, 0b01, 0b000110); 2012 INSN(faddd, 0b000, 0b01, 0b001010); 2013 INSN(fsubd, 0b000, 0b01, 0b001110); 2014 INSN(fmaxd, 0b000, 0b01, 0b010010); 2015 INSN(fmind, 0b000, 0b01, 0b010110); 2016 INSN(fnmuld, 0b000, 0b01, 0b100010); 2017 2018 #undef INSN 2019 2020 // Floating-point data-processing (3 source) 2021 void data_processing(unsigned op31, unsigned type, unsigned o1, unsigned o0, 2022 FloatRegister Vd, FloatRegister Vn, FloatRegister Vm, 2023 FloatRegister Va) { 2024 starti; 2025 f(op31, 31, 29); 2026 f(0b11111, 28, 24); 2027 f(type, 23, 22), f(o1, 21), f(o0, 15); 2028 rf(Vm, 16), rf(Va, 10), rf(Vn, 5), rf(Vd, 0); 2029 } 2030 2031 #define INSN(NAME, op31, type, o1, o0) \ 2032 void NAME(FloatRegister Vd, FloatRegister Vn, FloatRegister Vm, \ 2033 FloatRegister Va) { \ 2034 data_processing(op31, type, o1, o0, Vd, Vn, Vm, Va); \ 2035 } 2036 2037 INSN(fmadds, 0b000, 0b00, 0, 0); 2038 INSN(fmsubs, 0b000, 0b00, 0, 1); 2039 INSN(fnmadds, 0b000, 0b00, 1, 0); 2040 INSN(fnmsubs, 0b000, 0b00, 1, 1); 2041 2042 INSN(fmaddd, 0b000, 0b01, 0, 0); 2043 INSN(fmsubd, 0b000, 0b01, 0, 1); 2044 INSN(fnmaddd, 0b000, 0b01, 1, 0); 2045 INSN(fnmsub, 0b000, 0b01, 1, 1); 2046 2047 #undef INSN 2048 2049 // Floating-point conditional select 2050 void fp_conditional_select(unsigned op31, unsigned type, 2051 unsigned op1, unsigned op2, 2052 Condition cond, FloatRegister Vd, 2053 FloatRegister Vn, FloatRegister Vm) { 2054 starti; 2055 f(op31, 31, 29); 2056 f(0b11110, 28, 24); 2057 f(type, 23, 22); 2058 f(op1, 21, 21); 2059 f(op2, 11, 10); 2060 f(cond, 15, 12); 2061 rf(Vm, 16), rf(Vn, 5), rf(Vd, 0); 2062 } 2063 2064 #define INSN(NAME, op31, type, op1, op2) \ 2065 void NAME(FloatRegister Vd, FloatRegister Vn, \ 2066 FloatRegister Vm, Condition cond) { \ 2067 fp_conditional_select(op31, type, op1, op2, cond, Vd, Vn, Vm); \ 2068 } 2069 2070 INSN(fcsels, 0b000, 0b00, 0b1, 0b11); 2071 INSN(fcseld, 0b000, 0b01, 0b1, 0b11); 2072 2073 #undef INSN 2074 2075 // Floating-point<->integer conversions 2076 void float_int_convert(unsigned sflag, unsigned ftype, 2077 unsigned rmode, unsigned opcode, 2078 Register Rd, Register Rn) { 2079 starti; 2080 f(sflag, 31); 2081 f(0b00, 30, 29); 2082 f(0b11110, 28, 24); 2083 f(ftype, 23, 22), f(1, 21), f(rmode, 20, 19); 2084 f(opcode, 18, 16), f(0b000000, 15, 10); 2085 zrf(Rn, 5), zrf(Rd, 0); 2086 } 2087 2088 #define INSN(NAME, sflag, ftype, rmode, opcode) \ 2089 void NAME(Register Rd, FloatRegister Vn) { \ 2090 float_int_convert(sflag, ftype, rmode, opcode, Rd, as_Register(Vn)); \ 2091 } 2092 2093 INSN(fcvtzsw, 0b0, 0b00, 0b11, 0b000); 2094 INSN(fcvtzs, 0b1, 0b00, 0b11, 0b000); 2095 INSN(fcvtzdw, 0b0, 0b01, 0b11, 0b000); 2096 INSN(fcvtzd, 0b1, 0b01, 0b11, 0b000); 2097 2098 // RoundToNearestTiesAway 2099 INSN(fcvtassw, 0b0, 0b00, 0b00, 0b100); // float -> signed word 2100 INSN(fcvtasd, 0b1, 0b01, 0b00, 0b100); // double -> signed xword 2101 2102 // RoundTowardsNegative 2103 INSN(fcvtmssw, 0b0, 0b00, 0b10, 0b000); // float -> signed word 2104 INSN(fcvtmsd, 0b1, 0b01, 0b10, 0b000); // double -> signed xword 2105 2106 INSN(fmovs, 0b0, 0b00, 0b00, 0b110); 2107 INSN(fmovd, 0b1, 0b01, 0b00, 0b110); 2108 2109 INSN(fmovhid, 0b1, 0b10, 0b01, 0b110); 2110 2111 #undef INSN 2112 2113 #define INSN(NAME, sflag, type, rmode, opcode) \ 2114 void NAME(FloatRegister Vd, Register Rn) { \ 2115 float_int_convert(sflag, type, rmode, opcode, as_Register(Vd), Rn); \ 2116 } 2117 2118 INSN(fmovs, 0b0, 0b00, 0b00, 0b111); 2119 INSN(fmovd, 0b1, 0b01, 0b00, 0b111); 2120 2121 INSN(scvtfws, 0b0, 0b00, 0b00, 0b010); 2122 INSN(scvtfs, 0b1, 0b00, 0b00, 0b010); 2123 INSN(scvtfwd, 0b0, 0b01, 0b00, 0b010); 2124 INSN(scvtfd, 0b1, 0b01, 0b00, 0b010); 2125 2126 // INSN(fmovhid, 0b100, 0b10, 0b01, 0b111); 2127 2128 #undef INSN 2129 2130 enum sign_kind { SIGNED, UNSIGNED }; 2131 2132 private: 2133 void _xcvtf_scalar_integer(sign_kind sign, unsigned sz, 2134 FloatRegister Rd, FloatRegister Rn) { 2135 starti; 2136 f(0b01, 31, 30), f(sign == SIGNED ? 0 : 1, 29); 2137 f(0b111100, 27, 23), f((sz >> 1) & 1, 22), f(0b100001110110, 21, 10); 2138 rf(Rn, 5), rf(Rd, 0); 2139 } 2140 2141 public: 2142 #define INSN(NAME, sign, sz) \ 2143 void NAME(FloatRegister Rd, FloatRegister Rn) { \ 2144 _xcvtf_scalar_integer(sign, sz, Rd, Rn); \ 2145 } 2146 2147 INSN(scvtfs, SIGNED, 0); 2148 INSN(scvtfd, SIGNED, 1); 2149 2150 #undef INSN 2151 2152 private: 2153 void _xcvtf_vector_integer(sign_kind sign, SIMD_Arrangement T, 2154 FloatRegister Rd, FloatRegister Rn) { 2155 assert(T == T2S || T == T4S || T == T2D, "invalid arrangement"); 2156 starti; 2157 f(0, 31), f(T & 1, 30), f(sign == SIGNED ? 0 : 1, 29); 2158 f(0b011100, 28, 23), f((T >> 1) & 1, 22), f(0b100001110110, 21, 10); 2159 rf(Rn, 5), rf(Rd, 0); 2160 } 2161 2162 public: 2163 void scvtfv(SIMD_Arrangement T, FloatRegister Rd, FloatRegister Rn) { 2164 _xcvtf_vector_integer(SIGNED, T, Rd, Rn); 2165 } 2166 2167 // Floating-point compare 2168 void float_compare(unsigned op31, unsigned type, 2169 unsigned op, unsigned op2, 2170 FloatRegister Vn, FloatRegister Vm = as_FloatRegister(0)) { 2171 starti; 2172 f(op31, 31, 29); 2173 f(0b11110, 28, 24); 2174 f(type, 23, 22), f(1, 21); 2175 f(op, 15, 14), f(0b1000, 13, 10), f(op2, 4, 0); 2176 rf(Vn, 5), rf(Vm, 16); 2177 } 2178 2179 2180 #define INSN(NAME, op31, type, op, op2) \ 2181 void NAME(FloatRegister Vn, FloatRegister Vm) { \ 2182 float_compare(op31, type, op, op2, Vn, Vm); \ 2183 } 2184 2185 #define INSN1(NAME, op31, type, op, op2) \ 2186 void NAME(FloatRegister Vn, double d) { \ 2187 assert_cond(d == 0.0); \ 2188 float_compare(op31, type, op, op2, Vn); \ 2189 } 2190 2191 INSN(fcmps, 0b000, 0b00, 0b00, 0b00000); 2192 INSN1(fcmps, 0b000, 0b00, 0b00, 0b01000); 2193 // INSN(fcmpes, 0b000, 0b00, 0b00, 0b10000); 2194 // INSN1(fcmpes, 0b000, 0b00, 0b00, 0b11000); 2195 2196 INSN(fcmpd, 0b000, 0b01, 0b00, 0b00000); 2197 INSN1(fcmpd, 0b000, 0b01, 0b00, 0b01000); 2198 // INSN(fcmped, 0b000, 0b01, 0b00, 0b10000); 2199 // INSN1(fcmped, 0b000, 0b01, 0b00, 0b11000); 2200 2201 #undef INSN 2202 #undef INSN1 2203 2204 // Floating-point compare. 3-registers versions (scalar). 2205 #define INSN(NAME, sz, e) \ 2206 void NAME(FloatRegister Vd, FloatRegister Vn, FloatRegister Vm) { \ 2207 starti; \ 2208 f(0b01111110, 31, 24), f(e, 23), f(sz, 22), f(1, 21), rf(Vm, 16); \ 2209 f(0b111011, 15, 10), rf(Vn, 5), rf(Vd, 0); \ 2210 } \ 2211 2212 INSN(facged, 1, 0); // facge-double 2213 INSN(facges, 0, 0); // facge-single 2214 INSN(facgtd, 1, 1); // facgt-double 2215 INSN(facgts, 0, 1); // facgt-single 2216 2217 #undef INSN 2218 2219 // Floating-point Move (immediate) 2220 private: 2221 unsigned pack(double value); 2222 2223 void fmov_imm(FloatRegister Vn, double value, unsigned size) { 2224 starti; 2225 f(0b00011110, 31, 24), f(size, 23, 22), f(1, 21); 2226 f(pack(value), 20, 13), f(0b10000000, 12, 5); 2227 rf(Vn, 0); 2228 } 2229 2230 public: 2231 2232 void fmovs(FloatRegister Vn, double value) { 2233 if (value) 2234 fmov_imm(Vn, value, 0b00); 2235 else 2236 movi(Vn, T2S, 0); 2237 } 2238 void fmovd(FloatRegister Vn, double value) { 2239 if (value) 2240 fmov_imm(Vn, value, 0b01); 2241 else 2242 movi(Vn, T1D, 0); 2243 } 2244 2245 // Floating-point rounding 2246 // type: half-precision = 11 2247 // single = 00 2248 // double = 01 2249 // rmode: A = Away = 100 2250 // I = current = 111 2251 // M = MinusInf = 010 2252 // N = eveN = 000 2253 // P = PlusInf = 001 2254 // X = eXact = 110 2255 // Z = Zero = 011 2256 void float_round(unsigned type, unsigned rmode, FloatRegister Rd, FloatRegister Rn) { 2257 starti; 2258 f(0b00011110, 31, 24); 2259 f(type, 23, 22); 2260 f(0b1001, 21, 18); 2261 f(rmode, 17, 15); 2262 f(0b10000, 14, 10); 2263 rf(Rn, 5), rf(Rd, 0); 2264 } 2265 #define INSN(NAME, type, rmode) \ 2266 void NAME(FloatRegister Vd, FloatRegister Vn) { \ 2267 float_round(type, rmode, Vd, Vn); \ 2268 } 2269 2270 public: 2271 INSN(frintah, 0b11, 0b100); 2272 INSN(frintih, 0b11, 0b111); 2273 INSN(frintmh, 0b11, 0b010); 2274 INSN(frintnh, 0b11, 0b000); 2275 INSN(frintph, 0b11, 0b001); 2276 INSN(frintxh, 0b11, 0b110); 2277 INSN(frintzh, 0b11, 0b011); 2278 2279 INSN(frintas, 0b00, 0b100); 2280 INSN(frintis, 0b00, 0b111); 2281 INSN(frintms, 0b00, 0b010); 2282 INSN(frintns, 0b00, 0b000); 2283 INSN(frintps, 0b00, 0b001); 2284 INSN(frintxs, 0b00, 0b110); 2285 INSN(frintzs, 0b00, 0b011); 2286 2287 INSN(frintad, 0b01, 0b100); 2288 INSN(frintid, 0b01, 0b111); 2289 INSN(frintmd, 0b01, 0b010); 2290 INSN(frintnd, 0b01, 0b000); 2291 INSN(frintpd, 0b01, 0b001); 2292 INSN(frintxd, 0b01, 0b110); 2293 INSN(frintzd, 0b01, 0b011); 2294 #undef INSN 2295 2296 private: 2297 static short SIMD_Size_in_bytes[]; 2298 2299 public: 2300 #define INSN(NAME, op) \ 2301 void NAME(FloatRegister Rt, SIMD_RegVariant T, const Address &adr) { \ 2302 ld_st2(as_Register(Rt), adr, (int)T & 3, op + ((T==Q) ? 0b10:0b00), 1); \ 2303 } 2304 2305 INSN(ldr, 1); 2306 INSN(str, 0); 2307 2308 #undef INSN 2309 2310 private: 2311 2312 void ld_st(FloatRegister Vt, SIMD_Arrangement T, Register Xn, int op1, int op2) { 2313 starti; 2314 f(0,31), f((int)T & 1, 30); 2315 f(op1, 29, 21), f(0, 20, 16), f(op2, 15, 12); 2316 f((int)T >> 1, 11, 10), srf(Xn, 5), rf(Vt, 0); 2317 } 2318 void ld_st(FloatRegister Vt, SIMD_Arrangement T, Register Xn, 2319 int imm, int op1, int op2, int regs) { 2320 2321 bool replicate = op2 >> 2 == 3; 2322 // post-index value (imm) is formed differently for replicate/non-replicate ld* instructions 2323 int expectedImmediate = replicate ? regs * (1 << (T >> 1)) : SIMD_Size_in_bytes[T] * regs; 2324 guarantee(T < T1Q , "incorrect arrangement"); 2325 guarantee(imm == expectedImmediate, "bad offset"); 2326 starti; 2327 f(0,31), f((int)T & 1, 30); 2328 f(op1 | 0b100, 29, 21), f(0b11111, 20, 16), f(op2, 15, 12); 2329 f((int)T >> 1, 11, 10), srf(Xn, 5), rf(Vt, 0); 2330 } 2331 void ld_st(FloatRegister Vt, SIMD_Arrangement T, Register Xn, 2332 Register Xm, int op1, int op2) { 2333 starti; 2334 f(0,31), f((int)T & 1, 30); 2335 f(op1 | 0b100, 29, 21), rf(Xm, 16), f(op2, 15, 12); 2336 f((int)T >> 1, 11, 10), srf(Xn, 5), rf(Vt, 0); 2337 } 2338 2339 void ld_st(FloatRegister Vt, SIMD_Arrangement T, Address a, int op1, int op2, int regs) { 2340 switch (a.getMode()) { 2341 case Address::base_plus_offset: 2342 guarantee(a.offset() == 0, "no offset allowed here"); 2343 ld_st(Vt, T, a.base(), op1, op2); 2344 break; 2345 case Address::post: 2346 ld_st(Vt, T, a.base(), a.offset(), op1, op2, regs); 2347 break; 2348 case Address::post_reg: 2349 ld_st(Vt, T, a.base(), a.index(), op1, op2); 2350 break; 2351 default: 2352 ShouldNotReachHere(); 2353 } 2354 } 2355 2356 public: 2357 2358 #define INSN1(NAME, op1, op2) \ 2359 void NAME(FloatRegister Vt, SIMD_Arrangement T, const Address &a) { \ 2360 ld_st(Vt, T, a, op1, op2, 1); \ 2361 } 2362 2363 #define INSN2(NAME, op1, op2) \ 2364 void NAME(FloatRegister Vt, FloatRegister Vt2, SIMD_Arrangement T, const Address &a) { \ 2365 assert(Vt->successor() == Vt2, "Registers must be ordered"); \ 2366 ld_st(Vt, T, a, op1, op2, 2); \ 2367 } 2368 2369 #define INSN3(NAME, op1, op2) \ 2370 void NAME(FloatRegister Vt, FloatRegister Vt2, FloatRegister Vt3, \ 2371 SIMD_Arrangement T, const Address &a) { \ 2372 assert(Vt->successor() == Vt2 && Vt2->successor() == Vt3, \ 2373 "Registers must be ordered"); \ 2374 ld_st(Vt, T, a, op1, op2, 3); \ 2375 } 2376 2377 #define INSN4(NAME, op1, op2) \ 2378 void NAME(FloatRegister Vt, FloatRegister Vt2, FloatRegister Vt3, \ 2379 FloatRegister Vt4, SIMD_Arrangement T, const Address &a) { \ 2380 assert(Vt->successor() == Vt2 && Vt2->successor() == Vt3 && \ 2381 Vt3->successor() == Vt4, "Registers must be ordered"); \ 2382 ld_st(Vt, T, a, op1, op2, 4); \ 2383 } 2384 2385 INSN1(ld1, 0b001100010, 0b0111); 2386 INSN2(ld1, 0b001100010, 0b1010); 2387 INSN3(ld1, 0b001100010, 0b0110); 2388 INSN4(ld1, 0b001100010, 0b0010); 2389 2390 INSN2(ld2, 0b001100010, 0b1000); 2391 INSN3(ld3, 0b001100010, 0b0100); 2392 INSN4(ld4, 0b001100010, 0b0000); 2393 2394 INSN1(st1, 0b001100000, 0b0111); 2395 INSN2(st1, 0b001100000, 0b1010); 2396 INSN3(st1, 0b001100000, 0b0110); 2397 INSN4(st1, 0b001100000, 0b0010); 2398 2399 INSN2(st2, 0b001100000, 0b1000); 2400 INSN3(st3, 0b001100000, 0b0100); 2401 INSN4(st4, 0b001100000, 0b0000); 2402 2403 INSN1(ld1r, 0b001101010, 0b1100); 2404 INSN2(ld2r, 0b001101011, 0b1100); 2405 INSN3(ld3r, 0b001101010, 0b1110); 2406 INSN4(ld4r, 0b001101011, 0b1110); 2407 2408 #undef INSN1 2409 #undef INSN2 2410 #undef INSN3 2411 #undef INSN4 2412 2413 #define INSN(NAME, opc) \ 2414 void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, FloatRegister Vm) { \ 2415 starti; \ 2416 assert(T == T8B || T == T16B, "must be T8B or T16B"); \ 2417 f(0, 31), f((int)T & 1, 30), f(opc, 29, 21); \ 2418 rf(Vm, 16), f(0b000111, 15, 10), rf(Vn, 5), rf(Vd, 0); \ 2419 } 2420 2421 INSN(eor, 0b101110001); 2422 INSN(orr, 0b001110101); 2423 INSN(andr, 0b001110001); 2424 INSN(bic, 0b001110011); 2425 INSN(bif, 0b101110111); 2426 INSN(bit, 0b101110101); 2427 INSN(bsl, 0b101110011); 2428 INSN(orn, 0b001110111); 2429 2430 #undef INSN 2431 2432 #define INSN(NAME, opc, opc2, acceptT2D) \ 2433 void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, FloatRegister Vm) { \ 2434 guarantee(T != T1Q && T != T1D, "incorrect arrangement"); \ 2435 if (!acceptT2D) guarantee(T != T2D, "incorrect arrangement"); \ 2436 starti; \ 2437 f(0, 31), f((int)T & 1, 30), f(opc, 29), f(0b01110, 28, 24); \ 2438 f((int)T >> 1, 23, 22), f(1, 21), rf(Vm, 16), f(opc2, 15, 10); \ 2439 rf(Vn, 5), rf(Vd, 0); \ 2440 } 2441 2442 INSN(addv, 0, 0b100001, true); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S, T2D 2443 INSN(subv, 1, 0b100001, true); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S, T2D 2444 INSN(uqsubv, 1, 0b001011, true); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S, T2D 2445 INSN(mulv, 0, 0b100111, false); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S 2446 INSN(mlav, 0, 0b100101, false); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S 2447 INSN(mlsv, 1, 0b100101, false); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S 2448 INSN(sshl, 0, 0b010001, true); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S, T2D 2449 INSN(ushl, 1, 0b010001, true); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S, T2D 2450 INSN(addpv, 0, 0b101111, true); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S, T2D 2451 INSN(smullv, 0, 0b110000, false); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S 2452 INSN(umullv, 1, 0b110000, false); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S 2453 INSN(umlalv, 1, 0b100000, false); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S 2454 INSN(maxv, 0, 0b011001, false); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S 2455 INSN(minv, 0, 0b011011, false); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S 2456 INSN(smaxp, 0, 0b101001, false); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S 2457 INSN(sminp, 0, 0b101011, false); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S 2458 INSN(cmeq, 1, 0b100011, true); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S, T2D 2459 INSN(cmgt, 0, 0b001101, true); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S, T2D 2460 INSN(cmge, 0, 0b001111, true); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S, T2D 2461 INSN(cmhi, 1, 0b001101, true); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S, T2D 2462 INSN(cmhs, 1, 0b001111, true); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S, T2D 2463 2464 #undef INSN 2465 2466 #define INSN(NAME, opc, opc2, accepted) \ 2467 void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn) { \ 2468 guarantee(T != T1Q && T != T1D, "incorrect arrangement"); \ 2469 if (accepted < 3) guarantee(T != T2D, "incorrect arrangement"); \ 2470 if (accepted < 2) guarantee(T != T2S, "incorrect arrangement"); \ 2471 if (accepted < 1) guarantee(T == T8B || T == T16B, "incorrect arrangement"); \ 2472 starti; \ 2473 f(0, 31), f((int)T & 1, 30), f(opc, 29), f(0b01110, 28, 24); \ 2474 f((int)T >> 1, 23, 22), f(opc2, 21, 10); \ 2475 rf(Vn, 5), rf(Vd, 0); \ 2476 } 2477 2478 INSN(absr, 0, 0b100000101110, 3); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S, T2D 2479 INSN(negr, 1, 0b100000101110, 3); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S, T2D 2480 INSN(notr, 1, 0b100000010110, 0); // accepted arrangements: T8B, T16B 2481 INSN(addv, 0, 0b110001101110, 1); // accepted arrangements: T8B, T16B, T4H, T8H, T4S 2482 INSN(smaxv, 0, 0b110000101010, 1); // accepted arrangements: T8B, T16B, T4H, T8H, T4S 2483 INSN(umaxv, 1, 0b110000101010, 1); // accepted arrangements: T8B, T16B, T4H, T8H, T4S 2484 INSN(sminv, 0, 0b110001101010, 1); // accepted arrangements: T8B, T16B, T4H, T8H, T4S 2485 INSN(uminv, 1, 0b110001101010, 1); // accepted arrangements: T8B, T16B, T4H, T8H, T4S 2486 INSN(cls, 0, 0b100000010010, 2); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S 2487 INSN(clz, 1, 0b100000010010, 2); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S 2488 INSN(cnt, 0, 0b100000010110, 0); // accepted arrangements: T8B, T16B 2489 INSN(uaddlp, 1, 0b100000001010, 2); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S 2490 INSN(uaddlv, 1, 0b110000001110, 1); // accepted arrangements: T8B, T16B, T4H, T8H, T4S 2491 // Zero compare. 2492 INSN(cmeq, 0, 0b100000100110, 3); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S, T2D 2493 INSN(cmge, 1, 0b100000100010, 3); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S, T2D 2494 INSN(cmgt, 0, 0b100000100010, 3); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S, T2D 2495 INSN(cmle, 1, 0b100000100110, 3); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S, T2D 2496 INSN(cmlt, 0, 0b100000101010, 3); // accepted arrangements: T8B, T16B, T4H, T8H, T2S, T4S, T2D 2497 2498 #undef INSN 2499 2500 #define INSN(NAME, opc) \ 2501 void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn) { \ 2502 starti; \ 2503 assert(T == T4S, "arrangement must be T4S"); \ 2504 f(0, 31), f((int)T & 1, 30), f(0b101110, 29, 24), f(opc, 23), \ 2505 f(T == T4S ? 0 : 1, 22), f(0b110000111110, 21, 10); rf(Vn, 5), rf(Vd, 0); \ 2506 } 2507 2508 INSN(fmaxv, 0); 2509 INSN(fminv, 1); 2510 2511 #undef INSN 2512 2513 // Advanced SIMD modified immediate 2514 #define INSN(NAME, op0, cmode0) \ 2515 void NAME(FloatRegister Vd, SIMD_Arrangement T, unsigned imm8, unsigned lsl = 0) { \ 2516 unsigned cmode = cmode0; \ 2517 unsigned op = op0; \ 2518 starti; \ 2519 assert(lsl == 0 || \ 2520 ((T == T4H || T == T8H) && lsl == 8) || \ 2521 ((T == T2S || T == T4S) && ((lsl >> 3) < 4) && ((lsl & 7) == 0)), "invalid shift");\ 2522 cmode |= lsl >> 2; \ 2523 if (T == T4H || T == T8H) cmode |= 0b1000; \ 2524 if (!(T == T4H || T == T8H || T == T2S || T == T4S)) { \ 2525 assert(op == 0 && cmode0 == 0, "must be MOVI"); \ 2526 cmode = 0b1110; \ 2527 if (T == T1D || T == T2D) op = 1; \ 2528 } \ 2529 f(0, 31), f((int)T & 1, 30), f(op, 29), f(0b0111100000, 28, 19); \ 2530 f(imm8 >> 5, 18, 16), f(cmode, 15, 12), f(0x01, 11, 10), f(imm8 & 0b11111, 9, 5); \ 2531 rf(Vd, 0); \ 2532 } 2533 2534 INSN(movi, 0, 0); 2535 INSN(orri, 0, 1); 2536 INSN(mvni, 1, 0); 2537 INSN(bici, 1, 1); 2538 2539 #undef INSN 2540 2541 #define INSN(NAME, op, cmode) \ 2542 void NAME(FloatRegister Vd, SIMD_Arrangement T, double imm) { \ 2543 unsigned imm8 = pack(imm); \ 2544 starti; \ 2545 f(0, 31), f((int)T & 1, 30), f(op, 29), f(0b0111100000, 28, 19); \ 2546 f(imm8 >> 5, 18, 16), f(cmode, 15, 12), f(0x01, 11, 10), f(imm8 & 0b11111, 9, 5); \ 2547 rf(Vd, 0); \ 2548 } 2549 2550 INSN(fmovs, 0, 0b1111); 2551 INSN(fmovd, 1, 0b1111); 2552 2553 #undef INSN 2554 2555 // Advanced SIMD three same 2556 #define INSN(NAME, op1, op2, op3) \ 2557 void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, FloatRegister Vm) { \ 2558 starti; \ 2559 assert(T == T2S || T == T4S || T == T2D, "invalid arrangement"); \ 2560 f(0, 31), f((int)T & 1, 30), f(op1, 29), f(0b01110, 28, 24), f(op2, 23); \ 2561 f(T==T2D ? 1:0, 22); f(1, 21), rf(Vm, 16), f(op3, 15, 10), rf(Vn, 5), rf(Vd, 0); \ 2562 } 2563 2564 INSN(fabd, 1, 1, 0b110101); 2565 INSN(fadd, 0, 0, 0b110101); 2566 INSN(fdiv, 1, 0, 0b111111); 2567 INSN(fmul, 1, 0, 0b110111); 2568 INSN(fsub, 0, 1, 0b110101); 2569 INSN(fmla, 0, 0, 0b110011); 2570 INSN(fmls, 0, 1, 0b110011); 2571 INSN(fmax, 0, 0, 0b111101); 2572 INSN(fmin, 0, 1, 0b111101); 2573 INSN(fcmeq, 0, 0, 0b111001); 2574 INSN(fcmgt, 1, 1, 0b111001); 2575 INSN(fcmge, 1, 0, 0b111001); 2576 2577 #undef INSN 2578 2579 #define INSN(NAME, opc) \ 2580 void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, FloatRegister Vm) { \ 2581 starti; \ 2582 assert(T == T4S, "arrangement must be T4S"); \ 2583 f(0b01011110000, 31, 21), rf(Vm, 16), f(opc, 15, 10), rf(Vn, 5), rf(Vd, 0); \ 2584 } 2585 2586 INSN(sha1c, 0b000000); 2587 INSN(sha1m, 0b001000); 2588 INSN(sha1p, 0b000100); 2589 INSN(sha1su0, 0b001100); 2590 INSN(sha256h2, 0b010100); 2591 INSN(sha256h, 0b010000); 2592 INSN(sha256su1, 0b011000); 2593 2594 #undef INSN 2595 2596 #define INSN(NAME, opc) \ 2597 void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn) { \ 2598 starti; \ 2599 assert(T == T4S, "arrangement must be T4S"); \ 2600 f(0b0101111000101000, 31, 16), f(opc, 15, 10), rf(Vn, 5), rf(Vd, 0); \ 2601 } 2602 2603 INSN(sha1h, 0b000010); 2604 INSN(sha1su1, 0b000110); 2605 INSN(sha256su0, 0b001010); 2606 2607 #undef INSN 2608 2609 #define INSN(NAME, opc) \ 2610 void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, FloatRegister Vm) { \ 2611 starti; \ 2612 assert(T == T2D, "arrangement must be T2D"); \ 2613 f(0b11001110011, 31, 21), rf(Vm, 16), f(opc, 15, 10), rf(Vn, 5), rf(Vd, 0); \ 2614 } 2615 2616 INSN(sha512h, 0b100000); 2617 INSN(sha512h2, 0b100001); 2618 INSN(sha512su1, 0b100010); 2619 2620 #undef INSN 2621 2622 #define INSN(NAME, opc) \ 2623 void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn) { \ 2624 starti; \ 2625 assert(T == T2D, "arrangement must be T2D"); \ 2626 f(opc, 31, 10), rf(Vn, 5), rf(Vd, 0); \ 2627 } 2628 2629 INSN(sha512su0, 0b1100111011000000100000); 2630 2631 #undef INSN 2632 2633 #define INSN(NAME, opc) \ 2634 void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, FloatRegister Vm, FloatRegister Va) { \ 2635 starti; \ 2636 assert(T == T16B, "arrangement must be T16B"); \ 2637 f(0b11001110, 31, 24), f(opc, 23, 21), rf(Vm, 16), f(0b0, 15, 15), rf(Va, 10), rf(Vn, 5), rf(Vd, 0); \ 2638 } 2639 2640 INSN(eor3, 0b000); 2641 INSN(bcax, 0b001); 2642 2643 #undef INSN 2644 2645 #define INSN(NAME, opc) \ 2646 void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, FloatRegister Vm, unsigned imm) { \ 2647 starti; \ 2648 assert(T == T2D, "arrangement must be T2D"); \ 2649 f(0b11001110, 31, 24), f(opc, 23, 21), rf(Vm, 16), f(imm, 15, 10), rf(Vn, 5), rf(Vd, 0); \ 2650 } 2651 2652 INSN(xar, 0b100); 2653 2654 #undef INSN 2655 2656 #define INSN(NAME, opc) \ 2657 void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, FloatRegister Vm) { \ 2658 starti; \ 2659 assert(T == T2D, "arrangement must be T2D"); \ 2660 f(0b11001110, 31, 24), f(opc, 23, 21), rf(Vm, 16), f(0b100011, 15, 10), rf(Vn, 5), rf(Vd, 0); \ 2661 } 2662 2663 INSN(rax1, 0b011); 2664 2665 #undef INSN 2666 2667 #define INSN(NAME, opc) \ 2668 void NAME(FloatRegister Vd, FloatRegister Vn) { \ 2669 starti; \ 2670 f(opc, 31, 10), rf(Vn, 5), rf(Vd, 0); \ 2671 } 2672 2673 INSN(aese, 0b0100111000101000010010); 2674 INSN(aesd, 0b0100111000101000010110); 2675 INSN(aesmc, 0b0100111000101000011010); 2676 INSN(aesimc, 0b0100111000101000011110); 2677 2678 #undef INSN 2679 2680 #define INSN(NAME, op1, op2) \ 2681 void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, FloatRegister Vm, int index = 0) { \ 2682 starti; \ 2683 assert(T == T2S || T == T4S || T == T2D, "invalid arrangement"); \ 2684 assert(index >= 0 && ((T == T2D && index <= 1) || (T != T2D && index <= 3)), "invalid index"); \ 2685 f(0, 31), f((int)T & 1, 30), f(op1, 29); f(0b011111, 28, 23); \ 2686 f(T == T2D ? 1 : 0, 22), f(T == T2D ? 0 : index & 1, 21), rf(Vm, 16); \ 2687 f(op2, 15, 12), f(T == T2D ? index : (index >> 1), 11), f(0, 10); \ 2688 rf(Vn, 5), rf(Vd, 0); \ 2689 } 2690 2691 // FMLA/FMLS - Vector - Scalar 2692 INSN(fmlavs, 0, 0b0001); 2693 INSN(fmlsvs, 0, 0b0101); 2694 // FMULX - Vector - Scalar 2695 INSN(fmulxvs, 1, 0b1001); 2696 2697 #undef INSN 2698 2699 // Floating-point Reciprocal Estimate 2700 void frecpe(FloatRegister Vd, FloatRegister Vn, SIMD_RegVariant type) { 2701 assert(type == D || type == S, "Wrong type for frecpe"); 2702 starti; 2703 f(0b010111101, 31, 23); 2704 f(type == D ? 1 : 0, 22); 2705 f(0b100001110110, 21, 10); 2706 rf(Vn, 5), rf(Vd, 0); 2707 } 2708 2709 // (long) {a, b} -> (a + b) 2710 void addpd(FloatRegister Vd, FloatRegister Vn) { 2711 starti; 2712 f(0b0101111011110001101110, 31, 10); 2713 rf(Vn, 5), rf(Vd, 0); 2714 } 2715 2716 // Floating-point AdvSIMD scalar pairwise 2717 #define INSN(NAME, op1, op2) \ 2718 void NAME(FloatRegister Vd, FloatRegister Vn, SIMD_RegVariant type) { \ 2719 starti; \ 2720 assert(type == D || type == S, "Wrong type for faddp/fmaxp/fminp"); \ 2721 f(0b0111111, 31, 25), f(op1, 24, 23), \ 2722 f(type == S ? 0 : 1, 22), f(0b11000, 21, 17), f(op2, 16, 10), rf(Vn, 5), rf(Vd, 0); \ 2723 } 2724 2725 INSN(faddp, 0b00, 0b0110110); 2726 INSN(fmaxp, 0b00, 0b0111110); 2727 INSN(fminp, 0b01, 0b0111110); 2728 2729 #undef INSN 2730 2731 void ins(FloatRegister Vd, SIMD_RegVariant T, FloatRegister Vn, int didx, int sidx) { 2732 starti; 2733 assert(T != Q, "invalid register variant"); 2734 f(0b01101110000, 31, 21), f(((didx<<1)|1)<<(int)T, 20, 16), f(0, 15); 2735 f(sidx<<(int)T, 14, 11), f(1, 10), rf(Vn, 5), rf(Vd, 0); 2736 } 2737 2738 #define INSN(NAME, cond, op1, op2) \ 2739 void NAME(Register Rd, FloatRegister Vn, SIMD_RegVariant T, int idx) { \ 2740 starti; \ 2741 assert(cond, "invalid register variant"); \ 2742 f(0, 31), f(op1, 30), f(0b001110000, 29, 21); \ 2743 f(((idx << 1) | 1) << (int)T, 20, 16), f(op2, 15, 10); \ 2744 rf(Vn, 5), rf(Rd, 0); \ 2745 } 2746 2747 INSN(umov, (T != Q), (T == D ? 1 : 0), 0b001111); 2748 INSN(smov, (T < D), 1, 0b001011); 2749 2750 #undef INSN 2751 2752 #define INSN(NAME, opc, opc2, isSHR) \ 2753 void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, int shift){ \ 2754 starti; \ 2755 /* The encodings for the immh:immb fields (bits 22:16) in *SHR are \ 2756 * 0001 xxx 8B/16B, shift = 16 - UInt(immh:immb) \ 2757 * 001x xxx 4H/8H, shift = 32 - UInt(immh:immb) \ 2758 * 01xx xxx 2S/4S, shift = 64 - UInt(immh:immb) \ 2759 * 1xxx xxx 1D/2D, shift = 128 - UInt(immh:immb) \ 2760 * (1D is RESERVED) \ 2761 * for SHL shift is calculated as: \ 2762 * 0001 xxx 8B/16B, shift = UInt(immh:immb) - 8 \ 2763 * 001x xxx 4H/8H, shift = UInt(immh:immb) - 16 \ 2764 * 01xx xxx 2S/4S, shift = UInt(immh:immb) - 32 \ 2765 * 1xxx xxx 1D/2D, shift = UInt(immh:immb) - 64 \ 2766 * (1D is RESERVED) \ 2767 */ \ 2768 guarantee(!isSHR || (isSHR && (shift != 0)), "impossible encoding");\ 2769 assert((1 << ((T>>1)+3)) > shift, "Invalid Shift value"); \ 2770 int cVal = (1 << (((T >> 1) + 3) + (isSHR ? 1 : 0))); \ 2771 int encodedShift = isSHR ? cVal - shift : cVal + shift; \ 2772 f(0, 31), f(T & 1, 30), f(opc, 29), f(0b011110, 28, 23), \ 2773 f(encodedShift, 22, 16); f(opc2, 15, 10), rf(Vn, 5), rf(Vd, 0); \ 2774 } 2775 2776 INSN(shl, 0, 0b010101, /* isSHR = */ false); 2777 INSN(sshr, 0, 0b000001, /* isSHR = */ true); 2778 INSN(ushr, 1, 0b000001, /* isSHR = */ true); 2779 INSN(usra, 1, 0b000101, /* isSHR = */ true); 2780 INSN(ssra, 0, 0b000101, /* isSHR = */ true); 2781 2782 #undef INSN 2783 2784 #define INSN(NAME, opc, opc2, isSHR) \ 2785 void NAME(FloatRegister Vd, FloatRegister Vn, int shift){ \ 2786 starti; \ 2787 int encodedShift = isSHR ? 128 - shift : 64 + shift; \ 2788 f(0b01, 31, 30), f(opc, 29), f(0b111110, 28, 23), \ 2789 f(encodedShift, 22, 16); f(opc2, 15, 10), rf(Vn, 5), rf(Vd, 0); \ 2790 } 2791 2792 INSN(shld, 0, 0b010101, /* isSHR = */ false); 2793 INSN(sshrd, 0, 0b000001, /* isSHR = */ true); 2794 INSN(ushrd, 1, 0b000001, /* isSHR = */ true); 2795 2796 #undef INSN 2797 2798 private: 2799 void _xshll(sign_kind sign, FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, SIMD_Arrangement Tb, int shift) { 2800 starti; 2801 /* The encodings for the immh:immb fields (bits 22:16) are 2802 * 0001 xxx 8H, 8B/16B shift = xxx 2803 * 001x xxx 4S, 4H/8H shift = xxxx 2804 * 01xx xxx 2D, 2S/4S shift = xxxxx 2805 * 1xxx xxx RESERVED 2806 */ 2807 assert((Tb >> 1) + 1 == (Ta >> 1), "Incompatible arrangement"); 2808 assert((1 << ((Tb>>1)+3)) > shift, "Invalid shift value"); 2809 f(0, 31), f(Tb & 1, 30), f(sign == SIGNED ? 0 : 1, 29), f(0b011110, 28, 23); 2810 f((1 << ((Tb>>1)+3))|shift, 22, 16); 2811 f(0b101001, 15, 10), rf(Vn, 5), rf(Vd, 0); 2812 } 2813 2814 public: 2815 void ushll(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, SIMD_Arrangement Tb, int shift) { 2816 assert(Tb == T8B || Tb == T4H || Tb == T2S, "invalid arrangement"); 2817 _xshll(UNSIGNED, Vd, Ta, Vn, Tb, shift); 2818 } 2819 2820 void ushll2(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, SIMD_Arrangement Tb, int shift) { 2821 assert(Tb == T16B || Tb == T8H || Tb == T4S, "invalid arrangement"); 2822 _xshll(UNSIGNED, Vd, Ta, Vn, Tb, shift); 2823 } 2824 2825 void uxtl(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, SIMD_Arrangement Tb) { 2826 ushll(Vd, Ta, Vn, Tb, 0); 2827 } 2828 2829 void sshll(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, SIMD_Arrangement Tb, int shift) { 2830 assert(Tb == T8B || Tb == T4H || Tb == T2S, "invalid arrangement"); 2831 _xshll(SIGNED, Vd, Ta, Vn, Tb, shift); 2832 } 2833 2834 void sshll2(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, SIMD_Arrangement Tb, int shift) { 2835 assert(Tb == T16B || Tb == T8H || Tb == T4S, "invalid arrangement"); 2836 _xshll(SIGNED, Vd, Ta, Vn, Tb, shift); 2837 } 2838 2839 void sxtl(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, SIMD_Arrangement Tb) { 2840 sshll(Vd, Ta, Vn, Tb, 0); 2841 } 2842 2843 // Move from general purpose register 2844 // mov Vd.T[index], Rn 2845 void mov(FloatRegister Vd, SIMD_RegVariant T, int index, Register Xn) { 2846 guarantee(T != Q, "invalid register variant"); 2847 starti; 2848 f(0b01001110000, 31, 21), f(((1 << T) | (index << (T + 1))), 20, 16); 2849 f(0b000111, 15, 10), zrf(Xn, 5), rf(Vd, 0); 2850 } 2851 2852 // Move to general purpose register 2853 // mov Rd, Vn.T[index] 2854 void mov(Register Xd, FloatRegister Vn, SIMD_RegVariant T, int index) { 2855 guarantee(T == S || T == D, "invalid register variant"); 2856 umov(Xd, Vn, T, index); 2857 } 2858 2859 private: 2860 void _pmull(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, FloatRegister Vm, SIMD_Arrangement Tb) { 2861 starti; 2862 assert((Ta == T1Q && (Tb == T1D || Tb == T2D)) || 2863 (Ta == T8H && (Tb == T8B || Tb == T16B)), "Invalid Size specifier"); 2864 int size = (Ta == T1Q) ? 0b11 : 0b00; 2865 f(0, 31), f(Tb & 1, 30), f(0b001110, 29, 24), f(size, 23, 22); 2866 f(1, 21), rf(Vm, 16), f(0b111000, 15, 10), rf(Vn, 5), rf(Vd, 0); 2867 } 2868 2869 public: 2870 void pmull(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, FloatRegister Vm, SIMD_Arrangement Tb) { 2871 assert(Tb == T1D || Tb == T8B, "pmull assumes T1D or T8B as the second size specifier"); 2872 _pmull(Vd, Ta, Vn, Vm, Tb); 2873 } 2874 2875 void pmull2(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, FloatRegister Vm, SIMD_Arrangement Tb) { 2876 assert(Tb == T2D || Tb == T16B, "pmull2 assumes T2D or T16B as the second size specifier"); 2877 _pmull(Vd, Ta, Vn, Vm, Tb); 2878 } 2879 2880 void uqxtn(FloatRegister Vd, SIMD_Arrangement Tb, FloatRegister Vn, SIMD_Arrangement Ta) { 2881 starti; 2882 int size_b = (int)Tb >> 1; 2883 int size_a = (int)Ta >> 1; 2884 assert(size_b < 3 && size_b == size_a - 1, "Invalid size specifier"); 2885 f(0, 31), f(Tb & 1, 30), f(0b101110, 29, 24), f(size_b, 23, 22); 2886 f(0b100001010010, 21, 10), rf(Vn, 5), rf(Vd, 0); 2887 } 2888 2889 void xtn(FloatRegister Vd, SIMD_Arrangement Tb, FloatRegister Vn, SIMD_Arrangement Ta) { 2890 starti; 2891 int size_b = (int)Tb >> 1; 2892 int size_a = (int)Ta >> 1; 2893 assert(size_b < 3 && size_b == size_a - 1, "Invalid size specifier"); 2894 f(0, 31), f(Tb & 1, 30), f(0b001110, 29, 24), f(size_b, 23, 22); 2895 f(0b100001001010, 21, 10), rf(Vn, 5), rf(Vd, 0); 2896 } 2897 2898 void dup(FloatRegister Vd, SIMD_Arrangement T, Register Xs) 2899 { 2900 starti; 2901 assert(T != T1D, "reserved encoding"); 2902 f(0,31), f((int)T & 1, 30), f(0b001110000, 29, 21); 2903 f((1 << (T >> 1)), 20, 16), f(0b000011, 15, 10), zrf(Xs, 5), rf(Vd, 0); 2904 } 2905 2906 void dup(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, int index = 0) 2907 { 2908 starti; 2909 assert(T != T1D, "reserved encoding"); 2910 f(0, 31), f((int)T & 1, 30), f(0b001110000, 29, 21); 2911 f(((1 << (T >> 1)) | (index << ((T >> 1) + 1))), 20, 16); 2912 f(0b000001, 15, 10), rf(Vn, 5), rf(Vd, 0); 2913 } 2914 2915 // Advanced SIMD scalar copy 2916 void dup(FloatRegister Vd, SIMD_RegVariant T, FloatRegister Vn, int index = 0) 2917 { 2918 starti; 2919 assert(T != Q, "invalid size"); 2920 f(0b01011110000, 31, 21); 2921 f((1 << T) | (index << (T + 1)), 20, 16); 2922 f(0b000001, 15, 10), rf(Vn, 5), rf(Vd, 0); 2923 } 2924 2925 // AdvSIMD ZIP/UZP/TRN 2926 #define INSN(NAME, opcode) \ 2927 void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, FloatRegister Vm) { \ 2928 guarantee(T != T1D && T != T1Q, "invalid arrangement"); \ 2929 starti; \ 2930 f(0, 31), f(0b001110, 29, 24), f(0, 21), f(0, 15); \ 2931 f(opcode, 14, 12), f(0b10, 11, 10); \ 2932 rf(Vm, 16), rf(Vn, 5), rf(Vd, 0); \ 2933 f(T & 1, 30), f(T >> 1, 23, 22); \ 2934 } 2935 2936 INSN(uzp1, 0b001); 2937 INSN(trn1, 0b010); 2938 INSN(zip1, 0b011); 2939 INSN(uzp2, 0b101); 2940 INSN(trn2, 0b110); 2941 INSN(zip2, 0b111); 2942 2943 #undef INSN 2944 2945 // CRC32 instructions 2946 #define INSN(NAME, c, sf, sz) \ 2947 void NAME(Register Rd, Register Rn, Register Rm) { \ 2948 starti; \ 2949 f(sf, 31), f(0b0011010110, 30, 21), f(0b010, 15, 13), f(c, 12); \ 2950 f(sz, 11, 10), rf(Rm, 16), rf(Rn, 5), rf(Rd, 0); \ 2951 } 2952 2953 INSN(crc32b, 0, 0, 0b00); 2954 INSN(crc32h, 0, 0, 0b01); 2955 INSN(crc32w, 0, 0, 0b10); 2956 INSN(crc32x, 0, 1, 0b11); 2957 INSN(crc32cb, 1, 0, 0b00); 2958 INSN(crc32ch, 1, 0, 0b01); 2959 INSN(crc32cw, 1, 0, 0b10); 2960 INSN(crc32cx, 1, 1, 0b11); 2961 2962 #undef INSN 2963 2964 // Table vector lookup 2965 #define INSN(NAME, op) \ 2966 void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, unsigned registers, FloatRegister Vm) { \ 2967 starti; \ 2968 assert(T == T8B || T == T16B, "invalid arrangement"); \ 2969 assert(0 < registers && registers <= 4, "invalid number of registers"); \ 2970 f(0, 31), f((int)T & 1, 30), f(0b001110000, 29, 21), rf(Vm, 16), f(0, 15); \ 2971 f(registers - 1, 14, 13), f(op, 12),f(0b00, 11, 10), rf(Vn, 5), rf(Vd, 0); \ 2972 } 2973 2974 INSN(tbl, 0); 2975 INSN(tbx, 1); 2976 2977 #undef INSN 2978 2979 // AdvSIMD two-reg misc 2980 // In this instruction group, the 2 bits in the size field ([23:22]) may be 2981 // fixed or determined by the "SIMD_Arrangement T", or both. The additional 2982 // parameter "tmask" is a 2-bit mask used to indicate which bits in the size 2983 // field are determined by the SIMD_Arrangement. The bit of "tmask" should be 2984 // set to 1 if corresponding bit marked as "x" in the ArmARM. 2985 #define INSN(NAME, U, size, tmask, opcode) \ 2986 void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn) { \ 2987 starti; \ 2988 assert((ASSERTION), MSG); \ 2989 f(0, 31), f((int)T & 1, 30), f(U, 29), f(0b01110, 28, 24); \ 2990 f(size | ((int)(T >> 1) & tmask), 23, 22), f(0b10000, 21, 17); \ 2991 f(opcode, 16, 12), f(0b10, 11, 10), rf(Vn, 5), rf(Vd, 0); \ 2992 } 2993 2994 #define MSG "invalid arrangement" 2995 2996 #define ASSERTION (T == T2S || T == T4S || T == T2D) 2997 INSN(fsqrt, 1, 0b10, 0b01, 0b11111); 2998 INSN(fabs, 0, 0b10, 0b01, 0b01111); 2999 INSN(fneg, 1, 0b10, 0b01, 0b01111); 3000 INSN(frintn, 0, 0b00, 0b01, 0b11000); 3001 INSN(frintm, 0, 0b00, 0b01, 0b11001); 3002 INSN(frintp, 0, 0b10, 0b01, 0b11000); 3003 INSN(fcvtas, 0, 0b00, 0b01, 0b11100); 3004 INSN(fcvtzs, 0, 0b10, 0b01, 0b11011); 3005 INSN(fcvtms, 0, 0b00, 0b01, 0b11011); 3006 #undef ASSERTION 3007 3008 #define ASSERTION (T == T8B || T == T16B || T == T4H || T == T8H || T == T2S || T == T4S) 3009 INSN(rev64, 0, 0b00, 0b11, 0b00000); 3010 #undef ASSERTION 3011 3012 #define ASSERTION (T == T8B || T == T16B || T == T4H || T == T8H) 3013 INSN(rev32, 1, 0b00, 0b11, 0b00000); 3014 #undef ASSERTION 3015 3016 #define ASSERTION (T == T8B || T == T16B) 3017 INSN(rev16, 0, 0b00, 0b11, 0b00001); 3018 INSN(rbit, 1, 0b01, 0b00, 0b00101); 3019 #undef ASSERTION 3020 3021 #undef MSG 3022 3023 #undef INSN 3024 3025 void ext(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, FloatRegister Vm, int index) 3026 { 3027 starti; 3028 assert(T == T8B || T == T16B, "invalid arrangement"); 3029 assert((T == T8B && index <= 0b0111) || (T == T16B && index <= 0b1111), "Invalid index value"); 3030 f(0, 31), f((int)T & 1, 30), f(0b101110000, 29, 21); 3031 rf(Vm, 16), f(0, 15), f(index, 14, 11); 3032 f(0, 10), rf(Vn, 5), rf(Vd, 0); 3033 } 3034 3035 // SVE arithmetic - unpredicated 3036 #define INSN(NAME, opcode) \ 3037 void NAME(FloatRegister Zd, SIMD_RegVariant T, FloatRegister Zn, FloatRegister Zm) { \ 3038 starti; \ 3039 assert(T != Q, "invalid register variant"); \ 3040 f(0b00000100, 31, 24), f(T, 23, 22), f(1, 21), \ 3041 rf(Zm, 16), f(0, 15, 13), f(opcode, 12, 10), rf(Zn, 5), rf(Zd, 0); \ 3042 } 3043 INSN(sve_add, 0b000); 3044 INSN(sve_sub, 0b001); 3045 #undef INSN 3046 3047 // SVE integer add/subtract immediate (unpredicated) 3048 #define INSN(NAME, op) \ 3049 void NAME(FloatRegister Zd, SIMD_RegVariant T, unsigned imm8) { \ 3050 starti; \ 3051 /* The immediate is an unsigned value in the range 0 to 255, and \ 3052 * for element width of 16 bits or higher it may also be a \ 3053 * positive multiple of 256 in the range 256 to 65280. \ 3054 */ \ 3055 assert(T != Q, "invalid size"); \ 3056 int sh = 0; \ 3057 if (imm8 <= 0xff) { \ 3058 sh = 0; \ 3059 } else if (T != B && imm8 <= 0xff00 && (imm8 & 0xff) == 0) { \ 3060 sh = 1; \ 3061 imm8 = (imm8 >> 8); \ 3062 } else { \ 3063 guarantee(false, "invalid immediate"); \ 3064 } \ 3065 f(0b00100101, 31, 24), f(T, 23, 22), f(0b10000, 21, 17); \ 3066 f(op, 16, 14), f(sh, 13), f(imm8, 12, 5), rf(Zd, 0); \ 3067 } 3068 3069 INSN(sve_add, 0b011); 3070 INSN(sve_sub, 0b111); 3071 #undef INSN 3072 3073 // SVE floating-point arithmetic - unpredicated 3074 #define INSN(NAME, opcode) \ 3075 void NAME(FloatRegister Zd, SIMD_RegVariant T, FloatRegister Zn, FloatRegister Zm) { \ 3076 starti; \ 3077 assert(T == S || T == D, "invalid register variant"); \ 3078 f(0b01100101, 31, 24), f(T, 23, 22), f(0, 21), \ 3079 rf(Zm, 16), f(0, 15, 13), f(opcode, 12, 10), rf(Zn, 5), rf(Zd, 0); \ 3080 } 3081 3082 INSN(sve_fadd, 0b000); 3083 INSN(sve_fmul, 0b010); 3084 INSN(sve_fsub, 0b001); 3085 #undef INSN 3086 3087 private: 3088 void sve_predicate_reg_insn(unsigned op24, unsigned op13, 3089 FloatRegister Zd_or_Vd, SIMD_RegVariant T, 3090 PRegister Pg, FloatRegister Zn_or_Vn) { 3091 starti; 3092 f(op24, 31, 24), f(T, 23, 22), f(op13, 21, 13); 3093 pgrf(Pg, 10), rf(Zn_or_Vn, 5), rf(Zd_or_Vd, 0); 3094 } 3095 3096 void sve_shift_imm_encoding(SIMD_RegVariant T, int shift, bool isSHR, 3097 int& tszh, int& tszl_imm) { 3098 /* The encodings for the tszh:tszl:imm3 fields 3099 * for shift right is calculated as: 3100 * 0001 xxx B, shift = 16 - UInt(tszh:tszl:imm3) 3101 * 001x xxx H, shift = 32 - UInt(tszh:tszl:imm3) 3102 * 01xx xxx S, shift = 64 - UInt(tszh:tszl:imm3) 3103 * 1xxx xxx D, shift = 128 - UInt(tszh:tszl:imm3) 3104 * for shift left is calculated as: 3105 * 0001 xxx B, shift = UInt(tszh:tszl:imm3) - 8 3106 * 001x xxx H, shift = UInt(tszh:tszl:imm3) - 16 3107 * 01xx xxx S, shift = UInt(tszh:tszl:imm3) - 32 3108 * 1xxx xxx D, shift = UInt(tszh:tszl:imm3) - 64 3109 */ 3110 assert(T != Q, "Invalid register variant"); 3111 if (isSHR) { 3112 assert(((1 << (T + 3)) >= shift) && (shift > 0) , "Invalid shift value"); 3113 } else { 3114 assert(((1 << (T + 3)) > shift) && (shift >= 0) , "Invalid shift value"); 3115 } 3116 int cVal = (1 << ((T + 3) + (isSHR ? 1 : 0))); 3117 int encodedShift = isSHR ? cVal - shift : cVal + shift; 3118 tszh = encodedShift >> 5; 3119 tszl_imm = encodedShift & 0x1f; 3120 } 3121 3122 public: 3123 3124 // SVE integer arithmetic - predicate 3125 #define INSN(NAME, op1, op2) \ 3126 void NAME(FloatRegister Zdn_or_Zd_or_Vd, SIMD_RegVariant T, PRegister Pg, FloatRegister Znm_or_Vn) { \ 3127 assert(T != Q, "invalid register variant"); \ 3128 sve_predicate_reg_insn(op1, op2, Zdn_or_Zd_or_Vd, T, Pg, Znm_or_Vn); \ 3129 } 3130 3131 INSN(sve_abs, 0b00000100, 0b010110101); // vector abs, unary 3132 INSN(sve_add, 0b00000100, 0b000000000); // vector add 3133 INSN(sve_and, 0b00000100, 0b011010000); // vector and 3134 INSN(sve_andv, 0b00000100, 0b011010001); // bitwise and reduction to scalar 3135 INSN(sve_asr, 0b00000100, 0b010000100); // vector arithmetic shift right 3136 INSN(sve_bic, 0b00000100, 0b011011000); // vector bitwise clear 3137 INSN(sve_clz, 0b00000100, 0b011001101); // vector count leading zero bits 3138 INSN(sve_cnt, 0b00000100, 0b011010101); // count non-zero bits 3139 INSN(sve_cpy, 0b00000101, 0b100000100); // copy scalar to each active vector element 3140 INSN(sve_eor, 0b00000100, 0b011001000); // vector eor 3141 INSN(sve_eorv, 0b00000100, 0b011001001); // bitwise xor reduction to scalar 3142 INSN(sve_lsl, 0b00000100, 0b010011100); // vector logical shift left 3143 INSN(sve_lsr, 0b00000100, 0b010001100); // vector logical shift right 3144 INSN(sve_mul, 0b00000100, 0b010000000); // vector mul 3145 INSN(sve_neg, 0b00000100, 0b010111101); // vector neg, unary 3146 INSN(sve_not, 0b00000100, 0b011110101); // bitwise invert vector, unary 3147 INSN(sve_orr, 0b00000100, 0b011000000); // vector or 3148 INSN(sve_orv, 0b00000100, 0b011000001); // bitwise or reduction to scalar 3149 INSN(sve_smax, 0b00000100, 0b001000000); // signed maximum vectors 3150 INSN(sve_smaxv, 0b00000100, 0b001000001); // signed maximum reduction to scalar 3151 INSN(sve_smin, 0b00000100, 0b001010000); // signed minimum vectors 3152 INSN(sve_sminv, 0b00000100, 0b001010001); // signed minimum reduction to scalar 3153 INSN(sve_sub, 0b00000100, 0b000001000); // vector sub 3154 INSN(sve_uaddv, 0b00000100, 0b000001001); // unsigned add reduction to scalar 3155 #undef INSN 3156 3157 // SVE floating-point arithmetic - predicate 3158 #define INSN(NAME, op1, op2) \ 3159 void NAME(FloatRegister Zd_or_Zdn_or_Vd, SIMD_RegVariant T, PRegister Pg, FloatRegister Zn_or_Zm) { \ 3160 assert(T == S || T == D, "invalid register variant"); \ 3161 sve_predicate_reg_insn(op1, op2, Zd_or_Zdn_or_Vd, T, Pg, Zn_or_Zm); \ 3162 } 3163 3164 INSN(sve_fabs, 0b00000100, 0b011100101); 3165 INSN(sve_fadd, 0b01100101, 0b000000100); 3166 INSN(sve_fadda, 0b01100101, 0b011000001); // add strictly-ordered reduction to scalar Vd 3167 INSN(sve_fdiv, 0b01100101, 0b001101100); 3168 INSN(sve_fmax, 0b01100101, 0b000110100); // floating-point maximum 3169 INSN(sve_fmaxv, 0b01100101, 0b000110001); // floating-point maximum recursive reduction to scalar 3170 INSN(sve_fmin, 0b01100101, 0b000111100); // floating-point minimum 3171 INSN(sve_fminv, 0b01100101, 0b000111001); // floating-point minimum recursive reduction to scalar 3172 INSN(sve_fmul, 0b01100101, 0b000010100); 3173 INSN(sve_fneg, 0b00000100, 0b011101101); 3174 INSN(sve_frintm, 0b01100101, 0b000010101); // floating-point round to integral value, toward minus infinity 3175 INSN(sve_frintn, 0b01100101, 0b000000101); // floating-point round to integral value, nearest with ties to even 3176 INSN(sve_frinta, 0b01100101, 0b000100101); // floating-point round to integral value, nearest with ties to away 3177 INSN(sve_frintp, 0b01100101, 0b000001101); // floating-point round to integral value, toward plus infinity 3178 INSN(sve_fsqrt, 0b01100101, 0b001101101); 3179 INSN(sve_fsub, 0b01100101, 0b000001100); 3180 #undef INSN 3181 3182 // SVE multiple-add/sub - predicated 3183 #define INSN(NAME, op0, op1, op2) \ 3184 void NAME(FloatRegister Zda, SIMD_RegVariant T, PRegister Pg, FloatRegister Zn, FloatRegister Zm) { \ 3185 starti; \ 3186 assert(T != Q, "invalid size"); \ 3187 f(op0, 31, 24), f(T, 23, 22), f(op1, 21), rf(Zm, 16); \ 3188 f(op2, 15, 13), pgrf(Pg, 10), rf(Zn, 5), rf(Zda, 0); \ 3189 } 3190 3191 INSN(sve_fmla, 0b01100101, 1, 0b000); // floating-point fused multiply-add, writing addend: Zda = Zda + Zn * Zm 3192 INSN(sve_fmls, 0b01100101, 1, 0b001); // floating-point fused multiply-subtract: Zda = Zda + -Zn * Zm 3193 INSN(sve_fnmla, 0b01100101, 1, 0b010); // floating-point negated fused multiply-add: Zda = -Zda + -Zn * Zm 3194 INSN(sve_fnmls, 0b01100101, 1, 0b011); // floating-point negated fused multiply-subtract: Zda = -Zda + Zn * Zm 3195 INSN(sve_fmad, 0b01100101, 1, 0b100); // floating-point fused multiply-add, writing multiplicand: Zda = Zm + Zda * Zn 3196 INSN(sve_fmsb, 0b01100101, 1, 0b101); // floating-point fused multiply-subtract, writing multiplicand: Zda = Zm + -Zda * Zn 3197 INSN(sve_fnmad, 0b01100101, 1, 0b110); // floating-point negated fused multiply-add, writing multiplicand: Zda = -Zm + -Zda * Zn 3198 INSN(sve_fnmsb, 0b01100101, 1, 0b111); // floating-point negated fused multiply-subtract, writing multiplicand: Zda = -Zm + Zda * Zn 3199 INSN(sve_mla, 0b00000100, 0, 0b010); // multiply-add, writing addend: Zda = Zda + Zn*Zm 3200 INSN(sve_mls, 0b00000100, 0, 0b011); // multiply-subtract, writing addend: Zda = Zda + -Zn*Zm 3201 #undef INSN 3202 3203 // SVE bitwise logical - unpredicated 3204 #define INSN(NAME, opc) \ 3205 void NAME(FloatRegister Zd, FloatRegister Zn, FloatRegister Zm) { \ 3206 starti; \ 3207 f(0b00000100, 31, 24), f(opc, 23, 22), f(1, 21), \ 3208 rf(Zm, 16), f(0b001100, 15, 10), rf(Zn, 5), rf(Zd, 0); \ 3209 } 3210 INSN(sve_and, 0b00); 3211 INSN(sve_eor, 0b10); 3212 INSN(sve_orr, 0b01); 3213 INSN(sve_bic, 0b11); 3214 #undef INSN 3215 3216 // SVE bitwise logical with immediate (unpredicated) 3217 #define INSN(NAME, opc) \ 3218 void NAME(FloatRegister Zd, SIMD_RegVariant T, uint64_t imm) { \ 3219 starti; \ 3220 unsigned elembits = regVariant_to_elemBits(T); \ 3221 uint32_t val = encode_sve_logical_immediate(elembits, imm); \ 3222 f(0b00000101, 31, 24), f(opc, 23, 22), f(0b0000, 21, 18); \ 3223 f(val, 17, 5), rf(Zd, 0); \ 3224 } 3225 INSN(sve_and, 0b10); 3226 INSN(sve_eor, 0b01); 3227 INSN(sve_orr, 0b00); 3228 #undef INSN 3229 3230 // SVE shift immediate - unpredicated 3231 #define INSN(NAME, opc, isSHR) \ 3232 void NAME(FloatRegister Zd, SIMD_RegVariant T, FloatRegister Zn, int shift) { \ 3233 starti; \ 3234 int tszh, tszl_imm; \ 3235 sve_shift_imm_encoding(T, shift, isSHR, tszh, tszl_imm); \ 3236 f(0b00000100, 31, 24); \ 3237 f(tszh, 23, 22), f(1,21), f(tszl_imm, 20, 16); \ 3238 f(0b100, 15, 13), f(opc, 12, 10), rf(Zn, 5), rf(Zd, 0); \ 3239 } 3240 3241 INSN(sve_asr, 0b100, /* isSHR = */ true); 3242 INSN(sve_lsl, 0b111, /* isSHR = */ false); 3243 INSN(sve_lsr, 0b101, /* isSHR = */ true); 3244 #undef INSN 3245 3246 // SVE bitwise shift by immediate (predicated) 3247 #define INSN(NAME, opc, isSHR) \ 3248 void NAME(FloatRegister Zdn, SIMD_RegVariant T, PRegister Pg, int shift) { \ 3249 starti; \ 3250 int tszh, tszl_imm; \ 3251 sve_shift_imm_encoding(T, shift, isSHR, tszh, tszl_imm); \ 3252 f(0b00000100, 31, 24), f(tszh, 23, 22), f(0b00, 21, 20), f(opc, 19, 16); \ 3253 f(0b100, 15, 13), pgrf(Pg, 10), f(tszl_imm, 9, 5), rf(Zdn, 0); \ 3254 } 3255 3256 INSN(sve_asr, 0b0000, /* isSHR = */ true); 3257 INSN(sve_lsl, 0b0011, /* isSHR = */ false); 3258 INSN(sve_lsr, 0b0001, /* isSHR = */ true); 3259 #undef INSN 3260 3261 private: 3262 3263 // Scalar base + immediate index 3264 void sve_ld_st1(FloatRegister Zt, Register Xn, int imm, PRegister Pg, 3265 SIMD_RegVariant T, int op1, int type, int op2) { 3266 starti; 3267 assert_cond(T >= type); 3268 f(op1, 31, 25), f(type, 24, 23), f(T, 22, 21); 3269 f(0, 20), sf(imm, 19, 16), f(op2, 15, 13); 3270 pgrf(Pg, 10), srf(Xn, 5), rf(Zt, 0); 3271 } 3272 3273 // Scalar base + scalar index 3274 void sve_ld_st1(FloatRegister Zt, Register Xn, Register Xm, PRegister Pg, 3275 SIMD_RegVariant T, int op1, int type, int op2) { 3276 starti; 3277 assert_cond(T >= type); 3278 f(op1, 31, 25), f(type, 24, 23), f(T, 22, 21); 3279 rf(Xm, 16), f(op2, 15, 13); 3280 pgrf(Pg, 10), srf(Xn, 5), rf(Zt, 0); 3281 } 3282 3283 void sve_ld_st1(FloatRegister Zt, PRegister Pg, 3284 SIMD_RegVariant T, const Address &a, 3285 int op1, int type, int imm_op2, int scalar_op2) { 3286 switch (a.getMode()) { 3287 case Address::base_plus_offset: 3288 sve_ld_st1(Zt, a.base(), a.offset(), Pg, T, op1, type, imm_op2); 3289 break; 3290 case Address::base_plus_offset_reg: 3291 sve_ld_st1(Zt, a.base(), a.index(), Pg, T, op1, type, scalar_op2); 3292 break; 3293 default: 3294 ShouldNotReachHere(); 3295 } 3296 } 3297 3298 public: 3299 3300 // SVE contiguous load/store 3301 #define INSN(NAME, op1, type, imm_op2, scalar_op2) \ 3302 void NAME(FloatRegister Zt, SIMD_RegVariant T, PRegister Pg, const Address &a) { \ 3303 assert(T != Q, "invalid register variant"); \ 3304 sve_ld_st1(Zt, Pg, T, a, op1, type, imm_op2, scalar_op2); \ 3305 } 3306 3307 INSN(sve_ld1b, 0b1010010, 0b00, 0b101, 0b010); 3308 INSN(sve_st1b, 0b1110010, 0b00, 0b111, 0b010); 3309 INSN(sve_ld1h, 0b1010010, 0b01, 0b101, 0b010); 3310 INSN(sve_st1h, 0b1110010, 0b01, 0b111, 0b010); 3311 INSN(sve_ld1w, 0b1010010, 0b10, 0b101, 0b010); 3312 INSN(sve_st1w, 0b1110010, 0b10, 0b111, 0b010); 3313 INSN(sve_ld1d, 0b1010010, 0b11, 0b101, 0b010); 3314 INSN(sve_st1d, 0b1110010, 0b11, 0b111, 0b010); 3315 #undef INSN 3316 3317 // Gather/scatter load/store (SVE) - scalar plus vector 3318 #define INSN(NAME, op1, type, op2, op3) \ 3319 void NAME(FloatRegister Zt, PRegister Pg, Register Xn, FloatRegister Zm) { \ 3320 starti; \ 3321 f(op1, 31, 25), f(type, 24, 23), f(op2, 22, 21), rf(Zm, 16); \ 3322 f(op3, 15, 13), pgrf(Pg, 10), srf(Xn, 5), rf(Zt, 0); \ 3323 } 3324 // SVE 32-bit gather load words (scalar plus 32-bit scaled offsets) 3325 INSN(sve_ld1w_gather, 0b1000010, 0b10, 0b01, 0b010); 3326 // SVE 64-bit gather load (scalar plus 32-bit unpacked scaled offsets) 3327 INSN(sve_ld1d_gather, 0b1100010, 0b11, 0b01, 0b010); 3328 // SVE 32-bit scatter store (scalar plus 32-bit scaled offsets) 3329 INSN(sve_st1w_scatter, 0b1110010, 0b10, 0b11, 0b100); 3330 // SVE 64-bit scatter store (scalar plus unpacked 32-bit scaled offsets) 3331 INSN(sve_st1d_scatter, 0b1110010, 0b11, 0b01, 0b100); 3332 #undef INSN 3333 3334 // SVE load/store - unpredicated 3335 #define INSN(NAME, op1) \ 3336 void NAME(FloatRegister Zt, const Address &a) { \ 3337 starti; \ 3338 assert(a.index() == noreg, "invalid address variant"); \ 3339 f(op1, 31, 29), f(0b0010110, 28, 22), sf(a.offset() >> 3, 21, 16), \ 3340 f(0b010, 15, 13), f(a.offset() & 0x7, 12, 10), srf(a.base(), 5), rf(Zt, 0); \ 3341 } 3342 3343 INSN(sve_ldr, 0b100); // LDR (vector) 3344 INSN(sve_str, 0b111); // STR (vector) 3345 #undef INSN 3346 3347 // SVE stack frame adjustment 3348 #define INSN(NAME, op) \ 3349 void NAME(Register Xd, Register Xn, int imm6) { \ 3350 starti; \ 3351 f(0b000001000, 31, 23), f(op, 22, 21); \ 3352 srf(Xn, 16), f(0b01010, 15, 11), sf(imm6, 10, 5), srf(Xd, 0); \ 3353 } 3354 3355 INSN(sve_addvl, 0b01); // Add multiple of vector register size to scalar register 3356 INSN(sve_addpl, 0b11); // Add multiple of predicate register size to scalar register 3357 #undef INSN 3358 3359 // SVE inc/dec register by element count 3360 #define INSN(NAME, op) \ 3361 void NAME(Register Xdn, SIMD_RegVariant T, unsigned imm4 = 1, int pattern = 0b11111) { \ 3362 starti; \ 3363 assert(T != Q, "invalid size"); \ 3364 f(0b00000100,31, 24), f(T, 23, 22), f(0b11, 21, 20); \ 3365 f(imm4 - 1, 19, 16), f(0b11100, 15, 11), f(op, 10), f(pattern, 9, 5), rf(Xdn, 0); \ 3366 } 3367 3368 INSN(sve_inc, 0); 3369 INSN(sve_dec, 1); 3370 #undef INSN 3371 3372 // SVE predicate logical operations 3373 #define INSN(NAME, op1, op2, op3) \ 3374 void NAME(PRegister Pd, PRegister Pg, PRegister Pn, PRegister Pm) { \ 3375 starti; \ 3376 f(0b00100101, 31, 24), f(op1, 23, 22), f(0b00, 21, 20); \ 3377 prf(Pm, 16), f(0b01, 15, 14), prf(Pg, 10), f(op2, 9); \ 3378 prf(Pn, 5), f(op3, 4), prf(Pd, 0); \ 3379 } 3380 3381 INSN(sve_and, 0b00, 0b0, 0b0); 3382 INSN(sve_ands, 0b01, 0b0, 0b0); 3383 INSN(sve_eor, 0b00, 0b1, 0b0); 3384 INSN(sve_eors, 0b01, 0b1, 0b0); 3385 INSN(sve_orr, 0b10, 0b0, 0b0); 3386 INSN(sve_orrs, 0b11, 0b0, 0b0); 3387 INSN(sve_bic, 0b00, 0b0, 0b1); 3388 #undef INSN 3389 3390 // SVE increment register by predicate count 3391 void sve_incp(const Register rd, SIMD_RegVariant T, PRegister pg) { 3392 starti; 3393 assert(T != Q, "invalid size"); 3394 f(0b00100101, 31, 24), f(T, 23, 22), f(0b1011001000100, 21, 9), 3395 prf(pg, 5), rf(rd, 0); 3396 } 3397 3398 // SVE broadcast general-purpose register to vector elements (unpredicated) 3399 void sve_dup(FloatRegister Zd, SIMD_RegVariant T, Register Rn) { 3400 starti; 3401 assert(T != Q, "invalid size"); 3402 f(0b00000101, 31, 24), f(T, 23, 22), f(0b100000001110, 21, 10); 3403 srf(Rn, 5), rf(Zd, 0); 3404 } 3405 3406 // SVE broadcast signed immediate to vector elements (unpredicated) 3407 void sve_dup(FloatRegister Zd, SIMD_RegVariant T, int imm8) { 3408 starti; 3409 assert(T != Q, "invalid size"); 3410 int sh = 0; 3411 if (imm8 <= 127 && imm8 >= -128) { 3412 sh = 0; 3413 } else if (T != B && imm8 <= 32512 && imm8 >= -32768 && (imm8 & 0xff) == 0) { 3414 sh = 1; 3415 imm8 = (imm8 >> 8); 3416 } else { 3417 guarantee(false, "invalid immediate"); 3418 } 3419 f(0b00100101, 31, 24), f(T, 23, 22), f(0b11100011, 21, 14); 3420 f(sh, 13), sf(imm8, 12, 5), rf(Zd, 0); 3421 } 3422 3423 // SVE predicate test 3424 void sve_ptest(PRegister Pg, PRegister Pn) { 3425 starti; 3426 f(0b001001010101000011, 31, 14), prf(Pg, 10), f(0, 9), prf(Pn, 5), f(0, 4, 0); 3427 } 3428 3429 // SVE predicate initialize 3430 void sve_ptrue(PRegister pd, SIMD_RegVariant esize, int pattern = 0b11111) { 3431 starti; 3432 f(0b00100101, 31, 24), f(esize, 23, 22), f(0b011000111000, 21, 10); 3433 f(pattern, 9, 5), f(0b0, 4), prf(pd, 0); 3434 } 3435 3436 // SVE predicate zero 3437 void sve_pfalse(PRegister pd) { 3438 starti; 3439 f(0b00100101, 31, 24), f(0b00, 23, 22), f(0b011000111001, 21, 10); 3440 f(0b000000, 9, 4), prf(pd, 0); 3441 } 3442 3443 // SVE load/store predicate register 3444 #define INSN(NAME, op1) \ 3445 void NAME(PRegister Pt, const Address &a) { \ 3446 starti; \ 3447 assert(a.index() == noreg, "invalid address variant"); \ 3448 f(op1, 31, 29), f(0b0010110, 28, 22), sf(a.offset() >> 3, 21, 16), \ 3449 f(0b000, 15, 13), f(a.offset() & 0x7, 12, 10), srf(a.base(), 5), \ 3450 f(0, 4), prf(Pt, 0); \ 3451 } 3452 3453 INSN(sve_ldr, 0b100); // LDR (predicate) 3454 INSN(sve_str, 0b111); // STR (predicate) 3455 #undef INSN 3456 3457 // SVE move predicate register 3458 void sve_mov(PRegister Pd, PRegister Pn) { 3459 starti; 3460 f(0b001001011000, 31, 20), prf(Pn, 16), f(0b01, 15, 14), prf(Pn, 10); 3461 f(0, 9), prf(Pn, 5), f(0, 4), prf(Pd, 0); 3462 } 3463 3464 // SVE copy general-purpose register to vector elements (predicated) 3465 void sve_cpy(FloatRegister Zd, SIMD_RegVariant T, PRegister Pg, Register Rn) { 3466 starti; 3467 assert(T != Q, "invalid size"); 3468 f(0b00000101, 31, 24), f(T, 23, 22), f(0b101000101, 21, 13); 3469 pgrf(Pg, 10), srf(Rn, 5), rf(Zd, 0); 3470 } 3471 3472 private: 3473 void sve_cpy(FloatRegister Zd, SIMD_RegVariant T, PRegister Pg, int imm8, 3474 bool isMerge, bool isFloat) { 3475 starti; 3476 assert(T != Q, "invalid size"); 3477 int sh = 0; 3478 if (imm8 <= 127 && imm8 >= -128) { 3479 sh = 0; 3480 } else if (T != B && imm8 <= 32512 && imm8 >= -32768 && (imm8 & 0xff) == 0) { 3481 sh = 1; 3482 imm8 = (imm8 >> 8); 3483 } else { 3484 guarantee(false, "invalid immediate"); 3485 } 3486 int m = isMerge ? 1 : 0; 3487 f(0b00000101, 31, 24), f(T, 23, 22), f(0b01, 21, 20); 3488 prf(Pg, 16), f(isFloat ? 1 : 0, 15), f(m, 14), f(sh, 13), sf(imm8, 12, 5), rf(Zd, 0); 3489 } 3490 3491 public: 3492 // SVE copy signed integer immediate to vector elements (predicated) 3493 void sve_cpy(FloatRegister Zd, SIMD_RegVariant T, PRegister Pg, int imm8, bool isMerge) { 3494 sve_cpy(Zd, T, Pg, imm8, isMerge, /*isFloat*/false); 3495 } 3496 // SVE copy floating-point immediate to vector elements (predicated) 3497 void sve_cpy(FloatRegister Zd, SIMD_RegVariant T, PRegister Pg, double d) { 3498 sve_cpy(Zd, T, Pg, checked_cast<int8_t>(pack(d)), /*isMerge*/true, /*isFloat*/true); 3499 } 3500 3501 // SVE conditionally select elements from two vectors 3502 void sve_sel(FloatRegister Zd, SIMD_RegVariant T, PRegister Pg, 3503 FloatRegister Zn, FloatRegister Zm) { 3504 starti; 3505 assert(T != Q, "invalid size"); 3506 f(0b00000101, 31, 24), f(T, 23, 22), f(0b1, 21), rf(Zm, 16); 3507 f(0b11, 15, 14), prf(Pg, 10), rf(Zn, 5), rf(Zd, 0); 3508 } 3509 3510 // SVE Permute Vector - Extract 3511 void sve_ext(FloatRegister Zdn, FloatRegister Zm, int imm8) { 3512 starti; 3513 f(0b00000101001, 31, 21), f(imm8 >> 3, 20, 16), f(0b000, 15, 13); 3514 f(imm8 & 0b111, 12, 10), rf(Zm, 5), rf(Zdn, 0); 3515 } 3516 3517 // SVE Integer/Floating-Point Compare - Vectors 3518 #define INSN(NAME, op1, op2, fp) \ 3519 void NAME(Condition cond, PRegister Pd, SIMD_RegVariant T, PRegister Pg, \ 3520 FloatRegister Zn, FloatRegister Zm) { \ 3521 starti; \ 3522 if (fp == 0) { \ 3523 assert(T != Q, "invalid size"); \ 3524 } else { \ 3525 assert(T != B && T != Q, "invalid size"); \ 3526 assert(cond != HI && cond != HS, "invalid condition for fcm"); \ 3527 } \ 3528 int cond_op; \ 3529 switch(cond) { \ 3530 case EQ: cond_op = (op2 << 2) | 0b10; break; \ 3531 case NE: cond_op = (op2 << 2) | 0b11; break; \ 3532 case GE: cond_op = (op2 << 2) | 0b00; break; \ 3533 case GT: cond_op = (op2 << 2) | 0b01; break; \ 3534 case HI: cond_op = 0b0001; break; \ 3535 case HS: cond_op = 0b0000; break; \ 3536 default: \ 3537 ShouldNotReachHere(); \ 3538 } \ 3539 f(op1, 31, 24), f(T, 23, 22), f(0, 21), rf(Zm, 16), f((cond_op >> 1) & 7, 15, 13); \ 3540 pgrf(Pg, 10), rf(Zn, 5), f(cond_op & 1, 4), prf(Pd, 0); \ 3541 } 3542 3543 INSN(sve_cmp, 0b00100100, 0b10, 0); 3544 INSN(sve_fcm, 0b01100101, 0b01, 1); 3545 #undef INSN 3546 3547 // SVE Integer Compare - Signed Immediate 3548 void sve_cmp(Condition cond, PRegister Pd, SIMD_RegVariant T, 3549 PRegister Pg, FloatRegister Zn, int imm5) { 3550 starti; 3551 assert(T != Q, "invalid size"); 3552 guarantee(-16 <= imm5 && imm5 <= 15, "invalid immediate"); 3553 int cond_op; 3554 switch(cond) { 3555 case EQ: cond_op = 0b1000; break; 3556 case NE: cond_op = 0b1001; break; 3557 case GE: cond_op = 0b0000; break; 3558 case GT: cond_op = 0b0001; break; 3559 case LE: cond_op = 0b0011; break; 3560 case LT: cond_op = 0b0010; break; 3561 default: 3562 ShouldNotReachHere(); 3563 } 3564 f(0b00100101, 31, 24), f(T, 23, 22), f(0b0, 21), sf(imm5, 20, 16), 3565 f((cond_op >> 1) & 0x7, 15, 13), pgrf(Pg, 10), rf(Zn, 5); 3566 f(cond_op & 0x1, 4), prf(Pd, 0); 3567 } 3568 3569 // SVE Floating-point compare vector with zero 3570 void sve_fcm(Condition cond, PRegister Pd, SIMD_RegVariant T, 3571 PRegister Pg, FloatRegister Zn, double d) { 3572 starti; 3573 assert(T != Q, "invalid size"); 3574 guarantee(d == 0.0, "invalid immediate"); 3575 int cond_op; 3576 switch(cond) { 3577 case EQ: cond_op = 0b100; break; 3578 case GT: cond_op = 0b001; break; 3579 case GE: cond_op = 0b000; break; 3580 case LT: cond_op = 0b010; break; 3581 case LE: cond_op = 0b011; break; 3582 case NE: cond_op = 0b110; break; 3583 default: 3584 ShouldNotReachHere(); 3585 } 3586 f(0b01100101, 31, 24), f(T, 23, 22), f(0b0100, 21, 18), 3587 f((cond_op >> 1) & 0x3, 17, 16), f(0b001, 15, 13), 3588 pgrf(Pg, 10), rf(Zn, 5); 3589 f(cond_op & 0x1, 4), prf(Pd, 0); 3590 } 3591 3592 // SVE unpack vector elements 3593 #define INSN(NAME, op) \ 3594 void NAME(FloatRegister Zd, SIMD_RegVariant T, FloatRegister Zn) { \ 3595 starti; \ 3596 assert(T != B && T != Q, "invalid size"); \ 3597 f(0b00000101, 31, 24), f(T, 23, 22), f(0b1100, 21, 18); \ 3598 f(op, 17, 16), f(0b001110, 15, 10), rf(Zn, 5), rf(Zd, 0); \ 3599 } 3600 3601 INSN(sve_uunpkhi, 0b11); // Signed unpack and extend half of vector - high half 3602 INSN(sve_uunpklo, 0b10); // Signed unpack and extend half of vector - low half 3603 INSN(sve_sunpkhi, 0b01); // Unsigned unpack and extend half of vector - high half 3604 INSN(sve_sunpklo, 0b00); // Unsigned unpack and extend half of vector - low half 3605 #undef INSN 3606 3607 // SVE unpack predicate elements 3608 #define INSN(NAME, op) \ 3609 void NAME(PRegister Pd, PRegister Pn) { \ 3610 starti; \ 3611 f(0b000001010011000, 31, 17), f(op, 16), f(0b0100000, 15, 9); \ 3612 prf(Pn, 5), f(0b0, 4), prf(Pd, 0); \ 3613 } 3614 3615 INSN(sve_punpkhi, 0b1); // Unpack and widen high half of predicate 3616 INSN(sve_punpklo, 0b0); // Unpack and widen low half of predicate 3617 #undef INSN 3618 3619 // SVE permute vector elements 3620 #define INSN(NAME, op) \ 3621 void NAME(FloatRegister Zd, SIMD_RegVariant T, FloatRegister Zn, FloatRegister Zm) { \ 3622 starti; \ 3623 assert(T != Q, "invalid size"); \ 3624 f(0b00000101, 31, 24), f(T, 23, 22), f(0b1, 21), rf(Zm, 16); \ 3625 f(0b01101, 15, 11), f(op, 10), rf(Zn, 5), rf(Zd, 0); \ 3626 } 3627 3628 INSN(sve_uzp1, 0b0); // Concatenate even elements from two vectors 3629 INSN(sve_uzp2, 0b1); // Concatenate odd elements from two vectors 3630 #undef INSN 3631 3632 // SVE permute predicate elements 3633 #define INSN(NAME, op) \ 3634 void NAME(PRegister Pd, SIMD_RegVariant T, PRegister Pn, PRegister Pm) { \ 3635 starti; \ 3636 assert(T != Q, "invalid size"); \ 3637 f(0b00000101, 31, 24), f(T, 23, 22), f(0b10, 21, 20), prf(Pm, 16); \ 3638 f(0b01001, 15, 11), f(op, 10), f(0b0, 9), prf(Pn, 5), f(0b0, 4), prf(Pd, 0); \ 3639 } 3640 3641 INSN(sve_uzp1, 0b0); // Concatenate even elements from two predicates 3642 INSN(sve_uzp2, 0b1); // Concatenate odd elements from two predicates 3643 #undef INSN 3644 3645 // Predicate counted loop (SVE) (32-bit variants are not included) 3646 #define INSN(NAME, decode) \ 3647 void NAME(PRegister Pd, SIMD_RegVariant T, Register Rn, Register Rm) { \ 3648 starti; \ 3649 assert(T != Q, "invalid register variant"); \ 3650 f(0b00100101, 31, 24), f(T, 23, 22), f(1, 21), \ 3651 zrf(Rm, 16), f(0, 15, 13), f(1, 12), f(decode >> 1, 11, 10), \ 3652 zrf(Rn, 5), f(decode & 1, 4), prf(Pd, 0); \ 3653 } 3654 3655 INSN(sve_whilelt, 0b010); // While incrementing signed scalar less than scalar 3656 INSN(sve_whilele, 0b011); // While incrementing signed scalar less than or equal to scalar 3657 INSN(sve_whilelo, 0b110); // While incrementing unsigned scalar lower than scalar 3658 INSN(sve_whilels, 0b111); // While incrementing unsigned scalar lower than or the same as scalar 3659 #undef INSN 3660 3661 // SVE predicate reverse 3662 void sve_rev(PRegister Pd, SIMD_RegVariant T, PRegister Pn) { 3663 starti; 3664 assert(T != Q, "invalid size"); 3665 f(0b00000101, 31, 24), f(T, 23, 22), f(0b1101000100000, 21, 9); 3666 prf(Pn, 5), f(0, 4), prf(Pd, 0); 3667 } 3668 3669 // SVE partition break condition 3670 #define INSN(NAME, op) \ 3671 void NAME(PRegister Pd, PRegister Pg, PRegister Pn, bool isMerge) { \ 3672 starti; \ 3673 f(0b00100101, 31, 24), f(op, 23, 22), f(0b01000001, 21, 14); \ 3674 prf(Pg, 10), f(0b0, 9), prf(Pn, 5), f(isMerge ? 1 : 0, 4), prf(Pd, 0); \ 3675 } 3676 3677 INSN(sve_brka, 0b00); // Break after first true condition 3678 INSN(sve_brkb, 0b10); // Break before first true condition 3679 #undef INSN 3680 3681 // Element count and increment scalar (SVE) 3682 #define INSN(NAME, TYPE) \ 3683 void NAME(Register Xdn, unsigned imm4 = 1, int pattern = 0b11111) { \ 3684 starti; \ 3685 f(0b00000100, 31, 24), f(TYPE, 23, 22), f(0b10, 21, 20); \ 3686 f(imm4 - 1, 19, 16), f(0b11100, 15, 11), f(0, 10), f(pattern, 9, 5), rf(Xdn, 0); \ 3687 } 3688 3689 INSN(sve_cntb, B); // Set scalar to multiple of 8-bit predicate constraint element count 3690 INSN(sve_cnth, H); // Set scalar to multiple of 16-bit predicate constraint element count 3691 INSN(sve_cntw, S); // Set scalar to multiple of 32-bit predicate constraint element count 3692 INSN(sve_cntd, D); // Set scalar to multiple of 64-bit predicate constraint element count 3693 #undef INSN 3694 3695 // Set scalar to active predicate element count 3696 void sve_cntp(Register Xd, SIMD_RegVariant T, PRegister Pg, PRegister Pn) { 3697 starti; 3698 assert(T != Q, "invalid size"); 3699 f(0b00100101, 31, 24), f(T, 23, 22), f(0b10000010, 21, 14); 3700 prf(Pg, 10), f(0, 9), prf(Pn, 5), rf(Xd, 0); 3701 } 3702 3703 // SVE convert signed integer to floating-point (predicated) 3704 void sve_scvtf(FloatRegister Zd, SIMD_RegVariant T_dst, PRegister Pg, 3705 FloatRegister Zn, SIMD_RegVariant T_src) { 3706 starti; 3707 assert(T_src != B && T_dst != B && T_src != Q && T_dst != Q && 3708 (T_src != H || T_dst == T_src), "invalid register variant"); 3709 int opc = T_dst; 3710 int opc2 = T_src; 3711 // In most cases we can treat T_dst, T_src as opc, opc2, 3712 // except for the following two combinations. 3713 // +-----+------+---+------------------------------------+ 3714 // | opc | opc2 | U | Instruction Details | 3715 // +-----+------+---+------------------------------------+ 3716 // | 11 | 00 | 0 | SCVTF - 32-bit to double-precision | 3717 // | 11 | 10 | 0 | SCVTF - 64-bit to single-precision | 3718 // +-----+------+---+------------------------------------+ 3719 if (T_src == S && T_dst == D) { 3720 opc = 0b11; 3721 opc2 = 0b00; 3722 } else if (T_src == D && T_dst == S) { 3723 opc = 0b11; 3724 opc2 = 0b10; 3725 } 3726 f(0b01100101, 31, 24), f(opc, 23, 22), f(0b010, 21, 19); 3727 f(opc2, 18, 17), f(0b0101, 16, 13); 3728 pgrf(Pg, 10), rf(Zn, 5), rf(Zd, 0); 3729 } 3730 3731 // SVE floating-point convert to signed integer, rounding toward zero (predicated) 3732 void sve_fcvtzs(FloatRegister Zd, SIMD_RegVariant T_dst, PRegister Pg, 3733 FloatRegister Zn, SIMD_RegVariant T_src) { 3734 starti; 3735 assert(T_src != B && T_dst != B && T_src != Q && T_dst != Q && 3736 (T_dst != H || T_src == H), "invalid register variant"); 3737 int opc = T_src; 3738 int opc2 = T_dst; 3739 // In most cases we can treat T_src, T_dst as opc, opc2, 3740 // except for the following two combinations. 3741 // +-----+------+---+-------------------------------------+ 3742 // | opc | opc2 | U | Instruction Details | 3743 // +-----+------+---+-------------------------------------+ 3744 // | 11 | 10 | 0 | FCVTZS - single-precision to 64-bit | 3745 // | 11 | 00 | 0 | FCVTZS - double-precision to 32-bit | 3746 // +-----+------+---+-------------------------------------+ 3747 if (T_src == S && T_dst == D) { 3748 opc = 0b11; 3749 opc2 = 0b10; 3750 } else if (T_src == D && T_dst == S) { 3751 opc = 0b11; 3752 opc2 = 0b00; 3753 } 3754 f(0b01100101, 31, 24), f(opc, 23, 22), f(0b011, 21, 19); 3755 f(opc2, 18, 17), f(0b0101, 16, 13); 3756 pgrf(Pg, 10), rf(Zn, 5), rf(Zd, 0); 3757 } 3758 3759 // SVE floating-point convert precision (predicated) 3760 void sve_fcvt(FloatRegister Zd, SIMD_RegVariant T_dst, PRegister Pg, 3761 FloatRegister Zn, SIMD_RegVariant T_src) { 3762 starti; 3763 assert(T_src != B && T_dst != B && T_src != Q && T_dst != Q && 3764 T_src != T_dst, "invalid register variant"); 3765 guarantee(T_src != H && T_dst != H, "half-precision unsupported"); 3766 f(0b01100101, 31, 24), f(0b11, 23, 22), f(0b0010, 21, 18); 3767 f(T_dst, 17, 16), f(0b101, 15, 13); 3768 pgrf(Pg, 10), rf(Zn, 5), rf(Zd, 0); 3769 } 3770 3771 // SVE extract element to general-purpose register 3772 #define INSN(NAME, before) \ 3773 void NAME(Register Rd, SIMD_RegVariant T, PRegister Pg, FloatRegister Zn) { \ 3774 starti; \ 3775 f(0b00000101, 31, 24), f(T, 23, 22), f(0b10000, 21, 17); \ 3776 f(before, 16), f(0b101, 15, 13); \ 3777 pgrf(Pg, 10), rf(Zn, 5), rf(Rd, 0); \ 3778 } 3779 3780 INSN(sve_lasta, 0b0); 3781 INSN(sve_lastb, 0b1); 3782 #undef INSN 3783 3784 // SVE extract element to SIMD&FP scalar register 3785 #define INSN(NAME, before) \ 3786 void NAME(FloatRegister Vd, SIMD_RegVariant T, PRegister Pg, FloatRegister Zn) { \ 3787 starti; \ 3788 f(0b00000101, 31, 24), f(T, 23, 22), f(0b10001, 21, 17); \ 3789 f(before, 16), f(0b100, 15, 13); \ 3790 pgrf(Pg, 10), rf(Zn, 5), rf(Vd, 0); \ 3791 } 3792 3793 INSN(sve_lasta, 0b0); 3794 INSN(sve_lastb, 0b1); 3795 #undef INSN 3796 3797 // SVE reverse within elements 3798 #define INSN(NAME, opc, cond) \ 3799 void NAME(FloatRegister Zd, SIMD_RegVariant T, PRegister Pg, FloatRegister Zn) { \ 3800 starti; \ 3801 assert(cond, "invalid size"); \ 3802 f(0b00000101, 31, 24), f(T, 23, 22), f(0b1001, 21, 18), f(opc, 17, 16); \ 3803 f(0b100, 15, 13), pgrf(Pg, 10), rf(Zn, 5), rf(Zd, 0); \ 3804 } 3805 3806 INSN(sve_revb, 0b00, T == H || T == S || T == D); 3807 INSN(sve_rbit, 0b11, T != Q); 3808 #undef INSN 3809 3810 // SVE Index Generation: 3811 // Create index starting from and incremented by immediate 3812 void sve_index(FloatRegister Zd, SIMD_RegVariant T, int imm1, int imm2) { 3813 starti; 3814 assert(T != Q, "invalid size"); 3815 f(0b00000100, 31, 24), f(T, 23, 22), f(0b1, 21); 3816 sf(imm2, 20, 16), f(0b010000, 15, 10); 3817 sf(imm1, 9, 5), rf(Zd, 0); 3818 } 3819 3820 // SVE Index Generation: 3821 // Create index starting from general-purpose register and incremented by immediate 3822 void sve_index(FloatRegister Zd, SIMD_RegVariant T, Register Rn, int imm) { 3823 starti; 3824 assert(T != Q, "invalid size"); 3825 f(0b00000100, 31, 24), f(T, 23, 22), f(0b1, 21); 3826 sf(imm, 20, 16), f(0b010001, 15, 10); 3827 zrf(Rn, 5), rf(Zd, 0); 3828 } 3829 3830 // SVE programmable table lookup/permute using vector of element indices 3831 void sve_tbl(FloatRegister Zd, SIMD_RegVariant T, FloatRegister Zn, FloatRegister Zm) { 3832 starti; 3833 assert(T != Q, "invalid size"); 3834 f(0b00000101, 31, 24), f(T, 23, 22), f(0b1, 21), rf(Zm, 16); 3835 f(0b001100, 15, 10), rf(Zn, 5), rf(Zd, 0); 3836 } 3837 3838 // Shuffle active elements of vector to the right and fill with zero 3839 void sve_compact(FloatRegister Zd, SIMD_RegVariant T, FloatRegister Zn, PRegister Pg) { 3840 starti; 3841 assert(T == S || T == D, "invalid size"); 3842 f(0b00000101, 31, 24), f(T, 23, 22), f(0b100001100, 21, 13); 3843 pgrf(Pg, 10), rf(Zn, 5), rf(Zd, 0); 3844 } 3845 3846 // SVE2 Count matching elements in vector 3847 void sve_histcnt(FloatRegister Zd, SIMD_RegVariant T, PRegister Pg, 3848 FloatRegister Zn, FloatRegister Zm) { 3849 starti; 3850 assert(T == S || T == D, "invalid size"); 3851 f(0b01000101, 31, 24), f(T, 23, 22), f(0b1, 21), rf(Zm, 16); 3852 f(0b110, 15, 13), pgrf(Pg, 10), rf(Zn, 5), rf(Zd, 0); 3853 } 3854 3855 Assembler(CodeBuffer* code) : AbstractAssembler(code) { 3856 } 3857 3858 // Stack overflow checking 3859 virtual void bang_stack_with_offset(int offset); 3860 3861 static bool operand_valid_for_logical_immediate(bool is32, uint64_t imm); 3862 static bool operand_valid_for_sve_logical_immediate(unsigned elembits, uint64_t imm); 3863 static bool operand_valid_for_add_sub_immediate(int64_t imm); 3864 static bool operand_valid_for_sve_add_sub_immediate(int64_t imm); 3865 static bool operand_valid_for_float_immediate(double imm); 3866 static int operand_valid_for_movi_immediate(uint64_t imm64, SIMD_Arrangement T); 3867 3868 void emit_data64(jlong data, relocInfo::relocType rtype, int format = 0); 3869 void emit_data64(jlong data, RelocationHolder const& rspec, int format = 0); 3870 }; 3871 3872 inline Assembler::Membar_mask_bits operator|(Assembler::Membar_mask_bits a, 3873 Assembler::Membar_mask_bits b) { 3874 return Assembler::Membar_mask_bits(unsigned(a)|unsigned(b)); 3875 } 3876 3877 Instruction_aarch64::~Instruction_aarch64() { 3878 assem->emit_int32(insn); 3879 assert_cond(get_bits() == 0xffffffff); 3880 } 3881 3882 #undef f 3883 #undef sf 3884 #undef rf 3885 #undef srf 3886 #undef zrf 3887 #undef prf 3888 #undef pgrf 3889 #undef fixed 3890 3891 #undef starti 3892 3893 // Invert a condition 3894 inline const Assembler::Condition operator~(const Assembler::Condition cond) { 3895 return Assembler::Condition(int(cond) ^ 1); 3896 } 3897 3898 extern "C" void das(uint64_t start, int len); 3899 3900 #endif // CPU_AARCH64_ASSEMBLER_AARCH64_HPP --- EOF ---