1 /*
2 * Copyright (c) 1997, 2025, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25 #ifndef CPU_X86_ASSEMBLER_X86_HPP
26 #define CPU_X86_ASSEMBLER_X86_HPP
27
28 #include "asm/register.hpp"
29 #include "utilities/checkedCast.hpp"
30 #include "utilities/powerOfTwo.hpp"
31
32 // Contains all the definitions needed for x86 assembly code generation.
33
34 // Calling convention
35 class Argument {
36 public:
37 enum {
38 #ifdef _WIN64
39 n_int_register_parameters_c = 4, // rcx, rdx, r8, r9 (c_rarg0, c_rarg1, ...)
40 n_float_register_parameters_c = 4, // xmm0 - xmm3 (c_farg0, c_farg1, ... )
41 n_int_register_returns_c = 1, // rax
42 n_float_register_returns_c = 1, // xmm0
43 #else
44 n_int_register_parameters_c = 6, // rdi, rsi, rdx, rcx, r8, r9 (c_rarg0, c_rarg1, ...)
45 n_float_register_parameters_c = 8, // xmm0 - xmm7 (c_farg0, c_farg1, ... )
46 n_int_register_returns_c = 2, // rax, rdx
47 n_float_register_returns_c = 2, // xmm0, xmm1
48 #endif // _WIN64
49 n_int_register_parameters_j = 6, // j_rarg0, j_rarg1, ...
50 n_float_register_parameters_j = 8 // j_farg0, j_farg1, ...
51 };
52 };
53
54
55 // Symbolically name the register arguments used by the c calling convention.
56 // Windows is different from linux/solaris. So much for standards...
57
58 #ifdef _WIN64
59
60 constexpr Register c_rarg0 = rcx;
61 constexpr Register c_rarg1 = rdx;
62 constexpr Register c_rarg2 = r8;
63 constexpr Register c_rarg3 = r9;
64
65 constexpr XMMRegister c_farg0 = xmm0;
66 constexpr XMMRegister c_farg1 = xmm1;
67 constexpr XMMRegister c_farg2 = xmm2;
68 constexpr XMMRegister c_farg3 = xmm3;
69
70 #else
71
72 constexpr Register c_rarg0 = rdi;
73 constexpr Register c_rarg1 = rsi;
74 constexpr Register c_rarg2 = rdx;
75 constexpr Register c_rarg3 = rcx;
76 constexpr Register c_rarg4 = r8;
77 constexpr Register c_rarg5 = r9;
78
79 constexpr XMMRegister c_farg0 = xmm0;
80 constexpr XMMRegister c_farg1 = xmm1;
81 constexpr XMMRegister c_farg2 = xmm2;
82 constexpr XMMRegister c_farg3 = xmm3;
83 constexpr XMMRegister c_farg4 = xmm4;
84 constexpr XMMRegister c_farg5 = xmm5;
85 constexpr XMMRegister c_farg6 = xmm6;
86 constexpr XMMRegister c_farg7 = xmm7;
87
88 #endif // _WIN64
89
90 // Symbolically name the register arguments used by the Java calling convention.
91 // We have control over the convention for java so we can do what we please.
92 // What pleases us is to offset the java calling convention so that when
93 // we call a suitable jni method the arguments are lined up and we don't
94 // have to do little shuffling. A suitable jni method is non-static and a
95 // small number of arguments (two fewer args on windows)
96 //
97 // |-------------------------------------------------------|
98 // | c_rarg0 c_rarg1 c_rarg2 c_rarg3 c_rarg4 c_rarg5 |
99 // |-------------------------------------------------------|
100 // | rcx rdx r8 r9 rdi* rsi* | windows (* not a c_rarg)
101 // | rdi rsi rdx rcx r8 r9 | solaris/linux
102 // |-------------------------------------------------------|
103 // | j_rarg5 j_rarg0 j_rarg1 j_rarg2 j_rarg3 j_rarg4 |
104 // |-------------------------------------------------------|
105
106 constexpr Register j_rarg0 = c_rarg1;
107 constexpr Register j_rarg1 = c_rarg2;
108 constexpr Register j_rarg2 = c_rarg3;
109 // Windows runs out of register args here
110 #ifdef _WIN64
111 constexpr Register j_rarg3 = rdi;
112 constexpr Register j_rarg4 = rsi;
113 #else
114 constexpr Register j_rarg3 = c_rarg4;
115 constexpr Register j_rarg4 = c_rarg5;
116 #endif /* _WIN64 */
117 constexpr Register j_rarg5 = c_rarg0;
118
119 constexpr XMMRegister j_farg0 = xmm0;
120 constexpr XMMRegister j_farg1 = xmm1;
121 constexpr XMMRegister j_farg2 = xmm2;
122 constexpr XMMRegister j_farg3 = xmm3;
123 constexpr XMMRegister j_farg4 = xmm4;
124 constexpr XMMRegister j_farg5 = xmm5;
125 constexpr XMMRegister j_farg6 = xmm6;
126 constexpr XMMRegister j_farg7 = xmm7;
127
128 constexpr Register rscratch1 = r10; // volatile
129 constexpr Register rscratch2 = r11; // volatile
130
131 constexpr Register r12_heapbase = r12; // callee-saved
132 constexpr Register r15_thread = r15; // callee-saved
133
134 // JSR 292
135 // On x86, the SP does not have to be saved when invoking method handle intrinsics
136 // or compiled lambda forms. We indicate that by setting rbp_mh_SP_save to noreg.
137 constexpr Register rbp_mh_SP_save = noreg;
138
139 // Address is an abstraction used to represent a memory location
140 // using any of the amd64 addressing modes with one object.
141 //
142 // Note: A register location is represented via a Register, not
143 // via an address for efficiency & simplicity reasons.
144
145 class ArrayAddress;
146
147 class Address {
148 public:
149 enum ScaleFactor {
150 no_scale = -1,
151 times_1 = 0,
152 times_2 = 1,
153 times_4 = 2,
154 times_8 = 3,
155 times_ptr = times_8
156 };
157 static ScaleFactor times(int size) {
158 assert(size >= 1 && size <= 8 && is_power_of_2(size), "bad scale size");
159 if (size == 8) return times_8;
160 if (size == 4) return times_4;
161 if (size == 2) return times_2;
162 return times_1;
163 }
164 static int scale_size(ScaleFactor scale) {
165 assert(scale != no_scale, "");
166 assert(((1 << (int)times_1) == 1 &&
167 (1 << (int)times_2) == 2 &&
168 (1 << (int)times_4) == 4 &&
169 (1 << (int)times_8) == 8), "");
170 return (1 << (int)scale);
171 }
172
173 private:
174 Register _base;
175 Register _index;
176 XMMRegister _xmmindex;
177 ScaleFactor _scale;
178 int _disp;
179 bool _isxmmindex;
180 RelocationHolder _rspec;
181
182 // Easily misused constructors make them private
183 // %%% can we make these go away?
184 Address(int disp, address loc, relocInfo::relocType rtype);
185 Address(int disp, address loc, RelocationHolder spec);
186
187 public:
188
189 int disp() { return _disp; }
190 // creation
191 Address()
192 : _base(noreg),
193 _index(noreg),
194 _xmmindex(xnoreg),
195 _scale(no_scale),
196 _disp(0),
197 _isxmmindex(false){
198 }
199
200 explicit Address(Register base, int disp = 0)
201 : _base(base),
202 _index(noreg),
203 _xmmindex(xnoreg),
204 _scale(no_scale),
205 _disp(disp),
206 _isxmmindex(false){
207 }
208
209 Address(Register base, Register index, ScaleFactor scale, int disp = 0)
210 : _base (base),
211 _index(index),
212 _xmmindex(xnoreg),
213 _scale(scale),
214 _disp (disp),
215 _isxmmindex(false) {
216 assert(!index->is_valid() == (scale == Address::no_scale),
217 "inconsistent address");
218 }
219
220 Address(Register base, RegisterOrConstant index, ScaleFactor scale = times_1, int disp = 0)
221 : _base (base),
222 _index(index.register_or_noreg()),
223 _xmmindex(xnoreg),
224 _scale(scale),
225 _disp (disp + checked_cast<int>(index.constant_or_zero() * scale_size(scale))),
226 _isxmmindex(false){
227 if (!index.is_register()) scale = Address::no_scale;
228 assert(!_index->is_valid() == (scale == Address::no_scale),
229 "inconsistent address");
230 }
231
232 Address(Register base, XMMRegister index, ScaleFactor scale, int disp = 0)
233 : _base (base),
234 _index(noreg),
235 _xmmindex(index),
236 _scale(scale),
237 _disp(disp),
238 _isxmmindex(true) {
239 assert(!index->is_valid() == (scale == Address::no_scale),
240 "inconsistent address");
241 }
242
243 // The following overloads are used in connection with the
244 // ByteSize type (see sizes.hpp). They simplify the use of
245 // ByteSize'd arguments in assembly code.
246
247 Address(Register base, ByteSize disp)
248 : Address(base, in_bytes(disp)) {}
249
250 Address(Register base, Register index, ScaleFactor scale, ByteSize disp)
251 : Address(base, index, scale, in_bytes(disp)) {}
252
253 Address(Register base, RegisterOrConstant index, ScaleFactor scale, ByteSize disp)
254 : Address(base, index, scale, in_bytes(disp)) {}
255
256 Address plus_disp(int disp) const {
257 Address a = (*this);
258 a._disp += disp;
259 return a;
260 }
261 Address plus_disp(RegisterOrConstant disp, ScaleFactor scale = times_1) const {
262 Address a = (*this);
263 a._disp += checked_cast<int>(disp.constant_or_zero() * scale_size(scale));
264 if (disp.is_register()) {
265 assert(!a.index()->is_valid(), "competing indexes");
266 a._index = disp.as_register();
267 a._scale = scale;
268 }
269 return a;
270 }
271 bool is_same_address(Address a) const {
272 // disregard _rspec
273 return _base == a._base && _disp == a._disp && _index == a._index && _scale == a._scale;
274 }
275
276 // accessors
277 bool uses(Register reg) const { return _base == reg || _index == reg; }
278 Register base() const { return _base; }
279 Register index() const { return _index; }
280 XMMRegister xmmindex() const { return _xmmindex; }
281 ScaleFactor scale() const { return _scale; }
282 int disp() const { return _disp; }
283 bool isxmmindex() const { return _isxmmindex; }
284
285 // Convert the raw encoding form into the form expected by the constructor for
286 // Address. An index of 4 (rsp) corresponds to having no index, so convert
287 // that to noreg for the Address constructor.
288 static Address make_raw(int base, int index, int scale, int disp, relocInfo::relocType disp_reloc);
289
290 static Address make_array(ArrayAddress);
291
292 private:
293 bool base_needs_rex() const {
294 return _base->is_valid() && ((_base->encoding() & 8) == 8);
295 }
296
297 bool base_needs_rex2() const {
298 return _base->is_valid() && _base->encoding() >= 16;
299 }
300
301 bool index_needs_rex() const {
302 return _index->is_valid() && ((_index->encoding() & 8) == 8);
303 }
304
305 bool index_needs_rex2() const {
306 return _index->is_valid() &&_index->encoding() >= 16;
307 }
308
309 bool xmmindex_needs_rex() const {
310 return _xmmindex->is_valid() && ((_xmmindex->encoding() & 8) == 8);
311 }
312
313 bool xmmindex_needs_rex2() const {
314 return _xmmindex->is_valid() && _xmmindex->encoding() >= 16;
315 }
316
317 relocInfo::relocType reloc() const { return _rspec.type(); }
318
319 friend class Assembler;
320 friend class MacroAssembler;
321 friend class LIR_Assembler; // base/index/scale/disp
322 };
323
324 //
325 // AddressLiteral has been split out from Address because operands of this type
326 // need to be treated specially on 32bit vs. 64bit platforms. By splitting it out
327 // the few instructions that need to deal with address literals are unique and the
328 // MacroAssembler does not have to implement every instruction in the Assembler
329 // in order to search for address literals that may need special handling depending
330 // on the instruction and the platform. As small step on the way to merging i486/amd64
331 // directories.
332 //
333 class AddressLiteral {
334 friend class ArrayAddress;
335 RelocationHolder _rspec;
336 // Typically we use AddressLiterals we want to use their rval
337 // However in some situations we want the lval (effect address) of the item.
338 // We provide a special factory for making those lvals.
339 bool _is_lval;
340
341 // If the target is far we'll need to load the ea of this to
342 // a register to reach it. Otherwise if near we can do rip
343 // relative addressing.
344
345 address _target;
346
347 protected:
348 // creation
349 AddressLiteral()
350 : _is_lval(false),
351 _target(nullptr)
352 {}
353
354 public:
355
356
357 AddressLiteral(address target, relocInfo::relocType rtype);
358
359 AddressLiteral(address target, RelocationHolder const& rspec)
360 : _rspec(rspec),
361 _is_lval(false),
362 _target(target)
363 {}
364
365 AddressLiteral addr() {
366 AddressLiteral ret = *this;
367 ret._is_lval = true;
368 return ret;
369 }
370
371
372 private:
373
374 address target() { return _target; }
375 bool is_lval() const { return _is_lval; }
376
377 relocInfo::relocType reloc() const { return _rspec.type(); }
378 const RelocationHolder& rspec() const { return _rspec; }
379
380 friend class Assembler;
381 friend class MacroAssembler;
382 friend class Address;
383 friend class LIR_Assembler;
384 };
385
386 // Convenience classes
387 class RuntimeAddress: public AddressLiteral {
388
389 public:
390
391 RuntimeAddress(address target) : AddressLiteral(target, relocInfo::runtime_call_type) {}
392
393 };
394
395 class ExternalAddress: public AddressLiteral {
396 private:
397 static relocInfo::relocType reloc_for_target(address target) {
398 // Sometimes ExternalAddress is used for values which aren't
399 // exactly addresses, like the card table base.
400 // external_word_type can't be used for values in the first page
401 // so just skip the reloc in that case.
402 return external_word_Relocation::can_be_relocated(target) ? relocInfo::external_word_type : relocInfo::none;
403 }
404
405 public:
406
407 ExternalAddress(address target) : AddressLiteral(target, reloc_for_target(target)) {}
408
409 };
410
411 class InternalAddress: public AddressLiteral {
412
413 public:
414
415 InternalAddress(address target) : AddressLiteral(target, relocInfo::internal_word_type) {}
416
417 };
418
419 // x86 can do array addressing as a single operation since disp can be an absolute
420 // address amd64 can't. We create a class that expresses the concept but does extra
421 // magic on amd64 to get the final result
422
423 class ArrayAddress {
424 private:
425
426 AddressLiteral _base;
427 Address _index;
428
429 public:
430
431 ArrayAddress() {};
432 ArrayAddress(AddressLiteral base, Address index): _base(base), _index(index) {};
433 AddressLiteral base() { return _base; }
434 Address index() { return _index; }
435
436 };
437
438 class InstructionAttr;
439
440 // 64-bit reflect the fxsave size which is 512 bytes and the new xsave area on EVEX which is another 2176 bytes
441 // See fxsave and xsave(EVEX enabled) documentation for layout
442 const int FPUStateSizeInWords = 2688 / wordSize;
443
444
445 // AVX10 new minmax instruction control mask encoding.
446 //
447 // imm8[4] = 0 (please refer to Table 11.1 of section 11.2 of AVX10 manual[1] for details)
448 // imm8[3:2] (sign control) = 01 (select sign, please refer to Table 11.5 of section 11.2 of AVX10 manual[1] for details)
449 // imm8[1:0] = 00 (min) / 01 (max)
450 //
451 // [1] https://www.intel.com/content/www/us/en/content-details/856721/intel-advanced-vector-extensions-10-2-intel-avx10-2-architecture-specification.html?wapkw=AVX10
452 const int AVX10_MINMAX_MAX_COMPARE_SIGN = 0x5;
453 const int AVX10_MINMAX_MIN_COMPARE_SIGN = 0x4;
454
455 // The Intel x86/Amd64 Assembler: Pure assembler doing NO optimizations on the instruction
456 // level (e.g. mov rax, 0 is not translated into xor rax, rax!); i.e., what you write
457 // is what you get. The Assembler is generating code into a CodeBuffer.
458
459 class Assembler : public AbstractAssembler {
460 friend class AbstractAssembler; // for the non-virtual hack
461 friend class LIR_Assembler; // as_Address()
462 friend class StubGenerator;
463
464 public:
465 enum Condition { // The x86 condition codes used for conditional jumps/moves.
466 zero = 0x4,
467 notZero = 0x5,
468 equal = 0x4,
469 notEqual = 0x5,
470 less = 0xc,
471 lessEqual = 0xe,
472 greater = 0xf,
473 greaterEqual = 0xd,
474 below = 0x2,
475 belowEqual = 0x6,
476 above = 0x7,
477 aboveEqual = 0x3,
478 overflow = 0x0,
479 noOverflow = 0x1,
480 carrySet = 0x2,
481 carryClear = 0x3,
482 negative = 0x8,
483 positive = 0x9,
484 parity = 0xa,
485 noParity = 0xb
486 };
487
488 enum Prefix {
489 // segment overrides
490 CS_segment = 0x2e,
491 SS_segment = 0x36,
492 DS_segment = 0x3e,
493 ES_segment = 0x26,
494 FS_segment = 0x64,
495 GS_segment = 0x65,
496
497 REX = 0x40,
498
499 REX_B = 0x41,
500 REX_X = 0x42,
501 REX_XB = 0x43,
502 REX_R = 0x44,
503 REX_RB = 0x45,
504 REX_RX = 0x46,
505 REX_RXB = 0x47,
506
507 REX_W = 0x48,
508
509 REX_WB = 0x49,
510 REX_WX = 0x4A,
511 REX_WXB = 0x4B,
512 REX_WR = 0x4C,
513 REX_WRB = 0x4D,
514 REX_WRX = 0x4E,
515 REX_WRXB = 0x4F,
516
517 REX2 = 0xd5,
518 WREX2 = REX2 << 8,
519
520 VEX_3bytes = 0xC4,
521 VEX_2bytes = 0xC5,
522 EVEX_4bytes = 0x62,
523 Prefix_EMPTY = 0x0
524 };
525
526 enum PrefixBits {
527 REX2BIT_B = 0x01,
528 REX2BIT_X = 0x02,
529 REX2BIT_R = 0x04,
530 REX2BIT_W = 0x08,
531 REX2BIT_B4 = 0x10,
532 REX2BIT_X4 = 0x20,
533 REX2BIT_R4 = 0x40,
534 REX2BIT_M0 = 0x80,
535 REX2BIT_WB = 0x09,
536 REX2BIT_WB4 = 0x18,
537 };
538
539 enum VexPrefix {
540 VEX_B = 0x20,
541 VEX_X = 0x40,
542 VEX_R = 0x80,
543 VEX_W = 0x80
544 };
545
546 enum ExexPrefix {
547 EVEX_F = 0x04,
548 EVEX_V = 0x08,
549 EVEX_Rb = 0x10,
550 EVEX_B = 0x20,
551 EVEX_X = 0x40,
552 EVEX_Z = 0x80
553 };
554
555 enum ExtEvexPrefix {
556 EEVEX_R = 0x10,
557 EEVEX_B = 0x08,
558 EEVEX_X = 0x04,
559 EEVEX_V = 0x08
560 };
561
562 enum EvexRoundPrefix {
563 EVEX_RNE = 0x0,
564 EVEX_RD = 0x1,
565 EVEX_RU = 0x2,
566 EVEX_RZ = 0x3
567 };
568
569 enum VexSimdPrefix {
570 VEX_SIMD_NONE = 0x0,
571 VEX_SIMD_66 = 0x1,
572 VEX_SIMD_F3 = 0x2,
573 VEX_SIMD_F2 = 0x3,
574 };
575
576 enum VexOpcode {
577 VEX_OPCODE_NONE = 0x0,
578 VEX_OPCODE_0F = 0x1,
579 VEX_OPCODE_0F_38 = 0x2,
580 VEX_OPCODE_0F_3A = 0x3,
581 VEX_OPCODE_0F_3C = 0x4,
582 VEX_OPCODE_MAP5 = 0x5,
583 VEX_OPCODE_MAP6 = 0x6,
584 VEX_OPCODE_MASK = 0x1F
585 };
586
587 enum AvxVectorLen {
588 AVX_128bit = 0x0,
589 AVX_256bit = 0x1,
590 AVX_512bit = 0x2,
591 AVX_NoVec = 0x4
592 };
593
594 enum EvexTupleType {
595 EVEX_FV = 0,
596 EVEX_HV = 4,
597 EVEX_FVM = 6,
598 EVEX_T1S = 7,
599 EVEX_T1F = 11,
600 EVEX_T2 = 13,
601 EVEX_T4 = 15,
602 EVEX_T8 = 17,
603 EVEX_HVM = 18,
604 EVEX_QVM = 19,
605 EVEX_OVM = 20,
606 EVEX_M128 = 21,
607 EVEX_DUP = 22,
608 EVEX_NOSCALE = 23,
609 EVEX_ETUP = 24
610 };
611
612 enum EvexInputSizeInBits {
613 EVEX_8bit = 0,
614 EVEX_16bit = 1,
615 EVEX_32bit = 2,
616 EVEX_64bit = 3,
617 EVEX_NObit = 4
618 };
619
620 enum WhichOperand {
621 // input to locate_operand, and format code for relocations
622 imm_operand = 0, // embedded 32-bit|64-bit immediate operand
623 disp32_operand = 1, // embedded 32-bit displacement or address
624 call32_operand = 2, // embedded 32-bit self-relative displacement
625 narrow_oop_operand = 3, // embedded 32-bit immediate narrow oop
626 _WhichOperand_limit = 4
627 };
628
629 // Comparison predicates for integral types & FP types when using SSE
630 enum ComparisonPredicate {
631 eq = 0,
632 lt = 1,
633 le = 2,
634 _false = 3,
635 neq = 4,
636 nlt = 5,
637 nle = 6,
638 _true = 7
639 };
640
641 // Comparison predicates for FP types when using AVX
642 // O means ordered. U is unordered. When using ordered, any NaN comparison is false. Otherwise, it is true.
643 // S means signaling. Q means non-signaling. When signaling is true, instruction signals #IA on NaN.
644 enum ComparisonPredicateFP {
645 EQ_OQ = 0,
646 LT_OS = 1,
647 LE_OS = 2,
648 UNORD_Q = 3,
649 NEQ_UQ = 4,
650 NLT_US = 5,
651 NLE_US = 6,
652 ORD_Q = 7,
653 EQ_UQ = 8,
654 NGE_US = 9,
655 NGT_US = 0xA,
656 FALSE_OQ = 0XB,
657 NEQ_OQ = 0xC,
658 GE_OS = 0xD,
659 GT_OS = 0xE,
660 TRUE_UQ = 0xF,
661 EQ_OS = 0x10,
662 LT_OQ = 0x11,
663 LE_OQ = 0x12,
664 UNORD_S = 0x13,
665 NEQ_US = 0x14,
666 NLT_UQ = 0x15,
667 NLE_UQ = 0x16,
668 ORD_S = 0x17,
669 EQ_US = 0x18,
670 NGE_UQ = 0x19,
671 NGT_UQ = 0x1A,
672 FALSE_OS = 0x1B,
673 NEQ_OS = 0x1C,
674 GE_OQ = 0x1D,
675 GT_OQ = 0x1E,
676 TRUE_US =0x1F
677 };
678
679 enum Width {
680 B = 0,
681 W = 1,
682 D = 2,
683 Q = 3
684 };
685
686 //---< calculate length of instruction >---
687 // As instruction size can't be found out easily on x86/x64,
688 // we just use '4' for len and maxlen.
689 // instruction must start at passed address
690 static unsigned int instr_len(unsigned char *instr) { return 4; }
691
692 //---< longest instructions >---
693 // Max instruction length is not specified in architecture documentation.
694 // We could use a "safe enough" estimate (15), but just default to
695 // instruction length guess from above.
696 static unsigned int instr_maxlen() { return 4; }
697
698 // NOTE: The general philopsophy of the declarations here is that 64bit versions
699 // of instructions are freely declared without the need for wrapping them an ifdef.
700 // (Some dangerous instructions are ifdef's out of inappropriate jvm's.)
701 // In the .cpp file the implementations are wrapped so that they are dropped out
702 // of the resulting jvm. This is done mostly to keep the footprint of MINIMAL
703 // to the size it was prior to merging up the 32bit and 64bit assemblers.
704 //
705 // This does mean you'll get a linker/runtime error if you use a 64bit only instruction
706 // in a 32bit vm. This is somewhat unfortunate but keeps the ifdef noise down.
707
708 private:
709
710 bool _legacy_mode_bw;
711 bool _legacy_mode_dq;
712 bool _legacy_mode_vl;
713 bool _legacy_mode_vlbw;
714
715 InstructionAttr *_attributes;
716 void set_attributes(InstructionAttr* attributes);
717
718 int get_base_prefix_bits(int enc);
719 int get_index_prefix_bits(int enc);
720 int get_base_prefix_bits(Register base);
721 int get_index_prefix_bits(Register index);
722 int get_reg_prefix_bits(int enc);
723
724 // 64bit prefixes
725 void prefix(Register reg);
726 void prefix(Register dst, Register src, Prefix p);
727 void prefix_rex2(Register dst, Register src);
728 void prefix(Register dst, Address adr, Prefix p);
729 void prefix_rex2(Register dst, Address adr);
730
731 // The is_map1 bool indicates an x86 map1 instruction which, when
732 // legacy encoded, uses a 0x0F opcode prefix. By specification, the
733 // opcode prefix is omitted when using rex2 encoding in support
734 // of APX extended GPRs.
735 void prefix(Address adr, bool is_map1 = false);
736 void prefix_rex2(Address adr, bool is_map1 = false);
737 void prefix(Address adr, Register reg, bool byteinst = false, bool is_map1 = false);
738 void prefix_rex2(Address adr, Register reg, bool byteinst = false, bool is_map1 = false);
739 void prefix(Address adr, XMMRegister reg);
740 void prefix_rex2(Address adr, XMMRegister reg);
741
742 int prefix_and_encode(int reg_enc, bool byteinst = false, bool is_map1 = false);
743 int prefix_and_encode_rex2(int reg_enc, bool is_map1 = false);
744 int prefix_and_encode(int dst_enc, int src_enc, bool is_map1 = false) {
745 return prefix_and_encode(dst_enc, false, src_enc, false, is_map1);
746 }
747 int prefix_and_encode(int dst_enc, bool dst_is_byte, int src_enc, bool src_is_byte, bool is_map1 = false);
748
749 int prefix_and_encode_rex2(int dst_enc, int src_enc, int init_bits = 0);
750 // Some prefixq variants always emit exactly one prefix byte, so besides a
751 // prefix-emitting method we provide a method to get the prefix byte to emit,
752 // which can then be folded into a byte stream.
753 int get_prefixq(Address adr, bool is_map1 = false);
754 int get_prefixq_rex2(Address adr, bool is_map1 = false);
755 int get_prefixq(Address adr, Register reg, bool is_map1 = false);
756 int get_prefixq_rex2(Address adr, Register reg, bool ismap1 = false);
757
758 void prefixq(Address adr);
759 void prefixq(Address adr, Register reg, bool is_map1 = false);
760 void prefixq(Address adr, XMMRegister reg);
761 void prefixq_rex2(Address adr, XMMRegister src);
762
763 bool prefix_is_rex2(int prefix);
764
765 int prefixq_and_encode(int reg_enc, bool is_map1 = false);
766 int prefixq_and_encode_rex2(int reg_enc, bool is_map1 = false);
767 int prefixq_and_encode(int dst_enc, int src_enc, bool is_map1 = false);
768 int prefixq_and_encode_rex2(int dst_enc, int src_enc, bool is_map1 = false);
769
770 bool needs_rex2(Register reg1, Register reg2 = noreg, Register reg3 = noreg);
771
772 bool needs_eevex(Register reg1, Register reg2 = noreg, Register reg3 = noreg);
773 bool needs_eevex(int enc1, int enc2 = -1, int enc3 = -1);
774 NOT_PRODUCT(bool needs_evex(XMMRegister reg1, XMMRegister reg2 = xnoreg, XMMRegister reg3 = xnoreg);)
775
776 void rex_prefix(Address adr, XMMRegister xreg,
777 VexSimdPrefix pre, VexOpcode opc, bool rex_w);
778 int rex_prefix_and_encode(int dst_enc, int src_enc,
779 VexSimdPrefix pre, VexOpcode opc, bool rex_w);
780
781 void vex_prefix(bool vex_r, bool vex_b, bool vex_x, int nds_enc, VexSimdPrefix pre, VexOpcode opc);
782
783 void evex_prefix(bool vex_r, bool vex_b, bool vex_x, bool evex_v, bool evex_r, bool evex_b,
784 bool eevex_x, int nds_enc, VexSimdPrefix pre, VexOpcode opc, bool no_flags = false);
785
786 void eevex_prefix_ndd(Address adr, int ndd_enc, int xreg_enc, VexSimdPrefix pre, VexOpcode opc,
787 InstructionAttr *attributes, bool no_flags = false);
788
789 void eevex_prefix_nf(Address adr, int ndd_enc, int xreg_enc, VexSimdPrefix pre, VexOpcode opc,
790 InstructionAttr *attributes, bool no_flags = false);
791
792 void vex_prefix(Address adr, int nds_enc, int xreg_enc, VexSimdPrefix pre, VexOpcode opc,
793 InstructionAttr *attributes, bool nds_is_ndd = false, bool no_flags = false);
794
795 int vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc,
796 VexSimdPrefix pre, VexOpcode opc,
797 InstructionAttr *attributes, bool src_is_gpr = false, bool nds_is_ndd = false, bool no_flags = false);
798
799 int eevex_prefix_and_encode_nf(int dst_enc, int nds_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc,
800 InstructionAttr *attributes, bool no_flags = false);
801
802 int emit_eevex_prefix_ndd(int dst_enc, VexSimdPrefix pre, VexOpcode opc, InstructionAttr *attributes, bool no_flags = false);
803
804 int emit_eevex_prefix_or_demote_ndd(int dst_enc, int nds_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc,
805 InstructionAttr *attributes, bool no_flags = false, bool use_prefixq = false);
806
807 int emit_eevex_prefix_or_demote_ndd(int dst_enc, int nds_enc, VexSimdPrefix pre, VexOpcode opc,
808 InstructionAttr *attributes, bool no_flags = false, bool use_prefixq = false);
809
810 void emit_eevex_prefix_or_demote_arith_ndd(Register dst, Register src1, Register src2, VexSimdPrefix pre, VexOpcode opc,
811 int size, int op1, int op2, bool no_flags = false, bool is_commutative = false);
812
813 void emit_eevex_prefix_or_demote_arith_ndd(Register dst, Register nds, int32_t imm32, VexSimdPrefix pre, VexOpcode opc,
814 int size, int op1, int op2, bool no_flags);
815
816 void emit_eevex_or_demote(Register dst, Register src1, Address src2, VexSimdPrefix pre, VexOpcode opc,
817 int size, int opcode_byte, bool no_flags = false, bool is_map1 = false);
818
819 void emit_eevex_or_demote(Register dst, Address src1, Register src2, VexSimdPrefix pre, VexOpcode opc,
820 int size, int opcode_byte, bool no_flags = false, bool is_map1 = false, bool is_commutative = false);
821
822 void emit_eevex_or_demote(int dst_enc, int nds_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc,
823 int size, int opcode_byte, bool no_flags, bool is_map1 = false, bool swap = false, bool is_commutative = false);
824
825 void emit_eevex_or_demote(int dst_enc, int nds_enc, int src_enc, int8_t imm8, VexSimdPrefix pre, VexOpcode opc,
826 int size, int opcode_byte, bool no_flags, bool is_map1 = false);
827
828 void simd_prefix(XMMRegister xreg, XMMRegister nds, Address adr, VexSimdPrefix pre,
829 VexOpcode opc, InstructionAttr *attributes);
830
831 int simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src, VexSimdPrefix pre,
832 VexOpcode opc, InstructionAttr *attributes, bool src_is_gpr = false);
833
834 // Helper functions for groups of instructions
835 bool is_demotable(bool no_flags, int dst_enc, int nds_enc);
836 void emit_arith_b(int op1, int op2, Register dst, int imm8);
837
838 void emit_arith(int op1, int op2, Register dst, int32_t imm32, bool optimize_rax_dst = true);
839 // Force generation of a 4 byte immediate value even if it fits into 8bit
840 void emit_arith_imm32(int op1, int op2, Register dst, int32_t imm32);
841 void emit_arith(int op1, int op2, Register dst, Register src);
842
843 bool emit_compressed_disp_byte(int &disp);
844
845 void emit_modrm(int mod, int dst_enc, int src_enc);
846 void emit_modrm_disp8(int mod, int dst_enc, int src_enc,
847 int disp);
848 void emit_modrm_sib(int mod, int dst_enc, int src_enc,
849 Address::ScaleFactor scale, int index_enc, int base_enc);
850 void emit_modrm_sib_disp8(int mod, int dst_enc, int src_enc,
851 Address::ScaleFactor scale, int index_enc, int base_enc,
852 int disp);
853
854 void emit_operand_helper(int reg_enc,
855 int base_enc, int index_enc, Address::ScaleFactor scale,
856 int disp,
857 RelocationHolder const& rspec,
858 int post_addr_length);
859
860 void emit_operand(Register reg,
861 Register base, Register index, Address::ScaleFactor scale,
862 int disp,
863 RelocationHolder const& rspec,
864 int post_addr_length);
865
866 void emit_operand(Register reg,
867 Register base, XMMRegister index, Address::ScaleFactor scale,
868 int disp,
869 RelocationHolder const& rspec,
870 int post_addr_length);
871
872 void emit_operand(XMMRegister xreg,
873 Register base, XMMRegister xindex, Address::ScaleFactor scale,
874 int disp,
875 RelocationHolder const& rspec,
876 int post_addr_length);
877
878 void emit_operand(Register reg, Address adr,
879 int post_addr_length);
880
881 void emit_operand(XMMRegister reg,
882 Register base, Register index, Address::ScaleFactor scale,
883 int disp,
884 RelocationHolder const& rspec,
885 int post_addr_length);
886
887 void emit_operand_helper(KRegister kreg,
888 int base_enc, int index_enc, Address::ScaleFactor scale,
889 int disp,
890 RelocationHolder const& rspec,
891 int post_addr_length);
892
893 void emit_operand(KRegister kreg, Address adr,
894 int post_addr_length);
895
896 void emit_operand(KRegister kreg,
897 Register base, Register index, Address::ScaleFactor scale,
898 int disp,
899 RelocationHolder const& rspec,
900 int post_addr_length);
901
902 void emit_operand(XMMRegister reg, Address adr, int post_addr_length);
903
904 // Immediate-to-memory forms
905 void emit_arith_operand(int op1, Register rm, Address adr, int32_t imm32);
906 void emit_arith_operand_imm32(int op1, Register rm, Address adr, int32_t imm32);
907
908 protected:
909 #ifdef ASSERT
910 void check_relocation(RelocationHolder const& rspec, int format);
911 #endif
912
913 void emit_data(jint data, relocInfo::relocType rtype, int format = 0);
914 void emit_data(jint data, RelocationHolder const& rspec, int format = 0);
915 void emit_data64(jlong data, relocInfo::relocType rtype, int format = 0);
916 void emit_data64(jlong data, RelocationHolder const& rspec, int format = 0);
917
918 void emit_prefix_and_int8(int prefix, int b1);
919 void emit_opcode_prefix_and_encoding(int byte1, int ocp_and_encoding);
920 void emit_opcode_prefix_and_encoding(int byte1, int byte2, int ocp_and_encoding);
921 void emit_opcode_prefix_and_encoding(int byte1, int byte2, int ocp_and_encoding, int byte3);
922 bool always_reachable(AddressLiteral adr);
923 bool reachable(AddressLiteral adr);
924
925
926 // These are all easily abused and hence protected
927
928 public:
929 void mov_literal64(Register dst, intptr_t imm64, RelocationHolder const& rspec); // 64BIT ONLY
930
931 void cmp_narrow_oop(Register src1, int32_t imm32, RelocationHolder const& rspec);
932 void cmp_narrow_oop(Address src1, int32_t imm32, RelocationHolder const& rspec);
933
934 void mov_narrow_oop(Register dst, int32_t imm32, RelocationHolder const& rspec);
935 void mov_narrow_oop(Address dst, int32_t imm32, RelocationHolder const& rspec);
936
937 protected:
938 // These are unique in that we are ensured by the caller that the 32bit
939 // relative in these instructions will always be able to reach the potentially
940 // 64bit address described by entry. Since they can take a 64bit address they
941 // don't have the 32 suffix like the other instructions in this class.
942
943 void call_literal(address entry, RelocationHolder const& rspec);
944 void jmp_literal(address entry, RelocationHolder const& rspec);
945
946 // Avoid using directly section
947 // Instructions in this section are actually usable by anyone without danger
948 // of failure but have performance issues that are addressed my enhanced
949 // instructions which will do the proper thing base on the particular cpu.
950 // We protect them because we don't trust you...
951
952 // Don't use next inc() and dec() methods directly. INC & DEC instructions
953 // could cause a partial flag stall since they don't set CF flag.
954 // Use MacroAssembler::decrement() & MacroAssembler::increment() methods
955 // which call inc() & dec() or add() & sub() in accordance with
956 // the product flag UseIncDec value.
957
958 void decl(Register dst);
959 void decl(Address dst);
960 void decq(Address dst);
961
962 void incl(Register dst);
963 void incl(Address dst);
964 void incq(Register dst);
965 void incq(Address dst);
966
967 // New cpus require use of movsd and movss to avoid partial register stall
968 // when loading from memory. But for old Opteron use movlpd instead of movsd.
969 // The selection is done in MacroAssembler::movdbl() and movflt().
970
971 // Move Scalar Single-Precision Floating-Point Values
972 void movss(XMMRegister dst, Address src);
973 void movss(XMMRegister dst, XMMRegister src);
974 void movss(Address dst, XMMRegister src);
975
976 // Move Scalar Double-Precision Floating-Point Values
977 void movsd(XMMRegister dst, Address src);
978 void movsd(XMMRegister dst, XMMRegister src);
979 void movsd(Address dst, XMMRegister src);
980 void movlpd(XMMRegister dst, Address src);
981
982 void vmovsd(XMMRegister dst, XMMRegister src, XMMRegister src2);
983
984 // New cpus require use of movaps and movapd to avoid partial register stall
985 // when moving between registers.
986 void movaps(XMMRegister dst, XMMRegister src);
987 void movapd(XMMRegister dst, Address src);
988 void movapd(XMMRegister dst, XMMRegister src);
989
990 // End avoid using directly
991
992
993 // Instruction prefixes
994 void prefix(Prefix p);
995
996 void prefix16(int p);
997
998 public:
999
1000 // Creation
1001 Assembler(CodeBuffer* code) : AbstractAssembler(code) {
1002 init_attributes();
1003 }
1004
1005 // Decoding
1006 static address locate_operand(address inst, WhichOperand which);
1007 static address locate_next_instruction(address inst);
1008
1009 // Utilities
1010 static bool query_compressed_disp_byte(int disp, bool is_evex_inst, int vector_len,
1011 int cur_tuple_type, int in_size_in_bits, int cur_encoding);
1012
1013 // Generic instructions
1014 // Does 32bit or 64bit as needed for the platform. In some sense these
1015 // belong in macro assembler but there is no need for both varieties to exist
1016
1017 void init_attributes(void);
1018 void clear_attributes(void) { _attributes = nullptr; }
1019
1020 void lea(Register dst, Address src);
1021
1022 void mov(Register dst, Register src);
1023
1024 // support caching the result of some routines
1025
1026 // must be called before pusha(), popa(), vzeroupper() - checked with asserts
1027 static void precompute_instructions();
1028
1029 void pusha_uncached();
1030 void popa_uncached();
1031
1032 // APX ISA Extensions for register save/restore optimizations.
1033 void push2(Register src1, Register src2, bool with_ppx = false);
1034 void pop2(Register src1, Register src2, bool with_ppx = false);
1035 void push2p(Register src1, Register src2);
1036 void pop2p(Register src1, Register src2);
1037 void pushp(Register src);
1038 void popp(Register src);
1039
1040 // New Zero Upper setcc instruction.
1041 void esetzucc(Condition cc, Register dst);
1042
1043 void vzeroupper_uncached();
1044 void decq(Register dst);
1045 void edecq(Register dst, Register src, bool no_flags);
1046
1047 void pusha();
1048 void popa();
1049
1050 void pushf();
1051 void popf();
1052
1053 void push(int32_t imm32);
1054
1055 void push(Register src);
1056
1057 void pop(Register dst);
1058
1059 // These do register sized moves/scans
1060 void rep_mov();
1061 void rep_stos();
1062 void rep_stosb();
1063 void repne_scan();
1064 void repne_scanl();
1065
1066 // Vanilla instructions in lexical order
1067
1068 void adcl(Address dst, int32_t imm32);
1069 void adcl(Address dst, Register src);
1070 void adcl(Register dst, int32_t imm32);
1071 void adcl(Register dst, Address src);
1072 void adcl(Register dst, Register src);
1073
1074 void adcq(Register dst, int32_t imm32);
1075 void adcq(Register dst, Address src);
1076 void adcq(Register dst, Register src);
1077
1078 void addb(Address dst, int imm8);
1079 void addb(Address dst, Register src);
1080 void addb(Register dst, int imm8);
1081 void addw(Address dst, int imm16);
1082 void addw(Address dst, Register src);
1083
1084 void addl(Address dst, int32_t imm32);
1085 void eaddl(Register dst, Address src, int32_t imm32, bool no_flags);
1086 void addl(Address dst, Register src);
1087 void eaddl(Register dst, Address src1, Register src2, bool no_flags);
1088 void addl(Register dst, int32_t imm32);
1089 void eaddl(Register dst, Register src, int32_t imm32, bool no_flags);
1090 void addl(Register dst, Address src);
1091 void eaddl(Register dst, Register src1, Address src2, bool no_flags);
1092 void addl(Register dst, Register src);
1093 void eaddl(Register dst, Register src1, Register src2, bool no_flags);
1094
1095 void addq(Address dst, int32_t imm32);
1096 void eaddq(Register dst, Address src, int32_t imm32, bool no_flags);
1097 void addq(Address dst, Register src);
1098 void eaddq(Register dst, Address src1, Register src2, bool no_flags);
1099 void addq(Register dst, int32_t imm32);
1100 void eaddq(Register dst, Register src, int32_t imm32, bool no_flags);
1101 void addq(Register dst, Address src);
1102 void eaddq(Register dst, Register src1, Address src2, bool no_flags);
1103 void addq(Register dst, Register src);
1104 void eaddq(Register dst, Register src1, Register src2, bool no_flags);
1105
1106 void edecl(Register dst, Register src, bool no_flags);
1107 void edecl(Register dst, Address src, bool no_flags);
1108 void edecq(Register dst, Address src, bool no_flags);
1109 void eincl(Register dst, Register src, bool no_flags);
1110 void eincl(Register dst, Address src, bool no_flags);
1111 void eincq(Register dst, Register src, bool no_flags);
1112 void eincq(Register dst, Address src, bool no_flags);
1113
1114 //Add Unsigned Integers with Carry Flag
1115 void adcxq(Register dst, Register src);
1116 void eadcxq(Register dst, Register src1, Register src2);
1117
1118 //Add Unsigned Integers with Overflow Flag
1119 void adoxq(Register dst, Register src);
1120 void eadoxq(Register dst, Register src1, Register src2);
1121
1122 void addr_nop_4();
1123 void addr_nop_5();
1124 void addr_nop_7();
1125 void addr_nop_8();
1126
1127 // Add Scalar Double-Precision Floating-Point Values
1128 void addsd(XMMRegister dst, Address src);
1129 void addsd(XMMRegister dst, XMMRegister src);
1130
1131 // Add Scalar Single-Precision Floating-Point Values
1132 void addss(XMMRegister dst, Address src);
1133 void addss(XMMRegister dst, XMMRegister src);
1134
1135 // AES instructions
1136 void aesdec(XMMRegister dst, Address src);
1137 void aesdec(XMMRegister dst, XMMRegister src);
1138 void aesdeclast(XMMRegister dst, Address src);
1139 void aesdeclast(XMMRegister dst, XMMRegister src);
1140 void aesenc(XMMRegister dst, Address src);
1141 void aesenc(XMMRegister dst, XMMRegister src);
1142 void aesenclast(XMMRegister dst, Address src);
1143 void aesenclast(XMMRegister dst, XMMRegister src);
1144 // Vector AES instructions
1145 void vaesenc(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1146 void vaesenclast(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1147 void vaesdec(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1148 void vaesdeclast(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1149
1150 void andb(Address dst, Register src);
1151
1152 void andl(Address dst, int32_t imm32);
1153 void eandl(Register dst, Address src, int32_t imm32, bool no_flags);
1154 void andl(Register dst, int32_t imm32);
1155 void eandl(Register dst, Register src, int32_t imm32, bool no_flags);
1156 void andl(Register dst, Address src);
1157 void eandl(Register dst, Register src1, Address src2, bool no_flags);
1158 void eandl(Register dst, Address src1, Register src2, bool no_flags);
1159 void andl(Register dst, Register src);
1160 void eandl(Register dst, Register src1, Register src2, bool no_flags);
1161 void andl(Address dst, Register src);
1162
1163 void andq(Address dst, int32_t imm32);
1164 void eandq(Register dst, Address src, int32_t imm32, bool no_flags);
1165 void andq(Register dst, int32_t imm32);
1166 void eandq(Register dst, Register src, int32_t imm32, bool no_flags);
1167 void andq(Register dst, Address src);
1168 void eandq(Register dst, Register src1, Address src2, bool no_flags);
1169 void andq(Register dst, Register src);
1170 void eandq(Register dst, Register src1, Register src2, bool no_flags);
1171 void andq(Address dst, Register src);
1172 void eandq(Register dst, Address src1, Register src2, bool no_flags);
1173
1174 // BMI instructions
1175 void andnl(Register dst, Register src1, Register src2);
1176 void andnl(Register dst, Register src1, Address src2);
1177 void andnq(Register dst, Register src1, Register src2);
1178 void andnq(Register dst, Register src1, Address src2);
1179
1180 void blsil(Register dst, Register src);
1181 void blsil(Register dst, Address src);
1182 void blsiq(Register dst, Register src);
1183 void blsiq(Register dst, Address src);
1184
1185 void blsmskl(Register dst, Register src);
1186 void blsmskl(Register dst, Address src);
1187 void blsmskq(Register dst, Register src);
1188 void blsmskq(Register dst, Address src);
1189
1190 void blsrl(Register dst, Register src);
1191 void blsrl(Register dst, Address src);
1192 void blsrq(Register dst, Register src);
1193 void blsrq(Register dst, Address src);
1194
1195 void bsfl(Register dst, Register src);
1196 void bsrl(Register dst, Register src);
1197
1198 void bsfq(Register dst, Register src);
1199 void bsrq(Register dst, Register src);
1200
1201 void bswapl(Register reg);
1202
1203 void bswapq(Register reg);
1204
1205 void call(Label& L, relocInfo::relocType rtype);
1206 void call(Register reg); // push pc; pc <- reg
1207 void call(Address adr); // push pc; pc <- adr
1208
1209 void cdql();
1210
1211 void cdqq();
1212 void cdqe();
1213
1214 void cld();
1215
1216 void clflush(Address adr);
1217 void clflushopt(Address adr);
1218 void clwb(Address adr);
1219
1220 void cmovl(Condition cc, Register dst, Register src);
1221 void ecmovl(Condition cc, Register dst, Register src1, Register src2);
1222 void cmovl(Condition cc, Register dst, Address src);
1223 void ecmovl(Condition cc, Register dst, Register src1, Address src2);
1224
1225 void cmovq(Condition cc, Register dst, Register src);
1226 void ecmovq(Condition cc, Register dst, Register src1, Register src2);
1227 void cmovq(Condition cc, Register dst, Address src);
1228 void ecmovq(Condition cc, Register dst, Register src1, Address src2);
1229
1230
1231 void cmpb(Address dst, int imm8);
1232 void cmpb(Address dst, Register reg);
1233 void cmpb(Register reg, Address dst);
1234 void cmpb(Register reg, int imm8);
1235
1236 void cmpl(Address dst, int32_t imm32);
1237 void cmpl(Register dst, int32_t imm32);
1238 void cmpl(Register dst, Register src);
1239 void cmpl(Register dst, Address src);
1240 void cmpl_imm32(Address dst, int32_t imm32);
1241 void cmpl(Address dst, Register reg);
1242
1243 void cmpq(Address dst, int32_t imm32);
1244 void cmpq(Address dst, Register src);
1245 void cmpq(Register dst, int32_t imm32);
1246 void cmpq(Register dst, Register src);
1247 void cmpq(Register dst, Address src);
1248
1249 void cmpw(Address dst, int imm16);
1250 void cmpw(Address dst, Register reg);
1251
1252 void cmpxchg8 (Address adr);
1253
1254 void cmpxchgb(Register reg, Address adr);
1255 void cmpxchgl(Register reg, Address adr);
1256
1257 void cmpxchgq(Register reg, Address adr);
1258 void cmpxchgw(Register reg, Address adr);
1259
1260 // Ordered Compare Scalar Double-Precision Floating-Point Values and set EFLAGS
1261 void comisd(XMMRegister dst, Address src);
1262 void comisd(XMMRegister dst, XMMRegister src);
1263
1264 // Ordered Compare Scalar Single-Precision Floating-Point Values and set EFLAGS
1265 void comiss(XMMRegister dst, Address src);
1266 void comiss(XMMRegister dst, XMMRegister src);
1267
1268 // Identify processor type and features
1269 void cpuid();
1270
1271 // Serialize instruction stream
1272 void serialize();
1273
1274 // CRC32C
1275 void crc32(Register crc, Register v, int8_t sizeInBytes);
1276 void crc32(Register crc, Address adr, int8_t sizeInBytes);
1277
1278 // Convert Scalar Double-Precision Floating-Point Value to Scalar Single-Precision Floating-Point Value
1279 void cvtsd2ss(XMMRegister dst, XMMRegister src);
1280 void cvtsd2ss(XMMRegister dst, Address src);
1281
1282 // Convert Doubleword Integer to Scalar Double-Precision Floating-Point Value
1283 void cvtsi2sdl(XMMRegister dst, Register src);
1284 void cvtsi2sdl(XMMRegister dst, Address src);
1285 void cvtsi2sdq(XMMRegister dst, Register src);
1286 void cvtsi2sdq(XMMRegister dst, Address src);
1287
1288 // Convert Doubleword Integer to Scalar Single-Precision Floating-Point Value
1289 void cvtsi2ssl(XMMRegister dst, Register src);
1290 void cvtsi2ssl(XMMRegister dst, Address src);
1291 void cvtsi2ssq(XMMRegister dst, Register src);
1292 void cvtsi2ssq(XMMRegister dst, Address src);
1293
1294 // Convert Packed Signed Doubleword Integers to Packed Double-Precision Floating-Point Value
1295 void cvtdq2pd(XMMRegister dst, XMMRegister src);
1296 void vcvtdq2pd(XMMRegister dst, XMMRegister src, int vector_len);
1297
1298 // Convert Halffloat to Single Precision Floating-Point value
1299 void vcvtps2ph(XMMRegister dst, XMMRegister src, int imm8, int vector_len);
1300 void vcvtph2ps(XMMRegister dst, XMMRegister src, int vector_len);
1301 void evcvtps2ph(Address dst, KRegister mask, XMMRegister src, int imm8, int vector_len);
1302 void vcvtps2ph(Address dst, XMMRegister src, int imm8, int vector_len);
1303 void vcvtph2ps(XMMRegister dst, Address src, int vector_len);
1304
1305 // Convert Packed Signed Doubleword Integers to Packed Single-Precision Floating-Point Value
1306 void cvtdq2ps(XMMRegister dst, XMMRegister src);
1307 void vcvtdq2ps(XMMRegister dst, XMMRegister src, int vector_len);
1308
1309 // Convert Scalar Single-Precision Floating-Point Value to Scalar Double-Precision Floating-Point Value
1310 void cvtss2sd(XMMRegister dst, XMMRegister src);
1311 void cvtss2sd(XMMRegister dst, Address src);
1312
1313 // Convert with Truncation Scalar Double-Precision Floating-Point Value to Doubleword Integer
1314 void cvtsd2siq(Register dst, XMMRegister src);
1315 void cvttsd2sil(Register dst, Address src);
1316 void cvttsd2sil(Register dst, XMMRegister src);
1317 void cvttsd2siq(Register dst, Address src);
1318 void cvttsd2siq(Register dst, XMMRegister src);
1319 void evcvttsd2sisl(Register dst, XMMRegister src);
1320 void evcvttsd2sisl(Register dst, Address src);
1321 void evcvttsd2sisq(Register dst, XMMRegister src);
1322 void evcvttsd2sisq(Register dst, Address src);
1323
1324 // Convert with Truncation Scalar Single-Precision Floating-Point Value to Doubleword Integer
1325 void cvttss2sil(Register dst, XMMRegister src);
1326 void cvttss2siq(Register dst, XMMRegister src);
1327 void cvtss2sil(Register dst, XMMRegister src);
1328 void evcvttss2sisl(Register dst, XMMRegister src);
1329 void evcvttss2sisl(Register dst, Address src);
1330 void evcvttss2sisq(Register dst, XMMRegister src);
1331 void evcvttss2sisq(Register dst, Address src);
1332
1333 // Convert vector double to int
1334 void cvttpd2dq(XMMRegister dst, XMMRegister src);
1335
1336 // Convert vector float and double
1337 void vcvtps2pd(XMMRegister dst, XMMRegister src, int vector_len);
1338 void vcvtpd2ps(XMMRegister dst, XMMRegister src, int vector_len);
1339
1340 // Convert vector float to int/long
1341 void vcvtps2dq(XMMRegister dst, XMMRegister src, int vector_len);
1342 void vcvttps2dq(XMMRegister dst, XMMRegister src, int vector_len);
1343 void evcvttps2dqs(XMMRegister dst, XMMRegister src, int vector_len);
1344 void evcvttps2dqs(XMMRegister dst, Address src, int vector_len);
1345 void evcvttps2qq(XMMRegister dst, XMMRegister src, int vector_len);
1346 void evcvttps2qqs(XMMRegister dst, XMMRegister src, int vector_len);
1347 void evcvttps2qqs(XMMRegister dst, Address src, int vector_len);
1348
1349 // Convert vector long to vector FP
1350 void evcvtqq2ps(XMMRegister dst, XMMRegister src, int vector_len);
1351 void evcvtqq2pd(XMMRegister dst, XMMRegister src, int vector_len);
1352
1353 // Convert vector double to long
1354 void evcvtpd2qq(XMMRegister dst, XMMRegister src, int vector_len);
1355 void evcvttpd2qq(XMMRegister dst, XMMRegister src, int vector_len);
1356 void evcvttpd2qqs(XMMRegister dst, XMMRegister src, int vector_len);
1357 void evcvttpd2qqs(XMMRegister dst, Address src, int vector_len);
1358
1359 // Convert vector double to int
1360 void vcvttpd2dq(XMMRegister dst, XMMRegister src, int vector_len);
1361 void evcvttpd2dqs(XMMRegister dst, XMMRegister src, int vector_len);
1362 void evcvttpd2dqs(XMMRegister dst, Address src, int vector_len);
1363
1364 // Evex casts with truncation
1365 void evpmovwb(XMMRegister dst, XMMRegister src, int vector_len);
1366 void evpmovdw(XMMRegister dst, XMMRegister src, int vector_len);
1367 void evpmovdb(XMMRegister dst, XMMRegister src, int vector_len);
1368 void evpmovqd(XMMRegister dst, XMMRegister src, int vector_len);
1369 void evpmovqb(XMMRegister dst, XMMRegister src, int vector_len);
1370 void evpmovqw(XMMRegister dst, XMMRegister src, int vector_len);
1371
1372 // Evex casts with signed saturation
1373 void evpmovsqd(XMMRegister dst, XMMRegister src, int vector_len);
1374
1375 //Abs of packed Integer values
1376 void pabsb(XMMRegister dst, XMMRegister src);
1377 void pabsw(XMMRegister dst, XMMRegister src);
1378 void pabsd(XMMRegister dst, XMMRegister src);
1379 void vpabsb(XMMRegister dst, XMMRegister src, int vector_len);
1380 void vpabsw(XMMRegister dst, XMMRegister src, int vector_len);
1381 void vpabsd(XMMRegister dst, XMMRegister src, int vector_len);
1382 void evpabsq(XMMRegister dst, XMMRegister src, int vector_len);
1383
1384 // Divide Scalar Double-Precision Floating-Point Values
1385 void divsd(XMMRegister dst, Address src);
1386 void divsd(XMMRegister dst, XMMRegister src);
1387
1388 // Divide Scalar Single-Precision Floating-Point Values
1389 void divss(XMMRegister dst, Address src);
1390 void divss(XMMRegister dst, XMMRegister src);
1391
1392
1393 void fnstsw_ax();
1394 void fprem();
1395 void fld_d(Address adr);
1396 void fstp_d(Address adr);
1397 void fstp_d(int index);
1398
1399 private:
1400
1401 void emit_farith(int b1, int b2, int i);
1402
1403 public:
1404 // operands that only take the original 32bit registers
1405 void emit_operand32(Register reg, Address adr, int post_addr_length);
1406
1407 void fld_x(Address adr); // extended-precision (80-bit) format
1408 void fstp_x(Address adr); // extended-precision (80-bit) format
1409 void fxrstor(Address src);
1410 void xrstor(Address src);
1411
1412 void fxsave(Address dst);
1413 void xsave(Address dst);
1414
1415 void hlt();
1416
1417 void idivl(Register src);
1418 void eidivl(Register src, bool no_flags);
1419 void divl(Register src); // Unsigned division
1420 void edivl(Register src, bool no_flags); // Unsigned division
1421
1422 void idivq(Register src);
1423 void eidivq(Register src, bool no_flags);
1424 void divq(Register src); // Unsigned division
1425 void edivq(Register src, bool no_flags); // Unsigned division
1426
1427 void imull(Register src);
1428 void eimull(Register src, bool no_flags);
1429 void imull(Register dst, Register src);
1430 void eimull(Register dst, Register src1, Register src2, bool no_flags);
1431 void imull(Register dst, Register src, int value);
1432 void eimull(Register dst, Register src, int value, bool no_flags);
1433 void imull(Register dst, Address src, int value);
1434 void eimull(Register dst, Address src, int value, bool no_flags);
1435 void imull(Register dst, Address src);
1436 void eimull(Register dst, Register src1, Address src2, bool no_flags);
1437
1438 void imulq(Register dst, Register src);
1439 void eimulq(Register dst, Register src, bool no_flags);
1440 void eimulq(Register dst, Register src1, Register src2, bool no_flags);
1441 void imulq(Register dst, Register src, int value);
1442 void eimulq(Register dst, Register src, int value, bool no_flags);
1443 void imulq(Register dst, Address src, int value);
1444 void eimulq(Register dst, Address src, int value, bool no_flags);
1445 void imulq(Register dst, Address src);
1446 void eimulq(Register dst, Address src, bool no_flags);
1447 void eimulq(Register dst, Register src1, Address src2, bool no_flags);
1448 void imulq(Register dst);
1449 void eimulq(Register dst, bool no_flags);
1450
1451 // jcc is the generic conditional branch generator to run-
1452 // time routines, jcc is used for branches to labels. jcc
1453 // takes a branch opcode (cc) and a label (L) and generates
1454 // either a backward branch or a forward branch and links it
1455 // to the label fixup chain. Usage:
1456 //
1457 // Label L; // unbound label
1458 // jcc(cc, L); // forward branch to unbound label
1459 // bind(L); // bind label to the current pc
1460 // jcc(cc, L); // backward branch to bound label
1461 // bind(L); // illegal: a label may be bound only once
1462 //
1463 // Note: The same Label can be used for forward and backward branches
1464 // but it may be bound only once.
1465
1466 void jcc(Condition cc, Label& L, bool maybe_short = true);
1467
1468 // Conditional jump to a 8-bit offset to L.
1469 // WARNING: be very careful using this for forward jumps. If the label is
1470 // not bound within an 8-bit offset of this instruction, a run-time error
1471 // will occur.
1472
1473 // Use macro to record file and line number.
1474 #define jccb(cc, L) jccb_0(cc, L, __FILE__, __LINE__)
1475
1476 void jccb_0(Condition cc, Label& L, const char* file, int line);
1477
1478 void jmp(Address entry); // pc <- entry
1479
1480 // Label operations & relative jumps (PPUM Appendix D)
1481 void jmp(Label& L, bool maybe_short = true); // unconditional jump to L
1482
1483 void jmp(Register entry); // pc <- entry
1484
1485 // Unconditional 8-bit offset jump to L.
1486 // WARNING: be very careful using this for forward jumps. If the label is
1487 // not bound within an 8-bit offset of this instruction, a run-time error
1488 // will occur.
1489
1490 // Use macro to record file and line number.
1491 #define jmpb(L) jmpb_0(L, __FILE__, __LINE__)
1492
1493 void jmpb_0(Label& L, const char* file, int line);
1494
1495 void ldmxcsr( Address src );
1496
1497 void leal(Register dst, Address src);
1498
1499 void leaq(Register dst, Address src);
1500
1501 void lea(Register dst, Label& L);
1502
1503 void lfence();
1504
1505 void lock();
1506 void size_prefix();
1507
1508 void lzcntl(Register dst, Register src);
1509 void elzcntl(Register dst, Register src, bool no_flags);
1510 void lzcntl(Register dst, Address src);
1511 void elzcntl(Register dst, Address src, bool no_flags);
1512
1513 void lzcntq(Register dst, Register src);
1514 void elzcntq(Register dst, Register src, bool no_flags);
1515 void lzcntq(Register dst, Address src);
1516 void elzcntq(Register dst, Address src, bool no_flags);
1517
1518 enum Membar_mask_bits {
1519 StoreStore = 1 << 3,
1520 LoadStore = 1 << 2,
1521 StoreLoad = 1 << 1,
1522 LoadLoad = 1 << 0
1523 };
1524
1525 // Serializes memory and blows flags
1526 void membar(Membar_mask_bits order_constraint);
1527
1528 void mfence();
1529 void sfence();
1530
1531 // Moves
1532
1533 void mov64(Register dst, int64_t imm64);
1534 void mov64(Register dst, int64_t imm64, relocInfo::relocType rtype, int format);
1535
1536 void movb(Address dst, Register src);
1537 void movb(Address dst, int imm8);
1538 void movb(Register dst, Address src);
1539
1540 void movddup(XMMRegister dst, XMMRegister src);
1541 void movddup(XMMRegister dst, Address src);
1542 void vmovddup(XMMRegister dst, Address src, int vector_len);
1543
1544 void kandbl(KRegister dst, KRegister src1, KRegister src2);
1545 void kandwl(KRegister dst, KRegister src1, KRegister src2);
1546 void kanddl(KRegister dst, KRegister src1, KRegister src2);
1547 void kandql(KRegister dst, KRegister src1, KRegister src2);
1548
1549 void korbl(KRegister dst, KRegister src1, KRegister src2);
1550 void korwl(KRegister dst, KRegister src1, KRegister src2);
1551 void kordl(KRegister dst, KRegister src1, KRegister src2);
1552 void korql(KRegister dst, KRegister src1, KRegister src2);
1553
1554 void kxnorwl(KRegister dst, KRegister src1, KRegister src2);
1555
1556 void kxorbl(KRegister dst, KRegister src1, KRegister src2);
1557 void kxorwl(KRegister dst, KRegister src1, KRegister src2);
1558 void kxordl(KRegister dst, KRegister src1, KRegister src2);
1559 void kxorql(KRegister dst, KRegister src1, KRegister src2);
1560 void kmovbl(KRegister dst, Register src);
1561 void kmovbl(Register dst, KRegister src);
1562 void kmovbl(KRegister dst, KRegister src);
1563 void kmovwl(KRegister dst, Register src);
1564 void kmovwl(KRegister dst, Address src);
1565 void kmovwl(Register dst, KRegister src);
1566 void kmovwl(Address dst, KRegister src);
1567 void kmovwl(KRegister dst, KRegister src);
1568 void kmovdl(KRegister dst, Register src);
1569 void kmovdl(Register dst, KRegister src);
1570 void kmovql(KRegister dst, KRegister src);
1571 void kmovql(Address dst, KRegister src);
1572 void kmovql(KRegister dst, Address src);
1573 void kmovql(KRegister dst, Register src);
1574 void kmovql(Register dst, KRegister src);
1575
1576 void knotbl(KRegister dst, KRegister src);
1577 void knotwl(KRegister dst, KRegister src);
1578 void knotdl(KRegister dst, KRegister src);
1579 void knotql(KRegister dst, KRegister src);
1580
1581 void kortestbl(KRegister dst, KRegister src);
1582 void kortestwl(KRegister dst, KRegister src);
1583 void kortestdl(KRegister dst, KRegister src);
1584 void kortestql(KRegister dst, KRegister src);
1585
1586 void kxnorbl(KRegister dst, KRegister src1, KRegister src2);
1587 void kshiftlbl(KRegister dst, KRegister src, int imm8);
1588 void kshiftlql(KRegister dst, KRegister src, int imm8);
1589 void kshiftrbl(KRegister dst, KRegister src, int imm8);
1590 void kshiftrwl(KRegister dst, KRegister src, int imm8);
1591 void kshiftrdl(KRegister dst, KRegister src, int imm8);
1592 void kshiftrql(KRegister dst, KRegister src, int imm8);
1593 void ktestq(KRegister src1, KRegister src2);
1594 void ktestd(KRegister src1, KRegister src2);
1595 void kunpckdql(KRegister dst, KRegister src1, KRegister src2);
1596
1597
1598 void ktestql(KRegister dst, KRegister src);
1599 void ktestdl(KRegister dst, KRegister src);
1600 void ktestwl(KRegister dst, KRegister src);
1601 void ktestbl(KRegister dst, KRegister src);
1602
1603 void movdl(XMMRegister dst, Register src);
1604 void movdl(Register dst, XMMRegister src);
1605 void movdl(XMMRegister dst, Address src);
1606 void movdl(Address dst, XMMRegister src);
1607
1608 // Move Double Quadword
1609 void movdq(XMMRegister dst, Register src);
1610 void movdq(Register dst, XMMRegister src);
1611
1612 // Move Aligned Double Quadword
1613 void movdqa(XMMRegister dst, XMMRegister src);
1614 void movdqa(XMMRegister dst, Address src);
1615
1616 // Move Unaligned Double Quadword
1617 void movdqu(Address dst, XMMRegister src);
1618 void movdqu(XMMRegister dst, Address src);
1619 void movdqu(XMMRegister dst, XMMRegister src);
1620
1621 // Move Unaligned 256bit Vector
1622 void vmovdqu(Address dst, XMMRegister src);
1623 void vmovdqu(XMMRegister dst, Address src);
1624 void vmovdqu(XMMRegister dst, XMMRegister src);
1625
1626 // Move Aligned 256bit Vector
1627 void vmovdqa(XMMRegister dst, Address src);
1628 void vmovdqa(Address dst, XMMRegister src);
1629
1630 // Move Unaligned 512bit Vector
1631 void evmovdqub(XMMRegister dst, XMMRegister src, int vector_len);
1632 void evmovdqub(XMMRegister dst, Address src, int vector_len);
1633 void evmovdqub(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
1634 void evmovdqub(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len);
1635 void evmovdqub(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
1636
1637 void evmovdquw(XMMRegister dst, XMMRegister src, int vector_len);
1638 void evmovdquw(XMMRegister dst, Address src, int vector_len);
1639 void evmovdquw(Address dst, XMMRegister src, int vector_len);
1640 void evmovdquw(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
1641 void evmovdquw(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len);
1642 void evmovdquw(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
1643
1644 void evmovdqul(XMMRegister dst, XMMRegister src, int vector_len);
1645 void evmovdqul(XMMRegister dst, Address src, int vector_len);
1646 void evmovdqul(Address dst, XMMRegister src, int vector_len);
1647
1648 void evmovdqul(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
1649 void evmovdqul(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len);
1650 void evmovdqul(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
1651
1652 void evmovntdquq(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
1653 void evmovntdquq(Address dst, XMMRegister src, int vector_len);
1654
1655 void evmovdquq(Address dst, XMMRegister src, int vector_len);
1656 void evmovdquq(XMMRegister dst, Address src, int vector_len);
1657 void evmovdquq(XMMRegister dst, XMMRegister src, int vector_len);
1658
1659 void evmovdquq(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
1660 void evmovdquq(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len);
1661 void evmovdquq(Address dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
1662
1663 // Move Aligned 512bit Vector
1664 void evmovdqaq(XMMRegister dst, Address src, int vector_len);
1665 void evmovdqaq(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len);
1666
1667 // Move lower 64bit to high 64bit in 128bit register
1668 void movlhps(XMMRegister dst, XMMRegister src);
1669
1670 void movl(Register dst, int32_t imm32);
1671 void movl(Address dst, int32_t imm32);
1672 void movl(Register dst, Register src);
1673 void movl(Register dst, Address src);
1674 void movl(Address dst, Register src);
1675
1676 void movq(Register dst, Register src);
1677 void movq(Register dst, Address src);
1678 void movq(Address dst, Register src);
1679 void movq(Address dst, int32_t imm32);
1680 void movq(Register dst, int32_t imm32);
1681
1682 // Move Quadword
1683 void movq(Address dst, XMMRegister src);
1684 void movq(XMMRegister dst, Address src);
1685 void movq(XMMRegister dst, XMMRegister src);
1686 void movq(Register dst, XMMRegister src);
1687 void movq(XMMRegister dst, Register src);
1688
1689 void movsbl(Register dst, Address src);
1690 void movsbl(Register dst, Register src);
1691
1692 void vmovw(XMMRegister dst, Register src);
1693 void vmovw(Register dst, XMMRegister src);
1694
1695 void movsbq(Register dst, Address src);
1696 void movsbq(Register dst, Register src);
1697
1698 // Move signed 32bit immediate to 64bit extending sign
1699 void movslq(Address dst, int32_t imm64);
1700
1701 void movslq(Register dst, Address src);
1702 void movslq(Register dst, Register src);
1703
1704 void movswl(Register dst, Address src);
1705 void movswl(Register dst, Register src);
1706
1707 void movswq(Register dst, Address src);
1708 void movswq(Register dst, Register src);
1709
1710 void movups(XMMRegister dst, Address src);
1711 void vmovups(XMMRegister dst, Address src, int vector_len);
1712 void movups(Address dst, XMMRegister src);
1713 void vmovups(Address dst, XMMRegister src, int vector_len);
1714
1715 void movw(Address dst, int imm16);
1716 void movw(Register dst, Address src);
1717 void movw(Address dst, Register src);
1718
1719 void movzbl(Register dst, Address src);
1720 void movzbl(Register dst, Register src);
1721
1722 void movzbq(Register dst, Address src);
1723 void movzbq(Register dst, Register src);
1724
1725 void movzwl(Register dst, Address src);
1726 void movzwl(Register dst, Register src);
1727
1728 void movzwq(Register dst, Address src);
1729 void movzwq(Register dst, Register src);
1730
1731 // Unsigned multiply with RAX destination register
1732 void mull(Address src);
1733 void emull(Address src, bool no_flags);
1734 void mull(Register src);
1735 void emull(Register src, bool no_flags);
1736
1737 void mulq(Address src);
1738 void emulq(Address src, bool no_flags);
1739 void mulq(Register src);
1740 void emulq(Register src, bool no_flags);
1741 void mulxq(Register dst1, Register dst2, Register src);
1742
1743 // Multiply Scalar Double-Precision Floating-Point Values
1744 void mulsd(XMMRegister dst, Address src);
1745 void mulsd(XMMRegister dst, XMMRegister src);
1746
1747 // Multiply Scalar Single-Precision Floating-Point Values
1748 void mulss(XMMRegister dst, Address src);
1749 void mulss(XMMRegister dst, XMMRegister src);
1750
1751 void negl(Register dst);
1752 void enegl(Register dst, Register src, bool no_flags);
1753 void negl(Address dst);
1754 void enegl(Register dst, Address src, bool no_flags);
1755
1756 void negq(Register dst);
1757 void enegq(Register dst, Register src, bool no_flags);
1758 void negq(Address dst);
1759 void enegq(Register dst, Address src, bool no_flags);
1760
1761 void nop(uint i = 1);
1762
1763 void notl(Register dst);
1764 void enotl(Register dst, Register src);
1765
1766 void notq(Register dst);
1767 void enotq(Register dst, Register src);
1768
1769 void btsq(Address dst, int imm8);
1770 void btrq(Address dst, int imm8);
1771 void btq(Register src, int imm8);
1772 void btq(Register dst, Register src);
1773
1774 void eorw(Register dst, Register src1, Register src2, bool no_flags);
1775
1776 void orl(Address dst, int32_t imm32);
1777 void eorl(Register dst, Address src, int32_t imm32, bool no_flags);
1778 void orl(Register dst, int32_t imm32);
1779 void eorl(Register dst, Register src, int32_t imm32, bool no_flags);
1780 void orl(Register dst, Address src);
1781 void eorl(Register dst, Register src1, Address src2, bool no_flags);
1782 void orl(Register dst, Register src);
1783 void eorl(Register dst, Register src1, Register src2, bool no_flags);
1784 void orl(Address dst, Register src);
1785 void eorl(Register dst, Address src1, Register src2, bool no_flags);
1786
1787 void orb(Address dst, int imm8);
1788 void eorb(Register dst, Address src, int imm8, bool no_flags);
1789 void orb(Address dst, Register src);
1790 void eorb(Register dst, Address src1, Register src2, bool no_flags);
1791
1792 void orq(Address dst, int32_t imm32);
1793 void eorq(Register dst, Address src, int32_t imm32, bool no_flags);
1794 void orq(Address dst, Register src);
1795 void eorq(Register dst, Address src1, Register src2, bool no_flags);
1796 void orq(Register dst, int32_t imm32);
1797 void eorq(Register dst, Register src, int32_t imm32, bool no_flags);
1798 void orq_imm32(Register dst, int32_t imm32);
1799 void eorq_imm32(Register dst, Register src, int32_t imm32, bool no_flags);
1800 void orq(Register dst, Address src);
1801 void eorq(Register dst, Register src1, Address src2, bool no_flags);
1802 void orq(Register dst, Register src);
1803 void eorq(Register dst, Register src1, Register src2, bool no_flags);
1804
1805 // Pack with signed saturation
1806 void packsswb(XMMRegister dst, XMMRegister src);
1807 void vpacksswb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1808 void packssdw(XMMRegister dst, XMMRegister src);
1809 void vpackssdw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1810
1811 // Pack with unsigned saturation
1812 void packuswb(XMMRegister dst, XMMRegister src);
1813 void packuswb(XMMRegister dst, Address src);
1814 void packusdw(XMMRegister dst, XMMRegister src);
1815 void vpackuswb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1816 void vpackusdw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1817
1818 // Permutations
1819 void vpermq(XMMRegister dst, XMMRegister src, int imm8, int vector_len);
1820 void vpermq(XMMRegister dst, XMMRegister src, int imm8);
1821 void vpermq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1822 void vpermb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1823 void vpermb(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
1824 void vpermw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1825 void vpermd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
1826 void vpermd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1827 void vpermps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1828 void vperm2i128(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8);
1829 void vperm2f128(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8);
1830 void vpermilps(XMMRegister dst, XMMRegister src, int imm8, int vector_len);
1831 void vpermilps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1832 void vpermilpd(XMMRegister dst, XMMRegister src, int imm8, int vector_len);
1833 void vpermpd(XMMRegister dst, XMMRegister src, int imm8, int vector_len);
1834 void evpmultishiftqb(XMMRegister dst, XMMRegister ctl, XMMRegister src, int vector_len);
1835 void evpermi2b(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1836 void evpermi2w(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1837 void evpermi2d(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1838 void evpermi2q(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1839 void evpermi2ps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1840 void evpermi2pd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1841 void evpermt2b(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1842 void evpermt2w(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1843 void evpermt2d(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1844 void evpermt2q(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1845
1846 void pause();
1847
1848 // Undefined Instruction
1849 void ud2();
1850
1851 // SSE4.2 string instructions
1852 void pcmpestri(XMMRegister xmm1, XMMRegister xmm2, int imm8);
1853 void pcmpestri(XMMRegister xmm1, Address src, int imm8);
1854
1855 void pcmpeqb(XMMRegister dst, XMMRegister src);
1856 void vpcmpCCbwd(XMMRegister dst, XMMRegister nds, XMMRegister src, int cond_encoding, int vector_len);
1857
1858 void vpcmpeqb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1859 void vpcmpeqb(XMMRegister dst, XMMRegister src1, Address src2, int vector_len);
1860 void evpcmpeqb(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len);
1861 void evpcmpeqb(KRegister kdst, XMMRegister nds, Address src, int vector_len);
1862 void evpcmpeqb(KRegister kdst, KRegister mask, XMMRegister nds, Address src, int vector_len);
1863
1864 void vpcmpgtb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1865 void evpcmpgtb(KRegister kdst, XMMRegister nds, Address src, int vector_len);
1866 void evpcmpgtb(KRegister kdst, KRegister mask, XMMRegister nds, Address src, int vector_len);
1867
1868 void evpcmpub(KRegister kdst, XMMRegister nds, XMMRegister src, ComparisonPredicate vcc, int vector_len);
1869
1870 void evpcmpuw(KRegister kdst, XMMRegister nds, XMMRegister src, ComparisonPredicate vcc, int vector_len);
1871 void evpcmpuw(KRegister kdst, XMMRegister nds, Address src, ComparisonPredicate vcc, int vector_len);
1872
1873 void evpcmpud(KRegister kdst, XMMRegister nds, XMMRegister src, ComparisonPredicate vcc, int vector_len);
1874 void evpcmpuq(KRegister kdst, XMMRegister nds, XMMRegister src, ComparisonPredicate vcc, int vector_len);
1875
1876 void pcmpeqw(XMMRegister dst, XMMRegister src);
1877 void vpcmpeqw(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
1878 void vpcmpeqw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1879 void evpcmpeqw(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len);
1880 void evpcmpeqw(KRegister kdst, XMMRegister nds, Address src, int vector_len);
1881
1882 void vpcmpgtw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1883
1884 void pcmpeqd(XMMRegister dst, XMMRegister src);
1885 void vpcmpeqd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1886 void evpcmpeqd(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src, int vector_len);
1887 void evpcmpeqd(KRegister kdst, KRegister mask, XMMRegister nds, Address src, int vector_len);
1888
1889 void pcmpeqq(XMMRegister dst, XMMRegister src);
1890 void evpcmpeqq(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src, int vector_len);
1891 void vpcmpCCq(XMMRegister dst, XMMRegister nds, XMMRegister src, int cond_encoding, int vector_len);
1892 void vpcmpeqq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1893 void evpcmpeqq(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len);
1894 void evpcmpeqq(KRegister kdst, XMMRegister nds, Address src, int vector_len);
1895
1896 void pcmpgtq(XMMRegister dst, XMMRegister src);
1897 void vpcmpgtq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1898
1899 void pmovmskb(Register dst, XMMRegister src);
1900 void vpmovmskb(Register dst, XMMRegister src, int vec_enc);
1901 void vmovmskps(Register dst, XMMRegister src, int vec_enc);
1902 void vmovmskpd(Register dst, XMMRegister src, int vec_enc);
1903 void vpmaskmovd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
1904 void vpmaskmovq(XMMRegister dst, XMMRegister mask, Address src, int vector_len);
1905
1906
1907 void vmaskmovps(XMMRegister dst, Address src, XMMRegister mask, int vector_len);
1908 void vmaskmovpd(XMMRegister dst, Address src, XMMRegister mask, int vector_len);
1909 void vmaskmovps(Address dst, XMMRegister src, XMMRegister mask, int vector_len);
1910 void vmaskmovpd(Address dst, XMMRegister src, XMMRegister mask, int vector_len);
1911
1912 // SSE 4.1 extract
1913 void pextrd(Register dst, XMMRegister src, int imm8);
1914 void pextrq(Register dst, XMMRegister src, int imm8);
1915 void pextrd(Address dst, XMMRegister src, int imm8);
1916 void pextrq(Address dst, XMMRegister src, int imm8);
1917 void pextrb(Register dst, XMMRegister src, int imm8);
1918 void pextrb(Address dst, XMMRegister src, int imm8);
1919 // SSE 2 extract
1920 void pextrw(Register dst, XMMRegister src, int imm8);
1921 void pextrw(Address dst, XMMRegister src, int imm8);
1922
1923 // SSE 4.1 insert
1924 void pinsrd(XMMRegister dst, Register src, int imm8);
1925 void pinsrq(XMMRegister dst, Register src, int imm8);
1926 void pinsrb(XMMRegister dst, Register src, int imm8);
1927 void pinsrd(XMMRegister dst, Address src, int imm8);
1928 void pinsrq(XMMRegister dst, Address src, int imm8);
1929 void pinsrb(XMMRegister dst, Address src, int imm8);
1930 void insertps(XMMRegister dst, XMMRegister src, int imm8);
1931 // SSE 2 insert
1932 void pinsrw(XMMRegister dst, Register src, int imm8);
1933 void pinsrw(XMMRegister dst, Address src, int imm8);
1934
1935 // AVX insert
1936 void vpinsrd(XMMRegister dst, XMMRegister nds, Register src, int imm8);
1937 void vpinsrb(XMMRegister dst, XMMRegister nds, Register src, int imm8);
1938 void vpinsrq(XMMRegister dst, XMMRegister nds, Register src, int imm8);
1939 void vpinsrw(XMMRegister dst, XMMRegister nds, Register src, int imm8);
1940 void vinsertps(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8);
1941
1942 // Zero extend moves
1943 void pmovzxbw(XMMRegister dst, XMMRegister src);
1944 void pmovzxbw(XMMRegister dst, Address src);
1945 void pmovzxbd(XMMRegister dst, XMMRegister src);
1946 void vpmovzxbw(XMMRegister dst, Address src, int vector_len);
1947 void vpmovzxbw(XMMRegister dst, XMMRegister src, int vector_len);
1948 void vpmovzxbd(XMMRegister dst, XMMRegister src, int vector_len);
1949 void vpmovzxbq(XMMRegister dst, XMMRegister src, int vector_len);
1950 void vpmovzxwd(XMMRegister dst, XMMRegister src, int vector_len);
1951 void vpmovzxwq(XMMRegister dst, XMMRegister src, int vector_len);
1952 void pmovzxdq(XMMRegister dst, XMMRegister src);
1953 void vpmovzxdq(XMMRegister dst, XMMRegister src, int vector_len);
1954 void evpmovzxbw(XMMRegister dst, KRegister mask, Address src, int vector_len);
1955 void evpmovzxbd(XMMRegister dst, KRegister mask, Address src, int vector_len);
1956 void evpmovzxbd(XMMRegister dst, Address src, int vector_len);
1957
1958 // Sign extend moves
1959 void pmovsxbd(XMMRegister dst, XMMRegister src);
1960 void pmovsxbq(XMMRegister dst, XMMRegister src);
1961 void pmovsxbw(XMMRegister dst, XMMRegister src);
1962 void pmovsxwd(XMMRegister dst, XMMRegister src);
1963 void vpmovsxbd(XMMRegister dst, XMMRegister src, int vector_len);
1964 void vpmovsxbq(XMMRegister dst, XMMRegister src, int vector_len);
1965 void vpmovsxbw(XMMRegister dst, XMMRegister src, int vector_len);
1966 void vpmovsxwd(XMMRegister dst, XMMRegister src, int vector_len);
1967 void vpmovsxwq(XMMRegister dst, XMMRegister src, int vector_len);
1968 void vpmovsxdq(XMMRegister dst, XMMRegister src, int vector_len);
1969
1970 void evpmovwb(Address dst, XMMRegister src, int vector_len);
1971 void evpmovwb(Address dst, KRegister mask, XMMRegister src, int vector_len);
1972 void evpmovdb(Address dst, XMMRegister src, int vector_len);
1973
1974 // Multiply add
1975 void pmaddwd(XMMRegister dst, XMMRegister src);
1976 void vpmaddwd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1977 void vpmaddubsw(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
1978 void vpmadd52luq(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
1979 void vpmadd52luq(XMMRegister dst, XMMRegister src1, Address src2, int vector_len);
1980 void evpmadd52luq(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
1981 void evpmadd52luq(XMMRegister dst, KRegister mask, XMMRegister src1, XMMRegister src2, bool merge, int vector_len);
1982 void vpmadd52huq(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
1983 void vpmadd52huq(XMMRegister dst, XMMRegister src1, Address src2, int vector_len);
1984 void evpmadd52huq(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
1985 void evpmadd52huq(XMMRegister dst, KRegister mask, XMMRegister src1, XMMRegister src2, bool merge, int vector_len);
1986
1987 // Multiply add accumulate
1988 void evpdpwssd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1989
1990 void popq(Address dst);
1991 void popq(Register dst);
1992
1993 void popcntl(Register dst, Address src);
1994 void epopcntl(Register dst, Address src, bool no_flags);
1995 void popcntl(Register dst, Register src);
1996 void epopcntl(Register dst, Register src, bool no_flags);
1997
1998 void evpopcntb(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
1999 void evpopcntw(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
2000 void evpopcntd(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
2001 void evpopcntq(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
2002
2003 void popcntq(Register dst, Address src);
2004 void epopcntq(Register dst, Address src, bool no_flags);
2005 void popcntq(Register dst, Register src);
2006 void epopcntq(Register dst, Register src, bool no_flags);
2007
2008 // Prefetches (SSE, SSE2, 3DNOW only)
2009
2010 void prefetchnta(Address src);
2011 void prefetchr(Address src);
2012 void prefetcht0(Address src);
2013 void prefetcht1(Address src);
2014 void prefetcht2(Address src);
2015 void prefetchw(Address src);
2016
2017 // Shuffle Bytes
2018 void pshufb(XMMRegister dst, XMMRegister src);
2019 void pshufb(XMMRegister dst, Address src);
2020 void vpshufb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2021 void vpshufb(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2022 void evpshufb(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2023
2024
2025 // Shuffle Packed Doublewords
2026 void pshufd(XMMRegister dst, XMMRegister src, int mode);
2027 void pshufd(XMMRegister dst, Address src, int mode);
2028 void vpshufd(XMMRegister dst, XMMRegister src, int mode, int vector_len);
2029
2030 // Shuffle Packed High/Low Words
2031 void pshufhw(XMMRegister dst, XMMRegister src, int mode);
2032 void pshuflw(XMMRegister dst, XMMRegister src, int mode);
2033 void pshuflw(XMMRegister dst, Address src, int mode);
2034 void vpshufhw(XMMRegister dst, XMMRegister src, int mode, int vector_len);
2035 void vpshuflw(XMMRegister dst, XMMRegister src, int mode, int vector_len);
2036
2037 //shuffle floats and doubles
2038 void shufps(XMMRegister, XMMRegister, int);
2039 void shufpd(XMMRegister, XMMRegister, int);
2040 void vshufps(XMMRegister, XMMRegister, XMMRegister, int, int);
2041 void vshufpd(XMMRegister, XMMRegister, XMMRegister, int, int);
2042
2043 // Shuffle packed values at 128 bit granularity
2044 void evshufi64x2(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8, int vector_len);
2045
2046 // Shift Right by bytes Logical DoubleQuadword Immediate
2047 void psrldq(XMMRegister dst, int shift);
2048 // Shift Left by bytes Logical DoubleQuadword Immediate
2049 void pslldq(XMMRegister dst, int shift);
2050
2051 // Logical Compare 128bit
2052 void ptest(XMMRegister dst, XMMRegister src);
2053 void ptest(XMMRegister dst, Address src);
2054 // Logical Compare 256bit
2055 void vptest(XMMRegister dst, XMMRegister src);
2056 void vptest(XMMRegister dst, Address src);
2057
2058 void evptestmb(KRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2059 void evptestmd(KRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2060 void evptestnmd(KRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2061
2062 // Vector compare
2063 void vptest(XMMRegister dst, XMMRegister src, int vector_len);
2064 void vtestps(XMMRegister dst, XMMRegister src, int vector_len);
2065
2066 // Interleave Low Bytes
2067 void punpcklbw(XMMRegister dst, XMMRegister src);
2068 void punpcklbw(XMMRegister dst, Address src);
2069
2070 // Interleave Low Doublewords
2071 void punpckldq(XMMRegister dst, XMMRegister src);
2072 void punpckldq(XMMRegister dst, Address src);
2073 void vpunpckldq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2074 void vpunpcklqdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2075
2076
2077 // Interleave High Word
2078 void vpunpckhwd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2079
2080 // Interleave Low Word
2081 void vpunpcklwd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2082
2083 // Interleave High Doublewords
2084 void vpunpckhdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2085 void vpunpckhqdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2086
2087 // Interleave Low Quadwords
2088 void punpcklqdq(XMMRegister dst, XMMRegister src);
2089
2090 void evpunpcklqdq(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
2091 void evpunpcklqdq(XMMRegister dst, KRegister mask, XMMRegister src1, XMMRegister src2, bool merge, int vector_len);
2092 void evpunpckhqdq(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
2093 void evpunpckhqdq(XMMRegister dst, KRegister mask, XMMRegister src1, XMMRegister src2, bool merge, int vector_len);
2094
2095 // Vector sum of absolute difference.
2096 void vpsadbw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2097
2098 void pushq(Address src);
2099
2100 void rcll(Register dst, int imm8);
2101 void ercll(Register dst, Register src, int imm8);
2102
2103 void rclq(Register dst, int imm8);
2104 void erclq(Register dst, Register src, int imm8);
2105
2106 void rcrq(Register dst, int imm8);
2107 void ercrq(Register dst, Register src, int imm8);
2108
2109 void rcpps(XMMRegister dst, XMMRegister src);
2110
2111 void rcpss(XMMRegister dst, XMMRegister src);
2112
2113 void rdtsc();
2114
2115 void ret(int imm16);
2116
2117 void roll(Register dst);
2118 void eroll(Register dst, Register src, bool no_flags);
2119
2120 void roll(Register dst, int imm8);
2121 void eroll(Register dst, Register src, int imm8, bool no_flags);
2122
2123 void rorl(Register dst);
2124 void erorl(Register dst, Register src, bool no_flags);
2125
2126 void rorl(Register dst, int imm8);
2127 void erorl(Register dst, Register src, int imm8, bool no_flags);
2128
2129 void rolq(Register dst);
2130 void erolq(Register dst, Register src, bool no_flags);
2131 void rolq(Register dst, int imm8);
2132 void erolq(Register dst, Register src, int imm8, bool no_flags);
2133 void rorq(Register dst);
2134 void erorq(Register dst, Register src, bool no_flags);
2135 void rorq(Register dst, int imm8);
2136 void erorq(Register dst, Register src, int imm8, bool no_flags);
2137 void rorxl(Register dst, Register src, int imm8);
2138 void rorxl(Register dst, Address src, int imm8);
2139 void rorxq(Register dst, Register src, int imm8);
2140 void rorxq(Register dst, Address src, int imm8);
2141
2142 void sall(Register dst, int imm8);
2143 void esall(Register dst, Register src, int imm8, bool no_flags);
2144 void sall(Register dst);
2145 void esall(Register dst, Register src, bool no_flags);
2146 void sall(Address dst, int imm8);
2147 void esall(Register dst, Address src, int imm8, bool no_flags);
2148 void sall(Address dst);
2149 void esall(Register dst, Address src, bool no_flags);
2150
2151 void sarl(Address dst, int imm8);
2152 void esarl(Register dst, Address src, int imm8, bool no_flags);
2153 void sarl(Address dst);
2154 void esarl(Register dst, Address src, bool no_flags);
2155 void sarl(Register dst, int imm8);
2156 void esarl(Register dst, Register src, int imm8, bool no_flags);
2157 void sarl(Register dst);
2158 void esarl(Register dst, Register src, bool no_flags);
2159
2160 void salq(Register dst, int imm8);
2161 void esalq(Register dst, Register src, int imm8, bool no_flags);
2162 void salq(Register dst);
2163 void esalq(Register dst, Register src, bool no_flags);
2164 void salq(Address dst, int imm8);
2165 void esalq(Register dst, Address src, int imm8, bool no_flags);
2166 void salq(Address dst);
2167 void esalq(Register dst, Address src, bool no_flags);
2168
2169 void sarq(Address dst, int imm8);
2170 void esarq(Register dst, Address src, int imm8, bool no_flags);
2171 void sarq(Address dst);
2172 void esarq(Register dst, Address src, bool no_flags);
2173 void sarq(Register dst, int imm8);
2174 void esarq(Register dst, Register src, int imm8, bool no_flags);
2175 void sarq(Register dst);
2176 void esarq(Register dst, Register src, bool no_flags);
2177
2178 void sbbl(Address dst, int32_t imm32);
2179 void sbbl(Register dst, int32_t imm32);
2180 void sbbl(Register dst, Address src);
2181 void sbbl(Register dst, Register src);
2182
2183 void sbbq(Address dst, int32_t imm32);
2184 void sbbq(Register dst, int32_t imm32);
2185 void sbbq(Register dst, Address src);
2186 void sbbq(Register dst, Register src);
2187
2188 void setb(Condition cc, Register dst);
2189
2190 void palignr(XMMRegister dst, XMMRegister src, int imm8);
2191 void vpalignr(XMMRegister dst, XMMRegister src1, XMMRegister src2, int imm8, int vector_len);
2192 void evalignq(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
2193
2194 void pblendw(XMMRegister dst, XMMRegister src, int imm8);
2195 void vblendps(XMMRegister dst, XMMRegister src1, XMMRegister src2, int imm8, int vector_len);
2196
2197 void sha1rnds4(XMMRegister dst, XMMRegister src, int imm8);
2198 void sha1nexte(XMMRegister dst, XMMRegister src);
2199 void sha1msg1(XMMRegister dst, XMMRegister src);
2200 void sha1msg2(XMMRegister dst, XMMRegister src);
2201 // xmm0 is implicit additional source to the following instruction.
2202 void sha256rnds2(XMMRegister dst, XMMRegister src);
2203 void sha256msg1(XMMRegister dst, XMMRegister src);
2204 void sha256msg2(XMMRegister dst, XMMRegister src);
2205 void sha512rnds2(XMMRegister dst, XMMRegister nds, XMMRegister src);
2206 void sha512msg1(XMMRegister dst, XMMRegister src);
2207 void sha512msg2(XMMRegister dst, XMMRegister src);
2208
2209 void shldl(Register dst, Register src);
2210 void eshldl(Register dst, Register src1, Register src2, bool no_flags);
2211 void shldl(Register dst, Register src, int8_t imm8);
2212 void eshldl(Register dst, Register src1, Register src2, int8_t imm8, bool no_flags);
2213 void shrdl(Register dst, Register src);
2214 void eshrdl(Register dst, Register src1, Register src2, bool no_flags);
2215 void shrdl(Register dst, Register src, int8_t imm8);
2216 void eshrdl(Register dst, Register src1, Register src2, int8_t imm8, bool no_flags);
2217 void shldq(Register dst, Register src, int8_t imm8);
2218 void eshldq(Register dst, Register src1, Register src2, int8_t imm8, bool no_flags);
2219 void shrdq(Register dst, Register src, int8_t imm8);
2220 void eshrdq(Register dst, Register src1, Register src2, int8_t imm8, bool no_flags);
2221
2222 void shll(Register dst, int imm8);
2223 void eshll(Register dst, Register src, int imm8, bool no_flags);
2224 void shll(Register dst);
2225 void eshll(Register dst, Register src, bool no_flags);
2226
2227 void shlq(Register dst, int imm8);
2228 void eshlq(Register dst, Register src, int imm8, bool no_flags);
2229 void shlq(Register dst);
2230 void eshlq(Register dst, Register src, bool no_flags);
2231
2232 void shrl(Register dst, int imm8);
2233 void eshrl(Register dst, Register src, int imm8, bool no_flags);
2234 void shrl(Register dst);
2235 void eshrl(Register dst, Register src, bool no_flags);
2236 void shrl(Address dst);
2237 void eshrl(Register dst, Address src, bool no_flags);
2238 void shrl(Address dst, int imm8);
2239 void eshrl(Register dst, Address src, int imm8, bool no_flags);
2240
2241 void shrq(Register dst, int imm8);
2242 void eshrq(Register dst, Register src, int imm8, bool no_flags);
2243 void shrq(Register dst);
2244 void eshrq(Register dst, Register src, bool no_flags);
2245 void shrq(Address dst);
2246 void eshrq(Register dst, Address src, bool no_flags);
2247 void shrq(Address dst, int imm8);
2248 void eshrq(Register dst, Address src, int imm8, bool no_flags);
2249
2250 void smovl(); // QQQ generic?
2251
2252 // Compute Square Root of Scalar Double-Precision Floating-Point Value
2253 void sqrtsd(XMMRegister dst, Address src);
2254 void sqrtsd(XMMRegister dst, XMMRegister src);
2255
2256 void roundsd(XMMRegister dst, Address src, int32_t rmode);
2257 void roundsd(XMMRegister dst, XMMRegister src, int32_t rmode);
2258
2259 // Compute Square Root of Scalar Single-Precision Floating-Point Value
2260 void sqrtss(XMMRegister dst, Address src);
2261 void sqrtss(XMMRegister dst, XMMRegister src);
2262
2263 void std();
2264
2265 void stmxcsr( Address dst );
2266
2267 void subl(Address dst, int32_t imm32);
2268 void esubl(Register dst, Address src, int32_t imm32, bool no_flags);
2269 void subl(Address dst, Register src);
2270 void esubl(Register dst, Address src1, Register src2, bool no_flags);
2271 void subl(Register dst, int32_t imm32);
2272 void esubl(Register dst, Register src, int32_t imm32, bool no_flags);
2273 void subl(Register dst, Address src);
2274 void esubl(Register dst, Register src1, Address src2, bool no_flags);
2275 void subl(Register dst, Register src);
2276 void esubl(Register dst, Register src1, Register src2, bool no_flags);
2277
2278 void subq(Address dst, int32_t imm32);
2279 void esubq(Register dst, Address src, int32_t imm32, bool no_flags);
2280 void subq(Address dst, Register src);
2281 void esubq(Register dst, Address src1, Register src2, bool no_flags);
2282 void subq(Register dst, int32_t imm32);
2283 void esubq(Register dst, Register src, int32_t imm32, bool no_flags);
2284 void subq(Register dst, Address src);
2285 void esubq(Register dst, Register src1, Address src2, bool no_flags);
2286 void subq(Register dst, Register src);
2287 void esubq(Register dst, Register src1, Register src2, bool no_flags);
2288
2289 // Force generation of a 4 byte immediate value even if it fits into 8bit
2290 void subl_imm32(Register dst, int32_t imm32);
2291 void esubl_imm32(Register dst, Register src, int32_t imm32, bool no_flags);
2292 void subq_imm32(Register dst, int32_t imm32);
2293 void esubq_imm32(Register dst, Register src, int32_t imm32, bool no_flags);
2294
2295 // Subtract Scalar Double-Precision Floating-Point Values
2296 void subsd(XMMRegister dst, Address src);
2297 void subsd(XMMRegister dst, XMMRegister src);
2298
2299 // Subtract Scalar Single-Precision Floating-Point Values
2300 void subss(XMMRegister dst, Address src);
2301 void subss(XMMRegister dst, XMMRegister src);
2302
2303 void testb(Address dst, int imm8);
2304 void testb(Register dst, int imm8, bool use_ral = true);
2305
2306 void testl(Address dst, int32_t imm32);
2307 void testl(Register dst, int32_t imm32);
2308 void testl(Register dst, Register src);
2309 void testl(Register dst, Address src);
2310
2311 void testq(Address dst, int32_t imm32);
2312 void testq(Register dst, int32_t imm32);
2313 void testq(Register dst, Register src);
2314 void testq(Register dst, Address src);
2315
2316 // BMI - count trailing zeros
2317 void tzcntl(Register dst, Register src);
2318 void etzcntl(Register dst, Register src, bool no_flags);
2319 void tzcntl(Register dst, Address src);
2320 void etzcntl(Register dst, Address src, bool no_flags);
2321 void tzcntq(Register dst, Register src);
2322 void etzcntq(Register dst, Register src, bool no_flags);
2323 void tzcntq(Register dst, Address src);
2324 void etzcntq(Register dst, Address src, bool no_flags);
2325
2326 // Unordered Compare Scalar Double-Precision Floating-Point Values and set EFLAGS
2327 void ucomisd(XMMRegister dst, Address src);
2328 void ucomisd(XMMRegister dst, XMMRegister src);
2329
2330 // Unordered Compare Scalar Single-Precision Floating-Point Values and set EFLAGS
2331 void ucomiss(XMMRegister dst, Address src);
2332 void ucomiss(XMMRegister dst, XMMRegister src);
2333
2334 void xabort(int8_t imm8);
2335
2336 void xaddb(Address dst, Register src);
2337 void xaddw(Address dst, Register src);
2338 void xaddl(Address dst, Register src);
2339 void xaddq(Address dst, Register src);
2340
2341 void xbegin(Label& abort, relocInfo::relocType rtype = relocInfo::none);
2342
2343 void xchgb(Register reg, Address adr);
2344 void xchgw(Register reg, Address adr);
2345 void xchgl(Register reg, Address adr);
2346 void xchgl(Register dst, Register src);
2347
2348 void xchgq(Register reg, Address adr);
2349 void xchgq(Register dst, Register src);
2350
2351 void xend();
2352
2353 // Get Value of Extended Control Register
2354 void xgetbv();
2355
2356 void xorl(Register dst, int32_t imm32);
2357 void exorl(Register dst, Register src, int32_t imm32, bool no_flags);
2358 void xorl(Address dst, int32_t imm32);
2359 void exorl(Register dst, Address src, int32_t imm32, bool no_flags);
2360 void xorl(Register dst, Address src);
2361 void exorl(Register dst, Register src1, Address src2, bool no_flags);
2362 void xorl(Register dst, Register src);
2363 void exorl(Register dst, Register src1, Register src2, bool no_flags);
2364 void xorl(Address dst, Register src);
2365 void exorl(Register dst, Address src1, Register src2, bool no_flags);
2366
2367 void xorb(Address dst, Register src);
2368 void exorb(Register dst, Address src1, Register src2, bool no_flags);
2369 void xorb(Register dst, Address src);
2370 void exorb(Register dst, Register src1, Address src2, bool no_flags);
2371 void xorw(Register dst, Address src);
2372 void exorw(Register dst, Register src1, Address src2, bool no_flags);
2373
2374 void xorq(Register dst, Address src);
2375 void exorq(Register dst, Register src1, Address src2, bool no_flags);
2376 void xorq(Address dst, int32_t imm32);
2377 void exorq(Register dst, Address src, int32_t imm32, bool no_flags);
2378 void xorq(Register dst, Register src);
2379 void exorq(Register dst, Register src1, Register src2, bool no_flags);
2380 void xorq(Register dst, int32_t imm32);
2381 void exorq(Register dst, Register src, int32_t imm32, bool no_flags);
2382 void xorq(Address dst, Register src);
2383 void exorq(Register dst, Address src1, Register src2, bool no_flags);
2384
2385 // AVX 3-operands scalar instructions (encoded with VEX prefix)
2386
2387 void vaddsd(XMMRegister dst, XMMRegister nds, Address src);
2388 void vaddsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
2389 void vaddss(XMMRegister dst, XMMRegister nds, Address src);
2390 void vaddss(XMMRegister dst, XMMRegister nds, XMMRegister src);
2391 void vdivsd(XMMRegister dst, XMMRegister nds, Address src);
2392 void vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
2393 void evdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src, EvexRoundPrefix rmode);
2394 void vdivss(XMMRegister dst, XMMRegister nds, Address src);
2395 void vdivss(XMMRegister dst, XMMRegister nds, XMMRegister src);
2396 void vfmadd231sd(XMMRegister dst, XMMRegister nds, XMMRegister src);
2397 void vfnmadd213sd(XMMRegister dst, XMMRegister nds, XMMRegister src);
2398 void evfnmadd213sd(XMMRegister dst, XMMRegister nds, XMMRegister src, EvexRoundPrefix rmode);
2399 void vfnmadd231sd(XMMRegister dst, XMMRegister src1, XMMRegister src2);
2400 void vfmadd231ss(XMMRegister dst, XMMRegister nds, XMMRegister src);
2401 void vmulsd(XMMRegister dst, XMMRegister nds, Address src);
2402 void vmulsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
2403 void vmulss(XMMRegister dst, XMMRegister nds, Address src);
2404 void vmulss(XMMRegister dst, XMMRegister nds, XMMRegister src);
2405 void vsubsd(XMMRegister dst, XMMRegister nds, Address src);
2406 void vsubsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
2407 void vsubss(XMMRegister dst, XMMRegister nds, Address src);
2408 void vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src);
2409
2410 void vmaxss(XMMRegister dst, XMMRegister nds, XMMRegister src);
2411 void vmaxsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
2412 void vminss(XMMRegister dst, XMMRegister nds, XMMRegister src);
2413 void vminsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
2414
2415 void sarxl(Register dst, Register src1, Register src2);
2416 void sarxl(Register dst, Address src1, Register src2);
2417 void sarxq(Register dst, Register src1, Register src2);
2418 void sarxq(Register dst, Address src1, Register src2);
2419 void shlxl(Register dst, Register src1, Register src2);
2420 void shlxl(Register dst, Address src1, Register src2);
2421 void shlxq(Register dst, Register src1, Register src2);
2422 void shlxq(Register dst, Address src1, Register src2);
2423 void shrxl(Register dst, Register src1, Register src2);
2424 void shrxl(Register dst, Address src1, Register src2);
2425 void shrxq(Register dst, Register src1, Register src2);
2426 void shrxq(Register dst, Address src1, Register src2);
2427
2428 void bzhiq(Register dst, Register src1, Register src2);
2429 void bzhil(Register dst, Register src1, Register src2);
2430
2431 void pextl(Register dst, Register src1, Register src2);
2432 void pdepl(Register dst, Register src1, Register src2);
2433 void pextq(Register dst, Register src1, Register src2);
2434 void pdepq(Register dst, Register src1, Register src2);
2435 void pextl(Register dst, Register src1, Address src2);
2436 void pdepl(Register dst, Register src1, Address src2);
2437 void pextq(Register dst, Register src1, Address src2);
2438 void pdepq(Register dst, Register src1, Address src2);
2439
2440
2441 //====================VECTOR ARITHMETIC=====================================
2442 // Add Packed Floating-Point Values
2443 void addpd(XMMRegister dst, XMMRegister src);
2444 void addpd(XMMRegister dst, Address src);
2445 void addps(XMMRegister dst, XMMRegister src);
2446 void vaddpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2447 void vaddps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2448 void vaddpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2449 void vaddps(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2450
2451 // Subtract Packed Floating-Point Values
2452 void subpd(XMMRegister dst, XMMRegister src);
2453 void subps(XMMRegister dst, XMMRegister src);
2454 void vsubpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2455 void vsubps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2456 void vsubpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2457 void vsubps(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2458
2459 // Multiply Packed Floating-Point Values
2460 void mulpd(XMMRegister dst, XMMRegister src);
2461 void mulpd(XMMRegister dst, Address src);
2462 void mulps(XMMRegister dst, XMMRegister src);
2463 void vmulpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2464 void vmulps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2465 void vmulpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2466 void vmulps(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2467
2468 void vfmadd231pd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2469 void vfmadd231ps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2470 void vfmadd231pd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2471 void vfmadd231ps(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2472
2473 // Divide Packed Floating-Point Values
2474 void divpd(XMMRegister dst, XMMRegister src);
2475 void divps(XMMRegister dst, XMMRegister src);
2476 void vdivpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2477 void vdivps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2478 void vdivpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2479 void vdivps(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2480
2481 // Sqrt Packed Floating-Point Values
2482 void vsqrtpd(XMMRegister dst, XMMRegister src, int vector_len);
2483 void vsqrtpd(XMMRegister dst, Address src, int vector_len);
2484 void vsqrtps(XMMRegister dst, XMMRegister src, int vector_len);
2485 void vsqrtps(XMMRegister dst, Address src, int vector_len);
2486
2487 // Round Packed Double precision value.
2488 void vroundpd(XMMRegister dst, XMMRegister src, int32_t rmode, int vector_len);
2489 void vroundpd(XMMRegister dst, Address src, int32_t rmode, int vector_len);
2490 void vrndscalesd(XMMRegister dst, XMMRegister src1, XMMRegister src2, int32_t rmode);
2491 void vrndscalepd(XMMRegister dst, XMMRegister src, int32_t rmode, int vector_len);
2492 void vrndscalepd(XMMRegister dst, Address src, int32_t rmode, int vector_len);
2493 void vroundsd(XMMRegister dst, XMMRegister src, XMMRegister src2, int32_t rmode);
2494 void vroundsd(XMMRegister dst, XMMRegister src, Address src2, int32_t rmode);
2495
2496 // Bitwise Logical AND of Packed Floating-Point Values
2497 void andpd(XMMRegister dst, XMMRegister src);
2498 void andnpd(XMMRegister dst, XMMRegister src);
2499 void andps(XMMRegister dst, XMMRegister src);
2500 void vandpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2501 void vandps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2502 void vandpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2503 void vandps(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2504
2505 // Bitwise Logical OR of Packed Floating-Point Values
2506 void orpd(XMMRegister dst, XMMRegister src);
2507
2508 void unpckhpd(XMMRegister dst, XMMRegister src);
2509 void unpcklpd(XMMRegister dst, XMMRegister src);
2510
2511 // Bitwise Logical XOR of Packed Floating-Point Values
2512 void xorpd(XMMRegister dst, XMMRegister src);
2513 void xorps(XMMRegister dst, XMMRegister src);
2514 void vxorpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2515 void vxorps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2516 void vxorpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2517 void vxorps(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2518
2519 // Add horizontal packed integers
2520 void vphaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2521 void vphaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2522 void phaddw(XMMRegister dst, XMMRegister src);
2523 void phaddd(XMMRegister dst, XMMRegister src);
2524
2525 // Add packed integers
2526 void paddb(XMMRegister dst, XMMRegister src);
2527 void paddw(XMMRegister dst, XMMRegister src);
2528 void paddd(XMMRegister dst, XMMRegister src);
2529 void paddd(XMMRegister dst, Address src);
2530 void paddq(XMMRegister dst, XMMRegister src);
2531 void vpaddb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2532 void vpaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2533 void vpaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2534 void vpaddq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2535 void vpaddb(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2536 void vpaddw(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2537 void vpaddd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2538 void vpaddq(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2539
2540 // FP16 instructions
2541 void vaddsh(XMMRegister dst, XMMRegister nds, XMMRegister src);
2542 void vsubsh(XMMRegister dst, XMMRegister nds, XMMRegister src);
2543 void vmulsh(XMMRegister dst, XMMRegister nds, XMMRegister src);
2544 void vdivsh(XMMRegister dst, XMMRegister nds, XMMRegister src);
2545 void vmaxsh(XMMRegister dst, XMMRegister nds, XMMRegister src);
2546 void vminsh(XMMRegister dst, XMMRegister nds, XMMRegister src);
2547 void vsqrtsh(XMMRegister dst, XMMRegister src);
2548 void vfmadd132sh(XMMRegister dst, XMMRegister src1, XMMRegister src2);
2549
2550 // Saturating packed insturctions.
2551 void vpaddsb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2552 void vpaddsw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2553 void vpaddusb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2554 void vpaddusw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2555 void evpaddsb(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2556 void evpaddsw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2557 void evpaddusb(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2558 void evpaddusw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2559 void vpsubsb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2560 void vpsubsw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2561 void vpsubusb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2562 void vpsubusw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2563 void evpsubsb(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2564 void evpsubsw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2565 void evpsubusb(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2566 void evpsubusw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2567 void vpaddsb(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2568 void vpaddsw(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2569 void vpaddusb(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2570 void vpaddusw(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2571 void evpaddsb(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2572 void evpaddsw(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2573 void evpaddusb(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2574 void evpaddusw(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2575 void vpsubsb(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2576 void vpsubsw(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2577 void vpsubusb(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2578 void vpsubusw(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2579 void evpsubsb(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2580 void evpsubsw(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2581 void evpsubusb(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2582 void evpsubusw(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2583
2584 // Leaf level assembler routines for masked operations.
2585 void evpaddb(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2586 void evpaddb(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2587 void evpaddw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2588 void evpaddw(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2589 void evpaddd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2590 void evpaddd(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2591 void evpaddq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2592 void evpaddq(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2593 void evaddps(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2594 void evaddps(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2595 void evaddpd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2596 void evaddpd(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2597 void evpsubb(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2598 void evpsubb(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2599 void evpsubw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2600 void evpsubw(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2601 void evpsubd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2602 void evpsubd(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2603 void evpsubq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2604 void evpsubq(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2605 void evsubps(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2606 void evsubps(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2607 void evsubpd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2608 void evsubpd(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2609 void evpmulhw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2610 void evpmullw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2611 void evpmullw(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2612 void evpmulld(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2613 void evpmulld(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2614 void evpmullq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2615 void evpmullq(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2616 void evmulps(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2617 void evmulps(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2618 void evmulpd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2619 void evmulpd(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2620 void evdivps(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2621 void evdivps(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2622 void evdivpd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2623 void evdivpd(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2624 void evpabsb(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
2625 void evpabsb(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len);
2626 void evpabsw(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
2627 void evpabsw(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len);
2628 void evpabsd(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
2629 void evpabsd(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len);
2630 void evpabsq(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
2631 void evpabsq(XMMRegister dst, KRegister mask, Address src, bool merge, int vector_len);
2632 void evpfma213ps(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2633 void evpfma213ps(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2634 void evpfma213pd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2635 void evpfma213pd(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2636 void evpermb(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2637 void evpermb(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2638 void evpermw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2639 void evpermw(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2640 void evpermd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2641 void evpermd(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2642 void evpermq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2643 void evpermq(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2644 void evpsllw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2645 void evpslld(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2646 void evpsllq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2647 void evpsrlw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2648 void evpsrld(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2649 void evpsrlq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2650 void evpsraw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2651 void evpsrad(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2652 void evpsraq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2653 void evsqrtps(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2654 void evsqrtps(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2655 void evsqrtpd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2656 void evsqrtpd(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2657
2658 void evpsllw(XMMRegister dst, KRegister mask, XMMRegister src, int shift, bool merge, int vector_len);
2659 void evpslld(XMMRegister dst, KRegister mask, XMMRegister src, int shift, bool merge, int vector_len);
2660 void evpsllq(XMMRegister dst, KRegister mask, XMMRegister src, int shift, bool merge, int vector_len);
2661 void evpsrlw(XMMRegister dst, KRegister mask, XMMRegister src, int shift, bool merge, int vector_len);
2662 void evpsrld(XMMRegister dst, KRegister mask, XMMRegister src, int shift, bool merge, int vector_len);
2663 void evpsrlq(XMMRegister dst, KRegister mask, XMMRegister src, int shift, bool merge, int vector_len);
2664 void evpsraw(XMMRegister dst, KRegister mask, XMMRegister src, int shift, bool merge, int vector_len);
2665 void evpsrad(XMMRegister dst, KRegister mask, XMMRegister src, int shift, bool merge, int vector_len);
2666 void evpsraq(XMMRegister dst, KRegister mask, XMMRegister src, int shift, bool merge, int vector_len);
2667
2668 void evpsllvw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2669 void evpsllvd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2670 void evpsllvq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2671 void evpsrlvw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2672 void evpsrlvd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2673 void evpsrlvq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2674 void evpsravw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2675 void evpsravd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2676 void evpsravq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2677 void evpmaxsb(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2678 void evpmaxsw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2679 void evpmaxsd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2680 void evpmaxsq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2681 void evpminsb(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2682 void evpminsw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2683 void evpminsd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2684 void evpminsq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2685 void evpmaxsb(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2686 void evpmaxsw(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2687 void evpmaxsd(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2688 void evpmaxsq(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2689 void evpminsb(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2690 void evpminsw(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2691 void evpminsd(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2692 void evpminsq(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2693 void evpord(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2694 void evpord(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2695 void evporq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2696 void evporq(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2697 void evpandd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2698 void evpandd(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2699 void evpandq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2700 void evpandq(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2701 void evpxord(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2702 void evpxord(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2703 void evpxorq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2704 void evpxorq(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2705
2706 void evprold(XMMRegister dst, KRegister mask, XMMRegister src, int shift, bool merge, int vector_len);
2707 void evprolq(XMMRegister dst, KRegister mask, XMMRegister src, int shift, bool merge, int vector_len);
2708 void evprolvd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2709 void evprolvq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2710 void evprord(XMMRegister dst, KRegister mask, XMMRegister src, int shift, bool merge, int vector_len);
2711 void evprorq(XMMRegister dst, KRegister mask, XMMRegister src, int shift, bool merge, int vector_len);
2712 void evprorvd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2713 void evprorvq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2714
2715 void evpternlogd(XMMRegister dst, int imm8, KRegister mask, XMMRegister src2, XMMRegister src3, bool merge, int vector_len);
2716 void evpternlogd(XMMRegister dst, int imm8, KRegister mask, XMMRegister src2, Address src3, bool merge, int vector_len);
2717 void evpternlogq(XMMRegister dst, int imm8, KRegister mask, XMMRegister src2, XMMRegister src3, bool merge, int vector_len);
2718 void evpternlogq(XMMRegister dst, int imm8, KRegister mask, XMMRegister src2, Address src3, bool merge, int vector_len);
2719
2720 void evplzcntd(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
2721 void evplzcntq(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
2722
2723 // Float16 Vector instructions.
2724 void evaddph(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2725 void evaddph(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2726 void evsubph(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2727 void evsubph(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2728 void evdivph(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2729 void evdivph(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2730 void evmulph(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2731 void evmulph(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2732 void evminph(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2733 void evminph(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2734 void evmaxph(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2735 void evmaxph(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2736 void evfmadd132ph(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2737 void evfmadd132ph(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2738 void evsqrtph(XMMRegister dst, XMMRegister src1, int vector_len);
2739 void evsqrtph(XMMRegister dst, Address src1, int vector_len);
2740
2741 // Sub packed integers
2742 void psubb(XMMRegister dst, XMMRegister src);
2743 void psubw(XMMRegister dst, XMMRegister src);
2744 void psubd(XMMRegister dst, XMMRegister src);
2745 void psubq(XMMRegister dst, XMMRegister src);
2746 void vpsubb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2747 void vpsubw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2748 void vpsubd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2749 void vpsubq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2750 void vpsubb(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2751 void vpsubw(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2752 void vpsubd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2753 void vpsubq(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2754
2755 // Multiply packed integers (only shorts and ints)
2756 void pmullw(XMMRegister dst, XMMRegister src);
2757 void pmulld(XMMRegister dst, XMMRegister src);
2758 void pmuludq(XMMRegister dst, XMMRegister src);
2759 void vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2760 void vpmulld(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2761 void evpmullq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2762 void vpmuludq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2763 void vpmuldq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2764 void vpmullw(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2765 void vpmulld(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2766 void evpmullq(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2767 void vpmulhuw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2768
2769 // Minimum of packed integers
2770 void pminsb(XMMRegister dst, XMMRegister src);
2771 void vpminsb(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
2772 void pminsw(XMMRegister dst, XMMRegister src);
2773 void vpminsw(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
2774 void pminsd(XMMRegister dst, XMMRegister src);
2775 void vpminsd(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
2776 void vpminsq(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
2777 void minps(XMMRegister dst, XMMRegister src);
2778 void vminps(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
2779 void minpd(XMMRegister dst, XMMRegister src);
2780 void vminpd(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
2781
2782 // AVX10.2 floating point minmax instructions
2783 void eminmaxsh(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8);
2784 void eminmaxss(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8);
2785 void eminmaxsd(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8);
2786 void evminmaxph(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int imm8, int vector_len);
2787 void evminmaxph(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int imm8, int vector_len);
2788 void evminmaxps(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int imm8, int vector_len);
2789 void evminmaxps(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int imm8, int vector_len);
2790 void evminmaxpd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int imm8, int vector_len);
2791 void evminmaxpd(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int imm8, int vector_len);
2792
2793 // Maximum of packed integers
2794 void pmaxsb(XMMRegister dst, XMMRegister src);
2795 void vpmaxsb(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
2796 void pmaxsw(XMMRegister dst, XMMRegister src);
2797 void vpmaxsw(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
2798 void pmaxsd(XMMRegister dst, XMMRegister src);
2799 void vpmaxsd(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
2800 void vpmaxsq(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
2801 void maxps(XMMRegister dst, XMMRegister src);
2802 void vmaxps(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
2803 void maxpd(XMMRegister dst, XMMRegister src);
2804 void vmaxpd(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
2805
2806 // Unsigned maximum packed integers.
2807 void vpmaxub(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
2808 void vpmaxuw(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
2809 void vpmaxud(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
2810 void vpmaxub(XMMRegister dst, XMMRegister src1, Address src2, int vector_len);
2811 void vpmaxuw(XMMRegister dst, XMMRegister src1, Address src2, int vector_len);
2812 void vpmaxud(XMMRegister dst, XMMRegister src1, Address src2, int vector_len);
2813 void evpmaxub(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2814 void evpmaxuw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2815 void evpmaxud(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2816 void evpmaxuq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2817 void evpmaxub(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2818 void evpmaxuw(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2819 void evpmaxud(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2820 void evpmaxuq(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2821
2822 // Unsigned minimum packed integers.
2823 void vpminub(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
2824 void vpminuw(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
2825 void vpminud(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
2826 void vpminub(XMMRegister dst, XMMRegister src1, Address src2, int vector_len);
2827 void vpminuw(XMMRegister dst, XMMRegister src1, Address src2, int vector_len);
2828 void vpminud(XMMRegister dst, XMMRegister src1, Address src2, int vector_len);
2829 void evpminub(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2830 void evpminuw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2831 void evpminud(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2832 void evpminuq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2833 void evpminub(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2834 void evpminuw(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2835 void evpminud(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2836 void evpminuq(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2837
2838 // Shift left packed integers
2839 void psllw(XMMRegister dst, int shift);
2840 void pslld(XMMRegister dst, int shift);
2841 void psllq(XMMRegister dst, int shift);
2842 void psllw(XMMRegister dst, XMMRegister shift);
2843 void pslld(XMMRegister dst, XMMRegister shift);
2844 void psllq(XMMRegister dst, XMMRegister shift);
2845 void vpsllw(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2846 void vpslld(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2847 void vpsllq(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2848 void vpsllw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2849 void vpslld(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2850 void vpsllq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2851 void vpslldq(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2852
2853 // Logical shift right packed integers
2854 void psrlw(XMMRegister dst, int shift);
2855 void psrld(XMMRegister dst, int shift);
2856 void psrlq(XMMRegister dst, int shift);
2857 void psrlw(XMMRegister dst, XMMRegister shift);
2858 void psrld(XMMRegister dst, XMMRegister shift);
2859 void psrlq(XMMRegister dst, XMMRegister shift);
2860 void vpsrlw(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2861 void vpsrld(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2862 void vpsrlq(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2863 void vpsrlw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2864 void vpsrld(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2865 void vpsrlq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2866 void vpsrldq(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2867 void evpsrlvw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2868 void evpsllvw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2869
2870 // Arithmetic shift right packed integers (only shorts and ints, no instructions for longs)
2871 void psraw(XMMRegister dst, int shift);
2872 void psrad(XMMRegister dst, int shift);
2873 void psraw(XMMRegister dst, XMMRegister shift);
2874 void psrad(XMMRegister dst, XMMRegister shift);
2875 void vpsraw(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2876 void vpsrad(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2877 void vpsraw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2878 void vpsrad(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2879 void evpsravw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2880 void evpsraq(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2881 void evpsraq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2882
2883 // Variable shift left packed integers
2884 void vpsllvd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2885 void vpsllvq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2886
2887 // Variable shift right packed integers
2888 void vpsrlvd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2889 void vpsrlvq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2890
2891 // Variable shift right arithmetic packed integers
2892 void vpsravd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2893 void evpsravq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2894
2895 void vpshldvd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2896 void vpshrdvd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2897
2898 // And packed integers
2899 void pand(XMMRegister dst, XMMRegister src);
2900 void vpand(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2901 void vpand(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2902 void evpandq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2903 void evpandq(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2904
2905 // Andn packed integers
2906 void pandn(XMMRegister dst, XMMRegister src);
2907 void vpandn(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2908
2909 // Or packed integers
2910 void por(XMMRegister dst, XMMRegister src);
2911 void vpor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2912 void vpor(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2913 void evporq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2914 void evporq(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2915
2916 // Xor packed integers
2917 void pxor(XMMRegister dst, XMMRegister src);
2918 void vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2919 void vpxor(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2920 void vpxorq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2921 void evpxorq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2922 void evpxorq(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2923
2924 // Ternary logic instruction.
2925 void vpternlogd(XMMRegister dst, int imm8, XMMRegister src2, XMMRegister src3, int vector_len);
2926 void vpternlogd(XMMRegister dst, int imm8, XMMRegister src2, Address src3, int vector_len);
2927 void vpternlogq(XMMRegister dst, int imm8, XMMRegister src2, XMMRegister src3, int vector_len);
2928 void vpternlogq(XMMRegister dst, int imm8, XMMRegister src2, Address src3, int vector_len);
2929
2930 // Vector compress/expand instructions.
2931 void evpcompressb(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
2932 void evpcompressw(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
2933 void evpcompressd(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
2934 void evpcompressq(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
2935 void evcompressps(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
2936 void evcompresspd(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
2937
2938 void evpexpandb(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
2939 void evpexpandw(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
2940 void evpexpandd(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
2941 void evpexpandq(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
2942 void evexpandps(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
2943 void evexpandpd(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
2944
2945 // Vector Rotate Left/Right instruction.
2946 void evprolvd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2947 void evprolvq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2948 void evprorvd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2949 void evprorvq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2950 void evprold(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2951 void evprolq(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2952 void evprord(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2953 void evprorq(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2954
2955 // vinserti forms
2956 void vinserti128(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
2957 void vinserti128(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8);
2958 void vinserti32x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
2959 void vinserti32x4(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8);
2960 void vinserti64x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
2961 void evinserti64x2(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8, int vector_len);
2962
2963 // vinsertf forms
2964 void vinsertf128(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
2965 void vinsertf128(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8);
2966 void vinsertf32x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
2967 void vinsertf32x4(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8);
2968 void vinsertf64x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
2969 void vinsertf64x4(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8);
2970
2971 // vextracti forms
2972 void vextracti128(XMMRegister dst, XMMRegister src, uint8_t imm8);
2973 void vextracti128(Address dst, XMMRegister src, uint8_t imm8);
2974 void vextracti32x4(XMMRegister dst, XMMRegister src, uint8_t imm8);
2975 void vextracti32x4(Address dst, XMMRegister src, uint8_t imm8);
2976 void vextracti64x2(XMMRegister dst, XMMRegister src, uint8_t imm8);
2977 void vextracti64x4(XMMRegister dst, XMMRegister src, uint8_t imm8);
2978 void vextracti64x4(Address dst, XMMRegister src, uint8_t imm8);
2979
2980 // vextractf forms
2981 void vextractf128(XMMRegister dst, XMMRegister src, uint8_t imm8);
2982 void vextractf128(Address dst, XMMRegister src, uint8_t imm8);
2983 void vextractf32x4(XMMRegister dst, XMMRegister src, uint8_t imm8);
2984 void vextractf32x4(Address dst, XMMRegister src, uint8_t imm8);
2985 void vextractf64x2(XMMRegister dst, XMMRegister src, uint8_t imm8);
2986 void vextractf64x4(XMMRegister dst, XMMRegister src, uint8_t imm8);
2987 void vextractf64x4(Address dst, XMMRegister src, uint8_t imm8);
2988
2989 void extractps(Register dst, XMMRegister src, uint8_t imm8);
2990
2991 // xmm/mem sourced byte/word/dword/qword replicate
2992 void vpbroadcastb(XMMRegister dst, XMMRegister src, int vector_len);
2993 void vpbroadcastb(XMMRegister dst, Address src, int vector_len);
2994 void vpbroadcastw(XMMRegister dst, XMMRegister src, int vector_len);
2995 void vpbroadcastw(XMMRegister dst, Address src, int vector_len);
2996 void vpbroadcastd(XMMRegister dst, XMMRegister src, int vector_len);
2997 void vpbroadcastd(XMMRegister dst, Address src, int vector_len);
2998 void vpbroadcastq(XMMRegister dst, XMMRegister src, int vector_len);
2999 void vpbroadcastq(XMMRegister dst, Address src, int vector_len);
3000
3001 void evbroadcasti32x4(XMMRegister dst, Address src, int vector_len);
3002 void evbroadcasti64x2(XMMRegister dst, XMMRegister src, int vector_len);
3003 void evbroadcasti64x2(XMMRegister dst, Address src, int vector_len);
3004 void vbroadcasti128(XMMRegister dst, Address src, int vector_len);
3005
3006 // scalar single/double/128bit precision replicate
3007 void vbroadcastss(XMMRegister dst, XMMRegister src, int vector_len);
3008 void vbroadcastss(XMMRegister dst, Address src, int vector_len);
3009 void vbroadcastsd(XMMRegister dst, XMMRegister src, int vector_len);
3010 void vbroadcastsd(XMMRegister dst, Address src, int vector_len);
3011 void vbroadcastf128(XMMRegister dst, Address src, int vector_len);
3012 void evbroadcastf64x2(XMMRegister dst, Address src, int vector_len);
3013
3014 // gpr sourced byte/word/dword/qword replicate
3015 void evpbroadcastb(XMMRegister dst, Register src, int vector_len);
3016 void evpbroadcastw(XMMRegister dst, Register src, int vector_len);
3017 void evpbroadcastd(XMMRegister dst, Register src, int vector_len);
3018 void evpbroadcastq(XMMRegister dst, Register src, int vector_len);
3019
3020 // Gather AVX2 and AVX3
3021 void vpgatherdd(XMMRegister dst, Address src, XMMRegister mask, int vector_len);
3022 void vpgatherdq(XMMRegister dst, Address src, XMMRegister mask, int vector_len);
3023 void vgatherdpd(XMMRegister dst, Address src, XMMRegister mask, int vector_len);
3024 void vgatherdps(XMMRegister dst, Address src, XMMRegister mask, int vector_len);
3025 void evpgatherdd(XMMRegister dst, KRegister mask, Address src, int vector_len);
3026 void evpgatherdq(XMMRegister dst, KRegister mask, Address src, int vector_len);
3027 void evgatherdpd(XMMRegister dst, KRegister mask, Address src, int vector_len);
3028 void evgatherdps(XMMRegister dst, KRegister mask, Address src, int vector_len);
3029
3030 //Scatter AVX3 only
3031 void evpscatterdd(Address dst, KRegister mask, XMMRegister src, int vector_len);
3032 void evpscatterdq(Address dst, KRegister mask, XMMRegister src, int vector_len);
3033 void evscatterdps(Address dst, KRegister mask, XMMRegister src, int vector_len);
3034 void evscatterdpd(Address dst, KRegister mask, XMMRegister src, int vector_len);
3035
3036 // Carry-Less Multiplication Quadword
3037 void pclmulqdq(XMMRegister dst, XMMRegister src, int mask);
3038 void vpclmulqdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int mask);
3039 void evpclmulqdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int mask, int vector_len);
3040 // AVX instruction which is used to clear upper 128 bits of YMM registers and
3041 // to avoid transaction penalty between AVX and SSE states. There is no
3042 // penalty if legacy SSE instructions are encoded using VEX prefix because
3043 // they always clear upper 128 bits. It should be used before calling
3044 // runtime code and native libraries.
3045 void vzeroupper();
3046
3047 void vzeroall();
3048
3049 // Vector double compares
3050 void vcmppd(XMMRegister dst, XMMRegister nds, XMMRegister src, int cop, int vector_len);
3051 void evcmppd(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
3052 ComparisonPredicateFP comparison, int vector_len);
3053
3054 // Vector float compares
3055 void vcmpps(XMMRegister dst, XMMRegister nds, XMMRegister src, int comparison, int vector_len);
3056 void evcmpps(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
3057 ComparisonPredicateFP comparison, int vector_len);
3058
3059 void evcmpph(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
3060 ComparisonPredicateFP comparison, int vector_len);
3061
3062 void evcmpsh(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
3063 ComparisonPredicateFP comparison);
3064
3065 // Vector integer compares
3066 void vpcmpgtd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
3067 void evpcmpd(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
3068 int comparison, bool is_signed, int vector_len);
3069 void evpcmpd(KRegister kdst, KRegister mask, XMMRegister nds, Address src,
3070 int comparison, bool is_signed, int vector_len);
3071
3072 // Vector long compares
3073 void evpcmpq(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
3074 int comparison, bool is_signed, int vector_len);
3075 void evpcmpq(KRegister kdst, KRegister mask, XMMRegister nds, Address src,
3076 int comparison, bool is_signed, int vector_len);
3077
3078 // Vector byte compares
3079 void evpcmpb(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
3080 int comparison, bool is_signed, int vector_len);
3081 void evpcmpb(KRegister kdst, KRegister mask, XMMRegister nds, Address src,
3082 int comparison, bool is_signed, int vector_len);
3083
3084 // Vector short compares
3085 void evpcmpw(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src,
3086 int comparison, bool is_signed, int vector_len);
3087 void evpcmpw(KRegister kdst, KRegister mask, XMMRegister nds, Address src,
3088 int comparison, bool is_signed, int vector_len);
3089
3090 void evpmovb2m(KRegister dst, XMMRegister src, int vector_len);
3091 void evpmovw2m(KRegister dst, XMMRegister src, int vector_len);
3092 void evpmovd2m(KRegister dst, XMMRegister src, int vector_len);
3093 void evpmovq2m(KRegister dst, XMMRegister src, int vector_len);
3094 void evpmovm2b(XMMRegister dst, KRegister src, int vector_len);
3095 void evpmovm2w(XMMRegister dst, KRegister src, int vector_len);
3096 void evpmovm2d(XMMRegister dst, KRegister src, int vector_len);
3097 void evpmovm2q(XMMRegister dst, KRegister src, int vector_len);
3098
3099 // floating point class tests
3100 void vfpclassss(KRegister kdst, XMMRegister src, uint8_t imm8);
3101 void vfpclasssd(KRegister kdst, XMMRegister src, uint8_t imm8);
3102
3103 // Vector blends
3104 void blendvps(XMMRegister dst, XMMRegister src);
3105 void blendvpd(XMMRegister dst, XMMRegister src);
3106 void pblendvb(XMMRegister dst, XMMRegister src);
3107 void blendvpb(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMMRegister src2, int vector_len);
3108 void vblendvps(XMMRegister dst, XMMRegister nds, XMMRegister src, XMMRegister mask, int vector_len);
3109 void vblendvpd(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMMRegister src2, int vector_len);
3110 void vpblendvb(XMMRegister dst, XMMRegister nds, XMMRegister src, XMMRegister mask, int vector_len);
3111 void vpblendd(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8, int vector_len);
3112 void evblendmpd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
3113 void evblendmps(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
3114 void evpblendmb(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
3115 void evpblendmw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
3116 void evpblendmd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
3117 void evpblendmq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
3118
3119 // Galois field affine transformation instructions.
3120 void gf2p8affineqb(XMMRegister dst, XMMRegister src, int imm8);
3121 void vgf2p8affineqb(XMMRegister dst, XMMRegister src2, XMMRegister src3, int imm8, int vector_len);
3122
3123 protected:
3124 // Next instructions require address alignment 16 bytes SSE mode.
3125 // They should be called only from corresponding MacroAssembler instructions.
3126 void andpd(XMMRegister dst, Address src);
3127 void andps(XMMRegister dst, Address src);
3128 void xorpd(XMMRegister dst, Address src);
3129 void xorps(XMMRegister dst, Address src);
3130
3131 };
3132
3133 // The Intel x86/Amd64 Assembler attributes: All fields enclosed here are to guide encoding level decisions.
3134 // Specific set functions are for specialized use, else defaults or whatever was supplied to object construction
3135 // are applied.
3136 class InstructionAttr {
3137 public:
3138 InstructionAttr(
3139 int vector_len, // The length of vector to be applied in encoding - for both AVX and EVEX
3140 bool rex_vex_w, // Width of data: if 32-bits or less, false, else if 64-bit or specially defined, true
3141 bool legacy_mode, // Details if either this instruction is conditionally encoded to AVX or earlier if true else possibly EVEX
3142 bool no_reg_mask, // when true, k0 is used when EVEX encoding is chosen, else embedded_opmask_register_specifier is used
3143 bool uses_vl) // This instruction may have legacy constraints based on vector length for EVEX
3144 :
3145 _rex_vex_w(rex_vex_w),
3146 _legacy_mode(legacy_mode || UseAVX < 3),
3147 _no_reg_mask(no_reg_mask),
3148 _uses_vl(uses_vl),
3149 _rex_vex_w_reverted(false),
3150 _is_evex_instruction(false),
3151 _is_clear_context(true),
3152 _is_extended_context(false),
3153 _avx_vector_len(vector_len),
3154 _tuple_type(Assembler::EVEX_ETUP),
3155 _input_size_in_bits(Assembler::EVEX_NObit),
3156 _evex_encoding(0),
3157 _embedded_opmask_register_specifier(0), // hard code k0
3158 _current_assembler(nullptr) { }
3159
3160 ~InstructionAttr() {
3161 if (_current_assembler != nullptr) {
3162 _current_assembler->clear_attributes();
3163 }
3164 }
3165
3166 private:
3167 bool _rex_vex_w;
3168 bool _legacy_mode;
3169 bool _no_reg_mask;
3170 bool _uses_vl;
3171 bool _rex_vex_w_reverted;
3172 bool _is_evex_instruction;
3173 bool _is_clear_context;
3174 bool _is_extended_context;
3175 int _avx_vector_len;
3176 int _tuple_type;
3177 int _input_size_in_bits;
3178 int _evex_encoding;
3179 int _embedded_opmask_register_specifier;
3180
3181 Assembler *_current_assembler;
3182
3183 public:
3184 // query functions for field accessors
3185 bool is_rex_vex_w(void) const { return _rex_vex_w; }
3186 bool is_legacy_mode(void) const { return _legacy_mode; }
3187 bool is_no_reg_mask(void) const { return _no_reg_mask; }
3188 bool uses_vl(void) const { return _uses_vl; }
3189 bool is_rex_vex_w_reverted(void) { return _rex_vex_w_reverted; }
3190 bool is_evex_instruction(void) const { return _is_evex_instruction; }
3191 bool is_clear_context(void) const { return _is_clear_context; }
3192 bool is_extended_context(void) const { return _is_extended_context; }
3193 int get_vector_len(void) const { return _avx_vector_len; }
3194 int get_tuple_type(void) const { return _tuple_type; }
3195 int get_input_size(void) const { return _input_size_in_bits; }
3196 int get_evex_encoding(void) const { return _evex_encoding; }
3197 int get_embedded_opmask_register_specifier(void) const { return _embedded_opmask_register_specifier; }
3198
3199 // Set the vector len manually
3200 void set_vector_len(int vector_len) { _avx_vector_len = vector_len; }
3201
3202 // Set revert rex_vex_w for avx encoding
3203 void set_rex_vex_w_reverted(void) { _rex_vex_w_reverted = true; }
3204
3205 // Set rex_vex_w based on state
3206 void set_rex_vex_w(bool state) { _rex_vex_w = state; }
3207
3208 // Set the instruction to be encoded in AVX mode
3209 void set_is_legacy_mode(void) { _legacy_mode = true; }
3210
3211 // Set the current instruction to be encoded as an EVEX instruction
3212 void set_is_evex_instruction(void) { _is_evex_instruction = true; }
3213
3214 // Internal encoding data used in compressed immediate offset programming
3215 void set_evex_encoding(int value) { _evex_encoding = value; }
3216
3217 // When the Evex.Z field is set (true), it is used to clear all non directed XMM/YMM/ZMM components.
3218 // This method unsets it so that merge semantics are used instead.
3219 void reset_is_clear_context(void) { _is_clear_context = false; }
3220
3221 // Map back to current assembler so that we can manage object level association
3222 void set_current_assembler(Assembler *current_assembler) { _current_assembler = current_assembler; }
3223
3224 // Address modifiers used for compressed displacement calculation
3225 void set_address_attributes(int tuple_type, int input_size_in_bits);
3226
3227 // Set embedded opmask register specifier.
3228 void set_embedded_opmask_register_specifier(KRegister mask) {
3229 _embedded_opmask_register_specifier = mask->encoding() & 0x7;
3230 }
3231
3232 void set_extended_context(void) { _is_extended_context = true; }
3233 };
3234
3235 #endif // CPU_X86_ASSEMBLER_X86_HPP