1853 // Multiply add
1854 void pmaddwd(XMMRegister dst, XMMRegister src);
1855 void vpmaddwd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1856 void vpmaddubsw(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
1857
1858 // Multiply add accumulate
1859 void evpdpwssd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1860
1861 #ifndef _LP64 // no 32bit push/pop on amd64
1862 void popl(Address dst);
1863 #endif
1864
1865 #ifdef _LP64
1866 void popq(Address dst);
1867 void popq(Register dst);
1868 #endif
1869
1870 void popcntl(Register dst, Address src);
1871 void popcntl(Register dst, Register src);
1872
1873 void vpopcntd(XMMRegister dst, XMMRegister src, int vector_len);
1874 void vpopcntq(XMMRegister dst, XMMRegister src, int vector_len);
1875
1876 #ifdef _LP64
1877 void popcntq(Register dst, Address src);
1878 void popcntq(Register dst, Register src);
1879 #endif
1880
1881 // Prefetches (SSE, SSE2, 3DNOW only)
1882
1883 void prefetchnta(Address src);
1884 void prefetchr(Address src);
1885 void prefetcht0(Address src);
1886 void prefetcht1(Address src);
1887 void prefetcht2(Address src);
1888 void prefetchw(Address src);
1889
1890 // Shuffle Bytes
1891 void pshufb(XMMRegister dst, XMMRegister src);
1892 void pshufb(XMMRegister dst, Address src);
1893 void vpshufb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1894
1920 void ptest(XMMRegister dst, XMMRegister src);
1921 void ptest(XMMRegister dst, Address src);
1922 // Logical Compare 256bit
1923 void vptest(XMMRegister dst, XMMRegister src);
1924 void vptest(XMMRegister dst, Address src);
1925
1926 void evptestmb(KRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1927
1928 // Vector compare
1929 void vptest(XMMRegister dst, XMMRegister src, int vector_len);
1930
1931 // Interleave Low Bytes
1932 void punpcklbw(XMMRegister dst, XMMRegister src);
1933 void punpcklbw(XMMRegister dst, Address src);
1934
1935 // Interleave Low Doublewords
1936 void punpckldq(XMMRegister dst, XMMRegister src);
1937 void punpckldq(XMMRegister dst, Address src);
1938 void vpunpckldq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1939
1940 // Interleave High Doublewords
1941 void vpunpckhdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1942
1943 // Interleave Low Quadwords
1944 void punpcklqdq(XMMRegister dst, XMMRegister src);
1945
1946 // Vector sum of absolute difference.
1947 void vpsadbw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1948
1949 #ifndef _LP64 // no 32bit push/pop on amd64
1950 void pushl(Address src);
1951 #endif
1952
1953 void pushq(Address src);
1954
1955 void rcll(Register dst, int imm8);
1956
1957 void rclq(Register dst, int imm8);
1958
1959 void rcrq(Register dst, int imm8);
2181 void vmulsd(XMMRegister dst, XMMRegister nds, Address src);
2182 void vmulsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
2183 void vmulss(XMMRegister dst, XMMRegister nds, Address src);
2184 void vmulss(XMMRegister dst, XMMRegister nds, XMMRegister src);
2185 void vsubsd(XMMRegister dst, XMMRegister nds, Address src);
2186 void vsubsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
2187 void vsubss(XMMRegister dst, XMMRegister nds, Address src);
2188 void vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src);
2189
2190 void vmaxss(XMMRegister dst, XMMRegister nds, XMMRegister src);
2191 void vmaxsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
2192 void vminss(XMMRegister dst, XMMRegister nds, XMMRegister src);
2193 void vminsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
2194
2195 void shlxl(Register dst, Register src1, Register src2);
2196 void shlxq(Register dst, Register src1, Register src2);
2197 void shrxl(Register dst, Register src1, Register src2);
2198 void shrxq(Register dst, Register src1, Register src2);
2199
2200 void bzhiq(Register dst, Register src1, Register src2);
2201 void pdep(Register dst, Register src1, Register src2);
2202 void pext(Register dst, Register src1, Register src2);
2203
2204
2205 //====================VECTOR ARITHMETIC=====================================
2206 // Add Packed Floating-Point Values
2207 void addpd(XMMRegister dst, XMMRegister src);
2208 void addpd(XMMRegister dst, Address src);
2209 void addps(XMMRegister dst, XMMRegister src);
2210 void vaddpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2211 void vaddps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2212 void vaddpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2213 void vaddps(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2214
2215 // Subtract Packed Floating-Point Values
2216 void subpd(XMMRegister dst, XMMRegister src);
2217 void subps(XMMRegister dst, XMMRegister src);
2218 void vsubpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2219 void vsubps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2220 void vsubpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2221 void vsubps(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2222
2223 // Multiply Packed Floating-Point Values
2412 void evpandq(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2413 void evpxord(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2414 void evpxord(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2415 void evpxorq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2416 void evpxorq(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2417
2418 void evprold(XMMRegister dst, KRegister mask, XMMRegister src, int shift, bool merge, int vector_len);
2419 void evprolq(XMMRegister dst, KRegister mask, XMMRegister src, int shift, bool merge, int vector_len);
2420 void evprolvd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2421 void evprolvq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2422 void evprord(XMMRegister dst, KRegister mask, XMMRegister src, int shift, bool merge, int vector_len);
2423 void evprorq(XMMRegister dst, KRegister mask, XMMRegister src, int shift, bool merge, int vector_len);
2424 void evprorvd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2425 void evprorvq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2426
2427 void evpternlogd(XMMRegister dst, int imm8, KRegister mask, XMMRegister src2, XMMRegister src3, bool merge, int vector_len);
2428 void evpternlogd(XMMRegister dst, int imm8, KRegister mask, XMMRegister src2, Address src3, bool merge, int vector_len);
2429 void evpternlogq(XMMRegister dst, int imm8, KRegister mask, XMMRegister src2, XMMRegister src3, bool merge, int vector_len);
2430 void evpternlogq(XMMRegister dst, int imm8, KRegister mask, XMMRegister src2, Address src3, bool merge, int vector_len);
2431
2432
2433 // Sub packed integers
2434 void psubb(XMMRegister dst, XMMRegister src);
2435 void psubw(XMMRegister dst, XMMRegister src);
2436 void psubd(XMMRegister dst, XMMRegister src);
2437 void psubq(XMMRegister dst, XMMRegister src);
2438 void vpsubusb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2439 void vpsubb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2440 void vpsubw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2441 void vpsubd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2442 void vpsubq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2443 void vpsubb(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2444 void vpsubw(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2445 void vpsubd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2446 void vpsubq(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2447
2448 // Multiply packed integers (only shorts and ints)
2449 void pmullw(XMMRegister dst, XMMRegister src);
2450 void pmulld(XMMRegister dst, XMMRegister src);
2451 void pmuludq(XMMRegister dst, XMMRegister src);
2556
2557 // Or packed integers
2558 void por(XMMRegister dst, XMMRegister src);
2559 void vpor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2560 void vpor(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2561 void vporq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2562
2563 // Xor packed integers
2564 void pxor(XMMRegister dst, XMMRegister src);
2565 void vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2566 void vpxor(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2567 void vpxorq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2568 void evpxorq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2569 void evpxorq(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2570
2571 // Ternary logic instruction.
2572 void vpternlogd(XMMRegister dst, int imm8, XMMRegister src2, XMMRegister src3, int vector_len);
2573 void vpternlogd(XMMRegister dst, int imm8, XMMRegister src2, Address src3, int vector_len);
2574 void vpternlogq(XMMRegister dst, int imm8, XMMRegister src2, XMMRegister src3, int vector_len);
2575
2576 // Vector Rotate Left/Right instruction.
2577 void evprolvd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2578 void evprolvq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2579 void evprorvd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2580 void evprorvq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2581 void evprold(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2582 void evprolq(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2583 void evprord(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2584 void evprorq(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2585
2586 // vinserti forms
2587 void vinserti128(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
2588 void vinserti128(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8);
2589 void vinserti32x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
2590 void vinserti32x4(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8);
2591 void vinserti64x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
2592
2593 // vinsertf forms
2594 void vinsertf128(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
2595 void vinsertf128(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8);
2712 void evpmovm2b(XMMRegister dst, KRegister src, int vector_len);
2713 void evpmovm2w(XMMRegister dst, KRegister src, int vector_len);
2714 void evpmovm2d(XMMRegister dst, KRegister src, int vector_len);
2715 void evpmovm2q(XMMRegister dst, KRegister src, int vector_len);
2716
2717 // Vector blends
2718 void blendvps(XMMRegister dst, XMMRegister src);
2719 void blendvpd(XMMRegister dst, XMMRegister src);
2720 void pblendvb(XMMRegister dst, XMMRegister src);
2721 void blendvpb(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMMRegister src2, int vector_len);
2722 void vblendvps(XMMRegister dst, XMMRegister nds, XMMRegister src, XMMRegister mask, int vector_len);
2723 void vblendvpd(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMMRegister src2, int vector_len);
2724 void vpblendvb(XMMRegister dst, XMMRegister nds, XMMRegister src, XMMRegister mask, int vector_len);
2725 void vpblendd(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8, int vector_len);
2726 void evblendmpd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2727 void evblendmps(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2728 void evpblendmb(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2729 void evpblendmw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2730 void evpblendmd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2731 void evpblendmq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2732 protected:
2733 // Next instructions require address alignment 16 bytes SSE mode.
2734 // They should be called only from corresponding MacroAssembler instructions.
2735 void andpd(XMMRegister dst, Address src);
2736 void andps(XMMRegister dst, Address src);
2737 void xorpd(XMMRegister dst, Address src);
2738 void xorps(XMMRegister dst, Address src);
2739
2740 };
2741
2742 // The Intel x86/Amd64 Assembler attributes: All fields enclosed here are to guide encoding level decisions.
2743 // Specific set functions are for specialized use, else defaults or whatever was supplied to object construction
2744 // are applied.
2745 class InstructionAttr {
2746 public:
2747 InstructionAttr(
2748 int vector_len, // The length of vector to be applied in encoding - for both AVX and EVEX
2749 bool rex_vex_w, // Width of data: if 32-bits or less, false, else if 64-bit or specially defined, true
2750 bool legacy_mode, // Details if either this instruction is conditionally encoded to AVX or earlier if true else possibly EVEX
2751 bool no_reg_mask, // when true, k0 is used when EVEX encoding is chosen, else embedded_opmask_register_specifier is used
|
1853 // Multiply add
1854 void pmaddwd(XMMRegister dst, XMMRegister src);
1855 void vpmaddwd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1856 void vpmaddubsw(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
1857
1858 // Multiply add accumulate
1859 void evpdpwssd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1860
1861 #ifndef _LP64 // no 32bit push/pop on amd64
1862 void popl(Address dst);
1863 #endif
1864
1865 #ifdef _LP64
1866 void popq(Address dst);
1867 void popq(Register dst);
1868 #endif
1869
1870 void popcntl(Register dst, Address src);
1871 void popcntl(Register dst, Register src);
1872
1873 void evpopcntb(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
1874 void evpopcntw(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
1875 void evpopcntd(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
1876 void evpopcntq(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
1877
1878 #ifdef _LP64
1879 void popcntq(Register dst, Address src);
1880 void popcntq(Register dst, Register src);
1881 #endif
1882
1883 // Prefetches (SSE, SSE2, 3DNOW only)
1884
1885 void prefetchnta(Address src);
1886 void prefetchr(Address src);
1887 void prefetcht0(Address src);
1888 void prefetcht1(Address src);
1889 void prefetcht2(Address src);
1890 void prefetchw(Address src);
1891
1892 // Shuffle Bytes
1893 void pshufb(XMMRegister dst, XMMRegister src);
1894 void pshufb(XMMRegister dst, Address src);
1895 void vpshufb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1896
1922 void ptest(XMMRegister dst, XMMRegister src);
1923 void ptest(XMMRegister dst, Address src);
1924 // Logical Compare 256bit
1925 void vptest(XMMRegister dst, XMMRegister src);
1926 void vptest(XMMRegister dst, Address src);
1927
1928 void evptestmb(KRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1929
1930 // Vector compare
1931 void vptest(XMMRegister dst, XMMRegister src, int vector_len);
1932
1933 // Interleave Low Bytes
1934 void punpcklbw(XMMRegister dst, XMMRegister src);
1935 void punpcklbw(XMMRegister dst, Address src);
1936
1937 // Interleave Low Doublewords
1938 void punpckldq(XMMRegister dst, XMMRegister src);
1939 void punpckldq(XMMRegister dst, Address src);
1940 void vpunpckldq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1941
1942 // Interleave High Word
1943 void vpunpckhwd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1944
1945 // Interleave Low Word
1946 void vpunpcklwd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1947
1948 // Interleave High Doublewords
1949 void vpunpckhdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1950
1951 // Interleave Low Quadwords
1952 void punpcklqdq(XMMRegister dst, XMMRegister src);
1953
1954 // Vector sum of absolute difference.
1955 void vpsadbw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1956
1957 #ifndef _LP64 // no 32bit push/pop on amd64
1958 void pushl(Address src);
1959 #endif
1960
1961 void pushq(Address src);
1962
1963 void rcll(Register dst, int imm8);
1964
1965 void rclq(Register dst, int imm8);
1966
1967 void rcrq(Register dst, int imm8);
2189 void vmulsd(XMMRegister dst, XMMRegister nds, Address src);
2190 void vmulsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
2191 void vmulss(XMMRegister dst, XMMRegister nds, Address src);
2192 void vmulss(XMMRegister dst, XMMRegister nds, XMMRegister src);
2193 void vsubsd(XMMRegister dst, XMMRegister nds, Address src);
2194 void vsubsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
2195 void vsubss(XMMRegister dst, XMMRegister nds, Address src);
2196 void vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src);
2197
2198 void vmaxss(XMMRegister dst, XMMRegister nds, XMMRegister src);
2199 void vmaxsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
2200 void vminss(XMMRegister dst, XMMRegister nds, XMMRegister src);
2201 void vminsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
2202
2203 void shlxl(Register dst, Register src1, Register src2);
2204 void shlxq(Register dst, Register src1, Register src2);
2205 void shrxl(Register dst, Register src1, Register src2);
2206 void shrxq(Register dst, Register src1, Register src2);
2207
2208 void bzhiq(Register dst, Register src1, Register src2);
2209 void pext(Register dst, Register src1, Register src2);
2210 void pdep(Register dst, Register src1, Register src2);
2211
2212 //====================VECTOR ARITHMETIC=====================================
2213 // Add Packed Floating-Point Values
2214 void addpd(XMMRegister dst, XMMRegister src);
2215 void addpd(XMMRegister dst, Address src);
2216 void addps(XMMRegister dst, XMMRegister src);
2217 void vaddpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2218 void vaddps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2219 void vaddpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2220 void vaddps(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2221
2222 // Subtract Packed Floating-Point Values
2223 void subpd(XMMRegister dst, XMMRegister src);
2224 void subps(XMMRegister dst, XMMRegister src);
2225 void vsubpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2226 void vsubps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2227 void vsubpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2228 void vsubps(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2229
2230 // Multiply Packed Floating-Point Values
2419 void evpandq(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2420 void evpxord(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2421 void evpxord(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2422 void evpxorq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2423 void evpxorq(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2424
2425 void evprold(XMMRegister dst, KRegister mask, XMMRegister src, int shift, bool merge, int vector_len);
2426 void evprolq(XMMRegister dst, KRegister mask, XMMRegister src, int shift, bool merge, int vector_len);
2427 void evprolvd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2428 void evprolvq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2429 void evprord(XMMRegister dst, KRegister mask, XMMRegister src, int shift, bool merge, int vector_len);
2430 void evprorq(XMMRegister dst, KRegister mask, XMMRegister src, int shift, bool merge, int vector_len);
2431 void evprorvd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2432 void evprorvq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2433
2434 void evpternlogd(XMMRegister dst, int imm8, KRegister mask, XMMRegister src2, XMMRegister src3, bool merge, int vector_len);
2435 void evpternlogd(XMMRegister dst, int imm8, KRegister mask, XMMRegister src2, Address src3, bool merge, int vector_len);
2436 void evpternlogq(XMMRegister dst, int imm8, KRegister mask, XMMRegister src2, XMMRegister src3, bool merge, int vector_len);
2437 void evpternlogq(XMMRegister dst, int imm8, KRegister mask, XMMRegister src2, Address src3, bool merge, int vector_len);
2438
2439 void evplzcntd(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
2440 void evplzcntq(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
2441
2442 // Sub packed integers
2443 void psubb(XMMRegister dst, XMMRegister src);
2444 void psubw(XMMRegister dst, XMMRegister src);
2445 void psubd(XMMRegister dst, XMMRegister src);
2446 void psubq(XMMRegister dst, XMMRegister src);
2447 void vpsubusb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2448 void vpsubb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2449 void vpsubw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2450 void vpsubd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2451 void vpsubq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2452 void vpsubb(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2453 void vpsubw(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2454 void vpsubd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2455 void vpsubq(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2456
2457 // Multiply packed integers (only shorts and ints)
2458 void pmullw(XMMRegister dst, XMMRegister src);
2459 void pmulld(XMMRegister dst, XMMRegister src);
2460 void pmuludq(XMMRegister dst, XMMRegister src);
2565
2566 // Or packed integers
2567 void por(XMMRegister dst, XMMRegister src);
2568 void vpor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2569 void vpor(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2570 void vporq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2571
2572 // Xor packed integers
2573 void pxor(XMMRegister dst, XMMRegister src);
2574 void vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2575 void vpxor(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2576 void vpxorq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2577 void evpxorq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2578 void evpxorq(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2579
2580 // Ternary logic instruction.
2581 void vpternlogd(XMMRegister dst, int imm8, XMMRegister src2, XMMRegister src3, int vector_len);
2582 void vpternlogd(XMMRegister dst, int imm8, XMMRegister src2, Address src3, int vector_len);
2583 void vpternlogq(XMMRegister dst, int imm8, XMMRegister src2, XMMRegister src3, int vector_len);
2584
2585 // Vector compress/expand instructions.
2586 void evpcompressb(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
2587 void evpcompressw(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
2588 void evpcompressd(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
2589 void evpcompressq(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
2590 void evcompressps(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
2591 void evcompresspd(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
2592
2593 void evpexpandb(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
2594 void evpexpandw(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
2595 void evpexpandd(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
2596 void evpexpandq(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
2597 void evexpandps(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
2598 void evexpandpd(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
2599
2600 // Vector Rotate Left/Right instruction.
2601 void evprolvd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2602 void evprolvq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2603 void evprorvd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2604 void evprorvq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2605 void evprold(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2606 void evprolq(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2607 void evprord(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2608 void evprorq(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2609
2610 // vinserti forms
2611 void vinserti128(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
2612 void vinserti128(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8);
2613 void vinserti32x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
2614 void vinserti32x4(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8);
2615 void vinserti64x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
2616
2617 // vinsertf forms
2618 void vinsertf128(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
2619 void vinsertf128(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8);
2736 void evpmovm2b(XMMRegister dst, KRegister src, int vector_len);
2737 void evpmovm2w(XMMRegister dst, KRegister src, int vector_len);
2738 void evpmovm2d(XMMRegister dst, KRegister src, int vector_len);
2739 void evpmovm2q(XMMRegister dst, KRegister src, int vector_len);
2740
2741 // Vector blends
2742 void blendvps(XMMRegister dst, XMMRegister src);
2743 void blendvpd(XMMRegister dst, XMMRegister src);
2744 void pblendvb(XMMRegister dst, XMMRegister src);
2745 void blendvpb(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMMRegister src2, int vector_len);
2746 void vblendvps(XMMRegister dst, XMMRegister nds, XMMRegister src, XMMRegister mask, int vector_len);
2747 void vblendvpd(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMMRegister src2, int vector_len);
2748 void vpblendvb(XMMRegister dst, XMMRegister nds, XMMRegister src, XMMRegister mask, int vector_len);
2749 void vpblendd(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8, int vector_len);
2750 void evblendmpd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2751 void evblendmps(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2752 void evpblendmb(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2753 void evpblendmw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2754 void evpblendmd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2755 void evpblendmq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2756
2757 // Galois field affine transformation instructions.
2758 void vgf2p8affineqb(XMMRegister dst, XMMRegister src2, XMMRegister src3, int imm8, int vector_len);
2759
2760 protected:
2761 // Next instructions require address alignment 16 bytes SSE mode.
2762 // They should be called only from corresponding MacroAssembler instructions.
2763 void andpd(XMMRegister dst, Address src);
2764 void andps(XMMRegister dst, Address src);
2765 void xorpd(XMMRegister dst, Address src);
2766 void xorps(XMMRegister dst, Address src);
2767
2768 };
2769
2770 // The Intel x86/Amd64 Assembler attributes: All fields enclosed here are to guide encoding level decisions.
2771 // Specific set functions are for specialized use, else defaults or whatever was supplied to object construction
2772 // are applied.
2773 class InstructionAttr {
2774 public:
2775 InstructionAttr(
2776 int vector_len, // The length of vector to be applied in encoding - for both AVX and EVEX
2777 bool rex_vex_w, // Width of data: if 32-bits or less, false, else if 64-bit or specially defined, true
2778 bool legacy_mode, // Details if either this instruction is conditionally encoded to AVX or earlier if true else possibly EVEX
2779 bool no_reg_mask, // when true, k0 is used when EVEX encoding is chosen, else embedded_opmask_register_specifier is used
|