< prev index next >

src/hotspot/cpu/x86/assembler_x86.hpp

Print this page

1853   // Multiply add
1854   void pmaddwd(XMMRegister dst, XMMRegister src);
1855   void vpmaddwd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1856   void vpmaddubsw(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
1857 
1858   // Multiply add accumulate
1859   void evpdpwssd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1860 
1861 #ifndef _LP64 // no 32bit push/pop on amd64
1862   void popl(Address dst);
1863 #endif
1864 
1865 #ifdef _LP64
1866   void popq(Address dst);
1867   void popq(Register dst);
1868 #endif
1869 
1870   void popcntl(Register dst, Address src);
1871   void popcntl(Register dst, Register src);
1872 
1873   void vpopcntd(XMMRegister dst, XMMRegister src, int vector_len);
1874   void vpopcntq(XMMRegister dst, XMMRegister src, int vector_len);


1875 
1876 #ifdef _LP64
1877   void popcntq(Register dst, Address src);
1878   void popcntq(Register dst, Register src);
1879 #endif
1880 
1881   // Prefetches (SSE, SSE2, 3DNOW only)
1882 
1883   void prefetchnta(Address src);
1884   void prefetchr(Address src);
1885   void prefetcht0(Address src);
1886   void prefetcht1(Address src);
1887   void prefetcht2(Address src);
1888   void prefetchw(Address src);
1889 
1890   // Shuffle Bytes
1891   void pshufb(XMMRegister dst, XMMRegister src);
1892   void pshufb(XMMRegister dst, Address src);
1893   void vpshufb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1894 

1920   void ptest(XMMRegister dst, XMMRegister src);
1921   void ptest(XMMRegister dst, Address src);
1922   // Logical Compare 256bit
1923   void vptest(XMMRegister dst, XMMRegister src);
1924   void vptest(XMMRegister dst, Address src);
1925 
1926   void evptestmb(KRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1927 
1928   // Vector compare
1929   void vptest(XMMRegister dst, XMMRegister src, int vector_len);
1930 
1931   // Interleave Low Bytes
1932   void punpcklbw(XMMRegister dst, XMMRegister src);
1933   void punpcklbw(XMMRegister dst, Address src);
1934 
1935   // Interleave Low Doublewords
1936   void punpckldq(XMMRegister dst, XMMRegister src);
1937   void punpckldq(XMMRegister dst, Address src);
1938   void vpunpckldq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1939 






1940   // Interleave High Doublewords
1941   void vpunpckhdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1942 
1943   // Interleave Low Quadwords
1944   void punpcklqdq(XMMRegister dst, XMMRegister src);
1945 
1946   // Vector sum of absolute difference.
1947   void vpsadbw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1948 
1949 #ifndef _LP64 // no 32bit push/pop on amd64
1950   void pushl(Address src);
1951 #endif
1952 
1953   void pushq(Address src);
1954 
1955   void rcll(Register dst, int imm8);
1956 
1957   void rclq(Register dst, int imm8);
1958 
1959   void rcrq(Register dst, int imm8);

2181   void vmulsd(XMMRegister dst, XMMRegister nds, Address src);
2182   void vmulsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
2183   void vmulss(XMMRegister dst, XMMRegister nds, Address src);
2184   void vmulss(XMMRegister dst, XMMRegister nds, XMMRegister src);
2185   void vsubsd(XMMRegister dst, XMMRegister nds, Address src);
2186   void vsubsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
2187   void vsubss(XMMRegister dst, XMMRegister nds, Address src);
2188   void vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src);
2189 
2190   void vmaxss(XMMRegister dst, XMMRegister nds, XMMRegister src);
2191   void vmaxsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
2192   void vminss(XMMRegister dst, XMMRegister nds, XMMRegister src);
2193   void vminsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
2194 
2195   void shlxl(Register dst, Register src1, Register src2);
2196   void shlxq(Register dst, Register src1, Register src2);
2197   void shrxl(Register dst, Register src1, Register src2);
2198   void shrxq(Register dst, Register src1, Register src2);
2199 
2200   void bzhiq(Register dst, Register src1, Register src2);
2201   void pdep(Register dst, Register src1, Register src2);
2202   void pext(Register dst, Register src1, Register src2);
2203 
2204 
2205   //====================VECTOR ARITHMETIC=====================================
2206   // Add Packed Floating-Point Values
2207   void addpd(XMMRegister dst, XMMRegister src);
2208   void addpd(XMMRegister dst, Address src);
2209   void addps(XMMRegister dst, XMMRegister src);
2210   void vaddpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2211   void vaddps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2212   void vaddpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2213   void vaddps(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2214 
2215   // Subtract Packed Floating-Point Values
2216   void subpd(XMMRegister dst, XMMRegister src);
2217   void subps(XMMRegister dst, XMMRegister src);
2218   void vsubpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2219   void vsubps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2220   void vsubpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2221   void vsubps(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2222 
2223   // Multiply Packed Floating-Point Values

2412   void evpandq(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2413   void evpxord(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2414   void evpxord(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2415   void evpxorq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2416   void evpxorq(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2417 
2418   void evprold(XMMRegister dst, KRegister mask, XMMRegister src, int shift, bool merge, int vector_len);
2419   void evprolq(XMMRegister dst, KRegister mask, XMMRegister src, int shift, bool merge, int vector_len);
2420   void evprolvd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2421   void evprolvq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2422   void evprord(XMMRegister dst, KRegister mask, XMMRegister src, int shift, bool merge, int vector_len);
2423   void evprorq(XMMRegister dst, KRegister mask, XMMRegister src, int shift, bool merge, int vector_len);
2424   void evprorvd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2425   void evprorvq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2426 
2427   void evpternlogd(XMMRegister dst, int imm8, KRegister mask, XMMRegister src2, XMMRegister src3, bool merge, int vector_len);
2428   void evpternlogd(XMMRegister dst, int imm8, KRegister mask, XMMRegister src2, Address src3, bool merge, int vector_len);
2429   void evpternlogq(XMMRegister dst, int imm8, KRegister mask, XMMRegister src2, XMMRegister src3, bool merge, int vector_len);
2430   void evpternlogq(XMMRegister dst, int imm8, KRegister mask, XMMRegister src2, Address src3, bool merge, int vector_len);
2431 


2432 
2433   // Sub packed integers
2434   void psubb(XMMRegister dst, XMMRegister src);
2435   void psubw(XMMRegister dst, XMMRegister src);
2436   void psubd(XMMRegister dst, XMMRegister src);
2437   void psubq(XMMRegister dst, XMMRegister src);
2438   void vpsubusb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2439   void vpsubb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2440   void vpsubw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2441   void vpsubd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2442   void vpsubq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2443   void vpsubb(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2444   void vpsubw(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2445   void vpsubd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2446   void vpsubq(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2447 
2448   // Multiply packed integers (only shorts and ints)
2449   void pmullw(XMMRegister dst, XMMRegister src);
2450   void pmulld(XMMRegister dst, XMMRegister src);
2451   void pmuludq(XMMRegister dst, XMMRegister src);

2556 
2557   // Or packed integers
2558   void por(XMMRegister dst, XMMRegister src);
2559   void vpor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2560   void vpor(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2561   void vporq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2562 
2563   // Xor packed integers
2564   void pxor(XMMRegister dst, XMMRegister src);
2565   void vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2566   void vpxor(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2567   void vpxorq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2568   void evpxorq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2569   void evpxorq(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2570 
2571   // Ternary logic instruction.
2572   void vpternlogd(XMMRegister dst, int imm8, XMMRegister src2, XMMRegister src3, int vector_len);
2573   void vpternlogd(XMMRegister dst, int imm8, XMMRegister src2, Address     src3, int vector_len);
2574   void vpternlogq(XMMRegister dst, int imm8, XMMRegister src2, XMMRegister src3, int vector_len);
2575 















2576   // Vector Rotate Left/Right instruction.
2577   void evprolvd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2578   void evprolvq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2579   void evprorvd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2580   void evprorvq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2581   void evprold(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2582   void evprolq(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2583   void evprord(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2584   void evprorq(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2585 
2586   // vinserti forms
2587   void vinserti128(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
2588   void vinserti128(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8);
2589   void vinserti32x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
2590   void vinserti32x4(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8);
2591   void vinserti64x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
2592 
2593   // vinsertf forms
2594   void vinsertf128(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
2595   void vinsertf128(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8);

2712   void evpmovm2b(XMMRegister dst, KRegister src, int vector_len);
2713   void evpmovm2w(XMMRegister dst, KRegister src, int vector_len);
2714   void evpmovm2d(XMMRegister dst, KRegister src, int vector_len);
2715   void evpmovm2q(XMMRegister dst, KRegister src, int vector_len);
2716 
2717   // Vector blends
2718   void blendvps(XMMRegister dst, XMMRegister src);
2719   void blendvpd(XMMRegister dst, XMMRegister src);
2720   void pblendvb(XMMRegister dst, XMMRegister src);
2721   void blendvpb(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMMRegister src2, int vector_len);
2722   void vblendvps(XMMRegister dst, XMMRegister nds, XMMRegister src, XMMRegister mask, int vector_len);
2723   void vblendvpd(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMMRegister src2, int vector_len);
2724   void vpblendvb(XMMRegister dst, XMMRegister nds, XMMRegister src, XMMRegister mask, int vector_len);
2725   void vpblendd(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8, int vector_len);
2726   void evblendmpd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2727   void evblendmps(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2728   void evpblendmb(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2729   void evpblendmw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2730   void evpblendmd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2731   void evpblendmq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);




2732  protected:
2733   // Next instructions require address alignment 16 bytes SSE mode.
2734   // They should be called only from corresponding MacroAssembler instructions.
2735   void andpd(XMMRegister dst, Address src);
2736   void andps(XMMRegister dst, Address src);
2737   void xorpd(XMMRegister dst, Address src);
2738   void xorps(XMMRegister dst, Address src);
2739 
2740 };
2741 
2742 // The Intel x86/Amd64 Assembler attributes: All fields enclosed here are to guide encoding level decisions.
2743 // Specific set functions are for specialized use, else defaults or whatever was supplied to object construction
2744 // are applied.
2745 class InstructionAttr {
2746 public:
2747   InstructionAttr(
2748     int vector_len,     // The length of vector to be applied in encoding - for both AVX and EVEX
2749     bool rex_vex_w,     // Width of data: if 32-bits or less, false, else if 64-bit or specially defined, true
2750     bool legacy_mode,   // Details if either this instruction is conditionally encoded to AVX or earlier if true else possibly EVEX
2751     bool no_reg_mask,   // when true, k0 is used when EVEX encoding is chosen, else embedded_opmask_register_specifier is used

1853   // Multiply add
1854   void pmaddwd(XMMRegister dst, XMMRegister src);
1855   void vpmaddwd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1856   void vpmaddubsw(XMMRegister dst, XMMRegister src1, XMMRegister src2, int vector_len);
1857 
1858   // Multiply add accumulate
1859   void evpdpwssd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1860 
1861 #ifndef _LP64 // no 32bit push/pop on amd64
1862   void popl(Address dst);
1863 #endif
1864 
1865 #ifdef _LP64
1866   void popq(Address dst);
1867   void popq(Register dst);
1868 #endif
1869 
1870   void popcntl(Register dst, Address src);
1871   void popcntl(Register dst, Register src);
1872 
1873   void evpopcntb(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
1874   void evpopcntw(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
1875   void evpopcntd(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
1876   void evpopcntq(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
1877 
1878 #ifdef _LP64
1879   void popcntq(Register dst, Address src);
1880   void popcntq(Register dst, Register src);
1881 #endif
1882 
1883   // Prefetches (SSE, SSE2, 3DNOW only)
1884 
1885   void prefetchnta(Address src);
1886   void prefetchr(Address src);
1887   void prefetcht0(Address src);
1888   void prefetcht1(Address src);
1889   void prefetcht2(Address src);
1890   void prefetchw(Address src);
1891 
1892   // Shuffle Bytes
1893   void pshufb(XMMRegister dst, XMMRegister src);
1894   void pshufb(XMMRegister dst, Address src);
1895   void vpshufb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1896 

1922   void ptest(XMMRegister dst, XMMRegister src);
1923   void ptest(XMMRegister dst, Address src);
1924   // Logical Compare 256bit
1925   void vptest(XMMRegister dst, XMMRegister src);
1926   void vptest(XMMRegister dst, Address src);
1927 
1928   void evptestmb(KRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1929 
1930   // Vector compare
1931   void vptest(XMMRegister dst, XMMRegister src, int vector_len);
1932 
1933   // Interleave Low Bytes
1934   void punpcklbw(XMMRegister dst, XMMRegister src);
1935   void punpcklbw(XMMRegister dst, Address src);
1936 
1937   // Interleave Low Doublewords
1938   void punpckldq(XMMRegister dst, XMMRegister src);
1939   void punpckldq(XMMRegister dst, Address src);
1940   void vpunpckldq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1941 
1942   // Interleave High Word
1943   void vpunpckhwd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1944 
1945   // Interleave Low Word
1946   void vpunpcklwd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1947 
1948   // Interleave High Doublewords
1949   void vpunpckhdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1950 
1951   // Interleave Low Quadwords
1952   void punpcklqdq(XMMRegister dst, XMMRegister src);
1953 
1954   // Vector sum of absolute difference.
1955   void vpsadbw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
1956 
1957 #ifndef _LP64 // no 32bit push/pop on amd64
1958   void pushl(Address src);
1959 #endif
1960 
1961   void pushq(Address src);
1962 
1963   void rcll(Register dst, int imm8);
1964 
1965   void rclq(Register dst, int imm8);
1966 
1967   void rcrq(Register dst, int imm8);

2189   void vmulsd(XMMRegister dst, XMMRegister nds, Address src);
2190   void vmulsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
2191   void vmulss(XMMRegister dst, XMMRegister nds, Address src);
2192   void vmulss(XMMRegister dst, XMMRegister nds, XMMRegister src);
2193   void vsubsd(XMMRegister dst, XMMRegister nds, Address src);
2194   void vsubsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
2195   void vsubss(XMMRegister dst, XMMRegister nds, Address src);
2196   void vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src);
2197 
2198   void vmaxss(XMMRegister dst, XMMRegister nds, XMMRegister src);
2199   void vmaxsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
2200   void vminss(XMMRegister dst, XMMRegister nds, XMMRegister src);
2201   void vminsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
2202 
2203   void shlxl(Register dst, Register src1, Register src2);
2204   void shlxq(Register dst, Register src1, Register src2);
2205   void shrxl(Register dst, Register src1, Register src2);
2206   void shrxq(Register dst, Register src1, Register src2);
2207 
2208   void bzhiq(Register dst, Register src1, Register src2);

2209   void pext(Register dst, Register src1, Register src2);
2210   void pdep(Register dst, Register src1, Register src2);
2211 
2212   //====================VECTOR ARITHMETIC=====================================
2213   // Add Packed Floating-Point Values
2214   void addpd(XMMRegister dst, XMMRegister src);
2215   void addpd(XMMRegister dst, Address src);
2216   void addps(XMMRegister dst, XMMRegister src);
2217   void vaddpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2218   void vaddps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2219   void vaddpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2220   void vaddps(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2221 
2222   // Subtract Packed Floating-Point Values
2223   void subpd(XMMRegister dst, XMMRegister src);
2224   void subps(XMMRegister dst, XMMRegister src);
2225   void vsubpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2226   void vsubps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2227   void vsubpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2228   void vsubps(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2229 
2230   // Multiply Packed Floating-Point Values

2419   void evpandq(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2420   void evpxord(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2421   void evpxord(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2422   void evpxorq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2423   void evpxorq(XMMRegister dst, KRegister mask, XMMRegister nds, Address src, bool merge, int vector_len);
2424 
2425   void evprold(XMMRegister dst, KRegister mask, XMMRegister src, int shift, bool merge, int vector_len);
2426   void evprolq(XMMRegister dst, KRegister mask, XMMRegister src, int shift, bool merge, int vector_len);
2427   void evprolvd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2428   void evprolvq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2429   void evprord(XMMRegister dst, KRegister mask, XMMRegister src, int shift, bool merge, int vector_len);
2430   void evprorq(XMMRegister dst, KRegister mask, XMMRegister src, int shift, bool merge, int vector_len);
2431   void evprorvd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2432   void evprorvq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2433 
2434   void evpternlogd(XMMRegister dst, int imm8, KRegister mask, XMMRegister src2, XMMRegister src3, bool merge, int vector_len);
2435   void evpternlogd(XMMRegister dst, int imm8, KRegister mask, XMMRegister src2, Address src3, bool merge, int vector_len);
2436   void evpternlogq(XMMRegister dst, int imm8, KRegister mask, XMMRegister src2, XMMRegister src3, bool merge, int vector_len);
2437   void evpternlogq(XMMRegister dst, int imm8, KRegister mask, XMMRegister src2, Address src3, bool merge, int vector_len);
2438 
2439   void evplzcntd(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
2440   void evplzcntq(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
2441 
2442   // Sub packed integers
2443   void psubb(XMMRegister dst, XMMRegister src);
2444   void psubw(XMMRegister dst, XMMRegister src);
2445   void psubd(XMMRegister dst, XMMRegister src);
2446   void psubq(XMMRegister dst, XMMRegister src);
2447   void vpsubusb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2448   void vpsubb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2449   void vpsubw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2450   void vpsubd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2451   void vpsubq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2452   void vpsubb(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2453   void vpsubw(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2454   void vpsubd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2455   void vpsubq(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2456 
2457   // Multiply packed integers (only shorts and ints)
2458   void pmullw(XMMRegister dst, XMMRegister src);
2459   void pmulld(XMMRegister dst, XMMRegister src);
2460   void pmuludq(XMMRegister dst, XMMRegister src);

2565 
2566   // Or packed integers
2567   void por(XMMRegister dst, XMMRegister src);
2568   void vpor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2569   void vpor(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2570   void vporq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2571 
2572   // Xor packed integers
2573   void pxor(XMMRegister dst, XMMRegister src);
2574   void vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2575   void vpxor(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2576   void vpxorq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2577   void evpxorq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
2578   void evpxorq(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
2579 
2580   // Ternary logic instruction.
2581   void vpternlogd(XMMRegister dst, int imm8, XMMRegister src2, XMMRegister src3, int vector_len);
2582   void vpternlogd(XMMRegister dst, int imm8, XMMRegister src2, Address     src3, int vector_len);
2583   void vpternlogq(XMMRegister dst, int imm8, XMMRegister src2, XMMRegister src3, int vector_len);
2584 
2585   // Vector compress/expand instructions.
2586   void evpcompressb(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
2587   void evpcompressw(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
2588   void evpcompressd(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
2589   void evpcompressq(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
2590   void evcompressps(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
2591   void evcompresspd(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
2592 
2593   void evpexpandb(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
2594   void evpexpandw(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
2595   void evpexpandd(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
2596   void evpexpandq(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
2597   void evexpandps(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
2598   void evexpandpd(XMMRegister dst, KRegister mask, XMMRegister src, bool merge, int vector_len);
2599 
2600   // Vector Rotate Left/Right instruction.
2601   void evprolvd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2602   void evprolvq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2603   void evprorvd(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2604   void evprorvq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
2605   void evprold(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2606   void evprolq(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2607   void evprord(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2608   void evprorq(XMMRegister dst, XMMRegister src, int shift, int vector_len);
2609 
2610   // vinserti forms
2611   void vinserti128(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
2612   void vinserti128(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8);
2613   void vinserti32x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
2614   void vinserti32x4(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8);
2615   void vinserti64x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
2616 
2617   // vinsertf forms
2618   void vinsertf128(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
2619   void vinsertf128(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8);

2736   void evpmovm2b(XMMRegister dst, KRegister src, int vector_len);
2737   void evpmovm2w(XMMRegister dst, KRegister src, int vector_len);
2738   void evpmovm2d(XMMRegister dst, KRegister src, int vector_len);
2739   void evpmovm2q(XMMRegister dst, KRegister src, int vector_len);
2740 
2741   // Vector blends
2742   void blendvps(XMMRegister dst, XMMRegister src);
2743   void blendvpd(XMMRegister dst, XMMRegister src);
2744   void pblendvb(XMMRegister dst, XMMRegister src);
2745   void blendvpb(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMMRegister src2, int vector_len);
2746   void vblendvps(XMMRegister dst, XMMRegister nds, XMMRegister src, XMMRegister mask, int vector_len);
2747   void vblendvpd(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMMRegister src2, int vector_len);
2748   void vpblendvb(XMMRegister dst, XMMRegister nds, XMMRegister src, XMMRegister mask, int vector_len);
2749   void vpblendd(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8, int vector_len);
2750   void evblendmpd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2751   void evblendmps(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2752   void evpblendmb(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2753   void evpblendmw(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2754   void evpblendmd(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2755   void evpblendmq(XMMRegister dst, KRegister mask, XMMRegister nds, XMMRegister src, bool merge, int vector_len);
2756 
2757   // Galois field affine transformation instructions.
2758   void vgf2p8affineqb(XMMRegister dst, XMMRegister src2, XMMRegister src3, int imm8, int vector_len);
2759 
2760  protected:
2761   // Next instructions require address alignment 16 bytes SSE mode.
2762   // They should be called only from corresponding MacroAssembler instructions.
2763   void andpd(XMMRegister dst, Address src);
2764   void andps(XMMRegister dst, Address src);
2765   void xorpd(XMMRegister dst, Address src);
2766   void xorps(XMMRegister dst, Address src);
2767 
2768 };
2769 
2770 // The Intel x86/Amd64 Assembler attributes: All fields enclosed here are to guide encoding level decisions.
2771 // Specific set functions are for specialized use, else defaults or whatever was supplied to object construction
2772 // are applied.
2773 class InstructionAttr {
2774 public:
2775   InstructionAttr(
2776     int vector_len,     // The length of vector to be applied in encoding - for both AVX and EVEX
2777     bool rex_vex_w,     // Width of data: if 32-bits or less, false, else if 64-bit or specially defined, true
2778     bool legacy_mode,   // Details if either this instruction is conditionally encoded to AVX or earlier if true else possibly EVEX
2779     bool no_reg_mask,   // when true, k0 is used when EVEX encoding is chosen, else embedded_opmask_register_specifier is used
< prev index next >