1224 case 32: return Assembler::AVX_256bit;
1225 case 64: return Assembler::AVX_512bit;
1226
1227 default: {
1228 ShouldNotReachHere();
1229 return Assembler::AVX_NoVec;
1230 }
1231 }
1232 }
1233
1234 static inline Assembler::AvxVectorLen vector_length_encoding(const Node* n) {
1235 return vector_length_encoding(Matcher::vector_length_in_bytes(n));
1236 }
1237
1238 static inline Assembler::AvxVectorLen vector_length_encoding(const MachNode* use, MachOper* opnd) {
1239 uint def_idx = use->operand_index(opnd);
1240 Node* def = use->in(def_idx);
1241 return vector_length_encoding(def);
1242 }
1243
1244 static inline bool is_unsigned_booltest_pred(int bt) {
1245 return ((bt & BoolTest::unsigned_compare) == BoolTest::unsigned_compare);
1246 }
1247
1248 class Node::PD {
1249 public:
1250 enum NodeFlags {
1251 Flag_intel_jcc_erratum = Node::_last_flag << 1,
1252 _last_flag = Flag_intel_jcc_erratum
1253 };
1254 };
1255
1256 %} // end source_hpp
1257
1258 source %{
1259
1260 #include "opto/addnode.hpp"
1261 #include "c2_intelJccErratum_x86.hpp"
1262
1263 void PhaseOutput::pd_perform_mach_node_analysis() {
1264 if (VM_Version::has_intel_jcc_erratum()) {
1265 int extra_padding = IntelJccErratum::tag_affected_machnodes(C, C->cfg(), C->regalloc());
1266 _buf_sizes._code += extra_padding;
1267 }
1388 //=============================================================================
1389 const bool Matcher::match_rule_supported(int opcode) {
1390 if (!has_match_rule(opcode)) {
1391 return false; // no match rule present
1392 }
1393 const bool is_LP64 = LP64_ONLY(true) NOT_LP64(false);
1394 switch (opcode) {
1395 case Op_AbsVL:
1396 case Op_StoreVectorScatter:
1397 if (UseAVX < 3) {
1398 return false;
1399 }
1400 break;
1401 case Op_PopCountI:
1402 case Op_PopCountL:
1403 if (!UsePopCountInstruction) {
1404 return false;
1405 }
1406 break;
1407 case Op_PopCountVI:
1408 if (!UsePopCountInstruction || (UseAVX < 2)) {
1409 return false;
1410 }
1411 break;
1412 case Op_PopCountVL:
1413 if (!UsePopCountInstruction || (UseAVX <= 2)) {
1414 return false;
1415 }
1416 break;
1417 case Op_MulVI:
1418 if ((UseSSE < 4) && (UseAVX < 1)) { // only with SSE4_1 or AVX
1419 return false;
1420 }
1421 break;
1422 case Op_MulVL:
1423 if (UseSSE < 4) { // only with SSE4_1 or AVX
1424 return false;
1425 }
1426 break;
1427 case Op_MulReductionVL:
1428 if (VM_Version::supports_avx512dq() == false) {
1429 return false;
1430 }
1431 break;
1432 case Op_AddReductionVL:
1433 if (UseSSE < 2) { // requires at least SSE2
1608 break;
1609 case Op_MulAddVS2VI:
1610 case Op_RShiftVL:
1611 case Op_AbsVD:
1612 case Op_NegVD:
1613 if (UseSSE < 2) {
1614 return false;
1615 }
1616 break;
1617 #endif // !LP64
1618 case Op_SignumF:
1619 if (UseSSE < 1) {
1620 return false;
1621 }
1622 break;
1623 case Op_SignumD:
1624 if (UseSSE < 2) {
1625 return false;
1626 }
1627 break;
1628 case Op_SqrtF:
1629 if (UseSSE < 1) {
1630 return false;
1631 }
1632 break;
1633 case Op_SqrtD:
1634 #ifdef _LP64
1635 if (UseSSE < 2) {
1636 return false;
1637 }
1638 #else
1639 // x86_32.ad has a special match rule for SqrtD.
1640 // Together with common x86 rules, this handles all UseSSE cases.
1641 #endif
1642 break;
1643 }
1644 return true; // Match rules are supported by default.
1645 }
1646
1647 //------------------------------------------------------------------------
1648
1649 // Identify extra cases that we might want to provide match rules for vector nodes and
1650 // other intrinsics guarded with vector length (vlen) and element type (bt).
1651 const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
1652 const bool is_LP64 = LP64_ONLY(true) NOT_LP64(false);
1653 if (!match_rule_supported(opcode)) {
1654 return false;
1655 }
1656 // Matcher::vector_size_supported() restricts vector sizes in the following way (see Matcher::vector_width_in_bytes):
1657 // * SSE2 supports 128bit vectors for all types;
1658 // * AVX1 supports 256bit vectors only for FLOAT and DOUBLE types;
1659 // * AVX2 supports 256bit vectors for all types;
1660 // * AVX512F supports 512bit vectors only for INT, FLOAT, and DOUBLE types;
1661 // * AVX512BW supports 512bit vectors for BYTE, SHORT, and CHAR types.
1662 // There's also a limit on minimum vector size supported: 2 elements (or 4 bytes for BYTE).
1663 // And MaxVectorSize is taken into account as well.
1664 if (!vector_size_supported(bt, vlen)) {
1665 return false;
1666 }
1667 // Special cases which require vector length follow:
1668 // * implementation limitations
1834 }
1835 break;
1836 case Op_RoundVD:
1837 if (!VM_Version::supports_avx512dq()) {
1838 return false;
1839 }
1840 break;
1841 case Op_VectorCastF2X:
1842 if (is_subword_type(bt) || bt == T_LONG) {
1843 return false;
1844 }
1845 break;
1846 case Op_MulReductionVI:
1847 if (bt == T_BYTE && size_in_bits == 512 && !VM_Version::supports_avx512bw()) {
1848 return false;
1849 }
1850 break;
1851 case Op_LoadVectorGatherMasked:
1852 case Op_StoreVectorScatterMasked:
1853 case Op_StoreVectorScatter:
1854 if(is_subword_type(bt)) {
1855 return false;
1856 } else if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
1857 return false;
1858 }
1859 // fallthrough
1860 case Op_LoadVectorGather:
1861 if (size_in_bits == 64 ) {
1862 return false;
1863 }
1864 break;
1865 case Op_MaskAll:
1866 if (!VM_Version::supports_evex()) {
1867 return false;
1868 }
1869 if ((vlen > 16 || is_subword_type(bt)) && !VM_Version::supports_avx512bw()) {
1870 return false;
1871 }
1872 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
1873 return false;
1874 }
1875 break;
1876 case Op_VectorMaskCmp:
1877 if (vlen < 2 || size_in_bits < 32) {
1878 return false;
1879 }
1880 break;
1881 case Op_VectorLongToMask:
1882 if (UseAVX < 1 || !is_LP64) {
1883 return false;
1884 }
1885 if (UseAVX < 3 && !VM_Version::supports_bmi2()) {
1886 return false;
1887 }
1888 break;
1889 case Op_PopCountVI:
1890 if (!VM_Version::supports_avx512_vpopcntdq() &&
1891 (vlen == 16) && !VM_Version::supports_avx512bw()) {
1892 return false;
1893 }
1894 break;
1895 case Op_PopCountVL:
1896 if (!VM_Version::supports_avx512_vpopcntdq() &&
1897 ((vlen <= 4) || ((vlen == 8) && !VM_Version::supports_avx512bw()))) {
1898 return false;
1899 }
1900 break;
1901 }
1902 return true; // Per default match rules are supported.
1903 }
1904
1905 const bool Matcher::match_rule_supported_vector_masked(int opcode, int vlen, BasicType bt) {
1906 // ADLC based match_rule_supported routine checks for the existence of pattern based
1907 // on IR opcode. Most of the unary/binary/ternary masked operation share the IR nodes
1908 // of their non-masked counterpart with mask edge being the differentiator.
1909 // This routine does a strict check on the existence of masked operation patterns
1910 // by returning a default false value for all the other opcodes apart from the
1911 // ones whose masked instruction patterns are defined in this file.
1912 if (!match_rule_supported_vector(opcode, vlen, bt)) {
1913 return false;
1914 }
1915
1916 const bool is_LP64 = LP64_ONLY(true) NOT_LP64(false);
1917 int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte;
2025 case Op_VectorRearrange:
2026 if (bt == T_SHORT && !VM_Version::supports_avx512bw()) {
2027 return false; // Implementation limitation
2028 }
2029 if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) {
2030 return false; // Implementation limitation
2031 } else if ((bt == T_INT || bt == T_FLOAT) && size_in_bits < 256) {
2032 return false; // Implementation limitation
2033 }
2034 return true;
2035
2036 // Binary Logical operations
2037 case Op_AndVMask:
2038 case Op_OrVMask:
2039 case Op_XorVMask:
2040 if (vlen > 16 && !VM_Version::supports_avx512bw()) {
2041 return false; // Implementation limitation
2042 }
2043 return true;
2044
2045 case Op_MaskAll:
2046 return true;
2047
2048 default:
2049 return false;
2050 }
2051 }
2052
2053 MachOper* Matcher::pd_specialize_generic_vector_operand(MachOper* generic_opnd, uint ideal_reg, bool is_temp) {
2054 assert(Matcher::is_generic_vector(generic_opnd), "not generic");
2055 bool legacy = (generic_opnd->opcode() == LEGVEC);
2056 if (!VM_Version::supports_avx512vlbwdq() && // KNL
2057 is_temp && !legacy && (ideal_reg == Op_VecZ)) {
2058 // Conservatively specialize 512bit vec TEMP operands to legVecZ (zmm0-15) on KNL.
2059 return new legVecZOper();
2060 }
2061 if (legacy) {
2062 switch (ideal_reg) {
2063 case Op_VecS: return new legVecSOper();
2064 case Op_VecD: return new legVecDOper();
2065 case Op_VecX: return new legVecXOper();
2066 case Op_VecY: return new legVecYOper();
2067 case Op_VecZ: return new legVecZOper();
8625 ins_pipe( pipe_slow );
8626 %}
8627
8628 // --------------------------------- Vector Multiply Add Add ----------------------------------
8629
8630 instruct vmuladdaddS2I_reg(vec dst, vec src1, vec src2) %{
8631 predicate(VM_Version::supports_avx512_vnni());
8632 match(Set dst (AddVI (MulAddVS2VI src1 src2) dst));
8633 format %{ "evpdpwssd $dst,$src1,$src2\t! muladdadd packedStoI" %}
8634 ins_encode %{
8635 assert(UseAVX > 2, "required");
8636 int vlen_enc = vector_length_encoding(this);
8637 __ evpdpwssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
8638 %}
8639 ins_pipe( pipe_slow );
8640 ins_cost(10);
8641 %}
8642
8643 // --------------------------------- PopCount --------------------------------------
8644
8645 instruct vpopcountI_popcntd(vec dst, vec src) %{
8646 predicate(VM_Version::supports_avx512_vpopcntdq());
8647 match(Set dst (PopCountVI src));
8648 format %{ "vector_popcount_int $dst, $src\t! vector popcount packedI" %}
8649 ins_encode %{
8650 assert(UsePopCountInstruction, "not enabled");
8651 int vlen_enc = vector_length_encoding(this);
8652 __ vector_popcount_int($dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, xnoreg, noreg, vlen_enc);
8653 %}
8654 ins_pipe( pipe_slow );
8655 %}
8656
8657 instruct vpopcountI(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp, rFlagsReg cc) %{
8658 predicate(!VM_Version::supports_avx512_vpopcntdq());
8659 match(Set dst (PopCountVI src));
8660 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, KILL cc);
8661 format %{ "vector_popcount_int $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %}
8662 ins_encode %{
8663 assert(UsePopCountInstruction, "not enabled");
8664 int vlen_enc = vector_length_encoding(this);
8665 __ vector_popcount_int($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister,
8666 $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc);
8667 %}
8668 ins_pipe( pipe_slow );
8669 %}
8670
8671 instruct vpopcountL_popcntd(vec dst, vec src) %{
8672 predicate(VM_Version::supports_avx512_vpopcntdq());
8673 match(Set dst (PopCountVL src));
8674 format %{ "vector_popcount_long $dst, $src\t! vector popcount packedL" %}
8675 ins_encode %{
8676 assert(UsePopCountInstruction, "not enabled");
8677 int vlen_enc = vector_length_encoding(this, $src);
8678 __ vector_popcount_long($dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, xnoreg, noreg, vlen_enc);
8679 %}
8680 ins_pipe( pipe_slow );
8681 %}
8682
8683 instruct vpopcountL(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp, rFlagsReg cc) %{
8684 predicate(!VM_Version::supports_avx512_vpopcntdq());
8685 match(Set dst (PopCountVL src));
8686 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, KILL cc);
8687 format %{ "vector_popcount_long $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %}
8688 ins_encode %{
8689 assert(UsePopCountInstruction, "not enabled");
8690 int vlen_enc = vector_length_encoding(this, $src);
8691 __ vector_popcount_long($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister,
8692 $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc);
8693 %}
8694 ins_pipe( pipe_slow );
8695 %}
8696
8697 // --------------------------------- Bitwise Ternary Logic ----------------------------------
8698
8699 instruct vpternlog(vec dst, vec src2, vec src3, immU8 func) %{
8700 match(Set dst (MacroLogicV (Binary dst src2) (Binary src3 func)));
8701 effect(TEMP dst);
8702 format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %}
8703 ins_encode %{
8704 int vector_len = vector_length_encoding(this);
8705 __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$XMMRegister, vector_len);
8706 %}
8707 ins_pipe( pipe_slow );
8708 %}
8709
8710 instruct vpternlog_mem(vec dst, vec src2, memory src3, immU8 func) %{
8711 predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) > 8);
8712 match(Set dst (MacroLogicV (Binary dst src2) (Binary (LoadVector src3) func)));
8713 effect(TEMP dst);
8714 format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %}
8715 ins_encode %{
8716 int vector_len = vector_length_encoding(this);
8951 %}
8952 ins_pipe( pipe_slow );
8953 %}
8954
8955 instruct vmask_first_or_last_true_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{
8956 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == NULL);
8957 match(Set dst (VectorMaskFirstTrue (VectorStoreMask mask size)));
8958 match(Set dst (VectorMaskLastTrue (VectorStoreMask mask size)));
8959 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
8960 format %{ "vector_mask_first_or_last_true_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
8961 ins_encode %{
8962 int opcode = this->ideal_Opcode();
8963 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
8964 int mask_len = Matcher::vector_length(this, $mask);
8965 int vlen_enc = vector_length_encoding(this, $mask);
8966 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
8967 $tmp$$Register, mask_len, mbt, vlen_enc);
8968 %}
8969 ins_pipe( pipe_slow );
8970 %}
8971 #endif // _LP64
8972
8973 // ---------------------------------- Vector Masked Operations ------------------------------------
8974
8975 instruct vadd_reg_masked(vec dst, vec src2, kReg mask) %{
8976 match(Set dst (AddVB (Binary dst src2) mask));
8977 match(Set dst (AddVS (Binary dst src2) mask));
8978 match(Set dst (AddVI (Binary dst src2) mask));
8979 match(Set dst (AddVL (Binary dst src2) mask));
8980 match(Set dst (AddVF (Binary dst src2) mask));
8981 match(Set dst (AddVD (Binary dst src2) mask));
8982 format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %}
8983 ins_encode %{
8984 int vlen_enc = vector_length_encoding(this);
8985 BasicType bt = Matcher::vector_element_basic_type(this);
8986 int opc = this->ideal_Opcode();
8987 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
8988 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
8989 %}
8990 ins_pipe( pipe_slow );
8991 %}
8992
|
1224 case 32: return Assembler::AVX_256bit;
1225 case 64: return Assembler::AVX_512bit;
1226
1227 default: {
1228 ShouldNotReachHere();
1229 return Assembler::AVX_NoVec;
1230 }
1231 }
1232 }
1233
1234 static inline Assembler::AvxVectorLen vector_length_encoding(const Node* n) {
1235 return vector_length_encoding(Matcher::vector_length_in_bytes(n));
1236 }
1237
1238 static inline Assembler::AvxVectorLen vector_length_encoding(const MachNode* use, MachOper* opnd) {
1239 uint def_idx = use->operand_index(opnd);
1240 Node* def = use->in(def_idx);
1241 return vector_length_encoding(def);
1242 }
1243
1244 static inline bool is_vector_popcount_predicate(BasicType bt) {
1245 return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) ||
1246 (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq());
1247 }
1248
1249 static inline bool is_unsigned_booltest_pred(int bt) {
1250 return ((bt & BoolTest::unsigned_compare) == BoolTest::unsigned_compare);
1251 }
1252
1253 static inline bool is_clz_non_subword_predicate_evex(BasicType bt, int vlen_bytes) {
1254 return is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd() &&
1255 (VM_Version::supports_avx512vl() || vlen_bytes == 64);
1256 }
1257
1258 class Node::PD {
1259 public:
1260 enum NodeFlags {
1261 Flag_intel_jcc_erratum = Node::_last_flag << 1,
1262 _last_flag = Flag_intel_jcc_erratum
1263 };
1264 };
1265
1266 %} // end source_hpp
1267
1268 source %{
1269
1270 #include "opto/addnode.hpp"
1271 #include "c2_intelJccErratum_x86.hpp"
1272
1273 void PhaseOutput::pd_perform_mach_node_analysis() {
1274 if (VM_Version::has_intel_jcc_erratum()) {
1275 int extra_padding = IntelJccErratum::tag_affected_machnodes(C, C->cfg(), C->regalloc());
1276 _buf_sizes._code += extra_padding;
1277 }
1398 //=============================================================================
1399 const bool Matcher::match_rule_supported(int opcode) {
1400 if (!has_match_rule(opcode)) {
1401 return false; // no match rule present
1402 }
1403 const bool is_LP64 = LP64_ONLY(true) NOT_LP64(false);
1404 switch (opcode) {
1405 case Op_AbsVL:
1406 case Op_StoreVectorScatter:
1407 if (UseAVX < 3) {
1408 return false;
1409 }
1410 break;
1411 case Op_PopCountI:
1412 case Op_PopCountL:
1413 if (!UsePopCountInstruction) {
1414 return false;
1415 }
1416 break;
1417 case Op_PopCountVI:
1418 if (UseAVX < 2) {
1419 return false;
1420 }
1421 break;
1422 case Op_PopCountVL:
1423 if (UseAVX < 2) {
1424 return false;
1425 }
1426 break;
1427 case Op_MulVI:
1428 if ((UseSSE < 4) && (UseAVX < 1)) { // only with SSE4_1 or AVX
1429 return false;
1430 }
1431 break;
1432 case Op_MulVL:
1433 if (UseSSE < 4) { // only with SSE4_1 or AVX
1434 return false;
1435 }
1436 break;
1437 case Op_MulReductionVL:
1438 if (VM_Version::supports_avx512dq() == false) {
1439 return false;
1440 }
1441 break;
1442 case Op_AddReductionVL:
1443 if (UseSSE < 2) { // requires at least SSE2
1618 break;
1619 case Op_MulAddVS2VI:
1620 case Op_RShiftVL:
1621 case Op_AbsVD:
1622 case Op_NegVD:
1623 if (UseSSE < 2) {
1624 return false;
1625 }
1626 break;
1627 #endif // !LP64
1628 case Op_SignumF:
1629 if (UseSSE < 1) {
1630 return false;
1631 }
1632 break;
1633 case Op_SignumD:
1634 if (UseSSE < 2) {
1635 return false;
1636 }
1637 break;
1638 case Op_CompressM:
1639 if (!VM_Version::supports_avx512vl() || !VM_Version::supports_bmi2()) {
1640 return false;
1641 }
1642 break;
1643 case Op_CompressV:
1644 case Op_ExpandV:
1645 if (!VM_Version::supports_avx512vl()) {
1646 return false;
1647 }
1648 break;
1649 case Op_SqrtF:
1650 if (UseSSE < 1) {
1651 return false;
1652 }
1653 break;
1654 case Op_SqrtD:
1655 #ifdef _LP64
1656 if (UseSSE < 2) {
1657 return false;
1658 }
1659 #else
1660 // x86_32.ad has a special match rule for SqrtD.
1661 // Together with common x86 rules, this handles all UseSSE cases.
1662 #endif
1663 break;
1664 }
1665 return true; // Match rules are supported by default.
1666 }
1667
1668 //------------------------------------------------------------------------
1669
1670 static inline bool is_pop_count_instr_target(BasicType bt) {
1671 return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) ||
1672 (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq());
1673 }
1674
1675 // Identify extra cases that we might want to provide match rules for vector nodes and
1676 // other intrinsics guarded with vector length (vlen) and element type (bt).
1677 const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
1678 const bool is_LP64 = LP64_ONLY(true) NOT_LP64(false);
1679 if (!match_rule_supported(opcode)) {
1680 return false;
1681 }
1682 // Matcher::vector_size_supported() restricts vector sizes in the following way (see Matcher::vector_width_in_bytes):
1683 // * SSE2 supports 128bit vectors for all types;
1684 // * AVX1 supports 256bit vectors only for FLOAT and DOUBLE types;
1685 // * AVX2 supports 256bit vectors for all types;
1686 // * AVX512F supports 512bit vectors only for INT, FLOAT, and DOUBLE types;
1687 // * AVX512BW supports 512bit vectors for BYTE, SHORT, and CHAR types.
1688 // There's also a limit on minimum vector size supported: 2 elements (or 4 bytes for BYTE).
1689 // And MaxVectorSize is taken into account as well.
1690 if (!vector_size_supported(bt, vlen)) {
1691 return false;
1692 }
1693 // Special cases which require vector length follow:
1694 // * implementation limitations
1860 }
1861 break;
1862 case Op_RoundVD:
1863 if (!VM_Version::supports_avx512dq()) {
1864 return false;
1865 }
1866 break;
1867 case Op_VectorCastF2X:
1868 if (is_subword_type(bt) || bt == T_LONG) {
1869 return false;
1870 }
1871 break;
1872 case Op_MulReductionVI:
1873 if (bt == T_BYTE && size_in_bits == 512 && !VM_Version::supports_avx512bw()) {
1874 return false;
1875 }
1876 break;
1877 case Op_LoadVectorGatherMasked:
1878 case Op_StoreVectorScatterMasked:
1879 case Op_StoreVectorScatter:
1880 if (is_subword_type(bt)) {
1881 return false;
1882 } else if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
1883 return false;
1884 }
1885 // fallthrough
1886 case Op_LoadVectorGather:
1887 if (size_in_bits == 64 ) {
1888 return false;
1889 }
1890 break;
1891 case Op_MaskAll:
1892 if (!VM_Version::supports_evex()) {
1893 return false;
1894 }
1895 if ((vlen > 16 || is_subword_type(bt)) && !VM_Version::supports_avx512bw()) {
1896 return false;
1897 }
1898 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
1899 return false;
1900 }
1901 break;
1902 case Op_VectorMaskCmp:
1903 if (vlen < 2 || size_in_bits < 32) {
1904 return false;
1905 }
1906 break;
1907 case Op_CompressM:
1908 if (UseAVX < 3 || !VM_Version::supports_bmi2()) {
1909 return false;
1910 }
1911 break;
1912 case Op_CompressV:
1913 case Op_ExpandV:
1914 if (is_subword_type(bt) && !VM_Version::supports_avx512_vbmi2()) {
1915 return false;
1916 }
1917 if (size_in_bits < 128 ) {
1918 return false;
1919 }
1920 if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
1921 return false;
1922 }
1923 break;
1924 case Op_VectorLongToMask:
1925 if (UseAVX < 1 || !is_LP64) {
1926 return false;
1927 }
1928 if (UseAVX < 3 && !VM_Version::supports_bmi2()) {
1929 return false;
1930 }
1931 break;
1932 case Op_PopCountVI:
1933 case Op_PopCountVL: {
1934 if (!is_pop_count_instr_target(bt) &&
1935 (size_in_bits == 512) && !VM_Version::supports_avx512bw()) {
1936 return false;
1937 }
1938 }
1939 break;
1940 case Op_ReverseV:
1941 case Op_ReverseBytesV:
1942 if (UseAVX < 2) {
1943 return false;
1944 }
1945 break;
1946 case Op_CountTrailingZerosV:
1947 case Op_CountLeadingZerosV:
1948 if (UseAVX < 2) {
1949 return false;
1950 }
1951 break;
1952 }
1953 return true; // Per default match rules are supported.
1954 }
1955
1956 const bool Matcher::match_rule_supported_vector_masked(int opcode, int vlen, BasicType bt) {
1957 // ADLC based match_rule_supported routine checks for the existence of pattern based
1958 // on IR opcode. Most of the unary/binary/ternary masked operation share the IR nodes
1959 // of their non-masked counterpart with mask edge being the differentiator.
1960 // This routine does a strict check on the existence of masked operation patterns
1961 // by returning a default false value for all the other opcodes apart from the
1962 // ones whose masked instruction patterns are defined in this file.
1963 if (!match_rule_supported_vector(opcode, vlen, bt)) {
1964 return false;
1965 }
1966
1967 const bool is_LP64 = LP64_ONLY(true) NOT_LP64(false);
1968 int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte;
2076 case Op_VectorRearrange:
2077 if (bt == T_SHORT && !VM_Version::supports_avx512bw()) {
2078 return false; // Implementation limitation
2079 }
2080 if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) {
2081 return false; // Implementation limitation
2082 } else if ((bt == T_INT || bt == T_FLOAT) && size_in_bits < 256) {
2083 return false; // Implementation limitation
2084 }
2085 return true;
2086
2087 // Binary Logical operations
2088 case Op_AndVMask:
2089 case Op_OrVMask:
2090 case Op_XorVMask:
2091 if (vlen > 16 && !VM_Version::supports_avx512bw()) {
2092 return false; // Implementation limitation
2093 }
2094 return true;
2095
2096 case Op_PopCountVI:
2097 case Op_PopCountVL:
2098 if (!is_pop_count_instr_target(bt)) {
2099 return false;
2100 }
2101 return true;
2102
2103 case Op_MaskAll:
2104 return true;
2105
2106 case Op_CountLeadingZerosV:
2107 if ((bt == T_INT || bt == T_LONG) && VM_Version::supports_avx512cd()) {
2108 return true;
2109 }
2110 default:
2111 return false;
2112 }
2113 }
2114
2115 MachOper* Matcher::pd_specialize_generic_vector_operand(MachOper* generic_opnd, uint ideal_reg, bool is_temp) {
2116 assert(Matcher::is_generic_vector(generic_opnd), "not generic");
2117 bool legacy = (generic_opnd->opcode() == LEGVEC);
2118 if (!VM_Version::supports_avx512vlbwdq() && // KNL
2119 is_temp && !legacy && (ideal_reg == Op_VecZ)) {
2120 // Conservatively specialize 512bit vec TEMP operands to legVecZ (zmm0-15) on KNL.
2121 return new legVecZOper();
2122 }
2123 if (legacy) {
2124 switch (ideal_reg) {
2125 case Op_VecS: return new legVecSOper();
2126 case Op_VecD: return new legVecDOper();
2127 case Op_VecX: return new legVecXOper();
2128 case Op_VecY: return new legVecYOper();
2129 case Op_VecZ: return new legVecZOper();
8687 ins_pipe( pipe_slow );
8688 %}
8689
8690 // --------------------------------- Vector Multiply Add Add ----------------------------------
8691
8692 instruct vmuladdaddS2I_reg(vec dst, vec src1, vec src2) %{
8693 predicate(VM_Version::supports_avx512_vnni());
8694 match(Set dst (AddVI (MulAddVS2VI src1 src2) dst));
8695 format %{ "evpdpwssd $dst,$src1,$src2\t! muladdadd packedStoI" %}
8696 ins_encode %{
8697 assert(UseAVX > 2, "required");
8698 int vlen_enc = vector_length_encoding(this);
8699 __ evpdpwssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
8700 %}
8701 ins_pipe( pipe_slow );
8702 ins_cost(10);
8703 %}
8704
8705 // --------------------------------- PopCount --------------------------------------
8706
8707 instruct vpopcount_integral_reg_evex(vec dst, vec src) %{
8708 predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
8709 match(Set dst (PopCountVI src));
8710 match(Set dst (PopCountVL src));
8711 ins_cost(400);
8712 format %{ "vector_popcount_integral $dst, $src" %}
8713 ins_encode %{
8714 int opcode = this->ideal_Opcode();
8715 int vlen_enc = vector_length_encoding(this, $src);
8716 BasicType bt = Matcher::vector_element_basic_type(this, $src);
8717 __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, k0, true, vlen_enc);
8718 // TODO: Once auto-vectorizer supports ConvL2I operation, PopCountVL
8719 // should be succeeded by its corresponding vector IR and following
8720 // special handling should be removed.
8721 if (opcode == Op_PopCountVL && Matcher::vector_element_basic_type(this) == T_INT) {
8722 __ evpmovqd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
8723 }
8724 %}
8725 ins_pipe( pipe_slow );
8726 %}
8727
8728 instruct vpopcount_integral_reg_evex_masked(vec dst, vec src, kReg mask) %{
8729 predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
8730 match(Set dst (PopCountVI src mask));
8731 match(Set dst (PopCountVL src mask));
8732 format %{ "vector_popcount_integral_masked $dst, $src, $mask" %}
8733 ins_encode %{
8734 int vlen_enc = vector_length_encoding(this, $src);
8735 BasicType bt = Matcher::vector_element_basic_type(this, $src);
8736 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
8737 __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, true, vlen_enc);
8738 %}
8739 ins_pipe( pipe_slow );
8740 %}
8741
8742 instruct vpopcount_avx_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegP rtmp) %{
8743 predicate(!is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
8744 match(Set dst (PopCountVI src));
8745 match(Set dst (PopCountVL src));
8746 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
8747 format %{ "vector_popcount_integral $dst, $src\t! using $xtmp1, $xtmp2, and $rtmp as TEMP" %}
8748 ins_encode %{
8749 int opcode = this->ideal_Opcode();
8750 int vlen_enc = vector_length_encoding(this, $src);
8751 BasicType bt = Matcher::vector_element_basic_type(this, $src);
8752 __ vector_popcount_integral(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
8753 $xtmp2$$XMMRegister, $rtmp$$Register, vlen_enc);
8754 // TODO: Once auto-vectorizer supports ConvL2I operation, PopCountVL
8755 // should be succeeded by its corresponding vector IR and following
8756 // special handling should be removed.
8757 if (opcode == Op_PopCountVL && Matcher::vector_element_basic_type(this) == T_INT) {
8758 if (VM_Version::supports_avx512vl()) {
8759 __ evpmovqd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
8760 } else {
8761 assert(VM_Version::supports_avx2(), "");
8762 __ vpshufd($dst$$XMMRegister, $dst$$XMMRegister, 8, vlen_enc);
8763 __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 8, vlen_enc);
8764 }
8765 }
8766 %}
8767 ins_pipe( pipe_slow );
8768 %}
8769
8770 // --------------------------------- Vector Trailing Zeros Count --------------------------------------
8771
8772 instruct vcount_trailing_zeros_reg_evex(vec dst, vec src, vec xtmp, rRegP rtmp) %{
8773 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
8774 Matcher::vector_length_in_bytes(n->in(1))));
8775 match(Set dst (CountTrailingZerosV src));
8776 effect(TEMP dst, TEMP xtmp, TEMP rtmp);
8777 ins_cost(400);
8778 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp and $rtmp as TEMP" %}
8779 ins_encode %{
8780 int vlen_enc = vector_length_encoding(this, $src);
8781 BasicType bt = Matcher::vector_element_basic_type(this, $src);
8782 BasicType rbt = Matcher::vector_element_basic_type(this);
8783 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg,
8784 xnoreg, xnoreg, $xtmp$$XMMRegister, k0, $rtmp$$Register, vlen_enc);
8785 // TODO: Once auto-vectorizer supports ConvL2I operation, CountTrailingZerosV
8786 // should be succeeded by its corresponding vector IR and following
8787 // special handling should be removed.
8788 if (bt == T_LONG && rbt == T_INT) {
8789 __ evpmovqd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
8790 }
8791 %}
8792 ins_pipe( pipe_slow );
8793 %}
8794
8795 instruct vcount_trailing_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
8796 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT &&
8797 VM_Version::supports_avx512cd() &&
8798 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64));
8799 match(Set dst (CountTrailingZerosV src));
8800 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
8801 ins_cost(400);
8802 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3 and $rtmp as TEMP" %}
8803 ins_encode %{
8804 int vlen_enc = vector_length_encoding(this, $src);
8805 BasicType bt = Matcher::vector_element_basic_type(this, $src);
8806 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
8807 $xtmp2$$XMMRegister, xnoreg, $xtmp3$$XMMRegister, k0, $rtmp$$Register, vlen_enc);
8808 %}
8809 ins_pipe( pipe_slow );
8810 %}
8811
8812 instruct vcount_trailing_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, kReg ktmp, rRegP rtmp) %{
8813 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw());
8814 match(Set dst (CountTrailingZerosV src));
8815 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP ktmp, TEMP rtmp);
8816 ins_cost(400);
8817 format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $xtmp4, $ktmp and $rtmp as TEMP" %}
8818 ins_encode %{
8819 int vlen_enc = vector_length_encoding(this, $src);
8820 BasicType bt = Matcher::vector_element_basic_type(this, $src);
8821 __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
8822 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister,
8823 $ktmp$$KRegister, $rtmp$$Register, vlen_enc);
8824 %}
8825 ins_pipe( pipe_slow );
8826 %}
8827
8828 instruct vcount_trailing_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
8829 predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
8830 match(Set dst (CountTrailingZerosV src));
8831 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
8832 format %{ "vector_count_trailing_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %}
8833 ins_encode %{
8834 int vlen_enc = vector_length_encoding(this, $src);
8835 BasicType bt = Matcher::vector_element_basic_type(this, $src);
8836 BasicType rbt = Matcher::vector_element_basic_type(this);
8837 __ vector_count_trailing_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
8838 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc);
8839 // TODO: Once auto-vectorizer supports ConvL2I operation, PopCountVL
8840 // should be succeeded by its corresponding vector IR and following
8841 // special handling should be removed.
8842 if (bt == T_LONG && rbt == T_INT) {
8843 if (VM_Version::supports_avx512vl()) {
8844 __ evpmovqd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
8845 } else {
8846 assert(VM_Version::supports_avx2(), "");
8847 __ vpshufd($dst$$XMMRegister, $dst$$XMMRegister, 8, vlen_enc);
8848 __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 8, vlen_enc);
8849 }
8850 }
8851 %}
8852 ins_pipe( pipe_slow );
8853 %}
8854
8855
8856 // --------------------------------- Bitwise Ternary Logic ----------------------------------
8857
8858 instruct vpternlog(vec dst, vec src2, vec src3, immU8 func) %{
8859 match(Set dst (MacroLogicV (Binary dst src2) (Binary src3 func)));
8860 effect(TEMP dst);
8861 format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %}
8862 ins_encode %{
8863 int vector_len = vector_length_encoding(this);
8864 __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$XMMRegister, vector_len);
8865 %}
8866 ins_pipe( pipe_slow );
8867 %}
8868
8869 instruct vpternlog_mem(vec dst, vec src2, memory src3, immU8 func) %{
8870 predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) > 8);
8871 match(Set dst (MacroLogicV (Binary dst src2) (Binary (LoadVector src3) func)));
8872 effect(TEMP dst);
8873 format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %}
8874 ins_encode %{
8875 int vector_len = vector_length_encoding(this);
9110 %}
9111 ins_pipe( pipe_slow );
9112 %}
9113
9114 instruct vmask_first_or_last_true_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{
9115 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == NULL);
9116 match(Set dst (VectorMaskFirstTrue (VectorStoreMask mask size)));
9117 match(Set dst (VectorMaskLastTrue (VectorStoreMask mask size)));
9118 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
9119 format %{ "vector_mask_first_or_last_true_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
9120 ins_encode %{
9121 int opcode = this->ideal_Opcode();
9122 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
9123 int mask_len = Matcher::vector_length(this, $mask);
9124 int vlen_enc = vector_length_encoding(this, $mask);
9125 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
9126 $tmp$$Register, mask_len, mbt, vlen_enc);
9127 %}
9128 ins_pipe( pipe_slow );
9129 %}
9130
9131 // --------------------------------- Compress/Expand Operations ---------------------------
9132
9133 instruct vcompress_expand_reg_evex(vec dst, vec src, kReg mask) %{
9134 match(Set dst (CompressV src mask));
9135 match(Set dst (ExpandV src mask));
9136 format %{ "vector_compress_expand $dst, $src, $mask" %}
9137 ins_encode %{
9138 int opcode = this->ideal_Opcode();
9139 int vector_len = vector_length_encoding(this);
9140 BasicType bt = Matcher::vector_element_basic_type(this);
9141 __ vector_compress_expand(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, false, bt, vector_len);
9142 %}
9143 ins_pipe( pipe_slow );
9144 %}
9145
9146 instruct vcompress_mask_reg_evex(kReg dst, kReg mask, rRegL rtmp1, rRegL rtmp2, rFlagsReg cr) %{
9147 match(Set dst (CompressM mask));
9148 effect(TEMP rtmp1, TEMP rtmp2, KILL cr);
9149 format %{ "mask_compress_evex $dst, $mask\t! using $rtmp1 and $rtmp2 as TEMP" %}
9150 ins_encode %{
9151 assert(this->in(1)->bottom_type()->isa_vectmask(), "");
9152 int mask_len = Matcher::vector_length(this);
9153 __ vector_mask_compress($dst$$KRegister, $mask$$KRegister, $rtmp1$$Register, $rtmp2$$Register, mask_len);
9154 %}
9155 ins_pipe( pipe_slow );
9156 %}
9157
9158 #endif // _LP64
9159
9160 // -------------------------------- Bit and Byte Reversal Vector Operations ------------------------
9161
9162 instruct vreverse_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{
9163 predicate(!VM_Version::supports_gfni());
9164 match(Set dst (ReverseV src));
9165 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
9166 format %{ "vector_reverse_bit_evex $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %}
9167 ins_encode %{
9168 int vec_enc = vector_length_encoding(this);
9169 BasicType bt = Matcher::vector_element_basic_type(this);
9170 __ vector_reverse_bit(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
9171 $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc);
9172 %}
9173 ins_pipe( pipe_slow );
9174 %}
9175
9176 instruct vreverse_reg_gfni(vec dst, vec src, vec xtmp, rRegI rtmp) %{
9177 predicate(VM_Version::supports_gfni());
9178 match(Set dst (ReverseV src));
9179 effect(TEMP dst, TEMP xtmp, TEMP rtmp);
9180 format %{ "vector_reverse_bit_gfni $dst, $src!\t using $rtmp and $xtmp as TEMP" %}
9181 ins_encode %{
9182 int vec_enc = vector_length_encoding(this);
9183 BasicType bt = Matcher::vector_element_basic_type(this);
9184 InternalAddress addr = $constantaddress(T_LONG, vreplicate_imm(T_LONG, 0x8040201008040201L, 1));
9185 __ vector_reverse_bit_gfni(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp$$XMMRegister,
9186 addr, $rtmp$$Register, vec_enc);
9187 %}
9188 ins_pipe( pipe_slow );
9189 %}
9190
9191 instruct vreverse_byte_reg(vec dst, vec src, rRegI rtmp) %{
9192 predicate(VM_Version::supports_avx512bw() || Matcher::vector_length_in_bytes(n) < 64);
9193 match(Set dst (ReverseBytesV src));
9194 effect(TEMP dst, TEMP rtmp);
9195 format %{ "vector_reverse_byte $dst, $src!\t using $rtmp as TEMP" %}
9196 ins_encode %{
9197 int vec_enc = vector_length_encoding(this);
9198 BasicType bt = Matcher::vector_element_basic_type(this);
9199 __ vector_reverse_byte(bt, $dst$$XMMRegister, $src$$XMMRegister, $rtmp$$Register, vec_enc);
9200 %}
9201 ins_pipe( pipe_slow );
9202 %}
9203
9204 instruct vreverse_byte64_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{
9205 predicate(!VM_Version::supports_avx512bw() && Matcher::vector_length_in_bytes(n) == 64);
9206 match(Set dst (ReverseBytesV src));
9207 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
9208 format %{ "vector_reverse_byte $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %}
9209 ins_encode %{
9210 int vec_enc = vector_length_encoding(this);
9211 BasicType bt = Matcher::vector_element_basic_type(this);
9212 __ vector_reverse_byte64(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
9213 $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc);
9214 %}
9215 ins_pipe( pipe_slow );
9216 %}
9217
9218 // ---------------------------------- Vector Count Leading Zeros -----------------------------------
9219
9220 instruct vcount_leading_zeros_IL_reg_evex(vec dst, vec src) %{
9221 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
9222 Matcher::vector_length_in_bytes(n->in(1))));
9223 match(Set dst (CountLeadingZerosV src));
9224 format %{ "vector_count_leading_zeros $dst, $src" %}
9225 ins_encode %{
9226 int vlen_enc = vector_length_encoding(this, $src);
9227 BasicType bt = Matcher::vector_element_basic_type(this, $src);
9228 BasicType rbt = Matcher::vector_element_basic_type(this);
9229 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg,
9230 xnoreg, xnoreg, k0, noreg, true, vlen_enc);
9231 // TODO: Once auto-vectorizer supports ConvL2I operation, CountLeadingZerosV
9232 // should be succeeded by its corresponding vector IR and following
9233 // special handling should be removed.
9234 if (rbt == T_INT && bt == T_LONG) {
9235 __ evpmovqd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
9236 }
9237 %}
9238 ins_pipe( pipe_slow );
9239 %}
9240
9241 instruct vcount_leading_zeros_IL_reg_evex_masked(vec dst, vec src, kReg mask) %{
9242 predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
9243 Matcher::vector_length_in_bytes(n->in(1))));
9244 match(Set dst (CountLeadingZerosV src mask));
9245 format %{ "vector_count_leading_zeros $dst, $src, $mask" %}
9246 ins_encode %{
9247 int vlen_enc = vector_length_encoding(this, $src);
9248 BasicType bt = Matcher::vector_element_basic_type(this, $src);
9249 __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
9250 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg,
9251 xnoreg, $mask$$KRegister, noreg, true, vlen_enc);
9252 %}
9253 ins_pipe( pipe_slow );
9254 %}
9255
9256 instruct vcount_leading_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2) %{
9257 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT &&
9258 VM_Version::supports_avx512cd() &&
9259 (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64));
9260 match(Set dst (CountLeadingZerosV src));
9261 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
9262 format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1 and $xtmp2 as TEMP" %}
9263 ins_encode %{
9264 int vlen_enc = vector_length_encoding(this, $src);
9265 BasicType bt = Matcher::vector_element_basic_type(this, $src);
9266 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
9267 $xtmp2$$XMMRegister, xnoreg, k0, noreg, true, vlen_enc);
9268 %}
9269 ins_pipe( pipe_slow );
9270 %}
9271
9272 instruct vcount_leading_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegP rtmp) %{
9273 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw());
9274 match(Set dst (CountLeadingZerosV src));
9275 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp);
9276 format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $ktmp and $rtmp as TEMP" %}
9277 ins_encode %{
9278 int vlen_enc = vector_length_encoding(this, $src);
9279 BasicType bt = Matcher::vector_element_basic_type(this, $src);
9280 __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
9281 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $ktmp$$KRegister,
9282 $rtmp$$Register, true, vlen_enc);
9283 %}
9284 ins_pipe( pipe_slow );
9285 %}
9286
9287 instruct vcount_leading_zeros_int_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3) %{
9288 predicate(Matcher::vector_element_basic_type(n->in(1)) == T_INT &&
9289 !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
9290 match(Set dst (CountLeadingZerosV src));
9291 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3);
9292 format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %}
9293 ins_encode %{
9294 int vlen_enc = vector_length_encoding(this, $src);
9295 BasicType bt = Matcher::vector_element_basic_type(this, $src);
9296 __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
9297 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, noreg, vlen_enc);
9298 %}
9299 ins_pipe( pipe_slow );
9300 %}
9301
9302 instruct vcount_leading_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
9303 predicate(Matcher::vector_element_basic_type(n->in(1)) != T_INT &&
9304 !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
9305 match(Set dst (CountLeadingZerosV src));
9306 effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
9307 format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %}
9308 ins_encode %{
9309 int vlen_enc = vector_length_encoding(this, $src);
9310 BasicType bt = Matcher::vector_element_basic_type(this, $src);
9311 BasicType rbt = Matcher::vector_element_basic_type(this);
9312 __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
9313 $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc);
9314 // TODO: Once auto-vectorizer supports ConvL2I operation, CountLeadingZerosV
9315 // should be succeeded by its corresponding vector IR and following
9316 // special handling should be removed.
9317 if (rbt == T_INT && bt == T_LONG) {
9318 __ evpmovqd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
9319 }
9320 %}
9321 ins_pipe( pipe_slow );
9322 %}
9323
9324 // ---------------------------------- Vector Masked Operations ------------------------------------
9325
9326 instruct vadd_reg_masked(vec dst, vec src2, kReg mask) %{
9327 match(Set dst (AddVB (Binary dst src2) mask));
9328 match(Set dst (AddVS (Binary dst src2) mask));
9329 match(Set dst (AddVI (Binary dst src2) mask));
9330 match(Set dst (AddVL (Binary dst src2) mask));
9331 match(Set dst (AddVF (Binary dst src2) mask));
9332 match(Set dst (AddVD (Binary dst src2) mask));
9333 format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %}
9334 ins_encode %{
9335 int vlen_enc = vector_length_encoding(this);
9336 BasicType bt = Matcher::vector_element_basic_type(this);
9337 int opc = this->ideal_Opcode();
9338 __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
9339 $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
9340 %}
9341 ins_pipe( pipe_slow );
9342 %}
9343
|