< prev index next >

src/hotspot/cpu/x86/x86.ad

Print this page

 1224     case 32: return Assembler::AVX_256bit;
 1225     case 64: return Assembler::AVX_512bit;
 1226 
 1227     default: {
 1228       ShouldNotReachHere();
 1229       return Assembler::AVX_NoVec;
 1230     }
 1231   }
 1232 }
 1233 
 1234 static inline Assembler::AvxVectorLen vector_length_encoding(const Node* n) {
 1235   return vector_length_encoding(Matcher::vector_length_in_bytes(n));
 1236 }
 1237 
 1238 static inline Assembler::AvxVectorLen vector_length_encoding(const MachNode* use, MachOper* opnd) {
 1239   uint def_idx = use->operand_index(opnd);
 1240   Node* def = use->in(def_idx);
 1241   return vector_length_encoding(def);
 1242 }
 1243 





 1244 static inline bool is_unsigned_booltest_pred(int bt) {
 1245   return  ((bt & BoolTest::unsigned_compare) == BoolTest::unsigned_compare);
 1246 }
 1247 





 1248 class Node::PD {
 1249 public:
 1250   enum NodeFlags {
 1251     Flag_intel_jcc_erratum = Node::_last_flag << 1,
 1252     _last_flag             = Flag_intel_jcc_erratum
 1253   };
 1254 };
 1255 
 1256 %} // end source_hpp
 1257 
 1258 source %{
 1259 
 1260 #include "opto/addnode.hpp"
 1261 #include "c2_intelJccErratum_x86.hpp"
 1262 
 1263 void PhaseOutput::pd_perform_mach_node_analysis() {
 1264   if (VM_Version::has_intel_jcc_erratum()) {
 1265     int extra_padding = IntelJccErratum::tag_affected_machnodes(C, C->cfg(), C->regalloc());
 1266     _buf_sizes._code += extra_padding;
 1267   }

 1388 //=============================================================================
 1389 const bool Matcher::match_rule_supported(int opcode) {
 1390   if (!has_match_rule(opcode)) {
 1391     return false; // no match rule present
 1392   }
 1393   const bool is_LP64 = LP64_ONLY(true) NOT_LP64(false);
 1394   switch (opcode) {
 1395     case Op_AbsVL:
 1396     case Op_StoreVectorScatter:
 1397       if (UseAVX < 3) {
 1398         return false;
 1399       }
 1400       break;
 1401     case Op_PopCountI:
 1402     case Op_PopCountL:
 1403       if (!UsePopCountInstruction) {
 1404         return false;
 1405       }
 1406       break;
 1407     case Op_PopCountVI:
 1408       if (!UsePopCountInstruction || (UseAVX < 2)) {
 1409         return false;
 1410       }
 1411       break;
 1412     case Op_PopCountVL:
 1413       if (!UsePopCountInstruction || (UseAVX <= 2)) {
 1414         return false;
 1415       }
 1416       break;
 1417     case Op_MulVI:
 1418       if ((UseSSE < 4) && (UseAVX < 1)) { // only with SSE4_1 or AVX
 1419         return false;
 1420       }
 1421       break;
 1422     case Op_MulVL:
 1423       if (UseSSE < 4) { // only with SSE4_1 or AVX
 1424         return false;
 1425       }
 1426       break;
 1427     case Op_MulReductionVL:
 1428       if (VM_Version::supports_avx512dq() == false) {
 1429         return false;
 1430       }
 1431       break;
 1432     case Op_AddReductionVL:
 1433       if (UseSSE < 2) { // requires at least SSE2

 1608       break;
 1609     case Op_MulAddVS2VI:
 1610     case Op_RShiftVL:
 1611     case Op_AbsVD:
 1612     case Op_NegVD:
 1613       if (UseSSE < 2) {
 1614         return false;
 1615       }
 1616       break;
 1617 #endif // !LP64
 1618     case Op_SignumF:
 1619       if (UseSSE < 1) {
 1620         return false;
 1621       }
 1622       break;
 1623     case Op_SignumD:
 1624       if (UseSSE < 2) {
 1625         return false;
 1626       }
 1627       break;











 1628     case Op_SqrtF:
 1629       if (UseSSE < 1) {
 1630         return false;
 1631       }
 1632       break;
 1633     case Op_SqrtD:
 1634 #ifdef _LP64
 1635       if (UseSSE < 2) {
 1636         return false;
 1637       }
 1638 #else
 1639       // x86_32.ad has a special match rule for SqrtD.
 1640       // Together with common x86 rules, this handles all UseSSE cases.
 1641 #endif
 1642       break;
 1643   }
 1644   return true;  // Match rules are supported by default.
 1645 }
 1646 
 1647 //------------------------------------------------------------------------
 1648 





 1649 // Identify extra cases that we might want to provide match rules for vector nodes and
 1650 // other intrinsics guarded with vector length (vlen) and element type (bt).
 1651 const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
 1652   const bool is_LP64 = LP64_ONLY(true) NOT_LP64(false);
 1653   if (!match_rule_supported(opcode)) {
 1654     return false;
 1655   }
 1656   // Matcher::vector_size_supported() restricts vector sizes in the following way (see Matcher::vector_width_in_bytes):
 1657   //   * SSE2 supports 128bit vectors for all types;
 1658   //   * AVX1 supports 256bit vectors only for FLOAT and DOUBLE types;
 1659   //   * AVX2 supports 256bit vectors for all types;
 1660   //   * AVX512F supports 512bit vectors only for INT, FLOAT, and DOUBLE types;
 1661   //   * AVX512BW supports 512bit vectors for BYTE, SHORT, and CHAR types.
 1662   // There's also a limit on minimum vector size supported: 2 elements (or 4 bytes for BYTE).
 1663   // And MaxVectorSize is taken into account as well.
 1664   if (!vector_size_supported(bt, vlen)) {
 1665     return false;
 1666   }
 1667   // Special cases which require vector length follow:
 1668   //   * implementation limitations

 1834       }
 1835       break;
 1836     case Op_RoundVD:
 1837       if (!VM_Version::supports_avx512dq()) {
 1838         return false;
 1839       }
 1840       break;
 1841     case Op_VectorCastF2X:
 1842       if (is_subword_type(bt) || bt == T_LONG) {
 1843         return false;
 1844       }
 1845       break;
 1846     case Op_MulReductionVI:
 1847       if (bt == T_BYTE && size_in_bits == 512 && !VM_Version::supports_avx512bw()) {
 1848         return false;
 1849       }
 1850       break;
 1851     case Op_LoadVectorGatherMasked:
 1852     case Op_StoreVectorScatterMasked:
 1853     case Op_StoreVectorScatter:
 1854       if(is_subword_type(bt)) {
 1855         return false;
 1856       } else if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 1857         return false;
 1858       }
 1859       // fallthrough
 1860     case Op_LoadVectorGather:
 1861       if (size_in_bits == 64 ) {
 1862         return false;
 1863       }
 1864       break;
 1865     case Op_MaskAll:
 1866       if (!VM_Version::supports_evex()) {
 1867         return false;
 1868       }
 1869       if ((vlen > 16 || is_subword_type(bt)) && !VM_Version::supports_avx512bw()) {
 1870         return false;
 1871       }
 1872       if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 1873         return false;
 1874       }
 1875       break;
 1876     case Op_VectorMaskCmp:
 1877       if (vlen < 2 || size_in_bits < 32) {
 1878         return false;
 1879       }
 1880       break;

















 1881     case Op_VectorLongToMask:
 1882       if (UseAVX < 1 || !is_LP64) {
 1883         return false;
 1884       }
 1885       if (UseAVX < 3 && !VM_Version::supports_bmi2()) {
 1886         return false;
 1887       }
 1888       break;
 1889     case Op_PopCountVI:
 1890       if (!VM_Version::supports_avx512_vpopcntdq() &&
 1891           (vlen == 16) && !VM_Version::supports_avx512bw()) {








 1892         return false;
 1893       }
 1894       break;
 1895     case Op_PopCountVL:
 1896       if (!VM_Version::supports_avx512_vpopcntdq() &&
 1897           ((vlen <= 4) || ((vlen == 8) && !VM_Version::supports_avx512bw()))) {
 1898         return false;
 1899       }
 1900       break;
 1901   }
 1902   return true;  // Per default match rules are supported.
 1903 }
 1904 
 1905 const bool Matcher::match_rule_supported_vector_masked(int opcode, int vlen, BasicType bt) {
 1906   // ADLC based match_rule_supported routine checks for the existence of pattern based
 1907   // on IR opcode. Most of the unary/binary/ternary masked operation share the IR nodes
 1908   // of their non-masked counterpart with mask edge being the differentiator.
 1909   // This routine does a strict check on the existence of masked operation patterns
 1910   // by returning a default false value for all the other opcodes apart from the
 1911   // ones whose masked instruction patterns are defined in this file.
 1912   if (!match_rule_supported_vector(opcode, vlen, bt)) {
 1913     return false;
 1914   }
 1915 
 1916   const bool is_LP64 = LP64_ONLY(true) NOT_LP64(false);
 1917   int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte;

 2025     case Op_VectorRearrange:
 2026       if (bt == T_SHORT && !VM_Version::supports_avx512bw()) {
 2027         return false; // Implementation limitation
 2028       }
 2029       if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) {
 2030         return false; // Implementation limitation
 2031       } else if ((bt == T_INT || bt == T_FLOAT) && size_in_bits < 256) {
 2032         return false; // Implementation limitation
 2033       }
 2034       return true;
 2035 
 2036     // Binary Logical operations
 2037     case Op_AndVMask:
 2038     case Op_OrVMask:
 2039     case Op_XorVMask:
 2040       if (vlen > 16 && !VM_Version::supports_avx512bw()) {
 2041         return false; // Implementation limitation
 2042       }
 2043       return true;
 2044 







 2045     case Op_MaskAll:
 2046       return true;
 2047 




 2048     default:
 2049       return false;
 2050   }
 2051 }
 2052 
 2053 MachOper* Matcher::pd_specialize_generic_vector_operand(MachOper* generic_opnd, uint ideal_reg, bool is_temp) {
 2054   assert(Matcher::is_generic_vector(generic_opnd), "not generic");
 2055   bool legacy = (generic_opnd->opcode() == LEGVEC);
 2056   if (!VM_Version::supports_avx512vlbwdq() && // KNL
 2057       is_temp && !legacy && (ideal_reg == Op_VecZ)) {
 2058     // Conservatively specialize 512bit vec TEMP operands to legVecZ (zmm0-15) on KNL.
 2059     return new legVecZOper();
 2060   }
 2061   if (legacy) {
 2062     switch (ideal_reg) {
 2063       case Op_VecS: return new legVecSOper();
 2064       case Op_VecD: return new legVecDOper();
 2065       case Op_VecX: return new legVecXOper();
 2066       case Op_VecY: return new legVecYOper();
 2067       case Op_VecZ: return new legVecZOper();

 8625   ins_pipe( pipe_slow );
 8626 %}
 8627 
 8628 // --------------------------------- Vector Multiply Add Add ----------------------------------
 8629 
 8630 instruct vmuladdaddS2I_reg(vec dst, vec src1, vec src2) %{
 8631   predicate(VM_Version::supports_avx512_vnni());
 8632   match(Set dst (AddVI (MulAddVS2VI src1 src2) dst));
 8633   format %{ "evpdpwssd $dst,$src1,$src2\t! muladdadd packedStoI" %}
 8634   ins_encode %{
 8635     assert(UseAVX > 2, "required");
 8636     int vlen_enc = vector_length_encoding(this);
 8637     __ evpdpwssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
 8638   %}
 8639   ins_pipe( pipe_slow );
 8640   ins_cost(10);
 8641 %}
 8642 
 8643 // --------------------------------- PopCount --------------------------------------
 8644 
 8645 instruct vpopcountI_popcntd(vec dst, vec src) %{
 8646   predicate(VM_Version::supports_avx512_vpopcntdq());
 8647   match(Set dst (PopCountVI src));
 8648   format %{ "vector_popcount_int $dst, $src\t! vector popcount packedI" %}


 8649   ins_encode %{
 8650     assert(UsePopCountInstruction, "not enabled");
 8651     int vlen_enc = vector_length_encoding(this);
 8652     __ vector_popcount_int($dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, xnoreg, noreg, vlen_enc);







 8653   %}
 8654   ins_pipe( pipe_slow );
 8655 %}
 8656 
 8657 instruct vpopcountI(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp, rFlagsReg cc) %{
 8658   predicate(!VM_Version::supports_avx512_vpopcntdq());
 8659   match(Set dst (PopCountVI src));
 8660   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, KILL cc);
 8661   format %{ "vector_popcount_int  $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %}
 8662   ins_encode %{
 8663     assert(UsePopCountInstruction, "not enabled");
 8664     int vlen_enc = vector_length_encoding(this);
 8665     __ vector_popcount_int($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister,
 8666                            $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc);
 8667   %}
 8668   ins_pipe( pipe_slow );
 8669 %}
 8670 
 8671 instruct vpopcountL_popcntd(vec dst, vec src) %{
 8672   predicate(VM_Version::supports_avx512_vpopcntdq());

 8673   match(Set dst (PopCountVL src));
 8674   format %{ "vector_popcount_long  $dst, $src\t! vector popcount packedL" %}

 8675   ins_encode %{
 8676     assert(UsePopCountInstruction, "not enabled");
 8677     int vlen_enc = vector_length_encoding(this, $src);
 8678     __ vector_popcount_long($dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg, xnoreg, noreg, vlen_enc);














 8679   %}
 8680   ins_pipe( pipe_slow );
 8681 %}
 8682 
 8683 instruct vpopcountL(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp, rFlagsReg cc) %{
 8684   predicate(!VM_Version::supports_avx512_vpopcntdq());
 8685   match(Set dst (PopCountVL src));
 8686   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp, KILL cc);
 8687   format %{ "vector_popcount_long  $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %}


























































 8688   ins_encode %{
 8689     assert(UsePopCountInstruction, "not enabled");
 8690     int vlen_enc = vector_length_encoding(this, $src);
 8691     __ vector_popcount_long($dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister,
 8692                            $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc);














 8693   %}
 8694   ins_pipe( pipe_slow );
 8695 %}
 8696 

 8697 // --------------------------------- Bitwise Ternary Logic ----------------------------------
 8698 
 8699 instruct vpternlog(vec dst, vec src2, vec src3, immU8 func) %{
 8700   match(Set dst (MacroLogicV (Binary dst src2) (Binary src3 func)));
 8701   effect(TEMP dst);
 8702   format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %}
 8703   ins_encode %{
 8704     int vector_len = vector_length_encoding(this);
 8705     __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$XMMRegister, vector_len);
 8706   %}
 8707   ins_pipe( pipe_slow );
 8708 %}
 8709 
 8710 instruct vpternlog_mem(vec dst, vec src2, memory src3, immU8 func) %{
 8711   predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) > 8);
 8712   match(Set dst (MacroLogicV (Binary dst src2) (Binary (LoadVector src3) func)));
 8713   effect(TEMP dst);
 8714   format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %}
 8715   ins_encode %{
 8716     int vector_len = vector_length_encoding(this);

 8951   %}
 8952   ins_pipe( pipe_slow );
 8953 %}
 8954 
 8955 instruct vmask_first_or_last_true_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{
 8956   predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == NULL);
 8957   match(Set dst (VectorMaskFirstTrue (VectorStoreMask mask size)));
 8958   match(Set dst (VectorMaskLastTrue (VectorStoreMask mask size)));
 8959   effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
 8960   format %{ "vector_mask_first_or_last_true_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
 8961   ins_encode %{
 8962     int opcode = this->ideal_Opcode();
 8963     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
 8964     int mask_len = Matcher::vector_length(this, $mask);
 8965     int vlen_enc = vector_length_encoding(this, $mask);
 8966     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
 8967                              $tmp$$Register, mask_len, mbt, vlen_enc);
 8968   %}
 8969   ins_pipe( pipe_slow );
 8970 %}




























 8971 #endif // _LP64
 8972 




































































































































































 8973 // ---------------------------------- Vector Masked Operations ------------------------------------
 8974 
 8975 instruct vadd_reg_masked(vec dst, vec src2, kReg mask) %{
 8976   match(Set dst (AddVB (Binary dst src2) mask));
 8977   match(Set dst (AddVS (Binary dst src2) mask));
 8978   match(Set dst (AddVI (Binary dst src2) mask));
 8979   match(Set dst (AddVL (Binary dst src2) mask));
 8980   match(Set dst (AddVF (Binary dst src2) mask));
 8981   match(Set dst (AddVD (Binary dst src2) mask));
 8982   format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %}
 8983   ins_encode %{
 8984     int vlen_enc = vector_length_encoding(this);
 8985     BasicType bt = Matcher::vector_element_basic_type(this);
 8986     int opc = this->ideal_Opcode();
 8987     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
 8988                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
 8989   %}
 8990   ins_pipe( pipe_slow );
 8991 %}
 8992 

 1224     case 32: return Assembler::AVX_256bit;
 1225     case 64: return Assembler::AVX_512bit;
 1226 
 1227     default: {
 1228       ShouldNotReachHere();
 1229       return Assembler::AVX_NoVec;
 1230     }
 1231   }
 1232 }
 1233 
 1234 static inline Assembler::AvxVectorLen vector_length_encoding(const Node* n) {
 1235   return vector_length_encoding(Matcher::vector_length_in_bytes(n));
 1236 }
 1237 
 1238 static inline Assembler::AvxVectorLen vector_length_encoding(const MachNode* use, MachOper* opnd) {
 1239   uint def_idx = use->operand_index(opnd);
 1240   Node* def = use->in(def_idx);
 1241   return vector_length_encoding(def);
 1242 }
 1243 
 1244 static inline bool is_vector_popcount_predicate(BasicType bt) {
 1245   return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) ||
 1246          (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq());
 1247 }
 1248 
 1249 static inline bool is_unsigned_booltest_pred(int bt) {
 1250   return  ((bt & BoolTest::unsigned_compare) == BoolTest::unsigned_compare);
 1251 }
 1252 
 1253 static inline bool is_clz_non_subword_predicate_evex(BasicType bt, int vlen_bytes) {
 1254   return is_non_subword_integral_type(bt) && VM_Version::supports_avx512cd() &&
 1255            (VM_Version::supports_avx512vl() || vlen_bytes == 64);
 1256 }
 1257 
 1258 class Node::PD {
 1259 public:
 1260   enum NodeFlags {
 1261     Flag_intel_jcc_erratum = Node::_last_flag << 1,
 1262     _last_flag             = Flag_intel_jcc_erratum
 1263   };
 1264 };
 1265 
 1266 %} // end source_hpp
 1267 
 1268 source %{
 1269 
 1270 #include "opto/addnode.hpp"
 1271 #include "c2_intelJccErratum_x86.hpp"
 1272 
 1273 void PhaseOutput::pd_perform_mach_node_analysis() {
 1274   if (VM_Version::has_intel_jcc_erratum()) {
 1275     int extra_padding = IntelJccErratum::tag_affected_machnodes(C, C->cfg(), C->regalloc());
 1276     _buf_sizes._code += extra_padding;
 1277   }

 1398 //=============================================================================
 1399 const bool Matcher::match_rule_supported(int opcode) {
 1400   if (!has_match_rule(opcode)) {
 1401     return false; // no match rule present
 1402   }
 1403   const bool is_LP64 = LP64_ONLY(true) NOT_LP64(false);
 1404   switch (opcode) {
 1405     case Op_AbsVL:
 1406     case Op_StoreVectorScatter:
 1407       if (UseAVX < 3) {
 1408         return false;
 1409       }
 1410       break;
 1411     case Op_PopCountI:
 1412     case Op_PopCountL:
 1413       if (!UsePopCountInstruction) {
 1414         return false;
 1415       }
 1416       break;
 1417     case Op_PopCountVI:
 1418       if (UseAVX < 2) {
 1419         return false;
 1420       }
 1421       break;
 1422     case Op_PopCountVL:
 1423       if (UseAVX < 2) {
 1424         return false;
 1425       }
 1426       break;
 1427     case Op_MulVI:
 1428       if ((UseSSE < 4) && (UseAVX < 1)) { // only with SSE4_1 or AVX
 1429         return false;
 1430       }
 1431       break;
 1432     case Op_MulVL:
 1433       if (UseSSE < 4) { // only with SSE4_1 or AVX
 1434         return false;
 1435       }
 1436       break;
 1437     case Op_MulReductionVL:
 1438       if (VM_Version::supports_avx512dq() == false) {
 1439         return false;
 1440       }
 1441       break;
 1442     case Op_AddReductionVL:
 1443       if (UseSSE < 2) { // requires at least SSE2

 1618       break;
 1619     case Op_MulAddVS2VI:
 1620     case Op_RShiftVL:
 1621     case Op_AbsVD:
 1622     case Op_NegVD:
 1623       if (UseSSE < 2) {
 1624         return false;
 1625       }
 1626       break;
 1627 #endif // !LP64
 1628     case Op_SignumF:
 1629       if (UseSSE < 1) {
 1630         return false;
 1631       }
 1632       break;
 1633     case Op_SignumD:
 1634       if (UseSSE < 2) {
 1635         return false;
 1636       }
 1637       break;
 1638     case Op_CompressM:
 1639       if (!VM_Version::supports_avx512vl() || !VM_Version::supports_bmi2()) {
 1640         return false;
 1641       }
 1642       break;
 1643     case Op_CompressV:
 1644     case Op_ExpandV:
 1645       if (!VM_Version::supports_avx512vl()) {
 1646         return false;
 1647       }
 1648       break;
 1649     case Op_SqrtF:
 1650       if (UseSSE < 1) {
 1651         return false;
 1652       }
 1653       break;
 1654     case Op_SqrtD:
 1655 #ifdef _LP64
 1656       if (UseSSE < 2) {
 1657         return false;
 1658       }
 1659 #else
 1660       // x86_32.ad has a special match rule for SqrtD.
 1661       // Together with common x86 rules, this handles all UseSSE cases.
 1662 #endif
 1663       break;
 1664   }
 1665   return true;  // Match rules are supported by default.
 1666 }
 1667 
 1668 //------------------------------------------------------------------------
 1669 
 1670 static inline bool is_pop_count_instr_target(BasicType bt) {
 1671   return (is_subword_type(bt) && VM_Version::supports_avx512_bitalg()) ||
 1672          (is_non_subword_integral_type(bt) && VM_Version::supports_avx512_vpopcntdq());
 1673 }
 1674 
 1675 // Identify extra cases that we might want to provide match rules for vector nodes and
 1676 // other intrinsics guarded with vector length (vlen) and element type (bt).
 1677 const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
 1678   const bool is_LP64 = LP64_ONLY(true) NOT_LP64(false);
 1679   if (!match_rule_supported(opcode)) {
 1680     return false;
 1681   }
 1682   // Matcher::vector_size_supported() restricts vector sizes in the following way (see Matcher::vector_width_in_bytes):
 1683   //   * SSE2 supports 128bit vectors for all types;
 1684   //   * AVX1 supports 256bit vectors only for FLOAT and DOUBLE types;
 1685   //   * AVX2 supports 256bit vectors for all types;
 1686   //   * AVX512F supports 512bit vectors only for INT, FLOAT, and DOUBLE types;
 1687   //   * AVX512BW supports 512bit vectors for BYTE, SHORT, and CHAR types.
 1688   // There's also a limit on minimum vector size supported: 2 elements (or 4 bytes for BYTE).
 1689   // And MaxVectorSize is taken into account as well.
 1690   if (!vector_size_supported(bt, vlen)) {
 1691     return false;
 1692   }
 1693   // Special cases which require vector length follow:
 1694   //   * implementation limitations

 1860       }
 1861       break;
 1862     case Op_RoundVD:
 1863       if (!VM_Version::supports_avx512dq()) {
 1864         return false;
 1865       }
 1866       break;
 1867     case Op_VectorCastF2X:
 1868       if (is_subword_type(bt) || bt == T_LONG) {
 1869         return false;
 1870       }
 1871       break;
 1872     case Op_MulReductionVI:
 1873       if (bt == T_BYTE && size_in_bits == 512 && !VM_Version::supports_avx512bw()) {
 1874         return false;
 1875       }
 1876       break;
 1877     case Op_LoadVectorGatherMasked:
 1878     case Op_StoreVectorScatterMasked:
 1879     case Op_StoreVectorScatter:
 1880       if (is_subword_type(bt)) {
 1881         return false;
 1882       } else if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 1883         return false;
 1884       }
 1885       // fallthrough
 1886     case Op_LoadVectorGather:
 1887       if (size_in_bits == 64 ) {
 1888         return false;
 1889       }
 1890       break;
 1891     case Op_MaskAll:
 1892       if (!VM_Version::supports_evex()) {
 1893         return false;
 1894       }
 1895       if ((vlen > 16 || is_subword_type(bt)) && !VM_Version::supports_avx512bw()) {
 1896         return false;
 1897       }
 1898       if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 1899         return false;
 1900       }
 1901       break;
 1902     case Op_VectorMaskCmp:
 1903       if (vlen < 2 || size_in_bits < 32) {
 1904         return false;
 1905       }
 1906       break;
 1907     case Op_CompressM:
 1908       if (UseAVX < 3 || !VM_Version::supports_bmi2()) {
 1909         return false;
 1910       }
 1911       break;
 1912     case Op_CompressV:
 1913     case Op_ExpandV:
 1914       if (is_subword_type(bt) && !VM_Version::supports_avx512_vbmi2()) {
 1915         return false;
 1916       }
 1917       if (size_in_bits < 128 ) {
 1918         return false;
 1919       }
 1920       if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
 1921         return false;
 1922       }
 1923       break;
 1924     case Op_VectorLongToMask:
 1925       if (UseAVX < 1 || !is_LP64) {
 1926         return false;
 1927       }
 1928       if (UseAVX < 3 && !VM_Version::supports_bmi2()) {
 1929         return false;
 1930       }
 1931       break;
 1932     case Op_PopCountVI:
 1933     case Op_PopCountVL: {
 1934         if (!is_pop_count_instr_target(bt) &&
 1935             (size_in_bits == 512) && !VM_Version::supports_avx512bw()) {
 1936           return false;
 1937         }
 1938       }
 1939       break;
 1940     case Op_ReverseV:
 1941     case Op_ReverseBytesV:
 1942       if (UseAVX < 2) {
 1943         return false;
 1944       }
 1945       break;
 1946     case Op_CountTrailingZerosV:
 1947     case Op_CountLeadingZerosV:
 1948       if (UseAVX < 2) {
 1949         return false;
 1950       }
 1951       break;
 1952   }
 1953   return true;  // Per default match rules are supported.
 1954 }
 1955 
 1956 const bool Matcher::match_rule_supported_vector_masked(int opcode, int vlen, BasicType bt) {
 1957   // ADLC based match_rule_supported routine checks for the existence of pattern based
 1958   // on IR opcode. Most of the unary/binary/ternary masked operation share the IR nodes
 1959   // of their non-masked counterpart with mask edge being the differentiator.
 1960   // This routine does a strict check on the existence of masked operation patterns
 1961   // by returning a default false value for all the other opcodes apart from the
 1962   // ones whose masked instruction patterns are defined in this file.
 1963   if (!match_rule_supported_vector(opcode, vlen, bt)) {
 1964     return false;
 1965   }
 1966 
 1967   const bool is_LP64 = LP64_ONLY(true) NOT_LP64(false);
 1968   int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte;

 2076     case Op_VectorRearrange:
 2077       if (bt == T_SHORT && !VM_Version::supports_avx512bw()) {
 2078         return false; // Implementation limitation
 2079       }
 2080       if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) {
 2081         return false; // Implementation limitation
 2082       } else if ((bt == T_INT || bt == T_FLOAT) && size_in_bits < 256) {
 2083         return false; // Implementation limitation
 2084       }
 2085       return true;
 2086 
 2087     // Binary Logical operations
 2088     case Op_AndVMask:
 2089     case Op_OrVMask:
 2090     case Op_XorVMask:
 2091       if (vlen > 16 && !VM_Version::supports_avx512bw()) {
 2092         return false; // Implementation limitation
 2093       }
 2094       return true;
 2095 
 2096     case Op_PopCountVI:
 2097     case Op_PopCountVL:
 2098       if (!is_pop_count_instr_target(bt)) {
 2099         return false;
 2100       }
 2101       return true;
 2102 
 2103     case Op_MaskAll:
 2104       return true;
 2105 
 2106     case Op_CountLeadingZerosV:
 2107       if ((bt == T_INT || bt == T_LONG) && VM_Version::supports_avx512cd()) {
 2108         return true;
 2109       }
 2110     default:
 2111       return false;
 2112   }
 2113 }
 2114 
 2115 MachOper* Matcher::pd_specialize_generic_vector_operand(MachOper* generic_opnd, uint ideal_reg, bool is_temp) {
 2116   assert(Matcher::is_generic_vector(generic_opnd), "not generic");
 2117   bool legacy = (generic_opnd->opcode() == LEGVEC);
 2118   if (!VM_Version::supports_avx512vlbwdq() && // KNL
 2119       is_temp && !legacy && (ideal_reg == Op_VecZ)) {
 2120     // Conservatively specialize 512bit vec TEMP operands to legVecZ (zmm0-15) on KNL.
 2121     return new legVecZOper();
 2122   }
 2123   if (legacy) {
 2124     switch (ideal_reg) {
 2125       case Op_VecS: return new legVecSOper();
 2126       case Op_VecD: return new legVecDOper();
 2127       case Op_VecX: return new legVecXOper();
 2128       case Op_VecY: return new legVecYOper();
 2129       case Op_VecZ: return new legVecZOper();

 8687   ins_pipe( pipe_slow );
 8688 %}
 8689 
 8690 // --------------------------------- Vector Multiply Add Add ----------------------------------
 8691 
 8692 instruct vmuladdaddS2I_reg(vec dst, vec src1, vec src2) %{
 8693   predicate(VM_Version::supports_avx512_vnni());
 8694   match(Set dst (AddVI (MulAddVS2VI src1 src2) dst));
 8695   format %{ "evpdpwssd $dst,$src1,$src2\t! muladdadd packedStoI" %}
 8696   ins_encode %{
 8697     assert(UseAVX > 2, "required");
 8698     int vlen_enc = vector_length_encoding(this);
 8699     __ evpdpwssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vlen_enc);
 8700   %}
 8701   ins_pipe( pipe_slow );
 8702   ins_cost(10);
 8703 %}
 8704 
 8705 // --------------------------------- PopCount --------------------------------------
 8706 
 8707 instruct vpopcount_integral_reg_evex(vec dst, vec src) %{
 8708   predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
 8709   match(Set dst (PopCountVI src));
 8710   match(Set dst (PopCountVL src));
 8711   ins_cost(400);
 8712   format %{ "vector_popcount_integral $dst, $src" %}
 8713   ins_encode %{
 8714     int opcode = this->ideal_Opcode();
 8715     int vlen_enc = vector_length_encoding(this, $src);
 8716     BasicType bt = Matcher::vector_element_basic_type(this, $src);
 8717     __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, k0, true, vlen_enc);
 8718     // TODO: Once auto-vectorizer supports ConvL2I operation, PopCountVL
 8719     // should be succeeded by its corresponding vector IR and following
 8720     // special handling should be removed.
 8721     if (opcode == Op_PopCountVL && Matcher::vector_element_basic_type(this) == T_INT) {
 8722       __ evpmovqd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
 8723     }
 8724   %}
 8725   ins_pipe( pipe_slow );
 8726 %}
 8727 
 8728 instruct vpopcount_integral_reg_evex_masked(vec dst, vec src, kReg mask) %{
 8729   predicate(is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
 8730   match(Set dst (PopCountVI src mask));
 8731   match(Set dst (PopCountVL src mask));
 8732   format %{ "vector_popcount_integral_masked $dst, $src, $mask" %}
 8733   ins_encode %{
 8734     int vlen_enc = vector_length_encoding(this, $src);
 8735     BasicType bt = Matcher::vector_element_basic_type(this, $src);
 8736     __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
 8737     __ vector_popcount_integral_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, true, vlen_enc);
 8738   %}
 8739   ins_pipe( pipe_slow );
 8740 %}
 8741 
 8742 instruct vpopcount_avx_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegP rtmp) %{
 8743   predicate(!is_vector_popcount_predicate(Matcher::vector_element_basic_type(n->in(1))));
 8744   match(Set dst (PopCountVI src));
 8745   match(Set dst (PopCountVL src));
 8746   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
 8747   format %{ "vector_popcount_integral $dst, $src\t! using $xtmp1, $xtmp2, and $rtmp as TEMP" %}
 8748   ins_encode %{
 8749     int opcode = this->ideal_Opcode();
 8750     int vlen_enc = vector_length_encoding(this, $src);
 8751     BasicType bt = Matcher::vector_element_basic_type(this, $src);
 8752     __ vector_popcount_integral(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
 8753                                 $xtmp2$$XMMRegister, $rtmp$$Register, vlen_enc);
 8754     // TODO: Once auto-vectorizer supports ConvL2I operation, PopCountVL
 8755     // should be succeeded by its corresponding vector IR and following
 8756     // special handling should be removed.
 8757     if (opcode == Op_PopCountVL && Matcher::vector_element_basic_type(this) == T_INT) {
 8758       if (VM_Version::supports_avx512vl()) {
 8759         __ evpmovqd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
 8760       } else {
 8761         assert(VM_Version::supports_avx2(), "");
 8762         __ vpshufd($dst$$XMMRegister, $dst$$XMMRegister, 8, vlen_enc);
 8763         __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 8, vlen_enc);
 8764       }
 8765     }
 8766   %}
 8767   ins_pipe( pipe_slow );
 8768 %}
 8769 
 8770 // --------------------------------- Vector Trailing Zeros Count --------------------------------------
 8771 
 8772 instruct vcount_trailing_zeros_reg_evex(vec dst, vec src, vec xtmp, rRegP rtmp) %{
 8773   predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
 8774                                               Matcher::vector_length_in_bytes(n->in(1))));
 8775   match(Set dst (CountTrailingZerosV src));
 8776   effect(TEMP dst, TEMP xtmp, TEMP rtmp);
 8777   ins_cost(400);
 8778   format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp and $rtmp as TEMP" %}
 8779   ins_encode %{
 8780     int vlen_enc = vector_length_encoding(this, $src);
 8781     BasicType bt = Matcher::vector_element_basic_type(this, $src);
 8782     BasicType rbt = Matcher::vector_element_basic_type(this);
 8783     __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg,
 8784                                         xnoreg, xnoreg, $xtmp$$XMMRegister, k0, $rtmp$$Register, vlen_enc);
 8785     // TODO: Once auto-vectorizer supports ConvL2I operation, CountTrailingZerosV
 8786     // should be succeeded by its corresponding vector IR and following
 8787     // special handling should be removed.
 8788     if (bt == T_LONG && rbt == T_INT) {
 8789       __ evpmovqd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
 8790     }
 8791   %}
 8792   ins_pipe( pipe_slow );
 8793 %}
 8794 
 8795 instruct vcount_trailing_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
 8796   predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT &&
 8797             VM_Version::supports_avx512cd() &&
 8798             (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64));
 8799   match(Set dst (CountTrailingZerosV src));
 8800   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
 8801   ins_cost(400);
 8802   format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3 and $rtmp as TEMP" %}
 8803   ins_encode %{
 8804     int vlen_enc = vector_length_encoding(this, $src);
 8805     BasicType bt = Matcher::vector_element_basic_type(this, $src);
 8806     __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
 8807                                         $xtmp2$$XMMRegister, xnoreg, $xtmp3$$XMMRegister, k0, $rtmp$$Register, vlen_enc);
 8808   %}
 8809   ins_pipe( pipe_slow );
 8810 %}
 8811 
 8812 instruct vcount_trailing_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, vec xtmp4, kReg ktmp, rRegP rtmp) %{
 8813   predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw());
 8814   match(Set dst (CountTrailingZerosV src));
 8815   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP xtmp4, TEMP ktmp, TEMP rtmp);
 8816   ins_cost(400);
 8817   format %{ "vector_count_trailing_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $xtmp4, $ktmp and $rtmp as TEMP" %}
 8818   ins_encode %{
 8819     int vlen_enc = vector_length_encoding(this, $src);
 8820     BasicType bt = Matcher::vector_element_basic_type(this, $src);
 8821     __ vector_count_trailing_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
 8822                                         $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $xtmp4$$XMMRegister,
 8823                                         $ktmp$$KRegister, $rtmp$$Register, vlen_enc);
 8824   %}
 8825   ins_pipe( pipe_slow );
 8826 %}
 8827 
 8828 instruct vcount_trailing_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
 8829   predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
 8830   match(Set dst (CountTrailingZerosV src));
 8831   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
 8832   format %{ "vector_count_trailing_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %}
 8833   ins_encode %{

 8834     int vlen_enc = vector_length_encoding(this, $src);
 8835     BasicType bt = Matcher::vector_element_basic_type(this, $src);
 8836     BasicType rbt = Matcher::vector_element_basic_type(this);
 8837     __ vector_count_trailing_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
 8838                                        $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc);
 8839     // TODO: Once auto-vectorizer supports ConvL2I operation, PopCountVL
 8840     // should be succeeded by its corresponding vector IR and following
 8841     // special handling should be removed.
 8842     if (bt == T_LONG && rbt == T_INT) {
 8843       if (VM_Version::supports_avx512vl()) {
 8844         __ evpmovqd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
 8845       } else {
 8846         assert(VM_Version::supports_avx2(), "");
 8847         __ vpshufd($dst$$XMMRegister, $dst$$XMMRegister, 8, vlen_enc);
 8848         __ vpermq($dst$$XMMRegister, $dst$$XMMRegister, 8, vlen_enc);
 8849       }
 8850     }
 8851   %}
 8852   ins_pipe( pipe_slow );
 8853 %}
 8854 
 8855 
 8856 // --------------------------------- Bitwise Ternary Logic ----------------------------------
 8857 
 8858 instruct vpternlog(vec dst, vec src2, vec src3, immU8 func) %{
 8859   match(Set dst (MacroLogicV (Binary dst src2) (Binary src3 func)));
 8860   effect(TEMP dst);
 8861   format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %}
 8862   ins_encode %{
 8863     int vector_len = vector_length_encoding(this);
 8864     __ vpternlogd($dst$$XMMRegister, $func$$constant, $src2$$XMMRegister, $src3$$XMMRegister, vector_len);
 8865   %}
 8866   ins_pipe( pipe_slow );
 8867 %}
 8868 
 8869 instruct vpternlog_mem(vec dst, vec src2, memory src3, immU8 func) %{
 8870   predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) > 8);
 8871   match(Set dst (MacroLogicV (Binary dst src2) (Binary (LoadVector src3) func)));
 8872   effect(TEMP dst);
 8873   format %{ "vpternlogd $dst,$src2,$src3,$func\t! vector ternary logic" %}
 8874   ins_encode %{
 8875     int vector_len = vector_length_encoding(this);

 9110   %}
 9111   ins_pipe( pipe_slow );
 9112 %}
 9113 
 9114 instruct vmask_first_or_last_true_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{
 9115   predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == NULL);
 9116   match(Set dst (VectorMaskFirstTrue (VectorStoreMask mask size)));
 9117   match(Set dst (VectorMaskLastTrue (VectorStoreMask mask size)));
 9118   effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
 9119   format %{ "vector_mask_first_or_last_true_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
 9120   ins_encode %{
 9121     int opcode = this->ideal_Opcode();
 9122     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
 9123     int mask_len = Matcher::vector_length(this, $mask);
 9124     int vlen_enc = vector_length_encoding(this, $mask);
 9125     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
 9126                              $tmp$$Register, mask_len, mbt, vlen_enc);
 9127   %}
 9128   ins_pipe( pipe_slow );
 9129 %}
 9130 
 9131 // --------------------------------- Compress/Expand Operations ---------------------------
 9132 
 9133 instruct vcompress_expand_reg_evex(vec dst, vec src, kReg mask) %{
 9134   match(Set dst (CompressV src mask));
 9135   match(Set dst (ExpandV src mask));
 9136   format %{ "vector_compress_expand $dst, $src, $mask" %}
 9137   ins_encode %{
 9138     int opcode = this->ideal_Opcode();
 9139     int vector_len = vector_length_encoding(this);
 9140     BasicType bt  = Matcher::vector_element_basic_type(this);
 9141     __ vector_compress_expand(opcode, $dst$$XMMRegister, $src$$XMMRegister, $mask$$KRegister, false, bt, vector_len);
 9142   %}
 9143   ins_pipe( pipe_slow );
 9144 %}
 9145 
 9146 instruct vcompress_mask_reg_evex(kReg dst, kReg mask, rRegL rtmp1, rRegL rtmp2, rFlagsReg cr) %{
 9147   match(Set dst (CompressM mask));
 9148   effect(TEMP rtmp1, TEMP rtmp2, KILL cr);
 9149   format %{ "mask_compress_evex $dst, $mask\t! using $rtmp1 and $rtmp2 as TEMP" %}
 9150   ins_encode %{
 9151     assert(this->in(1)->bottom_type()->isa_vectmask(), "");
 9152     int mask_len = Matcher::vector_length(this);
 9153     __ vector_mask_compress($dst$$KRegister, $mask$$KRegister, $rtmp1$$Register, $rtmp2$$Register, mask_len);
 9154   %}
 9155   ins_pipe( pipe_slow );
 9156 %}
 9157 
 9158 #endif // _LP64
 9159 
 9160 // -------------------------------- Bit and Byte Reversal Vector Operations ------------------------
 9161 
 9162 instruct vreverse_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{
 9163   predicate(!VM_Version::supports_gfni());
 9164   match(Set dst (ReverseV src));
 9165   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
 9166   format %{ "vector_reverse_bit_evex $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %}
 9167   ins_encode %{
 9168     int vec_enc = vector_length_encoding(this);
 9169     BasicType bt = Matcher::vector_element_basic_type(this);
 9170     __ vector_reverse_bit(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
 9171                           $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc);
 9172   %}
 9173   ins_pipe( pipe_slow );
 9174 %}
 9175 
 9176 instruct vreverse_reg_gfni(vec dst, vec src, vec xtmp, rRegI rtmp) %{
 9177   predicate(VM_Version::supports_gfni());
 9178   match(Set dst (ReverseV src));
 9179   effect(TEMP dst, TEMP xtmp, TEMP rtmp);
 9180   format %{ "vector_reverse_bit_gfni $dst, $src!\t using $rtmp and $xtmp as TEMP" %}
 9181   ins_encode %{
 9182     int vec_enc = vector_length_encoding(this);
 9183     BasicType bt  = Matcher::vector_element_basic_type(this);
 9184     InternalAddress addr = $constantaddress(T_LONG, vreplicate_imm(T_LONG, 0x8040201008040201L, 1));
 9185     __ vector_reverse_bit_gfni(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp$$XMMRegister,
 9186                                addr, $rtmp$$Register, vec_enc);
 9187   %}
 9188   ins_pipe( pipe_slow );
 9189 %}
 9190 
 9191 instruct vreverse_byte_reg(vec dst, vec src, rRegI rtmp) %{
 9192   predicate(VM_Version::supports_avx512bw() || Matcher::vector_length_in_bytes(n) < 64);
 9193   match(Set dst (ReverseBytesV src));
 9194   effect(TEMP dst, TEMP rtmp);
 9195   format %{ "vector_reverse_byte $dst, $src!\t using $rtmp as TEMP" %}
 9196   ins_encode %{
 9197     int vec_enc = vector_length_encoding(this);
 9198     BasicType bt = Matcher::vector_element_basic_type(this);
 9199     __ vector_reverse_byte(bt, $dst$$XMMRegister, $src$$XMMRegister, $rtmp$$Register, vec_enc);
 9200   %}
 9201   ins_pipe( pipe_slow );
 9202 %}
 9203 
 9204 instruct vreverse_byte64_reg(vec dst, vec src, vec xtmp1, vec xtmp2, rRegI rtmp) %{
 9205   predicate(!VM_Version::supports_avx512bw() && Matcher::vector_length_in_bytes(n) == 64);
 9206   match(Set dst (ReverseBytesV src));
 9207   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP rtmp);
 9208   format %{ "vector_reverse_byte $dst, $src!\t using $xtmp1, $xtmp2 and $rtmp as TEMP" %}
 9209   ins_encode %{
 9210     int vec_enc = vector_length_encoding(this);
 9211     BasicType bt = Matcher::vector_element_basic_type(this);
 9212     __ vector_reverse_byte64(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
 9213                              $xtmp2$$XMMRegister, $rtmp$$Register, vec_enc);
 9214   %}
 9215   ins_pipe( pipe_slow );
 9216 %}
 9217 
 9218 // ---------------------------------- Vector Count Leading Zeros -----------------------------------
 9219 
 9220 instruct vcount_leading_zeros_IL_reg_evex(vec dst, vec src) %{
 9221   predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
 9222                                               Matcher::vector_length_in_bytes(n->in(1))));
 9223   match(Set dst (CountLeadingZerosV src));
 9224   format %{ "vector_count_leading_zeros $dst, $src" %}
 9225   ins_encode %{
 9226      int vlen_enc = vector_length_encoding(this, $src);
 9227      BasicType bt = Matcher::vector_element_basic_type(this, $src);
 9228      BasicType rbt = Matcher::vector_element_basic_type(this);
 9229      __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg,
 9230                                         xnoreg, xnoreg, k0, noreg, true, vlen_enc);
 9231      // TODO: Once auto-vectorizer supports ConvL2I operation, CountLeadingZerosV
 9232      // should be succeeded by its corresponding vector IR and following
 9233      // special handling should be removed.
 9234      if (rbt == T_INT && bt == T_LONG) {
 9235        __ evpmovqd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
 9236      }
 9237   %}
 9238   ins_pipe( pipe_slow );
 9239 %}
 9240 
 9241 instruct vcount_leading_zeros_IL_reg_evex_masked(vec dst, vec src, kReg mask) %{
 9242   predicate(is_clz_non_subword_predicate_evex(Matcher::vector_element_basic_type(n->in(1)),
 9243                                               Matcher::vector_length_in_bytes(n->in(1))));
 9244   match(Set dst (CountLeadingZerosV src mask));
 9245   format %{ "vector_count_leading_zeros $dst, $src, $mask" %}
 9246   ins_encode %{
 9247     int vlen_enc = vector_length_encoding(this, $src);
 9248     BasicType bt = Matcher::vector_element_basic_type(this, $src);
 9249     __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
 9250     __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, xnoreg, xnoreg,
 9251                                        xnoreg, $mask$$KRegister, noreg, true, vlen_enc);
 9252   %}
 9253   ins_pipe( pipe_slow );
 9254 %}
 9255 
 9256 instruct vcount_leading_zeros_short_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2) %{
 9257   predicate(Matcher::vector_element_basic_type(n->in(1)) == T_SHORT &&
 9258             VM_Version::supports_avx512cd() &&
 9259             (VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64));
 9260   match(Set dst (CountLeadingZerosV src));
 9261   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2);
 9262   format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1 and $xtmp2 as TEMP" %}
 9263   ins_encode %{
 9264     int vlen_enc = vector_length_encoding(this, $src);
 9265     BasicType bt = Matcher::vector_element_basic_type(this, $src);
 9266     __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
 9267                                        $xtmp2$$XMMRegister, xnoreg, k0, noreg, true, vlen_enc);
 9268   %}
 9269   ins_pipe( pipe_slow );
 9270 %}
 9271 
 9272 instruct vcount_leading_zeros_byte_reg_evex(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, kReg ktmp, rRegP rtmp) %{
 9273   predicate(Matcher::vector_element_basic_type(n->in(1)) == T_BYTE && VM_Version::supports_avx512vlbw());
 9274   match(Set dst (CountLeadingZerosV src));
 9275   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP ktmp, TEMP rtmp);
 9276   format %{ "vector_count_leading_zeros $dst, $src!\t using $xtmp1, $xtmp2, $xtmp3, $ktmp and $rtmp as TEMP" %}
 9277   ins_encode %{
 9278     int vlen_enc = vector_length_encoding(this, $src);
 9279     BasicType bt = Matcher::vector_element_basic_type(this, $src);
 9280     __ vector_count_leading_zeros_evex(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
 9281                                        $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $ktmp$$KRegister,
 9282                                        $rtmp$$Register, true, vlen_enc);
 9283   %}
 9284   ins_pipe( pipe_slow );
 9285 %}
 9286 
 9287 instruct vcount_leading_zeros_int_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3) %{
 9288   predicate(Matcher::vector_element_basic_type(n->in(1)) == T_INT &&
 9289             !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
 9290   match(Set dst (CountLeadingZerosV src));
 9291   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3);
 9292   format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2 and $xtmp3 as TEMP" %}
 9293   ins_encode %{
 9294     int vlen_enc = vector_length_encoding(this, $src);
 9295     BasicType bt = Matcher::vector_element_basic_type(this, $src);
 9296     __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
 9297                                       $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, noreg, vlen_enc);
 9298   %}
 9299   ins_pipe( pipe_slow );
 9300 %}
 9301 
 9302 instruct vcount_leading_zeros_reg_avx(vec dst, vec src, vec xtmp1, vec xtmp2, vec xtmp3, rRegP rtmp) %{
 9303   predicate(Matcher::vector_element_basic_type(n->in(1)) != T_INT &&
 9304             !VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n->in(1)) < 64);
 9305   match(Set dst (CountLeadingZerosV src));
 9306   effect(TEMP dst, TEMP xtmp1, TEMP xtmp2, TEMP xtmp3, TEMP rtmp);
 9307   format %{ "vector_count_leading_zeros $dst, $src\t! using $xtmp1, $xtmp2, $xtmp3, and $rtmp as TEMP" %}
 9308   ins_encode %{
 9309     int vlen_enc = vector_length_encoding(this, $src);
 9310     BasicType bt = Matcher::vector_element_basic_type(this, $src);
 9311     BasicType rbt = Matcher::vector_element_basic_type(this);
 9312     __ vector_count_leading_zeros_avx(bt, $dst$$XMMRegister, $src$$XMMRegister, $xtmp1$$XMMRegister,
 9313                                       $xtmp2$$XMMRegister, $xtmp3$$XMMRegister, $rtmp$$Register, vlen_enc);
 9314     // TODO: Once auto-vectorizer supports ConvL2I operation, CountLeadingZerosV
 9315     // should be succeeded by its corresponding vector IR and following
 9316     // special handling should be removed.
 9317     if (rbt == T_INT && bt == T_LONG) {
 9318       __ evpmovqd($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
 9319     }
 9320   %}
 9321   ins_pipe( pipe_slow );
 9322 %}
 9323 
 9324 // ---------------------------------- Vector Masked Operations ------------------------------------
 9325 
 9326 instruct vadd_reg_masked(vec dst, vec src2, kReg mask) %{
 9327   match(Set dst (AddVB (Binary dst src2) mask));
 9328   match(Set dst (AddVS (Binary dst src2) mask));
 9329   match(Set dst (AddVI (Binary dst src2) mask));
 9330   match(Set dst (AddVL (Binary dst src2) mask));
 9331   match(Set dst (AddVF (Binary dst src2) mask));
 9332   match(Set dst (AddVD (Binary dst src2) mask));
 9333   format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %}
 9334   ins_encode %{
 9335     int vlen_enc = vector_length_encoding(this);
 9336     BasicType bt = Matcher::vector_element_basic_type(this);
 9337     int opc = this->ideal_Opcode();
 9338     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
 9339                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
 9340   %}
 9341   ins_pipe( pipe_slow );
 9342 %}
 9343 
< prev index next >