< prev index next >

src/hotspot/cpu/x86/x86.ad

Print this page

1357 
1358 //=============================================================================
1359 
1360   // Float masks come from different places depending on platform.
1361 #ifdef _LP64
1362   static address float_signmask()  { return StubRoutines::x86::float_sign_mask(); }
1363   static address float_signflip()  { return StubRoutines::x86::float_sign_flip(); }
1364   static address double_signmask() { return StubRoutines::x86::double_sign_mask(); }
1365   static address double_signflip() { return StubRoutines::x86::double_sign_flip(); }
1366 #else
1367   static address float_signmask()  { return (address)float_signmask_pool; }
1368   static address float_signflip()  { return (address)float_signflip_pool; }
1369   static address double_signmask() { return (address)double_signmask_pool; }
1370   static address double_signflip() { return (address)double_signflip_pool; }
1371 #endif
1372   static address vector_short_to_byte_mask() { return StubRoutines::x86::vector_short_to_byte_mask(); }
1373   static address vector_int_to_byte_mask() { return StubRoutines::x86::vector_int_to_byte_mask(); }
1374   static address vector_byte_perm_mask() { return StubRoutines::x86::vector_byte_perm_mask(); }
1375   static address vector_long_sign_mask() { return StubRoutines::x86::vector_long_sign_mask(); }
1376   static address vector_all_bits_set() { return StubRoutines::x86::vector_all_bits_set(); }

1377   static address vector_int_to_short_mask() { return StubRoutines::x86::vector_int_to_short_mask(); }
1378   static address vector_byte_shufflemask() { return StubRoutines::x86::vector_byte_shuffle_mask(); }
1379   static address vector_short_shufflemask() { return StubRoutines::x86::vector_short_shuffle_mask(); }
1380   static address vector_int_shufflemask() { return StubRoutines::x86::vector_int_shuffle_mask(); }
1381   static address vector_long_shufflemask() { return StubRoutines::x86::vector_long_shuffle_mask(); }
1382   static address vector_32_bit_mask() { return StubRoutines::x86::vector_32_bit_mask(); }
1383   static address vector_64_bit_mask() { return StubRoutines::x86::vector_64_bit_mask(); }
1384 
1385 //=============================================================================
1386 const bool Matcher::match_rule_supported(int opcode) {
1387   if (!has_match_rule(opcode)) {
1388     return false; // no match rule present
1389   }
1390   const bool is_LP64 = LP64_ONLY(true) NOT_LP64(false);
1391   switch (opcode) {
1392     case Op_AbsVL:
1393     case Op_StoreVectorScatter:
1394       if (UseAVX < 3) {
1395         return false;
1396       }

1539         return false;
1540       }
1541       break;
1542     case Op_MacroLogicV:
1543       if (UseAVX < 3 || !UseVectorMacroLogic) {
1544         return false;
1545       }
1546       break;
1547 
1548     case Op_VectorCmpMasked:
1549     case Op_VectorMaskGen:
1550     case Op_LoadVectorMasked:
1551     case Op_StoreVectorMasked:
1552       if (!is_LP64  || UseAVX < 3 || !VM_Version::supports_bmi2()) {
1553         return false;
1554       }
1555       break;
1556     case Op_VectorMaskFirstTrue:
1557     case Op_VectorMaskLastTrue:
1558     case Op_VectorMaskTrueCount:

1559       if (!is_LP64 || UseAVX < 1) {
1560          return false;
1561       }
1562       break;
1563     case Op_CopySignD:
1564     case Op_CopySignF:
1565       if (UseAVX < 3 || !is_LP64)  {
1566         return false;
1567       }
1568       if (!VM_Version::supports_avx512vl()) {
1569         return false;
1570       }
1571       break;
1572 #ifndef _LP64
1573     case Op_AddReductionVF:
1574     case Op_AddReductionVD:
1575     case Op_MulReductionVF:
1576     case Op_MulReductionVD:
1577       if (UseSSE < 1) { // requires at least SSE
1578         return false;

1785       break;
1786     case Op_VectorCastL2X:
1787       if (is_integral_type(bt) && size_in_bits == 256 && UseAVX < 2) {
1788         return false;
1789       } else if (!is_integral_type(bt) && !VM_Version::supports_avx512dq()) {
1790         return false;
1791       }
1792       break;
1793     case Op_VectorCastF2X:
1794     case Op_VectorCastD2X:
1795       if (is_integral_type(bt)) {
1796         // Casts from FP to integral types require special fixup logic not easily
1797         // implementable with vectors.
1798         return false; // Implementation limitation
1799       }
1800     case Op_MulReductionVI:
1801       if (bt == T_BYTE && size_in_bits == 512 && !VM_Version::supports_avx512bw()) {
1802         return false;
1803       }
1804       break;


1805     case Op_StoreVectorScatter:
1806       if(bt == T_BYTE || bt == T_SHORT) {
1807         return false;
1808       } else if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
1809         return false;
1810       }
1811       // fallthrough
1812     case Op_LoadVectorGather:
1813       if (size_in_bits == 64 ) {
1814         return false;
1815       }
1816       break;











1817     case Op_VectorMaskCmp:
1818       if (vlen < 2 || size_in_bits < 32) {
1819         return false;
1820       }
1821       break;
1822   }
1823   return true;  // Per default match rules are supported.
1824 }
1825 














































































































































1826 MachOper* Matcher::pd_specialize_generic_vector_operand(MachOper* generic_opnd, uint ideal_reg, bool is_temp) {
1827   assert(Matcher::is_generic_vector(generic_opnd), "not generic");
1828   bool legacy = (generic_opnd->opcode() == LEGVEC);
1829   if (!VM_Version::supports_avx512vlbwdq() && // KNL
1830       is_temp && !legacy && (ideal_reg == Op_VecZ)) {
1831     // Conservatively specialize 512bit vec TEMP operands to legVecZ (zmm0-15) on KNL.
1832     return new legVecZOper();
1833   }
1834   if (legacy) {
1835     switch (ideal_reg) {
1836       case Op_VecS: return new legVecSOper();
1837       case Op_VecD: return new legVecDOper();
1838       case Op_VecX: return new legVecXOper();
1839       case Op_VecY: return new legVecYOper();
1840       case Op_VecZ: return new legVecZOper();
1841     }
1842   } else {
1843     switch (ideal_reg) {
1844       case Op_VecS: return new vecSOper();
1845       case Op_VecD: return new vecDOper();

1870   }
1871 }
1872 
1873 bool Matcher::is_generic_vector(MachOper* opnd) {
1874   switch (opnd->opcode()) {
1875     case VEC:
1876     case LEGVEC:
1877       return true;
1878     default:
1879       return false;
1880   }
1881 }
1882 
1883 //------------------------------------------------------------------------
1884 
1885 const RegMask* Matcher::predicate_reg_mask(void) {
1886   return &_VECTMASK_REG_mask;
1887 }
1888 
1889 const TypeVect* Matcher::predicate_reg_type(const Type* elemTy, int length) {
1890   return new TypeVectMask(TypeInt::BOOL, length);
1891 }
1892 
1893 // Max vector size in bytes. 0 if not supported.
1894 const int Matcher::vector_width_in_bytes(BasicType bt) {
1895   assert(is_java_primitive(bt), "only primitive type vectors");
1896   if (UseSSE < 2) return 0;
1897   // SSE2 supports 128bit vectors for all types.
1898   // AVX2 supports 256bit vectors for all types.
1899   // AVX2/EVEX supports 512bit vectors for all types.
1900   int size = (UseAVX > 1) ? (1 << UseAVX) * 8 : 16;
1901   // AVX1 supports 256bit vectors only for FLOAT and DOUBLE.
1902   if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE))
1903     size = (UseAVX > 2) ? 64 : 32;
1904   if (UseAVX > 2 && (bt == T_BYTE || bt == T_SHORT || bt == T_CHAR))
1905     size = (VM_Version::supports_avx512bw()) ? 64 : 32;
1906   // Use flag to limit vector size.
1907   size = MIN2(size,(int)MaxVectorSize);
1908   // Minimum 2 values in vector (or 4 for bytes).
1909   switch (bt) {
1910   case T_DOUBLE:

3293   match(Set dst (SqrtF dst));
3294   format %{ "sqrtss  $dst, $dst" %}
3295   ins_encode %{
3296     __ sqrtss($dst$$XMMRegister, $dst$$XMMRegister);
3297   %}
3298   ins_pipe(pipe_slow);
3299 %}
3300 
3301 // sqrtsd instruction needs destination register to be pre initialized for best performance
3302 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below
3303 instruct sqrtD_reg(regD dst) %{
3304   predicate(UseSSE>=2);
3305   match(Set dst (SqrtD dst));
3306   format %{ "sqrtsd  $dst, $dst" %}
3307   ins_encode %{
3308     __ sqrtsd($dst$$XMMRegister, $dst$$XMMRegister);
3309   %}
3310   ins_pipe(pipe_slow);
3311 %}
3312 

3313 // ---------------------------------------- VectorReinterpret ------------------------------------









































































3314 
3315 instruct reinterpret(vec dst) %{
3316   predicate(Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1))); // dst == src

3317   match(Set dst (VectorReinterpret dst));
3318   ins_cost(125);
3319   format %{ "vector_reinterpret $dst\t!" %}
3320   ins_encode %{
3321     // empty
3322   %}
3323   ins_pipe( pipe_slow );
3324 %}
3325 
3326 instruct reinterpret_expand(vec dst, vec src, rRegP scratch) %{
3327   predicate(UseAVX == 0 &&
3328             (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
3329   match(Set dst (VectorReinterpret src));
3330   ins_cost(125);
3331   effect(TEMP dst, TEMP scratch);
3332   format %{ "vector_reinterpret_expand $dst,$src\t! using $scratch as TEMP" %}
3333   ins_encode %{
3334     assert(Matcher::vector_length_in_bytes(this)       <= 16, "required");
3335     assert(Matcher::vector_length_in_bytes(this, $src) <=  8, "required");
3336 
3337     int src_vlen_in_bytes = Matcher::vector_length_in_bytes(this, $src);
3338     if (src_vlen_in_bytes == 4) {
3339       __ movdqu($dst$$XMMRegister, ExternalAddress(vector_32_bit_mask()), $scratch$$Register);
3340     } else {
3341       assert(src_vlen_in_bytes == 8, "");
3342       __ movdqu($dst$$XMMRegister, ExternalAddress(vector_64_bit_mask()), $scratch$$Register);
3343     }
3344     __ pand($dst$$XMMRegister, $src$$XMMRegister);
3345   %}
3346   ins_pipe( pipe_slow );
3347 %}
3348 
3349 instruct vreinterpret_expand4(legVec dst, vec src, rRegP scratch) %{
3350   predicate(UseAVX > 0 &&

3351             (Matcher::vector_length_in_bytes(n->in(1)) == 4) && // src
3352             (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
3353   match(Set dst (VectorReinterpret src));
3354   ins_cost(125);
3355   effect(TEMP scratch);
3356   format %{ "vector_reinterpret_expand $dst,$src\t! using $scratch as TEMP" %}
3357   ins_encode %{
3358     __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_32_bit_mask()), 0, $scratch$$Register);
3359   %}
3360   ins_pipe( pipe_slow );
3361 %}
3362 
3363 
3364 instruct vreinterpret_expand(legVec dst, vec src) %{
3365   predicate(UseAVX > 0 &&

3366             (Matcher::vector_length_in_bytes(n->in(1)) > 4) && // src
3367             (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
3368   match(Set dst (VectorReinterpret src));
3369   ins_cost(125);
3370   format %{ "vector_reinterpret_expand $dst,$src\t!" %}
3371   ins_encode %{
3372     switch (Matcher::vector_length_in_bytes(this, $src)) {
3373       case  8: __ movq   ($dst$$XMMRegister, $src$$XMMRegister); break;
3374       case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break;
3375       case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break;
3376       default: ShouldNotReachHere();
3377     }
3378   %}
3379   ins_pipe( pipe_slow );
3380 %}
3381 
3382 instruct reinterpret_shrink(vec dst, legVec src) %{
3383   predicate(Matcher::vector_length_in_bytes(n->in(1)) > Matcher::vector_length_in_bytes(n)); // src > dst

3384   match(Set dst (VectorReinterpret src));
3385   ins_cost(125);
3386   format %{ "vector_reinterpret_shrink $dst,$src\t!" %}
3387   ins_encode %{
3388     switch (Matcher::vector_length_in_bytes(this)) {
3389       case  4: __ movfltz($dst$$XMMRegister, $src$$XMMRegister); break;
3390       case  8: __ movq   ($dst$$XMMRegister, $src$$XMMRegister); break;
3391       case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break;
3392       case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break;
3393       default: ShouldNotReachHere();
3394     }
3395   %}
3396   ins_pipe( pipe_slow );
3397 %}
3398 
3399 // ----------------------------------------------------------------------------------------------------
3400 
3401 #ifdef _LP64
3402 instruct roundD_reg(legRegD dst, legRegD src, immU8 rmode) %{
3403   match(Set dst (RoundDoubleMode src rmode));

3565   ins_cost(145);
3566   format %{ "store_vector $mem,$src\n\t" %}
3567   ins_encode %{
3568     switch (Matcher::vector_length_in_bytes(this, $src)) {
3569       case  4: __ movdl    ($mem$$Address, $src$$XMMRegister); break;
3570       case  8: __ movq     ($mem$$Address, $src$$XMMRegister); break;
3571       case 16: __ movdqu   ($mem$$Address, $src$$XMMRegister); break;
3572       case 32: __ vmovdqu  ($mem$$Address, $src$$XMMRegister); break;
3573       case 64: __ evmovdqul($mem$$Address, $src$$XMMRegister, Assembler::AVX_512bit); break;
3574       default: ShouldNotReachHere();
3575     }
3576   %}
3577   ins_pipe( pipe_slow );
3578 %}
3579 
3580 // ---------------------------------------- Gather ------------------------------------
3581 
3582 // Gather INT, LONG, FLOAT, DOUBLE
3583 
3584 instruct gather(legVec dst, memory mem, legVec idx, rRegP tmp, legVec mask) %{
3585   predicate(Matcher::vector_length_in_bytes(n) <= 32);
3586   match(Set dst (LoadVectorGather mem idx));
3587   effect(TEMP dst, TEMP tmp, TEMP mask);
3588   format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and $mask as TEMP" %}
3589   ins_encode %{
3590     assert(UseAVX >= 2, "sanity");
3591 
3592     int vlen_enc = vector_length_encoding(this);
3593     BasicType elem_bt = Matcher::vector_element_basic_type(this);
3594 
3595     assert(Matcher::vector_length_in_bytes(this) >= 16, "sanity");
3596     assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
3597 
3598     if (vlen_enc == Assembler::AVX_128bit) {
3599       __ movdqu($mask$$XMMRegister, ExternalAddress(vector_all_bits_set()));
3600     } else {
3601       __ vmovdqu($mask$$XMMRegister, ExternalAddress(vector_all_bits_set()));
3602     }
3603     __ lea($tmp$$Register, $mem$$Address);
3604     __ vgather(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx$$XMMRegister, $mask$$XMMRegister, vlen_enc);
3605   %}
3606   ins_pipe( pipe_slow );
3607 %}
3608 
3609 instruct evgather(vec dst, memory mem, vec idx, rRegP tmp, kReg ktmp) %{
3610   predicate(Matcher::vector_length_in_bytes(n) == 64);
3611   match(Set dst (LoadVectorGather mem idx));
3612   effect(TEMP dst, TEMP tmp, TEMP ktmp);
3613   format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and k2 as TEMP" %}
3614   ins_encode %{
3615     assert(UseAVX > 2, "sanity");
3616 
3617     int vlen_enc = vector_length_encoding(this);
3618     BasicType elem_bt = Matcher::vector_element_basic_type(this);
3619 
3620     assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
3621 
3622     __ kmovwl($ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), $tmp$$Register);
3623     __ lea($tmp$$Register, $mem$$Address);
3624     __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc);
3625   %}
3626   ins_pipe( pipe_slow );
3627 %}
3628 


















3629 // ====================Scatter=======================================
3630 
3631 // Scatter INT, LONG, FLOAT, DOUBLE
3632 
3633 instruct scatter(memory mem, vec src, vec idx, rRegP tmp, kReg ktmp) %{
3634   predicate(UseAVX > 2);
3635   match(Set mem (StoreVectorScatter mem (Binary src idx)));
3636   effect(TEMP tmp, TEMP ktmp);
3637   format %{ "store_vector_scatter $mem, $idx, $src\t! using k2 and $tmp as TEMP" %}
3638   ins_encode %{
3639     int vlen_enc = vector_length_encoding(this, $src);
3640     BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
3641 
3642     assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity");
3643     assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
3644 
3645     __ kmovwl($ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), $tmp$$Register);
3646     __ lea($tmp$$Register, $mem$$Address);
3647     __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc);
3648   %}
3649   ins_pipe( pipe_slow );
3650 %}
3651 


















3652 // ====================REPLICATE=======================================
3653 
3654 // Replicate byte scalar to be vector
3655 instruct ReplB_reg(vec dst, rRegI src) %{
3656   match(Set dst (ReplicateB src));
3657   format %{ "replicateB $dst,$src" %}
3658   ins_encode %{
3659     uint vlen = Matcher::vector_length(this);
3660     if (vlen == 64 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands
3661       assert(VM_Version::supports_avx512bw(), "required"); // 512-bit byte vectors assume AVX512BW
3662       int vlen_enc = vector_length_encoding(this);
3663       __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vlen_enc);
3664     } else if (VM_Version::supports_avx2()) {
3665       int vlen_enc = vector_length_encoding(this);
3666       __ movdl($dst$$XMMRegister, $src$$Register);
3667       __ vpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
3668     } else {
3669       __ movdl($dst$$XMMRegister, $src$$Register);
3670       __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
3671       __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);

3877   ins_pipe( pipe_slow );
3878 %}
3879 
3880 // Replicate integer (4 byte) scalar zero to be vector
3881 instruct ReplI_zero(vec dst, immI_0 zero) %{
3882   match(Set dst (ReplicateI zero));
3883   format %{ "replicateI $dst,$zero" %}
3884   ins_encode %{
3885     uint vlen = Matcher::vector_length(this);
3886     if (vlen <= 4) {
3887       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
3888     } else {
3889       int vlen_enc = vector_length_encoding(this);
3890       __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
3891     }
3892   %}
3893   ins_pipe( fpu_reg_reg );
3894 %}
3895 
3896 instruct ReplI_M1(vec dst, immI_M1 con) %{
3897   predicate(UseAVX > 0);
3898   match(Set dst (ReplicateB con));
3899   match(Set dst (ReplicateS con));
3900   match(Set dst (ReplicateI con));
3901   effect(TEMP dst);
3902   format %{ "vallones $dst" %}
3903   ins_encode %{
3904     int vector_len = vector_length_encoding(this);
3905     __ vallones($dst$$XMMRegister, vector_len);
3906   %}
3907   ins_pipe( pipe_slow );
3908 %}
3909 
3910 // ====================ReplicateL=======================================
3911 
3912 #ifdef _LP64
3913 // Replicate long (8 byte) scalar to be vector
3914 instruct ReplL_reg(vec dst, rRegL src) %{
3915   match(Set dst (ReplicateL src));
3916   format %{ "replicateL $dst,$src" %}
3917   ins_encode %{

5843 %}
5844 
5845 instruct copySignD_imm(regD dst, regD src, regD tmp1, rRegL tmp2, immD zero) %{
5846   match(Set dst (CopySignD dst (Binary src zero)));
5847   ins_cost(100);
5848   effect(TEMP tmp1, TEMP tmp2);
5849   format %{ "CopySignD  $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %}
5850   ins_encode %{
5851     __ mov64($tmp2$$Register, 0x7FFFFFFFFFFFFFFF);
5852     __ movq($tmp1$$XMMRegister, $tmp2$$Register);
5853     __ vpternlogq($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit);
5854   %}
5855   ins_pipe( pipe_slow );
5856 %}
5857 #endif // _LP64
5858 
5859 // --------------------------------- Sqrt --------------------------------------
5860 
5861 instruct vsqrtF_reg(vec dst, vec src) %{
5862   match(Set dst (SqrtVF src));

5863   format %{ "vsqrtps  $dst,$src\t! sqrt packedF" %}
5864   ins_encode %{
5865     assert(UseAVX > 0, "required");
5866     int vlen_enc = vector_length_encoding(this);
5867     __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
5868   %}
5869   ins_pipe( pipe_slow );
5870 %}
5871 
5872 instruct vsqrtF_mem(vec dst, memory mem) %{
5873   predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
5874   match(Set dst (SqrtVF (LoadVector mem)));

5875   format %{ "vsqrtps  $dst,$mem\t! sqrt packedF" %}
5876   ins_encode %{
5877     assert(UseAVX > 0, "required");
5878     int vlen_enc = vector_length_encoding(this);
5879     __ vsqrtps($dst$$XMMRegister, $mem$$Address, vlen_enc);
5880   %}
5881   ins_pipe( pipe_slow );
5882 %}
5883 
5884 // Floating point vector sqrt
5885 instruct vsqrtD_reg(vec dst, vec src) %{
5886   match(Set dst (SqrtVD src));

5887   format %{ "vsqrtpd  $dst,$src\t! sqrt packedD" %}
5888   ins_encode %{
5889     assert(UseAVX > 0, "required");
5890     int vlen_enc = vector_length_encoding(this);
5891     __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
5892   %}
5893   ins_pipe( pipe_slow );
5894 %}
5895 
5896 instruct vsqrtD_mem(vec dst, memory mem) %{
5897   predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
5898   match(Set dst (SqrtVD (LoadVector mem)));

5899   format %{ "vsqrtpd  $dst,$mem\t! sqrt packedD" %}
5900   ins_encode %{
5901     assert(UseAVX > 0, "required");
5902     int vlen_enc = vector_length_encoding(this);
5903     __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vlen_enc);
5904   %}
5905   ins_pipe( pipe_slow );
5906 %}
5907 
5908 // ------------------------------ Shift ---------------------------------------
5909 
5910 // Left and right shift count vectors are the same on x86
5911 // (only lowest bits of xmm reg are used for count).
5912 instruct vshiftcnt(vec dst, rRegI cnt) %{
5913   match(Set dst (LShiftCntV cnt));
5914   match(Set dst (RShiftCntV cnt));
5915   format %{ "movdl    $dst,$cnt\t! load shift count" %}
5916   ins_encode %{
5917     __ movdl($dst$$XMMRegister, $cnt$$Register);
5918   %}

6887     int vlen_enc = vector_length_encoding(this);
6888     __ vcvtps2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
6889   %}
6890   ins_pipe( pipe_slow );
6891 %}
6892 
6893 instruct vcastDtoF_reg(vec dst, vec src) %{
6894   predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
6895   match(Set dst (VectorCastD2X src));
6896   format %{ "vector_cast_d2x  $dst,$src\t!" %}
6897   ins_encode %{
6898     int vlen_enc = vector_length_encoding(this, $src);
6899     __ vcvtpd2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
6900   %}
6901   ins_pipe( pipe_slow );
6902 %}
6903 
6904 // --------------------------------- VectorMaskCmp --------------------------------------
6905 
6906 instruct vcmpFD(legVec dst, legVec src1, legVec src2, immI8 cond) %{
6907   predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) >=  8 && // src1

6908             Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
6909             is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
6910   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
6911   format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %}
6912   ins_encode %{
6913     int vlen_enc = vector_length_encoding(this, $src1);
6914     Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
6915     if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
6916       __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
6917     } else {
6918       __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
6919     }
6920   %}
6921   ins_pipe( pipe_slow );
6922 %}
6923 
6924 instruct evcmpFD(vec dst, vec src1, vec src2, immI8 cond, rRegP scratch, kReg ktmp) %{
6925   predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64 && // src1

6926             is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
6927   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
6928   effect(TEMP scratch, TEMP ktmp);
6929   format %{ "vector_compare $dst,$src1,$src2,$cond\t! using $scratch as TEMP" %}
6930   ins_encode %{
6931     int vlen_enc = Assembler::AVX_512bit;
6932     Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
6933     KRegister mask = k0; // The comparison itself is not being masked.
6934     if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
6935       __ evcmpps($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
6936       __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, $scratch$$Register);
6937     } else {
6938       __ evcmppd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
6939       __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, $scratch$$Register);
6940     }
6941   %}
6942   ins_pipe( pipe_slow );
6943 %}
6944 



















6945 instruct vcmp(legVec dst, legVec src1, legVec src2, immI8 cond, rRegP scratch) %{
6946   predicate((UseAVX <= 2 || !VM_Version::supports_avx512vl()) &&
6947             !is_unsigned_booltest_pred(n->in(2)->get_int()) &&
6948             Matcher::vector_length_in_bytes(n->in(1)->in(1)) >=  4 && // src1
6949             Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
6950             is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
6951   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
6952   effect(TEMP scratch);
6953   format %{ "vector_compare $dst,$src1,$src2,$cond\t! using $scratch as TEMP" %}
6954   ins_encode %{
6955     int vlen_enc = vector_length_encoding(this, $src1);
6956     Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
6957     Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
6958     __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, ww, vlen_enc, $scratch$$Register);
6959   %}
6960   ins_pipe( pipe_slow );
6961 %}
6962 
6963 instruct vcmpu(legVec dst, legVec src1, legVec src2, immI8 cond, legVec vtmp1, legVec vtmp2, rRegP scratch) %{
6964   predicate((UseAVX == 2 || !VM_Version::supports_avx512vl()) &&
6965             is_unsigned_booltest_pred(n->in(2)->get_int()) &&
6966             Matcher::vector_length_in_bytes(n->in(1)->in(1)) >=  8 && // src1
6967             Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 16 && // src1
6968             is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
6969   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
6970   effect(TEMP vtmp1, TEMP vtmp2, TEMP scratch);
6971   format %{ "vector_compareu $dst,$src1,$src2,$cond\t! using $scratch as TEMP" %}
6972   ins_encode %{
6973     int vlen = Matcher::vector_length_in_bytes(this, $src1);
6974     Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
6975     BasicType bt = Matcher::vector_element_basic_type(this, $src1);
6976     __ vpcmpu(bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen, $vtmp1$$XMMRegister,
6977               $vtmp2$$XMMRegister, $scratch$$Register);
6978   %}
6979   ins_pipe( pipe_slow );
6980 %}
6981 
6982 instruct vcmpu32(legVec dst, legVec src1, legVec src2, immI8 cond, legVec vtmp1, legVec vtmp2, legVec vtmp3, rRegP scratch) %{
6983   predicate((UseAVX == 2 || !VM_Version::supports_avx512vl()) &&
6984             is_unsigned_booltest_pred(n->in(2)->get_int()) &&
6985             Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 32 && // src1
6986             is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
6987   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
6988   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2, TEMP vtmp3, TEMP scratch);
6989   format %{ "vector_compareu $dst,$src1,$src2,$cond\t! using $scratch as TEMP" %}
6990   ins_encode %{
6991     int vlen = Matcher::vector_length_in_bytes(this, $src1);
6992     Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
6993     BasicType bt = Matcher::vector_element_basic_type(this, $src1);
6994     __ vpcmpu32(bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen, $vtmp1$$XMMRegister,
6995                 $vtmp2$$XMMRegister, $vtmp3$$XMMRegister, $scratch$$Register);
6996   %}
6997   ins_pipe( pipe_slow );
6998 %}
6999 
7000 instruct evcmp(vec dst, vec src1, vec src2, immI8 cond, rRegP scratch, kReg ktmp) %{
7001   predicate(UseAVX > 2 &&
7002             (VM_Version::supports_avx512vl() ||
7003              Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64) && // src1
7004              is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
7005   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
7006   effect(TEMP scratch, TEMP ktmp);
7007   format %{ "vector_compare $dst,$src1,$src2,$cond\t! using $scratch as TEMP" %}
7008   ins_encode %{
7009     assert(UseAVX > 2, "required");
7010 
7011     int vlen_enc = vector_length_encoding(this, $src1);
7012     Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
7013     bool is_unsigned = is_unsigned_booltest_pred($cond$$constant);
7014     KRegister mask = k0; // The comparison itself is not being masked.
7015     bool merge = false;
7016     BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
7017 

































7018     switch (src1_elem_bt) {
7019       case T_BYTE: {
7020         __ evpcmpb($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
7021         __ evmovdqub($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, $scratch$$Register);
7022         break;
7023       }
7024       case T_SHORT: {
7025         __ evpcmpw($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
7026         __ evmovdquw($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, $scratch$$Register);
7027         break;
7028       }
7029       case T_INT: {
7030         __ evpcmpd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
7031         __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, $scratch$$Register);
7032         break;
7033       }
7034       case T_LONG: {
7035         __ evpcmpq($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
7036         __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, $scratch$$Register);
7037         break;
7038       }
7039       default: assert(false, "%s", type2name(src1_elem_bt));
7040     }
7041   %}
7042   ins_pipe( pipe_slow );
7043 %}
7044 
7045 // Extract
7046 
7047 instruct extractI(rRegI dst, legVec src, immU8 idx) %{
7048   predicate(Matcher::vector_length_in_bytes(n->in(1)) <= 16); // src
7049   match(Set dst (ExtractI src idx));
7050   match(Set dst (ExtractS src idx));
7051 #ifdef _LP64
7052   match(Set dst (ExtractB src idx));
7053 #endif
7054   format %{ "extractI $dst,$src,$idx\t!" %}
7055   ins_encode %{
7056     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");

7169 // --------------------------------- Vector Blend --------------------------------------
7170 
7171 instruct blendvp(vec dst, vec src, vec mask, rxmm0 tmp) %{
7172   predicate(UseAVX == 0);
7173   match(Set dst (VectorBlend (Binary dst src) mask));
7174   format %{ "vector_blend  $dst,$src,$mask\t! using $tmp as TEMP" %}
7175   effect(TEMP tmp);
7176   ins_encode %{
7177     assert(UseSSE >= 4, "required");
7178 
7179     if ($mask$$XMMRegister != $tmp$$XMMRegister) {
7180       __ movdqu($tmp$$XMMRegister, $mask$$XMMRegister);
7181     }
7182     __ pblendvb($dst$$XMMRegister, $src$$XMMRegister); // uses xmm0 as mask
7183   %}
7184   ins_pipe( pipe_slow );
7185 %}
7186 
7187 instruct vblendvpI(legVec dst, legVec src1, legVec src2, legVec mask) %{
7188   predicate(UseAVX > 0 &&

7189             Matcher::vector_length_in_bytes(n) <= 32 &&
7190             is_integral_type(Matcher::vector_element_basic_type(n)));
7191   match(Set dst (VectorBlend (Binary src1 src2) mask));
7192   format %{ "vector_blend  $dst,$src1,$src2,$mask\t!" %}
7193   ins_encode %{
7194     int vlen_enc = vector_length_encoding(this);
7195     __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc);
7196   %}
7197   ins_pipe( pipe_slow );
7198 %}
7199 
7200 instruct vblendvpFD(legVec dst, legVec src1, legVec src2, legVec mask) %{
7201   predicate(UseAVX > 0 &&

7202             Matcher::vector_length_in_bytes(n) <= 32 &&
7203             !is_integral_type(Matcher::vector_element_basic_type(n)));
7204   match(Set dst (VectorBlend (Binary src1 src2) mask));
7205   format %{ "vector_blend  $dst,$src1,$src2,$mask\t!" %}
7206   ins_encode %{
7207     int vlen_enc = vector_length_encoding(this);
7208     __ vblendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc);
7209   %}
7210   ins_pipe( pipe_slow );
7211 %}
7212 
7213 instruct evblendvp64(vec dst, vec src1, vec src2, vec mask, rRegP scratch, kReg ktmp) %{
7214   predicate(Matcher::vector_length_in_bytes(n) == 64);

7215   match(Set dst (VectorBlend (Binary src1 src2) mask));
7216   format %{ "vector_blend  $dst,$src1,$src2,$mask\t! using $scratch and k2 as TEMP" %}
7217   effect(TEMP scratch, TEMP ktmp);
7218   ins_encode %{
7219      int vlen_enc = Assembler::AVX_512bit;
7220      BasicType elem_bt = Matcher::vector_element_basic_type(this);
7221     __ evpcmp(elem_bt, $ktmp$$KRegister, k0, $mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), Assembler::eq, vlen_enc, $scratch$$Register);
7222     __ evpblend(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
7223   %}
7224   ins_pipe( pipe_slow );
7225 %}
7226 
















7227 // --------------------------------- ABS --------------------------------------
7228 // a = |a|
7229 instruct vabsB_reg(vec dst, vec src) %{
7230   match(Set dst (AbsVB  src));

7231   format %{ "vabsb $dst,$src\t# $dst = |$src| abs packedB" %}
7232   ins_encode %{
7233     uint vlen = Matcher::vector_length(this);
7234     if (vlen <= 16) {
7235       __ pabsb($dst$$XMMRegister, $src$$XMMRegister);
7236     } else {
7237       int vlen_enc = vector_length_encoding(this);
7238       __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
7239     }
7240   %}
7241   ins_pipe( pipe_slow );
7242 %}
7243 
7244 instruct vabsS_reg(vec dst, vec src) %{
7245   match(Set dst (AbsVS  src));

7246   format %{ "vabsw $dst,$src\t# $dst = |$src| abs packedS" %}
7247   ins_encode %{
7248     uint vlen = Matcher::vector_length(this);
7249     if (vlen <= 8) {
7250       __ pabsw($dst$$XMMRegister, $src$$XMMRegister);
7251     } else {
7252       int vlen_enc = vector_length_encoding(this);
7253       __ vpabsw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
7254     }
7255   %}
7256   ins_pipe( pipe_slow );
7257 %}
7258 
7259 instruct vabsI_reg(vec dst, vec src) %{
7260   match(Set dst (AbsVI  src));
7261   format %{ "pabsd $dst,$src\t# $dst = |$src| abs packedI" %}

7262   ins_encode %{
7263     uint vlen = Matcher::vector_length(this);
7264     if (vlen <= 4) {
7265       __ pabsd($dst$$XMMRegister, $src$$XMMRegister);
7266     } else {
7267       int vlen_enc = vector_length_encoding(this);
7268       __ vpabsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
7269     }
7270   %}
7271   ins_pipe( pipe_slow );
7272 %}
7273 
7274 instruct vabsL_reg(vec dst, vec src) %{
7275   match(Set dst (AbsVL  src));

7276   format %{ "evpabsq $dst,$src\t# $dst = |$src| abs packedL" %}
7277   ins_encode %{
7278     assert(UseAVX > 2, "required");
7279     int vlen_enc = vector_length_encoding(this);
7280     if (!VM_Version::supports_avx512vl()) {
7281       vlen_enc = Assembler::AVX_512bit;
7282     }
7283     __ evpabsq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
7284   %}
7285   ins_pipe( pipe_slow );
7286 %}
7287 
7288 // --------------------------------- ABSNEG --------------------------------------
7289 
7290 instruct vabsnegF(vec dst, vec src, rRegI scratch) %{
7291   predicate(Matcher::vector_length(n) != 4); // handled by 1-operand instruction vabsneg4F
7292   match(Set dst (AbsVF src));
7293   match(Set dst (NegVF src));
7294   effect(TEMP scratch);
7295   format %{ "vabsnegf $dst,$src,[mask]\t# absneg packedF" %}

7328   effect(TEMP scratch);
7329   format %{ "vabsnegd $dst,$src,[mask]\t# absneg packedD" %}
7330   ins_encode %{
7331     int opcode = this->ideal_Opcode();
7332     uint vlen = Matcher::vector_length(this);
7333     if (vlen == 2) {
7334       assert(UseSSE >= 2, "required");
7335       __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $scratch$$Register);
7336     } else {
7337       int vlen_enc = vector_length_encoding(this);
7338       __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc, $scratch$$Register);
7339     }
7340   %}
7341   ins_pipe( pipe_slow );
7342 %}
7343 
7344 //------------------------------------- VectorTest --------------------------------------------
7345 
7346 #ifdef _LP64
7347 instruct vptest_alltrue_lt16(rRegI dst, legVec src1, legVec src2, legVec vtmp1, legVec vtmp2, rFlagsReg cr) %{
7348   predicate(Matcher::vector_length_in_bytes(n->in(1)) >= 4 &&

7349             Matcher::vector_length_in_bytes(n->in(1)) < 16 &&
7350             static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::overflow);
7351   match(Set dst (VectorTest src1 src2 ));
7352   effect(TEMP vtmp1, TEMP vtmp2, KILL cr);
7353   format %{ "vector_test $dst,$src1, $src2\t! using $vtmp1, $vtmp2 and $cr as TEMP" %}
7354   ins_encode %{
7355     int vlen = Matcher::vector_length_in_bytes(this, $src1);
7356     __ vectortest(BoolTest::overflow, vlen, $src1$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
7357     __ setb(Assembler::carrySet, $dst$$Register);
7358     __ movzbl($dst$$Register, $dst$$Register);
7359   %}
7360   ins_pipe( pipe_slow );
7361 %}
7362 
7363 instruct vptest_alltrue(rRegI dst, legVec src1, legVec src2, rFlagsReg cr) %{
7364   predicate(Matcher::vector_length_in_bytes(n->in(1)) >= 16 &&

7365             Matcher::vector_length_in_bytes(n->in(1)) <  64 &&
7366             static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::overflow);
7367   match(Set dst (VectorTest src1 src2 ));
7368   effect(KILL cr);
7369   format %{ "vector_test $dst,$src1, $src2\t! using $cr as TEMP" %}
7370   ins_encode %{
7371     int vlen = Matcher::vector_length_in_bytes(this, $src1);
7372     __ vectortest(BoolTest::overflow, vlen, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg, knoreg);
7373     __ setb(Assembler::carrySet, $dst$$Register);
7374     __ movzbl($dst$$Register, $dst$$Register);
7375   %}
7376   ins_pipe( pipe_slow );
7377 %}
7378 
7379 instruct vptest_alltrue_evex(rRegI dst, legVec src1, legVec src2, kReg ktmp, rFlagsReg cr) %{
7380   predicate(Matcher::vector_length_in_bytes(n->in(1)) == 64 &&
7381             static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::overflow);
7382   match(Set dst (VectorTest src1 src2 ));
7383   effect(KILL cr, TEMP ktmp);
7384   format %{ "vector_test $dst,$src1, $src2\t! using $cr as TEMP" %}


7385   ins_encode %{
7386     int vlen = Matcher::vector_length_in_bytes(this, $src1);
7387     __ vectortest(BoolTest::overflow, vlen, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg, $ktmp$$KRegister);
7388     __ setb(Assembler::carrySet, $dst$$Register);
7389     __ movzbl($dst$$Register, $dst$$Register);




















7390   %}
7391   ins_pipe( pipe_slow );
7392 %}
7393 

7394 instruct vptest_anytrue_lt16(rRegI dst, legVec src1, legVec src2, legVec vtmp, rFlagsReg cr) %{
7395   predicate(Matcher::vector_length_in_bytes(n->in(1)) >= 4 &&

7396             Matcher::vector_length_in_bytes(n->in(1)) < 16 &&
7397             static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::ne);
7398   match(Set dst (VectorTest src1 src2 ));
7399   effect(TEMP vtmp, KILL cr);
7400   format %{ "vector_test_any_true $dst,$src1,$src2\t! using $vtmp, $cr as TEMP" %}
7401   ins_encode %{
7402     int vlen = Matcher::vector_length_in_bytes(this, $src1);
7403     __ vectortest(BoolTest::ne, vlen, $src1$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister);
7404     __ setb(Assembler::notZero, $dst$$Register);
7405     __ movzbl($dst$$Register, $dst$$Register);
7406   %}
7407   ins_pipe( pipe_slow );
7408 %}
7409 
7410 instruct vptest_anytrue(rRegI dst, legVec src1, legVec src2, rFlagsReg cr) %{
7411   predicate(Matcher::vector_length_in_bytes(n->in(1)) >= 16 &&

7412             Matcher::vector_length_in_bytes(n->in(1)) < 64  &&
7413             static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::ne);
7414   match(Set dst (VectorTest src1 src2 ));
7415   effect(KILL cr);
7416   format %{ "vector_test_any_true $dst,$src1,$src2\t! using $cr as TEMP" %}
7417   ins_encode %{
7418     int vlen = Matcher::vector_length_in_bytes(this, $src1);
7419     __ vectortest(BoolTest::ne, vlen, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg, knoreg);
7420     __ setb(Assembler::notZero, $dst$$Register);
7421     __ movzbl($dst$$Register, $dst$$Register);
7422   %}
7423   ins_pipe( pipe_slow );
7424 %}
7425 
7426 instruct vptest_anytrue_evex(rRegI dst, legVec src1, legVec src2, kReg ktmp, rFlagsReg cr) %{
7427   predicate(Matcher::vector_length_in_bytes(n->in(1)) == 64 &&
7428             static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::ne);
7429   match(Set dst (VectorTest src1 src2 ));
7430   effect(KILL cr, TEMP ktmp);
7431   format %{ "vector_test_any_true $dst,$src1,$src2\t! using $cr as TEMP" %}
7432   ins_encode %{
7433     int vlen = Matcher::vector_length_in_bytes(this, $src1);
7434     __ vectortest(BoolTest::ne, vlen, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg, $ktmp$$KRegister);
7435     __ setb(Assembler::notZero, $dst$$Register);
7436     __ movzbl($dst$$Register, $dst$$Register);

7437   %}
7438   ins_pipe( pipe_slow );
7439 %}
7440 
7441 instruct cmpvptest_anytrue_lt16(rFlagsReg cr, legVec src1, legVec src2, immI_0 zero, legVec vtmp) %{
7442   predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 &&

7443             Matcher::vector_length_in_bytes(n->in(1)->in(1)) < 16 &&
7444             static_cast<const VectorTestNode*>(n->in(1))->get_predicate() == BoolTest::ne);
7445   match(Set cr (CmpI (VectorTest src1 src2) zero));
7446   effect(TEMP vtmp);
7447   format %{ "cmp_vector_test_any_true $src1,$src2\t! using $vtmp as TEMP" %}
7448   ins_encode %{
7449     int vlen = Matcher::vector_length_in_bytes(this, $src1);
7450     __ vectortest(BoolTest::ne, vlen, $src1$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister);
7451   %}
7452   ins_pipe( pipe_slow );
7453 %}
7454 
7455 instruct cmpvptest_anytrue(rFlagsReg cr, legVec src1, legVec src2, immI_0 zero) %{
7456   predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 16 &&

7457             Matcher::vector_length_in_bytes(n->in(1)->in(1)) <  64 &&
7458             static_cast<const VectorTestNode*>(n->in(1))->get_predicate() == BoolTest::ne);
7459   match(Set cr (CmpI (VectorTest src1 src2) zero));
7460   format %{ "cmp_vector_test_any_true $src1,$src2\t!" %}
7461   ins_encode %{
7462     int vlen = Matcher::vector_length_in_bytes(this, $src1);
7463     __ vectortest(BoolTest::ne, vlen, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg, knoreg);
7464   %}
7465   ins_pipe( pipe_slow );
7466 %}
7467 
7468 instruct cmpvptest_anytrue_evex(rFlagsReg cr, legVec src1, legVec src2, immI_0 zero, kReg ktmp) %{
7469   predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64 &&
7470             static_cast<const VectorTestNode*>(n->in(1))->get_predicate() == BoolTest::ne);
7471   match(Set cr (CmpI (VectorTest src1 src2) zero));
7472   effect(TEMP ktmp);
7473   format %{ "cmp_vector_test_any_true $src1,$src2\t!" %}
7474   ins_encode %{
7475     int vlen = Matcher::vector_length_in_bytes(this, $src1);
7476     __ vectortest(BoolTest::ne, vlen, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg, $ktmp$$KRegister);




7477   %}
7478   ins_pipe( pipe_slow );
7479 %}
7480 #endif
7481 
7482 //------------------------------------- LoadMask --------------------------------------------
7483 
7484 instruct loadMask(legVec dst, legVec src) %{
7485   predicate(!VM_Version::supports_avx512vlbw());
7486   match(Set dst (VectorLoadMask src));
7487   effect(TEMP dst);
7488   format %{ "vector_loadmask_byte $dst,$src\n\t" %}
7489   ins_encode %{
7490     int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
7491     BasicType elem_bt = Matcher::vector_element_basic_type(this);
7492 
7493     __ load_vector_mask($dst$$XMMRegister, $src$$XMMRegister, vlen_in_bytes, elem_bt, true);
7494   %}
7495   ins_pipe( pipe_slow );
7496 %}
7497 
7498 instruct loadMask_evex(vec dst, vec src) %{
7499   predicate(VM_Version::supports_avx512vlbw());
7500   match(Set dst (VectorLoadMask src));
7501   effect(TEMP dst);
7502   format %{ "vector_loadmask_byte $dst,$src\n\t" %}
7503   ins_encode %{
7504     int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
7505     BasicType elem_bt = Matcher::vector_element_basic_type(this);



7506 
7507     __ load_vector_mask($dst$$XMMRegister, $src$$XMMRegister, vlen_in_bytes, elem_bt, false);








7508   %}
7509   ins_pipe( pipe_slow );
7510 %}
7511 
7512 //------------------------------------- StoreMask --------------------------------------------
7513 
7514 instruct storeMask1B(vec dst, vec src, immI_1 size) %{
7515   predicate(Matcher::vector_length(n) < 64 || VM_Version::supports_avx512vlbw());
7516   match(Set dst (VectorStoreMask src size));
7517   format %{ "vector_store_mask $dst,$src\t!" %}
7518   ins_encode %{
7519     assert(UseSSE >= 3, "required");
7520     if (Matcher::vector_length_in_bytes(this) <= 16) {

7521       __ pabsb($dst$$XMMRegister, $src$$XMMRegister);
7522     } else {
7523       assert(UseAVX >= 2, "required");
7524       int src_vlen_enc = vector_length_encoding(this, $src);
7525       __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
7526     }
7527   %}
7528   ins_pipe( pipe_slow );
7529 %}
7530 
7531 instruct storeMask2B(vec dst, vec src, immI_2 size) %{
7532   predicate(Matcher::vector_length(n) <= 8);
7533   match(Set dst (VectorStoreMask src size));
7534   format %{ "vector_store_mask $dst,$src\n\t" %}
7535   ins_encode %{
7536     assert(UseSSE >= 3, "required");
7537     __ pabsw($dst$$XMMRegister, $src$$XMMRegister);
7538     __ packsswb($dst$$XMMRegister, $dst$$XMMRegister);
7539   %}
7540   ins_pipe( pipe_slow );
7541 %}
7542 
7543 instruct vstoreMask2B(vec dst, vec src, immI_2 size) %{
7544   predicate(Matcher::vector_length(n) == 16 && !VM_Version::supports_avx512bw());
7545   match(Set dst (VectorStoreMask src size));
7546   effect(TEMP dst);
7547   format %{ "vector_store_mask $dst,$src\t!" %}
7548   ins_encode %{
7549     int vlen_enc = Assembler::AVX_128bit;
7550     __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1);
7551     __ vpacksswb($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister,vlen_enc);
7552     __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);









7553   %}
7554   ins_pipe( pipe_slow );
7555 %}
7556 
7557 instruct vstoreMask2B_evex(vec dst, vec src, immI_2 size) %{
7558   predicate(VM_Version::supports_avx512bw());
7559   match(Set dst (VectorStoreMask src size));
7560   format %{ "vector_store_mask $dst,$src\t!" %}

7561   ins_encode %{
7562     int src_vlen_enc = vector_length_encoding(this, $src);
7563     int dst_vlen_enc = vector_length_encoding(this);
7564     __ evpmovwb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
7565     __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);












7566   %}
7567   ins_pipe( pipe_slow );
7568 %}
7569 
7570 instruct storeMask4B(vec dst, vec src, immI_4 size) %{
7571   predicate(Matcher::vector_length(n) <= 4 && UseAVX <= 2);
7572   match(Set dst (VectorStoreMask src size));
7573   format %{ "vector_store_mask $dst,$src\t!" %}

7574   ins_encode %{
7575     assert(UseSSE >= 3, "required");
7576     __ pabsd($dst$$XMMRegister, $src$$XMMRegister);
7577     __ packssdw($dst$$XMMRegister, $dst$$XMMRegister);
7578     __ packsswb($dst$$XMMRegister, $dst$$XMMRegister);


7579   %}
7580   ins_pipe( pipe_slow );
7581 %}
7582 
7583 instruct vstoreMask4B(vec dst, vec src, immI_4 size) %{
7584   predicate(Matcher::vector_length(n) == 8 && UseAVX <= 2);
7585   match(Set dst (VectorStoreMask src size));
7586   format %{ "vector_store_mask $dst,$src\t!" %}
7587   effect(TEMP dst);
7588   ins_encode %{
7589     int vlen_enc = Assembler::AVX_128bit;
7590     __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1);
7591     __ vpackssdw($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc);
7592     __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);



7593     __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
7594   %}
7595   ins_pipe( pipe_slow );
7596 %}
7597 
7598 instruct vstoreMask4B_evex(vec dst, vec src, immI_4 size) %{
7599   predicate(UseAVX > 2);
7600   match(Set dst (VectorStoreMask src size));
7601   format %{ "vector_store_mask $dst,$src\t!" %}
7602   ins_encode %{
7603     int src_vlen_enc = vector_length_encoding(this, $src);
7604     int dst_vlen_enc = vector_length_encoding(this);
7605     if (!VM_Version::supports_avx512vl()) {
7606       src_vlen_enc = Assembler::AVX_512bit;
7607     }
7608     __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
7609     __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
7610   %}
7611   ins_pipe( pipe_slow );
7612 %}
7613 
7614 instruct storeMask8B(vec dst, vec src, immI_8 size) %{
7615   predicate(Matcher::vector_length(n) == 2 && UseAVX <= 2);
7616   match(Set dst (VectorStoreMask src size));
7617   format %{ "vector_store_mask $dst,$src\t!" %}
7618   ins_encode %{
7619     assert(UseSSE >= 3, "required");
7620     __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x8);
7621     __ packssdw($dst$$XMMRegister, $dst$$XMMRegister);
7622     __ packsswb($dst$$XMMRegister, $dst$$XMMRegister);
7623     __ pabsb($dst$$XMMRegister, $dst$$XMMRegister);
7624   %}


7625   ins_pipe( pipe_slow );
7626 %}
7627 
7628 instruct storeMask8B_avx(vec dst, vec src, immI_8 size, legVec vtmp) %{
7629   predicate(Matcher::vector_length(n) == 4 && UseAVX <= 2);
7630   match(Set dst (VectorStoreMask src size));
7631   format %{ "vector_store_mask $dst,$src\t! using $vtmp as TEMP" %}
7632   effect(TEMP dst, TEMP vtmp);
7633   ins_encode %{
7634     int vlen_enc = Assembler::AVX_128bit;
7635     __ vpshufps($dst$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 0x88, Assembler::AVX_256bit);
7636     __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1);
7637     __ vblendps($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0xC, vlen_enc);
7638     __ vpackssdw($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
7639     __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
7640     __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
7641   %}
7642   ins_pipe( pipe_slow );
7643 %}
7644 
7645 instruct vstoreMask8B_evex(vec dst, vec src, immI_8 size) %{
7646   predicate(UseAVX > 2);
7647   match(Set dst (VectorStoreMask src size));
7648   format %{ "vector_store_mask $dst,$src\t!" %}

7649   ins_encode %{
7650     int src_vlen_enc = vector_length_encoding(this, $src);
7651     int dst_vlen_enc = vector_length_encoding(this);
7652     if (!VM_Version::supports_avx512vl()) {
7653       src_vlen_enc = Assembler::AVX_512bit;
7654     }
7655     __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
7656     __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
7657   %}
7658   ins_pipe( pipe_slow );
7659 %}
7660 











7661 instruct vmaskcast(vec dst) %{
7662   predicate((Matcher::vector_length(n) == Matcher::vector_length(n->in(1))) &&
7663             (Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1))));
7664   match(Set dst (VectorMaskCast dst));
7665   ins_cost(0);
7666   format %{ "vector_mask_cast $dst" %}
7667   ins_encode %{
7668     // empty
7669   %}
7670   ins_pipe(empty);
7671 %}
7672 
7673 //-------------------------------- Load Iota Indices ----------------------------------
7674 
7675 instruct loadIotaIndices(vec dst, immI_0 src, rRegP scratch) %{
7676   predicate(Matcher::vector_element_basic_type(n) == T_BYTE);
7677   match(Set dst (VectorLoadConst src));
7678   effect(TEMP scratch);
7679   format %{ "vector_load_iota $dst CONSTANT_MEMORY\t! load iota indices" %}
7680   ins_encode %{

8210   effect(TEMP temp);
8211   ins_encode %{
8212     __ mov64($temp$$Register, (0xFFFFFFFFFFFFFFFFUL >> (64 -$len$$constant)));
8213     __ kmovql($dst$$KRegister, $temp$$Register);
8214   %}
8215   ins_pipe( pipe_slow );
8216 %}
8217 
8218 instruct vmasked_store64(memory mem, vec src, kReg mask) %{
8219   match(Set mem (StoreVectorMasked mem (Binary src mask)));
8220   format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %}
8221   ins_encode %{
8222     const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src)));
8223     BasicType elmType =  src_node->bottom_type()->is_vect()->element_basic_type();
8224     int vector_len = vector_length_encoding(src_node);
8225     __ evmovdqu(elmType, $mask$$KRegister, $mem$$Address, $src$$XMMRegister, vector_len);
8226   %}
8227   ins_pipe( pipe_slow );
8228 %}
8229 
8230 instruct vmask_truecount_evex(rRegI dst, vec mask, rRegL tmp, kReg ktmp, vec xtmp) %{
8231   predicate(VM_Version::supports_avx512vlbw());















































8232   match(Set dst (VectorMaskTrueCount mask));
8233   effect(TEMP_DEF dst, TEMP tmp, TEMP ktmp, TEMP xtmp);
8234   format %{ "vector_truecount_evex $mask \t! vector mask true count" %}
8235   ins_encode %{
8236     int opcode = this->ideal_Opcode();
8237     int vlen_enc = vector_length_encoding(this, $mask);
8238     int mask_len = Matcher::vector_length(this, $mask);
8239     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
8240                              $tmp$$Register, $ktmp$$KRegister, mask_len, vlen_enc);


8241   %}
8242   ins_pipe( pipe_slow );
8243 %}
8244 
8245 instruct vmask_first_or_last_true_evex(rRegI dst, vec mask, rRegL tmp, kReg ktmp, vec xtmp, rFlagsReg cr) %{
8246   predicate(VM_Version::supports_avx512vlbw());
8247   match(Set dst (VectorMaskFirstTrue mask));
8248   match(Set dst (VectorMaskLastTrue mask));
8249   effect(TEMP_DEF dst, TEMP tmp, TEMP ktmp, TEMP xtmp, KILL cr);
8250   format %{ "vector_mask_first_or_last_true_evex $mask \t! vector first/last true location" %}
8251   ins_encode %{
8252     int opcode = this->ideal_Opcode();
8253     int vlen_enc = vector_length_encoding(this, $mask);
8254     int mask_len = Matcher::vector_length(this, $mask);


8255     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
8256                              $tmp$$Register, $ktmp$$KRegister, mask_len, vlen_enc);
8257   %}
8258   ins_pipe( pipe_slow );
8259 %}
8260 
8261 instruct vmask_truecount_avx(rRegI dst, vec mask, rRegL tmp, vec xtmp, vec xtmp1) %{
8262   predicate(!VM_Version::supports_avx512vlbw());
8263   match(Set dst (VectorMaskTrueCount mask));
8264   effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, TEMP xtmp1);
8265   format %{ "vector_truecount_avx $mask \t! vector mask true count" %}

8266   ins_encode %{
8267     int opcode = this->ideal_Opcode();
8268     int vlen_enc = vector_length_encoding(this, $mask);
8269     int mask_len = Matcher::vector_length(this, $mask);
8270     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
8271                              $xtmp1$$XMMRegister, $tmp$$Register, mask_len, vlen_enc);


8272   %}
8273   ins_pipe( pipe_slow );
8274 %}
8275 
8276 instruct vmask_first_or_last_true_avx(rRegI dst, vec mask, rRegL tmp, vec xtmp, vec xtmp1, rFlagsReg cr) %{
8277   predicate(!VM_Version::supports_avx512vlbw());
8278   match(Set dst (VectorMaskFirstTrue mask));
8279   match(Set dst (VectorMaskLastTrue mask));
8280   effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, TEMP xtmp1, KILL cr);
8281   format %{ "vector_mask_first_or_last_true_avx $mask \t! vector first/last true location" %}
8282   ins_encode %{
8283     int opcode = this->ideal_Opcode();
8284     int vlen_enc = vector_length_encoding(this, $mask);
8285     int mask_len = Matcher::vector_length(this, $mask);


8286     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
8287                              $xtmp1$$XMMRegister, $tmp$$Register, mask_len, vlen_enc);
8288   %}
8289   ins_pipe( pipe_slow );
8290 %}
8291 #endif // _LP64
8292 





























































































































































































































































































































































































































































































































































































































































































8293 instruct castVV(vec dst)
8294 %{
8295   match(Set dst (CastVV dst));
8296 
8297   size(0);
8298   format %{ "# castVV of $dst" %}
8299   ins_encode(/* empty encoding */);
8300   ins_cost(0);
8301   ins_pipe(empty);
8302 %}
8303 
8304 instruct castVVLeg(legVec dst)
8305 %{
8306   match(Set dst (CastVV dst));
8307 
8308   size(0);
8309   format %{ "# castVV of $dst" %}
8310   ins_encode(/* empty encoding */);
8311   ins_cost(0);
8312   ins_pipe(empty);

1357 
1358 //=============================================================================
1359 
1360   // Float masks come from different places depending on platform.
1361 #ifdef _LP64
1362   static address float_signmask()  { return StubRoutines::x86::float_sign_mask(); }
1363   static address float_signflip()  { return StubRoutines::x86::float_sign_flip(); }
1364   static address double_signmask() { return StubRoutines::x86::double_sign_mask(); }
1365   static address double_signflip() { return StubRoutines::x86::double_sign_flip(); }
1366 #else
1367   static address float_signmask()  { return (address)float_signmask_pool; }
1368   static address float_signflip()  { return (address)float_signflip_pool; }
1369   static address double_signmask() { return (address)double_signmask_pool; }
1370   static address double_signflip() { return (address)double_signflip_pool; }
1371 #endif
1372   static address vector_short_to_byte_mask() { return StubRoutines::x86::vector_short_to_byte_mask(); }
1373   static address vector_int_to_byte_mask() { return StubRoutines::x86::vector_int_to_byte_mask(); }
1374   static address vector_byte_perm_mask() { return StubRoutines::x86::vector_byte_perm_mask(); }
1375   static address vector_long_sign_mask() { return StubRoutines::x86::vector_long_sign_mask(); }
1376   static address vector_all_bits_set() { return StubRoutines::x86::vector_all_bits_set(); }
1377   static address vector_int_mask_cmp_bits() { return StubRoutines::x86::vector_int_mask_cmp_bits(); }
1378   static address vector_int_to_short_mask() { return StubRoutines::x86::vector_int_to_short_mask(); }
1379   static address vector_byte_shufflemask() { return StubRoutines::x86::vector_byte_shuffle_mask(); }
1380   static address vector_short_shufflemask() { return StubRoutines::x86::vector_short_shuffle_mask(); }
1381   static address vector_int_shufflemask() { return StubRoutines::x86::vector_int_shuffle_mask(); }
1382   static address vector_long_shufflemask() { return StubRoutines::x86::vector_long_shuffle_mask(); }
1383   static address vector_32_bit_mask() { return StubRoutines::x86::vector_32_bit_mask(); }
1384   static address vector_64_bit_mask() { return StubRoutines::x86::vector_64_bit_mask(); }
1385 
1386 //=============================================================================
1387 const bool Matcher::match_rule_supported(int opcode) {
1388   if (!has_match_rule(opcode)) {
1389     return false; // no match rule present
1390   }
1391   const bool is_LP64 = LP64_ONLY(true) NOT_LP64(false);
1392   switch (opcode) {
1393     case Op_AbsVL:
1394     case Op_StoreVectorScatter:
1395       if (UseAVX < 3) {
1396         return false;
1397       }

1540         return false;
1541       }
1542       break;
1543     case Op_MacroLogicV:
1544       if (UseAVX < 3 || !UseVectorMacroLogic) {
1545         return false;
1546       }
1547       break;
1548 
1549     case Op_VectorCmpMasked:
1550     case Op_VectorMaskGen:
1551     case Op_LoadVectorMasked:
1552     case Op_StoreVectorMasked:
1553       if (!is_LP64  || UseAVX < 3 || !VM_Version::supports_bmi2()) {
1554         return false;
1555       }
1556       break;
1557     case Op_VectorMaskFirstTrue:
1558     case Op_VectorMaskLastTrue:
1559     case Op_VectorMaskTrueCount:
1560     case Op_VectorMaskToLong:
1561       if (!is_LP64 || UseAVX < 1) {
1562          return false;
1563       }
1564       break;
1565     case Op_CopySignD:
1566     case Op_CopySignF:
1567       if (UseAVX < 3 || !is_LP64)  {
1568         return false;
1569       }
1570       if (!VM_Version::supports_avx512vl()) {
1571         return false;
1572       }
1573       break;
1574 #ifndef _LP64
1575     case Op_AddReductionVF:
1576     case Op_AddReductionVD:
1577     case Op_MulReductionVF:
1578     case Op_MulReductionVD:
1579       if (UseSSE < 1) { // requires at least SSE
1580         return false;

1787       break;
1788     case Op_VectorCastL2X:
1789       if (is_integral_type(bt) && size_in_bits == 256 && UseAVX < 2) {
1790         return false;
1791       } else if (!is_integral_type(bt) && !VM_Version::supports_avx512dq()) {
1792         return false;
1793       }
1794       break;
1795     case Op_VectorCastF2X:
1796     case Op_VectorCastD2X:
1797       if (is_integral_type(bt)) {
1798         // Casts from FP to integral types require special fixup logic not easily
1799         // implementable with vectors.
1800         return false; // Implementation limitation
1801       }
1802     case Op_MulReductionVI:
1803       if (bt == T_BYTE && size_in_bits == 512 && !VM_Version::supports_avx512bw()) {
1804         return false;
1805       }
1806       break;
1807     case Op_LoadVectorGatherMasked:
1808     case Op_StoreVectorScatterMasked:
1809     case Op_StoreVectorScatter:
1810       if(is_subword_type(bt)) {
1811         return false;
1812       } else if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
1813         return false;
1814       }
1815       // fallthrough
1816     case Op_LoadVectorGather:
1817       if (size_in_bits == 64 ) {
1818         return false;
1819       }
1820       break;
1821     case Op_MaskAll:
1822       if (!is_LP64 || !VM_Version::supports_evex()) {
1823         return false;
1824       }
1825       if ((vlen > 16 || is_subword_type(bt)) && !VM_Version::supports_avx512bw()) {
1826         return false;
1827       }
1828       if (size_in_bits < 512 && !VM_Version::supports_avx512vl()) {
1829         return false;
1830       }
1831       break;
1832     case Op_VectorMaskCmp:
1833       if (vlen < 2 || size_in_bits < 32) {
1834         return false;
1835       }
1836       break;
1837   }
1838   return true;  // Per default match rules are supported.
1839 }
1840 
1841 const bool Matcher::match_rule_supported_vector_masked(int opcode, int vlen, BasicType bt) {
1842   // ADLC based match_rule_supported routine checks for the existence of pattern based
1843   // on IR opcode. Most of the unary/binary/ternary masked operation share the IR nodes
1844   // of their non-masked counterpart with mask edge being the differentiator.
1845   // This routine does a strict check on the existence of masked operation patterns
1846   // by returning a default false value for all the other opcodes apart from the
1847   // ones whose masked instruction patterns are defined in this file.
1848   if (!match_rule_supported_vector(opcode, vlen, bt)) {
1849     return false;
1850   }
1851 
1852   const bool is_LP64 = LP64_ONLY(true) NOT_LP64(false);
1853   int size_in_bits = vlen * type2aelembytes(bt) * BitsPerByte;
1854   if (size_in_bits != 512 && !VM_Version::supports_avx512vl()) {
1855     return false;
1856   }
1857   switch(opcode) {
1858     // Unary masked operations
1859     case Op_AbsVB:
1860     case Op_AbsVS:
1861       if(!VM_Version::supports_avx512bw()) {
1862         return false;  // Implementation limitation
1863       }
1864     case Op_AbsVI:
1865     case Op_AbsVL:
1866       return true;
1867 
1868     // Ternary masked operations
1869     case Op_FmaVF:
1870     case Op_FmaVD:
1871       return true;
1872 
1873     // Binary masked operations
1874     case Op_AddVB:
1875     case Op_AddVS:
1876     case Op_SubVB:
1877     case Op_SubVS:
1878     case Op_MulVS:
1879     case Op_LShiftVS:
1880     case Op_RShiftVS:
1881     case Op_URShiftVS:
1882       assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
1883       if (!VM_Version::supports_avx512bw()) {
1884         return false;  // Implementation limitation
1885       }
1886       return true;
1887 
1888     case Op_MulVL:
1889       assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
1890       if (!VM_Version::supports_avx512dq()) {
1891         return false;  // Implementation limitation
1892       }
1893       return true;
1894 
1895     case Op_AndV:
1896     case Op_OrV:
1897     case Op_XorV:
1898     case Op_RotateRightV:
1899     case Op_RotateLeftV:
1900       if (bt != T_INT && bt != T_LONG) {
1901         return false; // Implementation limitation
1902       }
1903       return true;
1904 
1905     case Op_VectorLoadMask:
1906       assert(size_in_bits == 512 || VM_Version::supports_avx512vl(), "");
1907       if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
1908         return false;
1909       }
1910       return true;
1911 
1912     case Op_AddVI:
1913     case Op_AddVL:
1914     case Op_AddVF:
1915     case Op_AddVD:
1916     case Op_SubVI:
1917     case Op_SubVL:
1918     case Op_SubVF:
1919     case Op_SubVD:
1920     case Op_MulVI:
1921     case Op_MulVF:
1922     case Op_MulVD:
1923     case Op_DivVF:
1924     case Op_DivVD:
1925     case Op_SqrtVF:
1926     case Op_SqrtVD:
1927     case Op_LShiftVI:
1928     case Op_LShiftVL:
1929     case Op_RShiftVI:
1930     case Op_RShiftVL:
1931     case Op_URShiftVI:
1932     case Op_URShiftVL:
1933     case Op_LoadVectorMasked:
1934     case Op_StoreVectorMasked:
1935     case Op_LoadVectorGatherMasked:
1936     case Op_StoreVectorScatterMasked:
1937       return true;
1938 
1939     case Op_MaxV:
1940     case Op_MinV:
1941       if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
1942         return false; // Implementation limitation
1943       }
1944       if (is_floating_point_type(bt)) {
1945         return false; // Implementation limitation
1946       }
1947       return true;
1948 
1949     case Op_VectorMaskCmp:
1950       if (is_subword_type(bt) && !VM_Version::supports_avx512bw()) {
1951         return false; // Implementation limitation
1952       }
1953       return true;
1954 
1955     case Op_VectorRearrange:
1956       if (bt == T_SHORT && !VM_Version::supports_avx512bw()) {
1957         return false; // Implementation limitation
1958       }
1959       if (bt == T_BYTE && !VM_Version::supports_avx512_vbmi()) {
1960         return false; // Implementation limitation
1961       } else if ((bt == T_INT || bt == T_FLOAT) && size_in_bits < 256) {
1962         return false; // Implementation limitation
1963       }
1964       return true;
1965 
1966     // Binary Logical operations
1967     case Op_AndVMask:
1968     case Op_OrVMask:
1969     case Op_XorVMask:
1970       if (vlen > 16 && !VM_Version::supports_avx512bw()) {
1971         return false; // Implementation limitation
1972       }
1973       return true;
1974 
1975     case Op_MaskAll:
1976       return true;
1977 
1978     default:
1979       return false;
1980   }
1981 }
1982 
1983 MachOper* Matcher::pd_specialize_generic_vector_operand(MachOper* generic_opnd, uint ideal_reg, bool is_temp) {
1984   assert(Matcher::is_generic_vector(generic_opnd), "not generic");
1985   bool legacy = (generic_opnd->opcode() == LEGVEC);
1986   if (!VM_Version::supports_avx512vlbwdq() && // KNL
1987       is_temp && !legacy && (ideal_reg == Op_VecZ)) {
1988     // Conservatively specialize 512bit vec TEMP operands to legVecZ (zmm0-15) on KNL.
1989     return new legVecZOper();
1990   }
1991   if (legacy) {
1992     switch (ideal_reg) {
1993       case Op_VecS: return new legVecSOper();
1994       case Op_VecD: return new legVecDOper();
1995       case Op_VecX: return new legVecXOper();
1996       case Op_VecY: return new legVecYOper();
1997       case Op_VecZ: return new legVecZOper();
1998     }
1999   } else {
2000     switch (ideal_reg) {
2001       case Op_VecS: return new vecSOper();
2002       case Op_VecD: return new vecDOper();

2027   }
2028 }
2029 
2030 bool Matcher::is_generic_vector(MachOper* opnd) {
2031   switch (opnd->opcode()) {
2032     case VEC:
2033     case LEGVEC:
2034       return true;
2035     default:
2036       return false;
2037   }
2038 }
2039 
2040 //------------------------------------------------------------------------
2041 
2042 const RegMask* Matcher::predicate_reg_mask(void) {
2043   return &_VECTMASK_REG_mask;
2044 }
2045 
2046 const TypeVect* Matcher::predicate_reg_type(const Type* elemTy, int length) {
2047   return new TypeVectMask(elemTy, length);
2048 }
2049 
2050 // Max vector size in bytes. 0 if not supported.
2051 const int Matcher::vector_width_in_bytes(BasicType bt) {
2052   assert(is_java_primitive(bt), "only primitive type vectors");
2053   if (UseSSE < 2) return 0;
2054   // SSE2 supports 128bit vectors for all types.
2055   // AVX2 supports 256bit vectors for all types.
2056   // AVX2/EVEX supports 512bit vectors for all types.
2057   int size = (UseAVX > 1) ? (1 << UseAVX) * 8 : 16;
2058   // AVX1 supports 256bit vectors only for FLOAT and DOUBLE.
2059   if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE))
2060     size = (UseAVX > 2) ? 64 : 32;
2061   if (UseAVX > 2 && (bt == T_BYTE || bt == T_SHORT || bt == T_CHAR))
2062     size = (VM_Version::supports_avx512bw()) ? 64 : 32;
2063   // Use flag to limit vector size.
2064   size = MIN2(size,(int)MaxVectorSize);
2065   // Minimum 2 values in vector (or 4 for bytes).
2066   switch (bt) {
2067   case T_DOUBLE:

3450   match(Set dst (SqrtF dst));
3451   format %{ "sqrtss  $dst, $dst" %}
3452   ins_encode %{
3453     __ sqrtss($dst$$XMMRegister, $dst$$XMMRegister);
3454   %}
3455   ins_pipe(pipe_slow);
3456 %}
3457 
3458 // sqrtsd instruction needs destination register to be pre initialized for best performance
3459 // Therefore only the instruct rule where the input is pre-loaded into dst register is defined below
3460 instruct sqrtD_reg(regD dst) %{
3461   predicate(UseSSE>=2);
3462   match(Set dst (SqrtD dst));
3463   format %{ "sqrtsd  $dst, $dst" %}
3464   ins_encode %{
3465     __ sqrtsd($dst$$XMMRegister, $dst$$XMMRegister);
3466   %}
3467   ins_pipe(pipe_slow);
3468 %}
3469 
3470 
3471 // ---------------------------------------- VectorReinterpret ------------------------------------
3472 instruct reinterpret_mask(kReg dst) %{
3473   predicate(n->bottom_type()->isa_vectmask() &&
3474             Matcher::vector_length(n) == Matcher::vector_length(n->in(1))); // dst == src
3475   match(Set dst (VectorReinterpret dst));
3476   ins_cost(125);
3477   format %{ "vector_reinterpret $dst\t!" %}
3478   ins_encode %{
3479     // empty
3480   %}
3481   ins_pipe( pipe_slow );
3482 %}
3483 
3484 instruct reinterpret_mask_W2B(kReg dst, kReg src, vec xtmp) %{
3485   predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
3486             n->bottom_type()->isa_vectmask() &&
3487             n->in(1)->bottom_type()->isa_vectmask() &&
3488             n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_SHORT &&
3489             n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src
3490   match(Set dst (VectorReinterpret src));
3491   effect(TEMP xtmp);
3492   format %{ "vector_mask_reinterpret_W2B $dst $src\t!" %}
3493   ins_encode %{
3494      int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_SHORT);
3495      int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
3496      assert(src_sz == dst_sz , "src and dst size mismatch");
3497      int vlen_enc = vector_length_encoding(src_sz);
3498      __  evpmovm2w($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
3499      __  evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
3500   %}
3501   ins_pipe( pipe_slow );
3502 %}
3503 
3504 instruct reinterpret_mask_D2B(kReg dst, kReg src, vec xtmp) %{
3505   predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
3506             n->bottom_type()->isa_vectmask() &&
3507             n->in(1)->bottom_type()->isa_vectmask() &&
3508             (n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_INT ||
3509              n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_FLOAT) &&
3510             n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src
3511   match(Set dst (VectorReinterpret src));
3512   effect(TEMP xtmp);
3513   format %{ "vector_mask_reinterpret_D2B $dst $src\t!" %}
3514   ins_encode %{
3515      int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_INT);
3516      int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
3517      assert(src_sz == dst_sz , "src and dst size mismatch");
3518      int vlen_enc = vector_length_encoding(src_sz);
3519      __  evpmovm2d($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
3520      __  evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
3521   %}
3522   ins_pipe( pipe_slow );
3523 %}
3524 
3525 instruct reinterpret_mask_Q2B(kReg dst, kReg src, vec xtmp) %{
3526   predicate(UseAVX > 2 && Matcher::vector_length(n) != Matcher::vector_length(n->in(1)) &&
3527             n->bottom_type()->isa_vectmask() &&
3528             n->in(1)->bottom_type()->isa_vectmask() &&
3529             (n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_LONG ||
3530              n->in(1)->bottom_type()->is_vectmask()->element_basic_type() == T_DOUBLE) &&
3531             n->bottom_type()->is_vectmask()->element_basic_type() == T_BYTE); // dst == src
3532   match(Set dst (VectorReinterpret src));
3533   effect(TEMP xtmp);
3534   format %{ "vector_mask_reinterpret_Q2B $dst $src\t!" %}
3535   ins_encode %{
3536      int src_sz = Matcher::vector_length(this, $src)*type2aelembytes(T_LONG);
3537      int dst_sz = Matcher::vector_length(this)*type2aelembytes(T_BYTE);
3538      assert(src_sz == dst_sz , "src and dst size mismatch");
3539      int vlen_enc = vector_length_encoding(src_sz);
3540      __  evpmovm2q($xtmp$$XMMRegister, $src$$KRegister, vlen_enc);
3541      __  evpmovb2m($dst$$KRegister, $xtmp$$XMMRegister, vlen_enc);
3542   %}
3543   ins_pipe( pipe_slow );
3544 %}
3545 
3546 instruct reinterpret(vec dst) %{
3547   predicate(!n->bottom_type()->isa_vectmask() &&
3548             Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1))); // dst == src
3549   match(Set dst (VectorReinterpret dst));
3550   ins_cost(125);
3551   format %{ "vector_reinterpret $dst\t!" %}
3552   ins_encode %{
3553     // empty
3554   %}
3555   ins_pipe( pipe_slow );
3556 %}
3557 
3558 instruct reinterpret_expand(vec dst, vec src, rRegP scratch) %{
3559   predicate(UseAVX == 0 &&
3560             (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
3561   match(Set dst (VectorReinterpret src));
3562   ins_cost(125);
3563   effect(TEMP dst, TEMP scratch);
3564   format %{ "vector_reinterpret_expand $dst,$src\t! using $scratch as TEMP" %}
3565   ins_encode %{
3566     assert(Matcher::vector_length_in_bytes(this)       <= 16, "required");
3567     assert(Matcher::vector_length_in_bytes(this, $src) <=  8, "required");
3568 
3569     int src_vlen_in_bytes = Matcher::vector_length_in_bytes(this, $src);
3570     if (src_vlen_in_bytes == 4) {
3571       __ movdqu($dst$$XMMRegister, ExternalAddress(vector_32_bit_mask()), $scratch$$Register);
3572     } else {
3573       assert(src_vlen_in_bytes == 8, "");
3574       __ movdqu($dst$$XMMRegister, ExternalAddress(vector_64_bit_mask()), $scratch$$Register);
3575     }
3576     __ pand($dst$$XMMRegister, $src$$XMMRegister);
3577   %}
3578   ins_pipe( pipe_slow );
3579 %}
3580 
3581 instruct vreinterpret_expand4(legVec dst, vec src, rRegP scratch) %{
3582   predicate(UseAVX > 0 &&
3583             !n->bottom_type()->isa_vectmask() &&
3584             (Matcher::vector_length_in_bytes(n->in(1)) == 4) && // src
3585             (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
3586   match(Set dst (VectorReinterpret src));
3587   ins_cost(125);
3588   effect(TEMP scratch);
3589   format %{ "vector_reinterpret_expand $dst,$src\t! using $scratch as TEMP" %}
3590   ins_encode %{
3591     __ vpand($dst$$XMMRegister, $src$$XMMRegister, ExternalAddress(vector_32_bit_mask()), 0, $scratch$$Register);
3592   %}
3593   ins_pipe( pipe_slow );
3594 %}
3595 
3596 
3597 instruct vreinterpret_expand(legVec dst, vec src) %{
3598   predicate(UseAVX > 0 &&
3599             !n->bottom_type()->isa_vectmask() &&
3600             (Matcher::vector_length_in_bytes(n->in(1)) > 4) && // src
3601             (Matcher::vector_length_in_bytes(n->in(1)) < Matcher::vector_length_in_bytes(n))); // src < dst
3602   match(Set dst (VectorReinterpret src));
3603   ins_cost(125);
3604   format %{ "vector_reinterpret_expand $dst,$src\t!" %}
3605   ins_encode %{
3606     switch (Matcher::vector_length_in_bytes(this, $src)) {
3607       case  8: __ movq   ($dst$$XMMRegister, $src$$XMMRegister); break;
3608       case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break;
3609       case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break;
3610       default: ShouldNotReachHere();
3611     }
3612   %}
3613   ins_pipe( pipe_slow );
3614 %}
3615 
3616 instruct reinterpret_shrink(vec dst, legVec src) %{
3617   predicate(!n->bottom_type()->isa_vectmask() &&
3618             Matcher::vector_length_in_bytes(n->in(1)) > Matcher::vector_length_in_bytes(n)); // src > dst
3619   match(Set dst (VectorReinterpret src));
3620   ins_cost(125);
3621   format %{ "vector_reinterpret_shrink $dst,$src\t!" %}
3622   ins_encode %{
3623     switch (Matcher::vector_length_in_bytes(this)) {
3624       case  4: __ movfltz($dst$$XMMRegister, $src$$XMMRegister); break;
3625       case  8: __ movq   ($dst$$XMMRegister, $src$$XMMRegister); break;
3626       case 16: __ movdqu ($dst$$XMMRegister, $src$$XMMRegister); break;
3627       case 32: __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister); break;
3628       default: ShouldNotReachHere();
3629     }
3630   %}
3631   ins_pipe( pipe_slow );
3632 %}
3633 
3634 // ----------------------------------------------------------------------------------------------------
3635 
3636 #ifdef _LP64
3637 instruct roundD_reg(legRegD dst, legRegD src, immU8 rmode) %{
3638   match(Set dst (RoundDoubleMode src rmode));

3800   ins_cost(145);
3801   format %{ "store_vector $mem,$src\n\t" %}
3802   ins_encode %{
3803     switch (Matcher::vector_length_in_bytes(this, $src)) {
3804       case  4: __ movdl    ($mem$$Address, $src$$XMMRegister); break;
3805       case  8: __ movq     ($mem$$Address, $src$$XMMRegister); break;
3806       case 16: __ movdqu   ($mem$$Address, $src$$XMMRegister); break;
3807       case 32: __ vmovdqu  ($mem$$Address, $src$$XMMRegister); break;
3808       case 64: __ evmovdqul($mem$$Address, $src$$XMMRegister, Assembler::AVX_512bit); break;
3809       default: ShouldNotReachHere();
3810     }
3811   %}
3812   ins_pipe( pipe_slow );
3813 %}
3814 
3815 // ---------------------------------------- Gather ------------------------------------
3816 
3817 // Gather INT, LONG, FLOAT, DOUBLE
3818 
3819 instruct gather(legVec dst, memory mem, legVec idx, rRegP tmp, legVec mask) %{
3820   predicate(!VM_Version::supports_avx512vl() && Matcher::vector_length_in_bytes(n) <= 32);
3821   match(Set dst (LoadVectorGather mem idx));
3822   effect(TEMP dst, TEMP tmp, TEMP mask);
3823   format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and $mask as TEMP" %}
3824   ins_encode %{
3825     assert(UseAVX >= 2, "sanity");
3826 
3827     int vlen_enc = vector_length_encoding(this);
3828     BasicType elem_bt = Matcher::vector_element_basic_type(this);
3829 
3830     assert(Matcher::vector_length_in_bytes(this) >= 16, "sanity");
3831     assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
3832 
3833     if (vlen_enc == Assembler::AVX_128bit) {
3834       __ movdqu($mask$$XMMRegister, ExternalAddress(vector_all_bits_set()));
3835     } else {
3836       __ vmovdqu($mask$$XMMRegister, ExternalAddress(vector_all_bits_set()));
3837     }
3838     __ lea($tmp$$Register, $mem$$Address);
3839     __ vgather(elem_bt, $dst$$XMMRegister, $tmp$$Register, $idx$$XMMRegister, $mask$$XMMRegister, vlen_enc);
3840   %}
3841   ins_pipe( pipe_slow );
3842 %}
3843 
3844 instruct evgather(vec dst, memory mem, vec idx, rRegP tmp, kReg ktmp) %{
3845   predicate(VM_Version::supports_avx512vl() || Matcher::vector_length_in_bytes(n) == 64);
3846   match(Set dst (LoadVectorGather mem idx));
3847   effect(TEMP dst, TEMP tmp, TEMP ktmp);
3848   format %{ "load_vector_gather $dst, $mem, $idx\t! using $tmp and ktmp as TEMP" %}
3849   ins_encode %{
3850     assert(UseAVX > 2, "sanity");
3851 
3852     int vlen_enc = vector_length_encoding(this);
3853     BasicType elem_bt = Matcher::vector_element_basic_type(this);
3854 
3855     assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
3856 
3857     __ kmovwl($ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), $tmp$$Register);
3858     __ lea($tmp$$Register, $mem$$Address);
3859     __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc);
3860   %}
3861   ins_pipe( pipe_slow );
3862 %}
3863 
3864 instruct evgather_masked(vec dst, memory mem, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{
3865   match(Set dst (LoadVectorGatherMasked mem (Binary idx mask)));
3866   effect(TEMP_DEF dst, TEMP tmp, TEMP ktmp);
3867   format %{ "load_vector_gather_masked $dst, $mem, $idx, $mask\t! using $tmp and ktmp as TEMP" %}
3868   ins_encode %{
3869     assert(UseAVX > 2, "sanity");
3870     int vlen_enc = vector_length_encoding(this);
3871     BasicType elem_bt = Matcher::vector_element_basic_type(this);
3872     assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
3873     // Note: Since gather instruction partially updates the opmask register used
3874     // for predication hense moving mask operand to a temporary.
3875     __ kmovwl($ktmp$$KRegister, $mask$$KRegister);
3876     __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
3877     __ lea($tmp$$Register, $mem$$Address);
3878     __ evgather(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $tmp$$Register, $idx$$XMMRegister, vlen_enc);
3879   %}
3880   ins_pipe( pipe_slow );
3881 %}
3882 // ====================Scatter=======================================
3883 
3884 // Scatter INT, LONG, FLOAT, DOUBLE
3885 
3886 instruct scatter(memory mem, vec src, vec idx, rRegP tmp, kReg ktmp) %{
3887   predicate(UseAVX > 2);
3888   match(Set mem (StoreVectorScatter mem (Binary src idx)));
3889   effect(TEMP tmp, TEMP ktmp);
3890   format %{ "store_vector_scatter $mem, $idx, $src\t! using k2 and $tmp as TEMP" %}
3891   ins_encode %{
3892     int vlen_enc = vector_length_encoding(this, $src);
3893     BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
3894 
3895     assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity");
3896     assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
3897 
3898     __ kmovwl($ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), $tmp$$Register);
3899     __ lea($tmp$$Register, $mem$$Address);
3900     __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc);
3901   %}
3902   ins_pipe( pipe_slow );
3903 %}
3904 
3905 instruct scatter_masked(memory mem, vec src, vec idx, kReg mask, kReg ktmp, rRegP tmp) %{
3906   match(Set mem (StoreVectorScatterMasked mem (Binary src (Binary idx mask))));
3907   effect(TEMP tmp, TEMP ktmp);
3908   format %{ "store_vector_scatter_masked $mem, $idx, $src, $mask\t!" %}
3909   ins_encode %{
3910     int vlen_enc = vector_length_encoding(this, $src);
3911     BasicType elem_bt = Matcher::vector_element_basic_type(this, $src);
3912     assert(Matcher::vector_length_in_bytes(this, $src) >= 16, "sanity");
3913     assert(!is_subword_type(elem_bt), "sanity"); // T_INT, T_LONG, T_FLOAT, T_DOUBLE
3914     // Note: Since scatter instruction partially updates the opmask register used
3915     // for predication hense moving mask operand to a temporary.
3916     __ kmovwl($ktmp$$KRegister, $mask$$KRegister);
3917     __ lea($tmp$$Register, $mem$$Address);
3918     __ evscatter(elem_bt, $tmp$$Register, $idx$$XMMRegister, $ktmp$$KRegister, $src$$XMMRegister, vlen_enc);
3919   %}
3920   ins_pipe( pipe_slow );
3921 %}
3922 
3923 // ====================REPLICATE=======================================
3924 
3925 // Replicate byte scalar to be vector
3926 instruct ReplB_reg(vec dst, rRegI src) %{
3927   match(Set dst (ReplicateB src));
3928   format %{ "replicateB $dst,$src" %}
3929   ins_encode %{
3930     uint vlen = Matcher::vector_length(this);
3931     if (vlen == 64 || VM_Version::supports_avx512vlbw()) { // AVX512VL for <512bit operands
3932       assert(VM_Version::supports_avx512bw(), "required"); // 512-bit byte vectors assume AVX512BW
3933       int vlen_enc = vector_length_encoding(this);
3934       __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vlen_enc);
3935     } else if (VM_Version::supports_avx2()) {
3936       int vlen_enc = vector_length_encoding(this);
3937       __ movdl($dst$$XMMRegister, $src$$Register);
3938       __ vpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
3939     } else {
3940       __ movdl($dst$$XMMRegister, $src$$Register);
3941       __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
3942       __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);

4148   ins_pipe( pipe_slow );
4149 %}
4150 
4151 // Replicate integer (4 byte) scalar zero to be vector
4152 instruct ReplI_zero(vec dst, immI_0 zero) %{
4153   match(Set dst (ReplicateI zero));
4154   format %{ "replicateI $dst,$zero" %}
4155   ins_encode %{
4156     uint vlen = Matcher::vector_length(this);
4157     if (vlen <= 4) {
4158       __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
4159     } else {
4160       int vlen_enc = vector_length_encoding(this);
4161       __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
4162     }
4163   %}
4164   ins_pipe( fpu_reg_reg );
4165 %}
4166 
4167 instruct ReplI_M1(vec dst, immI_M1 con) %{
4168   predicate(UseAVX > 0 && Matcher::vector_length_in_bytes(n) >= 16);
4169   match(Set dst (ReplicateB con));
4170   match(Set dst (ReplicateS con));
4171   match(Set dst (ReplicateI con));
4172   effect(TEMP dst);
4173   format %{ "vallones $dst" %}
4174   ins_encode %{
4175     int vector_len = vector_length_encoding(this);
4176     __ vallones($dst$$XMMRegister, vector_len);
4177   %}
4178   ins_pipe( pipe_slow );
4179 %}
4180 
4181 // ====================ReplicateL=======================================
4182 
4183 #ifdef _LP64
4184 // Replicate long (8 byte) scalar to be vector
4185 instruct ReplL_reg(vec dst, rRegL src) %{
4186   match(Set dst (ReplicateL src));
4187   format %{ "replicateL $dst,$src" %}
4188   ins_encode %{

6114 %}
6115 
6116 instruct copySignD_imm(regD dst, regD src, regD tmp1, rRegL tmp2, immD zero) %{
6117   match(Set dst (CopySignD dst (Binary src zero)));
6118   ins_cost(100);
6119   effect(TEMP tmp1, TEMP tmp2);
6120   format %{ "CopySignD  $dst, $src\t! using $tmp1 and $tmp2 as TEMP" %}
6121   ins_encode %{
6122     __ mov64($tmp2$$Register, 0x7FFFFFFFFFFFFFFF);
6123     __ movq($tmp1$$XMMRegister, $tmp2$$Register);
6124     __ vpternlogq($dst$$XMMRegister, 0xE4, $src$$XMMRegister, $tmp1$$XMMRegister, Assembler::AVX_128bit);
6125   %}
6126   ins_pipe( pipe_slow );
6127 %}
6128 #endif // _LP64
6129 
6130 // --------------------------------- Sqrt --------------------------------------
6131 
6132 instruct vsqrtF_reg(vec dst, vec src) %{
6133   match(Set dst (SqrtVF src));
6134   ins_cost(400);
6135   format %{ "vsqrtps  $dst,$src\t! sqrt packedF" %}
6136   ins_encode %{
6137     assert(UseAVX > 0, "required");
6138     int vlen_enc = vector_length_encoding(this);
6139     __ vsqrtps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
6140   %}
6141   ins_pipe( pipe_slow );
6142 %}
6143 
6144 instruct vsqrtF_mem(vec dst, memory mem) %{
6145   predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
6146   match(Set dst (SqrtVF (LoadVector mem)));
6147   ins_cost(400);
6148   format %{ "vsqrtps  $dst,$mem\t! sqrt packedF" %}
6149   ins_encode %{
6150     assert(UseAVX > 0, "required");
6151     int vlen_enc = vector_length_encoding(this);
6152     __ vsqrtps($dst$$XMMRegister, $mem$$Address, vlen_enc);
6153   %}
6154   ins_pipe( pipe_slow );
6155 %}
6156 
6157 // Floating point vector sqrt
6158 instruct vsqrtD_reg(vec dst, vec src) %{
6159   match(Set dst (SqrtVD src));
6160   ins_cost(400);
6161   format %{ "vsqrtpd  $dst,$src\t! sqrt packedD" %}
6162   ins_encode %{
6163     assert(UseAVX > 0, "required");
6164     int vlen_enc = vector_length_encoding(this);
6165     __ vsqrtpd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
6166   %}
6167   ins_pipe( pipe_slow );
6168 %}
6169 
6170 instruct vsqrtD_mem(vec dst, memory mem) %{
6171   predicate(Matcher::vector_length_in_bytes(n->in(1)) > 8);
6172   match(Set dst (SqrtVD (LoadVector mem)));
6173   ins_cost(400);
6174   format %{ "vsqrtpd  $dst,$mem\t! sqrt packedD" %}
6175   ins_encode %{
6176     assert(UseAVX > 0, "required");
6177     int vlen_enc = vector_length_encoding(this);
6178     __ vsqrtpd($dst$$XMMRegister, $mem$$Address, vlen_enc);
6179   %}
6180   ins_pipe( pipe_slow );
6181 %}
6182 
6183 // ------------------------------ Shift ---------------------------------------
6184 
6185 // Left and right shift count vectors are the same on x86
6186 // (only lowest bits of xmm reg are used for count).
6187 instruct vshiftcnt(vec dst, rRegI cnt) %{
6188   match(Set dst (LShiftCntV cnt));
6189   match(Set dst (RShiftCntV cnt));
6190   format %{ "movdl    $dst,$cnt\t! load shift count" %}
6191   ins_encode %{
6192     __ movdl($dst$$XMMRegister, $cnt$$Register);
6193   %}

7162     int vlen_enc = vector_length_encoding(this);
7163     __ vcvtps2pd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
7164   %}
7165   ins_pipe( pipe_slow );
7166 %}
7167 
7168 instruct vcastDtoF_reg(vec dst, vec src) %{
7169   predicate(Matcher::vector_element_basic_type(n) == T_FLOAT);
7170   match(Set dst (VectorCastD2X src));
7171   format %{ "vector_cast_d2x  $dst,$src\t!" %}
7172   ins_encode %{
7173     int vlen_enc = vector_length_encoding(this, $src);
7174     __ vcvtpd2ps($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
7175   %}
7176   ins_pipe( pipe_slow );
7177 %}
7178 
7179 // --------------------------------- VectorMaskCmp --------------------------------------
7180 
7181 instruct vcmpFD(legVec dst, legVec src1, legVec src2, immI8 cond) %{
7182   predicate(n->bottom_type()->isa_vectmask() == NULL &&
7183             Matcher::vector_length_in_bytes(n->in(1)->in(1)) >=  8 && // src1
7184             Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
7185             is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
7186   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
7187   format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %}
7188   ins_encode %{
7189     int vlen_enc = vector_length_encoding(this, $src1);
7190     Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
7191     if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
7192       __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
7193     } else {
7194       __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
7195     }
7196   %}
7197   ins_pipe( pipe_slow );
7198 %}
7199 
7200 instruct evcmpFD64(vec dst, vec src1, vec src2, immI8 cond, rRegP scratch, kReg ktmp) %{
7201   predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64 && // src1
7202             n->bottom_type()->isa_vectmask() == NULL &&
7203             is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
7204   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
7205   effect(TEMP scratch, TEMP ktmp);
7206   format %{ "vector_compare $dst,$src1,$src2,$cond\t! using $scratch as TEMP" %}
7207   ins_encode %{
7208     int vlen_enc = Assembler::AVX_512bit;
7209     Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
7210     KRegister mask = k0; // The comparison itself is not being masked.
7211     if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
7212       __ evcmpps($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
7213       __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, $scratch$$Register);
7214     } else {
7215       __ evcmppd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
7216       __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, $scratch$$Register);
7217     }
7218   %}
7219   ins_pipe( pipe_slow );
7220 %}
7221 
7222 instruct evcmpFD(kReg dst, vec src1, vec src2, immI8 cond) %{
7223   predicate(n->bottom_type()->isa_vectmask() &&
7224             is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
7225   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
7226   format %{ "vector_compare_evex $dst,$src1,$src2,$cond\t!" %}
7227   ins_encode %{
7228     assert(bottom_type()->isa_vectmask(), "TypeVectMask expected");
7229     int vlen_enc = vector_length_encoding(this, $src1);
7230     Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
7231     KRegister mask = k0; // The comparison itself is not being masked.
7232     if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
7233       __ evcmpps($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
7234     } else {
7235       __ evcmppd($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
7236     }
7237   %}
7238   ins_pipe( pipe_slow );
7239 %}
7240 
7241 instruct vcmp(legVec dst, legVec src1, legVec src2, immI8 cond, rRegP scratch) %{
7242   predicate(n->bottom_type()->isa_vectmask() == NULL &&
7243             !is_unsigned_booltest_pred(n->in(2)->get_int()) &&
7244             Matcher::vector_length_in_bytes(n->in(1)->in(1)) >=  4 && // src1
7245             Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
7246             is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
7247   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
7248   effect(TEMP scratch);
7249   format %{ "vector_compare $dst,$src1,$src2,$cond\t! using $scratch as TEMP" %}
7250   ins_encode %{
7251     int vlen_enc = vector_length_encoding(this, $src1);
7252     Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
7253     Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
7254     __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, ww, vlen_enc, $scratch$$Register);
7255   %}
7256   ins_pipe( pipe_slow );
7257 %}
7258 
7259 instruct vcmpu(legVec dst, legVec src1, legVec src2, immI8 cond, legVec vtmp1, legVec vtmp2, rRegP scratch) %{
7260   predicate(n->bottom_type()->isa_vectmask() == NULL &&
7261             is_unsigned_booltest_pred(n->in(2)->get_int()) &&
7262             Matcher::vector_length_in_bytes(n->in(1)->in(1)) >=  8 && // src1
7263             Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 16 && // src1
7264             is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
7265   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
7266   effect(TEMP vtmp1, TEMP vtmp2, TEMP scratch);
7267   format %{ "vector_compareu $dst,$src1,$src2,$cond\t! using $scratch as TEMP" %}
7268   ins_encode %{
7269     int vlen = Matcher::vector_length_in_bytes(this, $src1);
7270     Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
7271     BasicType bt = Matcher::vector_element_basic_type(this, $src1);
7272     __ vpcmpu(bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen, $vtmp1$$XMMRegister,
7273               $vtmp2$$XMMRegister, $scratch$$Register);
7274   %}
7275   ins_pipe( pipe_slow );
7276 %}
7277 
7278 instruct vcmpu32(legVec dst, legVec src1, legVec src2, immI8 cond, legVec vtmp1, legVec vtmp2, legVec vtmp3, rRegP scratch) %{
7279   predicate(n->bottom_type()->isa_vectmask() == NULL &&
7280             is_unsigned_booltest_pred(n->in(2)->get_int()) &&
7281             Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 32 && // src1
7282             is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
7283   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
7284   effect(TEMP dst, TEMP vtmp1, TEMP vtmp2, TEMP vtmp3, TEMP scratch);
7285   format %{ "vector_compareu $dst,$src1,$src2,$cond\t! using $scratch as TEMP" %}
7286   ins_encode %{
7287     int vlen = Matcher::vector_length_in_bytes(this, $src1);
7288     Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
7289     BasicType bt = Matcher::vector_element_basic_type(this, $src1);
7290     __ vpcmpu32(bt, $dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen, $vtmp1$$XMMRegister,
7291                 $vtmp2$$XMMRegister, $vtmp3$$XMMRegister, $scratch$$Register);
7292   %}
7293   ins_pipe( pipe_slow );
7294 %}
7295 
7296 instruct vcmpu64(vec dst, vec src1, vec src2, immI8 cond, rRegP scratch, kReg ktmp) %{
7297   predicate((n->bottom_type()->isa_vectmask() == NULL &&

7298              Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64) && // src1
7299              is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
7300   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
7301   effect(TEMP scratch, TEMP ktmp);
7302   format %{ "vector_compare $dst,$src1,$src2,$cond\t! using $scratch as TEMP" %}
7303   ins_encode %{
7304     assert(UseAVX > 2, "required");
7305 
7306     int vlen_enc = vector_length_encoding(this, $src1);
7307     Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
7308     bool is_unsigned = is_unsigned_booltest_pred($cond$$constant);
7309     KRegister mask = k0; // The comparison itself is not being masked.
7310     bool merge = false;
7311     BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
7312 
7313     switch (src1_elem_bt) {
7314       case T_INT: {
7315         __ evpcmpd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
7316         __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, $scratch$$Register);
7317         break;
7318       }
7319       case T_LONG: {
7320         __ evpcmpq($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
7321         __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, $scratch$$Register);
7322         break;
7323       }
7324       default: assert(false, "%s", type2name(src1_elem_bt));
7325     }
7326   %}
7327   ins_pipe( pipe_slow );
7328 %}
7329 
7330 
7331 instruct evcmp(kReg dst, vec src1, vec src2, immI8 cond) %{
7332   predicate(n->bottom_type()->isa_vectmask() &&
7333             is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
7334   match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
7335   format %{ "vector_compared_evex $dst,$src1,$src2,$cond\t!" %}
7336   ins_encode %{
7337     assert(UseAVX > 2, "required");
7338     assert(bottom_type()->isa_vectmask(), "TypeVectMask expected");
7339 
7340     int vlen_enc = vector_length_encoding(this, $src1);
7341     Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
7342     bool is_unsigned = is_unsigned_booltest_pred($cond$$constant);
7343     BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
7344 
7345     // Comparison i
7346     switch (src1_elem_bt) {
7347       case T_BYTE: {
7348         __ evpcmpb($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);

7349         break;
7350       }
7351       case T_SHORT: {
7352         __ evpcmpw($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);

7353         break;
7354       }
7355       case T_INT: {
7356         __ evpcmpd($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);

7357         break;
7358       }
7359       case T_LONG: {
7360         __ evpcmpq($dst$$KRegister, k0, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);

7361         break;
7362       }
7363       default: assert(false, "%s", type2name(src1_elem_bt));
7364     }
7365   %}
7366   ins_pipe( pipe_slow );
7367 %}
7368 
7369 // Extract
7370 
7371 instruct extractI(rRegI dst, legVec src, immU8 idx) %{
7372   predicate(Matcher::vector_length_in_bytes(n->in(1)) <= 16); // src
7373   match(Set dst (ExtractI src idx));
7374   match(Set dst (ExtractS src idx));
7375 #ifdef _LP64
7376   match(Set dst (ExtractB src idx));
7377 #endif
7378   format %{ "extractI $dst,$src,$idx\t!" %}
7379   ins_encode %{
7380     assert($idx$$constant < (int)Matcher::vector_length(this, $src), "out of bounds");

7493 // --------------------------------- Vector Blend --------------------------------------
7494 
7495 instruct blendvp(vec dst, vec src, vec mask, rxmm0 tmp) %{
7496   predicate(UseAVX == 0);
7497   match(Set dst (VectorBlend (Binary dst src) mask));
7498   format %{ "vector_blend  $dst,$src,$mask\t! using $tmp as TEMP" %}
7499   effect(TEMP tmp);
7500   ins_encode %{
7501     assert(UseSSE >= 4, "required");
7502 
7503     if ($mask$$XMMRegister != $tmp$$XMMRegister) {
7504       __ movdqu($tmp$$XMMRegister, $mask$$XMMRegister);
7505     }
7506     __ pblendvb($dst$$XMMRegister, $src$$XMMRegister); // uses xmm0 as mask
7507   %}
7508   ins_pipe( pipe_slow );
7509 %}
7510 
7511 instruct vblendvpI(legVec dst, legVec src1, legVec src2, legVec mask) %{
7512   predicate(UseAVX > 0 &&
7513             n->in(2)->bottom_type()->isa_vectmask() == NULL &&
7514             Matcher::vector_length_in_bytes(n) <= 32 &&
7515             is_integral_type(Matcher::vector_element_basic_type(n)));
7516   match(Set dst (VectorBlend (Binary src1 src2) mask));
7517   format %{ "vector_blend  $dst,$src1,$src2,$mask\t!" %}
7518   ins_encode %{
7519     int vlen_enc = vector_length_encoding(this);
7520     __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc);
7521   %}
7522   ins_pipe( pipe_slow );
7523 %}
7524 
7525 instruct vblendvpFD(legVec dst, legVec src1, legVec src2, legVec mask) %{
7526   predicate(UseAVX > 0 &&
7527             n->in(2)->bottom_type()->isa_vectmask() == NULL &&
7528             Matcher::vector_length_in_bytes(n) <= 32 &&
7529             !is_integral_type(Matcher::vector_element_basic_type(n)));
7530   match(Set dst (VectorBlend (Binary src1 src2) mask));
7531   format %{ "vector_blend  $dst,$src1,$src2,$mask\t!" %}
7532   ins_encode %{
7533     int vlen_enc = vector_length_encoding(this);
7534     __ vblendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc);
7535   %}
7536   ins_pipe( pipe_slow );
7537 %}
7538 
7539 instruct evblendvp64(vec dst, vec src1, vec src2, vec mask, rRegP scratch, kReg ktmp) %{
7540   predicate(Matcher::vector_length_in_bytes(n) == 64 &&
7541             n->in(2)->bottom_type()->isa_vectmask() == NULL);
7542   match(Set dst (VectorBlend (Binary src1 src2) mask));
7543   format %{ "vector_blend  $dst,$src1,$src2,$mask\t! using $scratch and k2 as TEMP" %}
7544   effect(TEMP scratch, TEMP ktmp);
7545   ins_encode %{
7546      int vlen_enc = Assembler::AVX_512bit;
7547      BasicType elem_bt = Matcher::vector_element_basic_type(this);
7548     __ evpcmp(elem_bt, $ktmp$$KRegister, k0, $mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), Assembler::eq, vlen_enc, $scratch$$Register);
7549     __ evpblend(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
7550   %}
7551   ins_pipe( pipe_slow );
7552 %}
7553 
7554 
7555 instruct evblendvp64_masked(vec dst, vec src1, vec src2, kReg mask, rRegP scratch) %{
7556   predicate(n->in(2)->bottom_type()->isa_vectmask() &&
7557             (!is_subword_type(Matcher::vector_element_basic_type(n)) ||
7558              VM_Version::supports_avx512bw()));
7559   match(Set dst (VectorBlend (Binary src1 src2) mask));
7560   format %{ "vector_blend  $dst,$src1,$src2,$mask\t! using $scratch and k2 as TEMP" %}
7561   effect(TEMP scratch);
7562   ins_encode %{
7563     int vlen_enc = vector_length_encoding(this);
7564     BasicType elem_bt = Matcher::vector_element_basic_type(this);
7565     __ evpblend(elem_bt, $dst$$XMMRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
7566   %}
7567   ins_pipe( pipe_slow );
7568 %}
7569 
7570 // --------------------------------- ABS --------------------------------------
7571 // a = |a|
7572 instruct vabsB_reg(vec dst, vec src) %{
7573   match(Set dst (AbsVB  src));
7574   ins_cost(450);
7575   format %{ "vabsb $dst,$src\t# $dst = |$src| abs packedB" %}
7576   ins_encode %{
7577     uint vlen = Matcher::vector_length(this);
7578     if (vlen <= 16) {
7579       __ pabsb($dst$$XMMRegister, $src$$XMMRegister);
7580     } else {
7581       int vlen_enc = vector_length_encoding(this);
7582       __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
7583     }
7584   %}
7585   ins_pipe( pipe_slow );
7586 %}
7587 
7588 instruct vabsS_reg(vec dst, vec src) %{
7589   match(Set dst (AbsVS  src));
7590   ins_cost(450);
7591   format %{ "vabsw $dst,$src\t# $dst = |$src| abs packedS" %}
7592   ins_encode %{
7593     uint vlen = Matcher::vector_length(this);
7594     if (vlen <= 8) {
7595       __ pabsw($dst$$XMMRegister, $src$$XMMRegister);
7596     } else {
7597       int vlen_enc = vector_length_encoding(this);
7598       __ vpabsw($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
7599     }
7600   %}
7601   ins_pipe( pipe_slow );
7602 %}
7603 
7604 instruct vabsI_reg(vec dst, vec src) %{
7605   match(Set dst (AbsVI  src));
7606   format %{ "pabsd $dst,$src\t# $dst = |$src| abs packedI" %}
7607   ins_cost(250);
7608   ins_encode %{
7609     uint vlen = Matcher::vector_length(this);
7610     if (vlen <= 4) {
7611       __ pabsd($dst$$XMMRegister, $src$$XMMRegister);
7612     } else {
7613       int vlen_enc = vector_length_encoding(this);
7614       __ vpabsd($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
7615     }
7616   %}
7617   ins_pipe( pipe_slow );
7618 %}
7619 
7620 instruct vabsL_reg(vec dst, vec src) %{
7621   match(Set dst (AbsVL  src));
7622   ins_cost(450);
7623   format %{ "evpabsq $dst,$src\t# $dst = |$src| abs packedL" %}
7624   ins_encode %{
7625     assert(UseAVX > 2, "required");
7626     int vlen_enc = vector_length_encoding(this);
7627     if (!VM_Version::supports_avx512vl()) {
7628       vlen_enc = Assembler::AVX_512bit;
7629     }
7630     __ evpabsq($dst$$XMMRegister, $src$$XMMRegister, vlen_enc);
7631   %}
7632   ins_pipe( pipe_slow );
7633 %}
7634 
7635 // --------------------------------- ABSNEG --------------------------------------
7636 
7637 instruct vabsnegF(vec dst, vec src, rRegI scratch) %{
7638   predicate(Matcher::vector_length(n) != 4); // handled by 1-operand instruction vabsneg4F
7639   match(Set dst (AbsVF src));
7640   match(Set dst (NegVF src));
7641   effect(TEMP scratch);
7642   format %{ "vabsnegf $dst,$src,[mask]\t# absneg packedF" %}

7675   effect(TEMP scratch);
7676   format %{ "vabsnegd $dst,$src,[mask]\t# absneg packedD" %}
7677   ins_encode %{
7678     int opcode = this->ideal_Opcode();
7679     uint vlen = Matcher::vector_length(this);
7680     if (vlen == 2) {
7681       assert(UseSSE >= 2, "required");
7682       __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister, $scratch$$Register);
7683     } else {
7684       int vlen_enc = vector_length_encoding(this);
7685       __ vabsnegd(opcode, $dst$$XMMRegister, $src$$XMMRegister, vlen_enc, $scratch$$Register);
7686     }
7687   %}
7688   ins_pipe( pipe_slow );
7689 %}
7690 
7691 //------------------------------------- VectorTest --------------------------------------------
7692 
7693 #ifdef _LP64
7694 instruct vptest_alltrue_lt16(rRegI dst, legVec src1, legVec src2, legVec vtmp1, legVec vtmp2, rFlagsReg cr) %{
7695   predicate(!VM_Version::supports_avx512bwdq() &&
7696             Matcher::vector_length_in_bytes(n->in(1)) >= 4 &&
7697             Matcher::vector_length_in_bytes(n->in(1)) < 16 &&
7698             static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::overflow);
7699   match(Set dst (VectorTest src1 src2 ));
7700   effect(TEMP vtmp1, TEMP vtmp2, KILL cr);
7701   format %{ "vptest_alltrue_lt16 $dst,$src1, $src2\t! using $vtmp1, $vtmp2 and $cr as TEMP" %}
7702   ins_encode %{
7703     int vlen = Matcher::vector_length_in_bytes(this, $src1);
7704     __ vectortest(BoolTest::overflow, vlen, $src1$$XMMRegister, $src2$$XMMRegister, $vtmp1$$XMMRegister, $vtmp2$$XMMRegister);
7705     __ setb(Assembler::carrySet, $dst$$Register);
7706     __ movzbl($dst$$Register, $dst$$Register);
7707   %}
7708   ins_pipe( pipe_slow );
7709 %}
7710 
7711 instruct vptest_alltrue_ge16(rRegI dst, legVec src1, legVec src2, rFlagsReg cr) %{
7712   predicate(!VM_Version::supports_avx512bwdq() &&
7713             Matcher::vector_length_in_bytes(n->in(1)) >= 16 &&
7714             Matcher::vector_length_in_bytes(n->in(1)) <  64 &&
7715             static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::overflow);
7716   match(Set dst (VectorTest src1 src2 ));
7717   effect(KILL cr);
7718   format %{ "vptest_alltrue_ge16  $dst,$src1, $src2\t! using $cr as TEMP" %}
7719   ins_encode %{
7720     int vlen = Matcher::vector_length_in_bytes(this, $src1);
7721     __ vectortest(BoolTest::overflow, vlen, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg, knoreg);
7722     __ setb(Assembler::carrySet, $dst$$Register);
7723     __ movzbl($dst$$Register, $dst$$Register);
7724   %}
7725   ins_pipe( pipe_slow );
7726 %}
7727 
7728 instruct vptest_alltrue_lt8_evex(rRegI dst, kReg src1, kReg src2, kReg kscratch, rFlagsReg cr) %{
7729   predicate(VM_Version::supports_avx512bwdq() &&
7730             static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::overflow &&
7731             n->in(1)->bottom_type()->isa_vectmask() &&
7732             Matcher::vector_length(n->in(1)) < 8);
7733   match(Set dst (VectorTest src1 src2));
7734   effect(KILL cr, TEMP kscratch);
7735   format %{ "vptest_alltrue_lt8_evex $dst,$src1,$src2\t! using $cr as TEMP" %}
7736   ins_encode %{
7737     const MachNode* mask1 = static_cast<const MachNode*>(this->in(this->operand_index($src1)));
7738     const MachNode* mask2 = static_cast<const MachNode*>(this->in(this->operand_index($src2)));
7739     assert(0 == Type::cmp(mask1->bottom_type(), mask2->bottom_type()), "");
7740     uint masklen = Matcher::vector_length(this, $src1);
7741     __ alltrue($dst$$Register, masklen, $src1$$KRegister, $src2$$KRegister, $kscratch$$KRegister);
7742   %}
7743   ins_pipe( pipe_slow );
7744 %}
7745 
7746 
7747 instruct vptest_alltrue_ge8_evex(rRegI dst, kReg src1, kReg src2, rFlagsReg cr) %{
7748   predicate(VM_Version::supports_avx512bwdq() &&
7749             static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::overflow &&
7750             n->in(1)->bottom_type()->isa_vectmask() &&
7751             Matcher::vector_length(n->in(1)) >= 8);
7752   match(Set dst (VectorTest src1 src2));
7753   effect(KILL cr);
7754   format %{ "vptest_alltrue_ge8_evex $dst,$src1,$src2\t! using $cr as TEMP" %}
7755   ins_encode %{
7756     const MachNode* mask1 = static_cast<const MachNode*>(this->in(this->operand_index($src1)));
7757     const MachNode* mask2 = static_cast<const MachNode*>(this->in(this->operand_index($src2)));
7758     assert(0 == Type::cmp(mask1->bottom_type(), mask2->bottom_type()), "");
7759     uint masklen = Matcher::vector_length(this, $src1);
7760     __ alltrue($dst$$Register, masklen, $src1$$KRegister, $src2$$KRegister, knoreg);
7761   %}
7762   ins_pipe( pipe_slow );
7763 %}
7764 
7765 
7766 instruct vptest_anytrue_lt16(rRegI dst, legVec src1, legVec src2, legVec vtmp, rFlagsReg cr) %{
7767   predicate(!VM_Version::supports_avx512bwdq() &&
7768             Matcher::vector_length_in_bytes(n->in(1)) >= 4 &&
7769             Matcher::vector_length_in_bytes(n->in(1)) < 16 &&
7770             static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::ne);
7771   match(Set dst (VectorTest src1 src2 ));
7772   effect(TEMP vtmp, KILL cr);
7773   format %{ "vptest_anytrue_lt16 $dst,$src1,$src2\t! using $vtmp, $cr as TEMP" %}
7774   ins_encode %{
7775     int vlen = Matcher::vector_length_in_bytes(this, $src1);
7776     __ vectortest(BoolTest::ne, vlen, $src1$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister);
7777     __ setb(Assembler::notZero, $dst$$Register);
7778     __ movzbl($dst$$Register, $dst$$Register);
7779   %}
7780   ins_pipe( pipe_slow );
7781 %}
7782 
7783 instruct vptest_anytrue_ge16(rRegI dst, legVec src1, legVec src2, rFlagsReg cr) %{
7784   predicate(!VM_Version::supports_avx512bwdq() &&
7785             Matcher::vector_length_in_bytes(n->in(1)) >= 16 &&
7786             Matcher::vector_length_in_bytes(n->in(1)) < 64  &&
7787             static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::ne);
7788   match(Set dst (VectorTest src1 src2 ));
7789   effect(KILL cr);
7790   format %{ "vptest_anytrue_ge16 $dst,$src1,$src2\t! using $cr as TEMP" %}
7791   ins_encode %{
7792     int vlen = Matcher::vector_length_in_bytes(this, $src1);
7793     __ vectortest(BoolTest::ne, vlen, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg, knoreg);
7794     __ setb(Assembler::notZero, $dst$$Register);
7795     __ movzbl($dst$$Register, $dst$$Register);
7796   %}
7797   ins_pipe( pipe_slow );
7798 %}
7799 
7800 instruct vptest_anytrue_evex(rRegI dst, kReg src1, kReg src2, rFlagsReg cr) %{
7801   predicate(VM_Version::supports_avx512bwdq() &&
7802             static_cast<const VectorTestNode*>(n)->get_predicate() == BoolTest::ne);
7803   match(Set dst (VectorTest src1 src2));
7804   effect(KILL cr);
7805   format %{ "vptest_anytrue_lt8_evex $dst,$src1,$src2\t! using $cr as TEMP" %}
7806   ins_encode %{
7807     const MachNode* mask1 = static_cast<const MachNode*>(this->in(this->operand_index($src1)));
7808     const MachNode* mask2 = static_cast<const MachNode*>(this->in(this->operand_index($src2)));
7809     assert(0 == Type::cmp(mask1->bottom_type(), mask2->bottom_type()), "");
7810     uint  masklen = Matcher::vector_length(this, $src1);
7811     __ anytrue($dst$$Register, masklen, $src1$$KRegister, $src2$$KRegister);
7812   %}
7813   ins_pipe( pipe_slow );
7814 %}
7815 
7816 instruct cmpvptest_anytrue_lt16(rFlagsReg cr, legVec src1, legVec src2, immI_0 zero, legVec vtmp) %{
7817   predicate(!VM_Version::supports_avx512bwdq() &&
7818             Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 &&
7819             Matcher::vector_length_in_bytes(n->in(1)->in(1)) < 16 &&
7820             static_cast<const VectorTestNode*>(n->in(1))->get_predicate() == BoolTest::ne);
7821   match(Set cr (CmpI (VectorTest src1 src2) zero));
7822   effect(TEMP vtmp);
7823   format %{ "cmpvptest_anytrue_lt16 $src1,$src2\t! using $vtmp as TEMP" %}
7824   ins_encode %{
7825     int vlen = Matcher::vector_length_in_bytes(this, $src1);
7826     __ vectortest(BoolTest::ne, vlen, $src1$$XMMRegister, $src2$$XMMRegister, $vtmp$$XMMRegister);
7827   %}
7828   ins_pipe( pipe_slow );
7829 %}
7830 
7831 instruct cmpvptest_anytrue_ge16(rFlagsReg cr, legVec src1, legVec src2, immI_0 zero) %{
7832   predicate(!VM_Version::supports_avx512bwdq() &&
7833             Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 16 &&
7834             Matcher::vector_length_in_bytes(n->in(1)->in(1)) <  64 &&
7835             static_cast<const VectorTestNode*>(n->in(1))->get_predicate() == BoolTest::ne);
7836   match(Set cr (CmpI (VectorTest src1 src2) zero));
7837   format %{ "cmpvptest_anytrue_ge16 $src1,$src2\t!" %}
7838   ins_encode %{
7839     int vlen = Matcher::vector_length_in_bytes(this, $src1);
7840     __ vectortest(BoolTest::ne, vlen, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, xnoreg, knoreg);
7841   %}
7842   ins_pipe( pipe_slow );
7843 %}
7844 
7845 instruct cmpvptest_anytrue_evex(rFlagsReg cr, kReg src1, kReg src2, immI_0 zero) %{
7846   predicate(VM_Version::supports_avx512bwdq() &&
7847             static_cast<const VectorTestNode*>(n->in(1))->get_predicate() == BoolTest::ne);
7848   match(Set cr (CmpI (VectorTest src1 src2) zero));
7849   format %{ "cmpvptest_anytrue_evex $src1,$src2\t!" %}

7850   ins_encode %{
7851     uint masklen = Matcher::vector_length(this, $src1);
7852     const MachNode* mask1 = static_cast<const MachNode*>(this->in(this->operand_index($src1)));
7853     const MachNode* mask2 = static_cast<const MachNode*>(this->in(this->operand_index($src2)));
7854     assert(0 == Type::cmp(mask1->bottom_type(), mask2->bottom_type()), "");
7855     masklen = masklen < 8 ? 8 : masklen;
7856     __ ktest(masklen, $src1$$KRegister, $src2$$KRegister);
7857   %}
7858   ins_pipe( pipe_slow );
7859 %}
7860 #endif
7861 
7862 //------------------------------------- LoadMask --------------------------------------------
7863 
7864 instruct loadMask(legVec dst, legVec src) %{
7865   predicate(n->bottom_type()->isa_vectmask() == NULL && !VM_Version::supports_avx512vlbw());
7866   match(Set dst (VectorLoadMask src));
7867   effect(TEMP dst);
7868   format %{ "vector_loadmask_byte $dst, $src\n\t" %}
7869   ins_encode %{
7870     int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
7871     BasicType elem_bt = Matcher::vector_element_basic_type(this);

7872     __ load_vector_mask($dst$$XMMRegister, $src$$XMMRegister, vlen_in_bytes, elem_bt, true);
7873   %}
7874   ins_pipe( pipe_slow );
7875 %}
7876 
7877 instruct loadMask64(kReg dst, vec src, vec xtmp, rRegI tmp) %{
7878   predicate(n->bottom_type()->isa_vectmask() && !VM_Version::supports_avx512vlbw());
7879   match(Set dst (VectorLoadMask src));
7880   effect(TEMP xtmp, TEMP tmp);
7881   format %{ "vector_loadmask_64byte $dst, $src\t! using $xtmp and $tmp as TEMP" %}
7882   ins_encode %{
7883     __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister,
7884                         $tmp$$Register, true, Assembler::AVX_512bit);
7885   %}
7886   ins_pipe( pipe_slow );
7887 %}
7888 
7889 instruct loadMask_evex(kReg dst, vec src,  vec xtmp) %{
7890   predicate(n->bottom_type()->isa_vectmask() && VM_Version::supports_avx512vlbw());
7891   match(Set dst (VectorLoadMask src));
7892   effect(TEMP xtmp);
7893   format %{ "vector_loadmask_byte $dst, $src\t! using $xtmp as TEMP" %}
7894   ins_encode %{
7895     int vlen_enc = vector_length_encoding(in(1));
7896     __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister,
7897                         noreg, false, vlen_enc);
7898   %}
7899   ins_pipe( pipe_slow );
7900 %}
7901 
7902 //------------------------------------- StoreMask --------------------------------------------
7903 
7904 instruct vstoreMask1B(vec dst, vec src, immI_1 size) %{
7905   predicate(Matcher::vector_length(n) < 64 && n->in(1)->bottom_type()->isa_vectmask() == NULL);
7906   match(Set dst (VectorStoreMask src size));
7907   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
7908   ins_encode %{
7909     int vlen = Matcher::vector_length(this);
7910     if (vlen <= 16 && UseAVX <= 2) {
7911       assert(UseSSE >= 3, "required");
7912       __ pabsb($dst$$XMMRegister, $src$$XMMRegister);
7913     } else {
7914       assert(UseAVX > 0, "required");
7915       int src_vlen_enc = vector_length_encoding(this, $src);
7916       __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
7917     }
7918   %}
7919   ins_pipe( pipe_slow );
7920 %}
7921 
7922 instruct vstoreMask2B(vec dst, vec src, vec xtmp, immI_2 size) %{
7923   predicate(Matcher::vector_length(n) <= 16 && n->in(1)->bottom_type()->isa_vectmask() == NULL);
7924   match(Set dst (VectorStoreMask src size));
7925   effect(TEMP_DEF dst, TEMP xtmp);
7926   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}












7927   ins_encode %{
7928     int vlen_enc = Assembler::AVX_128bit;
7929     int vlen = Matcher::vector_length(this);
7930     if (vlen <= 8) {
7931       assert(UseSSE >= 3, "required");
7932       __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
7933       __ pabsw($dst$$XMMRegister, $src$$XMMRegister);
7934       __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
7935     } else {
7936       assert(UseAVX > 0, "required");
7937       __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1);
7938       __ vpacksswb($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc);
7939       __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
7940     }
7941   %}
7942   ins_pipe( pipe_slow );
7943 %}
7944 
7945 instruct vstoreMask4B(vec dst, vec src, vec xtmp, immI_4 size) %{
7946   predicate(UseAVX <= 2 && Matcher::vector_length(n) <= 8 && n->in(1)->bottom_type()->isa_vectmask() == NULL);
7947   match(Set dst (VectorStoreMask src size));
7948   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
7949   effect(TEMP_DEF dst, TEMP xtmp);
7950   ins_encode %{
7951     int vlen_enc = Assembler::AVX_128bit;
7952     int vlen = Matcher::vector_length(this);
7953     if (vlen <= 4) {
7954       assert(UseSSE >= 3, "required");
7955       __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
7956       __ pabsd($dst$$XMMRegister, $src$$XMMRegister);
7957       __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister);
7958       __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
7959     } else {
7960       assert(UseAVX > 0, "required");
7961       __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, vlen_enc);
7962       __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1);
7963       __ vpackssdw($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc);
7964       __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, vlen_enc);
7965       __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
7966     }
7967   %}
7968   ins_pipe( pipe_slow );
7969 %}
7970 
7971 instruct storeMask8B(vec dst, vec src, vec xtmp, immI_8 size) %{
7972   predicate(UseAVX <= 2 && Matcher::vector_length(n) == 2);
7973   match(Set dst (VectorStoreMask src size));
7974   effect(TEMP_DEF dst, TEMP xtmp);
7975   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
7976   ins_encode %{
7977     assert(UseSSE >= 3, "required");
7978     __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
7979     __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x8);
7980     __ pabsd($dst$$XMMRegister, $dst$$XMMRegister);
7981     __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister);
7982     __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
7983   %}
7984   ins_pipe( pipe_slow );
7985 %}
7986 
7987 instruct storeMask8B_avx(vec dst, vec src, immI_8 size, vec vtmp) %{
7988   predicate(UseAVX <= 2 && Matcher::vector_length(n) == 4);
7989   match(Set dst (VectorStoreMask src size));
7990   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s], using $vtmp as TEMP" %}
7991   effect(TEMP_DEF dst, TEMP vtmp);
7992   ins_encode %{
7993     int vlen_enc = Assembler::AVX_128bit;
7994     __ vpshufps($dst$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 0x88, Assembler::AVX_256bit);
7995     __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1);
7996     __ vblendps($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0xC, vlen_enc);
7997     __ vpxor($vtmp$$XMMRegister, $vtmp$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
7998     __ vpackssdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
7999     __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
8000     __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
8001   %}
8002   ins_pipe( pipe_slow );
8003 %}
8004 
8005 instruct vstoreMask4B_evex_novectmask(vec dst, vec src, immI_4 size) %{
8006   predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_vectmask() == NULL);
8007   match(Set dst (VectorStoreMask src size));
8008   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
8009   ins_encode %{
8010     int src_vlen_enc = vector_length_encoding(this, $src);
8011     int dst_vlen_enc = vector_length_encoding(this);
8012     if (!VM_Version::supports_avx512vl()) {
8013       src_vlen_enc = Assembler::AVX_512bit;
8014     }
8015     __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
8016     __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
8017   %}
8018   ins_pipe( pipe_slow );
8019 %}
8020 
8021 instruct vstoreMask8B_evex_novectmask(vec dst, vec src, immI_8 size) %{
8022   predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_vectmask() == NULL);
8023   match(Set dst (VectorStoreMask src size));
8024   format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
8025   ins_encode %{
8026     int src_vlen_enc = vector_length_encoding(this, $src);
8027     int dst_vlen_enc = vector_length_encoding(this);
8028     if (!VM_Version::supports_avx512vl()) {
8029       src_vlen_enc = Assembler::AVX_512bit;
8030     }
8031     __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
8032     __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
8033   %}
8034   ins_pipe( pipe_slow );
8035 %}
8036 
8037 instruct vstoreMask_evex_vectmask(vec dst, kReg mask, immI size, rRegI tmp) %{
8038   predicate(n->in(1)->bottom_type()->isa_vectmask() && !VM_Version::supports_avx512vlbw());
8039   match(Set dst (VectorStoreMask mask size));
8040   effect(TEMP_DEF dst, TEMP tmp);
8041   format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %}
8042   ins_encode %{
8043     assert(Matcher::vector_length_in_bytes(this, $mask) == 64, "");
8044     __ evmovdqul($dst$$XMMRegister, $mask$$KRegister, ExternalAddress(vector_int_mask_cmp_bits()),
8045                  false, Assembler::AVX_512bit, $tmp$$Register);
8046     __ evpmovdb($dst$$XMMRegister, $dst$$XMMRegister, Assembler::AVX_512bit);



8047   %}
8048   ins_pipe( pipe_slow );
8049 %}
8050 
8051 instruct vstoreMask_evex(vec dst, kReg mask, immI size) %{
8052   predicate(n->in(1)->bottom_type()->isa_vectmask() && VM_Version::supports_avx512vlbw());
8053   match(Set dst (VectorStoreMask mask size));
8054   effect(TEMP_DEF dst);
8055   format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %}
8056   ins_encode %{

8057     int dst_vlen_enc = vector_length_encoding(this);
8058     __ evpmovm2b($dst$$XMMRegister, $mask$$KRegister, dst_vlen_enc);



8059     __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
8060   %}
8061   ins_pipe( pipe_slow );
8062 %}
8063 
8064 instruct vmaskcast_evex(kReg dst) %{
8065   predicate(Matcher::vector_length(n) == Matcher::vector_length(n->in(1)));
8066   match(Set dst (VectorMaskCast dst));
8067   ins_cost(0);
8068   format %{ "vector_mask_cast $dst" %}
8069   ins_encode %{
8070     // empty
8071   %}
8072   ins_pipe(empty);
8073 %}
8074 
8075 instruct vmaskcast(vec dst) %{
8076   predicate((Matcher::vector_length(n) == Matcher::vector_length(n->in(1))) &&
8077             (Matcher::vector_length_in_bytes(n) == Matcher::vector_length_in_bytes(n->in(1))));
8078   match(Set dst (VectorMaskCast dst));
8079   ins_cost(0);
8080   format %{ "vector_mask_cast $dst" %}
8081   ins_encode %{
8082     // empty
8083   %}
8084   ins_pipe(empty);
8085 %}
8086 
8087 //-------------------------------- Load Iota Indices ----------------------------------
8088 
8089 instruct loadIotaIndices(vec dst, immI_0 src, rRegP scratch) %{
8090   predicate(Matcher::vector_element_basic_type(n) == T_BYTE);
8091   match(Set dst (VectorLoadConst src));
8092   effect(TEMP scratch);
8093   format %{ "vector_load_iota $dst CONSTANT_MEMORY\t! load iota indices" %}
8094   ins_encode %{

8624   effect(TEMP temp);
8625   ins_encode %{
8626     __ mov64($temp$$Register, (0xFFFFFFFFFFFFFFFFUL >> (64 -$len$$constant)));
8627     __ kmovql($dst$$KRegister, $temp$$Register);
8628   %}
8629   ins_pipe( pipe_slow );
8630 %}
8631 
8632 instruct vmasked_store64(memory mem, vec src, kReg mask) %{
8633   match(Set mem (StoreVectorMasked mem (Binary src mask)));
8634   format %{ "vector_masked_store $mem, $src, $mask \t! vector masked store" %}
8635   ins_encode %{
8636     const MachNode* src_node = static_cast<const MachNode*>(this->in(this->operand_index($src)));
8637     BasicType elmType =  src_node->bottom_type()->is_vect()->element_basic_type();
8638     int vector_len = vector_length_encoding(src_node);
8639     __ evmovdqu(elmType, $mask$$KRegister, $mem$$Address, $src$$XMMRegister, vector_len);
8640   %}
8641   ins_pipe( pipe_slow );
8642 %}
8643 
8644 instruct vmask_tolong_evex(rRegL dst, kReg mask, rFlagsReg cr) %{
8645   predicate(n->in(1)->bottom_type()->isa_vectmask());
8646   match(Set dst (VectorMaskToLong mask));
8647   effect(TEMP dst, KILL cr);
8648   format %{ "vector_tolong_evex $dst, $mask \t! vector mask tolong" %}
8649   ins_encode %{
8650     int mask_len = Matcher::vector_length(this, $mask);
8651     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
8652     if (VM_Version::supports_avx512vlbw()) {
8653       __ kmovql($dst$$Register, $mask$$KRegister);
8654     } else {
8655       assert(mask_len <= 16, "");
8656       __ kmovwl($dst$$Register, $mask$$KRegister);
8657     }
8658     // Mask generated out of partial vector comparisons/replicate/mask manipulation
8659     // operations needs to be clipped.
8660     int mask_size = mask_len * type2aelembytes(mbt);
8661     if (mask_size < 16) {
8662       __ andq($dst$$Register, (((jlong)1 << mask_len) - 1));
8663     }
8664   %}
8665   ins_pipe( pipe_slow );
8666 %}
8667 
8668 instruct vmask_tolong_avx(rRegL dst, vec mask, vec xtmp, rFlagsReg cr) %{
8669   predicate(n->in(1)->bottom_type()->isa_vectmask() == NULL &&
8670             n->in(1)->bottom_type()->is_vect()->element_basic_type() == T_BOOLEAN);
8671   match(Set dst (VectorMaskToLong mask));
8672   format %{ "vector_tolong_avx $dst, $mask \t! using $xtmp as TEMP" %}
8673   effect(TEMP_DEF dst, TEMP xtmp, KILL cr);
8674   ins_encode %{
8675     int mask_len = Matcher::vector_length(this, $mask);
8676     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
8677     int vlen_enc = vector_length_encoding(this, $mask);
8678     __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, vlen_enc);
8679     __ vpsubb($xtmp$$XMMRegister, $xtmp$$XMMRegister, $mask$$XMMRegister, vlen_enc);
8680     __ vpmovmskb($dst$$Register, $xtmp$$XMMRegister, vlen_enc);
8681     // Mask generated out of partial vector comparisons/replicate/mask manipulation
8682     // operations needs to be clipped.
8683     int mask_size = mask_len * type2aelembytes(mbt);
8684     if (mask_size < 16) {
8685       __ andq($dst$$Register, (((jlong)1 << mask_len) - 1));
8686     }
8687   %}
8688   ins_pipe( pipe_slow );
8689 %}
8690 
8691 instruct vmask_truecount_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{
8692   predicate(n->in(1)->bottom_type()->isa_vectmask());
8693   match(Set dst (VectorMaskTrueCount mask));
8694   effect(TEMP_DEF dst, TEMP tmp, KILL cr);
8695   format %{ "vector_truecount_evex $dst, $mask \t! using $tmp as TEMP" %}
8696   ins_encode %{
8697     int opcode = this->ideal_Opcode();
8698     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
8699     int mask_len = Matcher::vector_length(this, $mask);
8700     int mask_size = mask_len * type2aelembytes(mbt);
8701     int vlen_enc = vector_length_encoding(this, $mask);
8702     __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister, $tmp$$Register,
8703                              mask_len, mask_size, vlen_enc);
8704   %}
8705   ins_pipe( pipe_slow );
8706 %}
8707 
8708 instruct vmask_truecount_avx(rRegI dst, vec mask, rRegL tmp, vec xtmp, vec xtmp1, rFlagsReg cr) %{
8709   predicate(n->in(1)->bottom_type()->isa_vectmask() == NULL);
8710   match(Set dst (VectorMaskTrueCount mask));
8711   effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, TEMP xtmp1, KILL cr);
8712   format %{ "vector_truecount_avx $dst, $mask \t! using $tmp, $xtmp and $xtmp1 as TEMP" %}

8713   ins_encode %{
8714     int opcode = this->ideal_Opcode();
8715     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
8716     int mask_len = Matcher::vector_length(this, $mask);
8717     int mask_size = mask_len * type2aelembytes(mbt);
8718     int vlen_enc = vector_length_encoding(this, $mask);
8719     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
8720                              $xtmp1$$XMMRegister, $tmp$$Register, mask_len, mask_size, vlen_enc);
8721   %}
8722   ins_pipe( pipe_slow );
8723 %}
8724 
8725 instruct vmask_first_or_last_true_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{
8726   predicate(n->in(1)->bottom_type()->isa_vectmask());
8727   match(Set dst (VectorMaskFirstTrue mask));
8728   match(Set dst (VectorMaskLastTrue mask));
8729   effect(TEMP_DEF dst, TEMP tmp, KILL cr);
8730   format %{ "vector_mask_first_or_last_true_evex $dst, $mask \t! using $tmp as TEMP" %}
8731   ins_encode %{
8732     int opcode = this->ideal_Opcode();
8733     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
8734     int mask_len = Matcher::vector_length(this, $mask);
8735     int mask_size = mask_len * type2aelembytes(mbt);
8736     int vlen_enc = vector_length_encoding(this, $mask);
8737     __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister, $tmp$$Register, mask_len,
8738                              mask_size, vlen_enc);
8739   %}
8740   ins_pipe( pipe_slow );
8741 %}
8742 
8743 instruct vmask_first_or_last_true_avx(rRegI dst, vec mask, rRegL tmp, vec xtmp, vec xtmp1, rFlagsReg cr) %{
8744   predicate(n->in(1)->bottom_type()->isa_vectmask() == NULL);
8745   match(Set dst (VectorMaskFirstTrue mask));
8746   match(Set dst (VectorMaskLastTrue mask));
8747   effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, TEMP xtmp1, KILL cr);
8748   format %{ "vector_mask_first_or_last_true_avx $dst, $mask \t! using $tmp, $xtmp and $xtmp1 as TEMP" %}
8749   ins_encode %{
8750     int opcode = this->ideal_Opcode();
8751     BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
8752     int mask_len = Matcher::vector_length(this, $mask);
8753     int mask_size = mask_len * type2aelembytes(mbt);
8754     int vlen_enc = vector_length_encoding(this, $mask);
8755     __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
8756                              $xtmp1$$XMMRegister, $tmp$$Register, mask_len, mask_size, vlen_enc);
8757   %}
8758   ins_pipe( pipe_slow );
8759 %}
8760 #endif // _LP64
8761 
8762 // ---------------------------------- Vector Masked Operations ------------------------------------
8763 
8764 instruct vadd_reg_masked(vec dst, vec src2, kReg mask) %{
8765   match(Set dst (AddVB (Binary dst src2) mask));
8766   match(Set dst (AddVS (Binary dst src2) mask));
8767   match(Set dst (AddVI (Binary dst src2) mask));
8768   match(Set dst (AddVL (Binary dst src2) mask));
8769   match(Set dst (AddVF (Binary dst src2) mask));
8770   match(Set dst (AddVD (Binary dst src2) mask));
8771   format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %}
8772   ins_encode %{
8773     int vlen_enc = vector_length_encoding(this);
8774     BasicType bt = Matcher::vector_element_basic_type(this);
8775     int opc = this->ideal_Opcode();
8776     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
8777                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
8778   %}
8779   ins_pipe( pipe_slow );
8780 %}
8781 
8782 instruct vadd_mem_masked(vec dst, memory src2, kReg mask) %{
8783   match(Set dst (AddVB (Binary dst (LoadVector src2)) mask));
8784   match(Set dst (AddVS (Binary dst (LoadVector src2)) mask));
8785   match(Set dst (AddVI (Binary dst (LoadVector src2)) mask));
8786   match(Set dst (AddVL (Binary dst (LoadVector src2)) mask));
8787   match(Set dst (AddVF (Binary dst (LoadVector src2)) mask));
8788   match(Set dst (AddVD (Binary dst (LoadVector src2)) mask));
8789   format %{ "vpadd_masked $dst, $dst, $src2, $mask\t! add masked operation" %}
8790   ins_encode %{
8791     int vlen_enc = vector_length_encoding(this);
8792     BasicType bt = Matcher::vector_element_basic_type(this);
8793     int opc = this->ideal_Opcode();
8794     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
8795                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
8796   %}
8797   ins_pipe( pipe_slow );
8798 %}
8799 
8800 instruct vxor_reg_masked(vec dst, vec src2, kReg mask) %{
8801   match(Set dst (XorV (Binary dst src2) mask));
8802   format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %}
8803   ins_encode %{
8804     int vlen_enc = vector_length_encoding(this);
8805     BasicType bt = Matcher::vector_element_basic_type(this);
8806     int opc = this->ideal_Opcode();
8807     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
8808                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
8809   %}
8810   ins_pipe( pipe_slow );
8811 %}
8812 
8813 instruct vxor_mem_masked(vec dst, memory src2, kReg mask) %{
8814   match(Set dst (XorV (Binary dst (LoadVector src2)) mask));
8815   format %{ "vxor_masked $dst, $dst, $src2, $mask\t! xor masked operation" %}
8816   ins_encode %{
8817     int vlen_enc = vector_length_encoding(this);
8818     BasicType bt = Matcher::vector_element_basic_type(this);
8819     int opc = this->ideal_Opcode();
8820     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
8821                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
8822   %}
8823   ins_pipe( pipe_slow );
8824 %}
8825 
8826 instruct vor_reg_masked(vec dst, vec src2, kReg mask) %{
8827   match(Set dst (OrV (Binary dst src2) mask));
8828   format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %}
8829   ins_encode %{
8830     int vlen_enc = vector_length_encoding(this);
8831     BasicType bt = Matcher::vector_element_basic_type(this);
8832     int opc = this->ideal_Opcode();
8833     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
8834                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
8835   %}
8836   ins_pipe( pipe_slow );
8837 %}
8838 
8839 instruct vor_mem_masked(vec dst, memory src2, kReg mask) %{
8840   match(Set dst (OrV (Binary dst (LoadVector src2)) mask));
8841   format %{ "vor_masked $dst, $dst, $src2, $mask\t! or masked operation" %}
8842   ins_encode %{
8843     int vlen_enc = vector_length_encoding(this);
8844     BasicType bt = Matcher::vector_element_basic_type(this);
8845     int opc = this->ideal_Opcode();
8846     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
8847                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
8848   %}
8849   ins_pipe( pipe_slow );
8850 %}
8851 
8852 instruct vand_reg_masked(vec dst, vec src2, kReg mask) %{
8853   match(Set dst (AndV (Binary dst src2) mask));
8854   format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %}
8855   ins_encode %{
8856     int vlen_enc = vector_length_encoding(this);
8857     BasicType bt = Matcher::vector_element_basic_type(this);
8858     int opc = this->ideal_Opcode();
8859     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
8860                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
8861   %}
8862   ins_pipe( pipe_slow );
8863 %}
8864 
8865 instruct vand_mem_masked(vec dst, memory src2, kReg mask) %{
8866   match(Set dst (AndV (Binary dst (LoadVector src2)) mask));
8867   format %{ "vand_masked $dst, $dst, $src2, $mask\t! and masked operation" %}
8868   ins_encode %{
8869     int vlen_enc = vector_length_encoding(this);
8870     BasicType bt = Matcher::vector_element_basic_type(this);
8871     int opc = this->ideal_Opcode();
8872     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
8873                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
8874   %}
8875   ins_pipe( pipe_slow );
8876 %}
8877 
8878 instruct vsub_reg_masked(vec dst, vec src2, kReg mask) %{
8879   match(Set dst (SubVB (Binary dst src2) mask));
8880   match(Set dst (SubVS (Binary dst src2) mask));
8881   match(Set dst (SubVI (Binary dst src2) mask));
8882   match(Set dst (SubVL (Binary dst src2) mask));
8883   match(Set dst (SubVF (Binary dst src2) mask));
8884   match(Set dst (SubVD (Binary dst src2) mask));
8885   format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %}
8886   ins_encode %{
8887     int vlen_enc = vector_length_encoding(this);
8888     BasicType bt = Matcher::vector_element_basic_type(this);
8889     int opc = this->ideal_Opcode();
8890     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
8891                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
8892   %}
8893   ins_pipe( pipe_slow );
8894 %}
8895 
8896 instruct vsub_mem_masked(vec dst, memory src2, kReg mask) %{
8897   match(Set dst (SubVB (Binary dst (LoadVector src2)) mask));
8898   match(Set dst (SubVS (Binary dst (LoadVector src2)) mask));
8899   match(Set dst (SubVI (Binary dst (LoadVector src2)) mask));
8900   match(Set dst (SubVL (Binary dst (LoadVector src2)) mask));
8901   match(Set dst (SubVF (Binary dst (LoadVector src2)) mask));
8902   match(Set dst (SubVD (Binary dst (LoadVector src2)) mask));
8903   format %{ "vpsub_masked $dst, $dst, $src2, $mask\t! sub masked operation" %}
8904   ins_encode %{
8905     int vlen_enc = vector_length_encoding(this);
8906     BasicType bt = Matcher::vector_element_basic_type(this);
8907     int opc = this->ideal_Opcode();
8908     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
8909                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
8910   %}
8911   ins_pipe( pipe_slow );
8912 %}
8913 
8914 instruct vmul_reg_masked(vec dst, vec src2, kReg mask) %{
8915   match(Set dst (MulVS (Binary dst src2) mask));
8916   match(Set dst (MulVI (Binary dst src2) mask));
8917   match(Set dst (MulVL (Binary dst src2) mask));
8918   match(Set dst (MulVF (Binary dst src2) mask));
8919   match(Set dst (MulVD (Binary dst src2) mask));
8920   format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %}
8921   ins_encode %{
8922     int vlen_enc = vector_length_encoding(this);
8923     BasicType bt = Matcher::vector_element_basic_type(this);
8924     int opc = this->ideal_Opcode();
8925     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
8926                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
8927   %}
8928   ins_pipe( pipe_slow );
8929 %}
8930 
8931 instruct vmul_mem_masked(vec dst, memory src2, kReg mask) %{
8932   match(Set dst (MulVS (Binary dst (LoadVector src2)) mask));
8933   match(Set dst (MulVI (Binary dst (LoadVector src2)) mask));
8934   match(Set dst (MulVL (Binary dst (LoadVector src2)) mask));
8935   match(Set dst (MulVF (Binary dst (LoadVector src2)) mask));
8936   match(Set dst (MulVD (Binary dst (LoadVector src2)) mask));
8937   format %{ "vpmul_masked $dst, $dst, $src2, $mask\t! mul masked operation" %}
8938   ins_encode %{
8939     int vlen_enc = vector_length_encoding(this);
8940     BasicType bt = Matcher::vector_element_basic_type(this);
8941     int opc = this->ideal_Opcode();
8942     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
8943                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
8944   %}
8945   ins_pipe( pipe_slow );
8946 %}
8947 
8948 instruct vsqrt_reg_masked(vec dst, kReg mask) %{
8949   match(Set dst (SqrtVF dst mask));
8950   match(Set dst (SqrtVD dst mask));
8951   ins_cost(100);
8952   format %{ "vpsqrt_masked $dst, $mask\t! sqrt masked operation" %}
8953   ins_encode %{
8954     int vlen_enc = vector_length_encoding(this);
8955     BasicType bt = Matcher::vector_element_basic_type(this);
8956     int opc = this->ideal_Opcode();
8957     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
8958                    $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc);
8959   %}
8960   ins_pipe( pipe_slow );
8961 %}
8962 
8963 instruct vdiv_reg_masked(vec dst, vec src2, kReg mask) %{
8964   match(Set dst (DivVF (Binary dst src2) mask));
8965   match(Set dst (DivVD (Binary dst src2) mask));
8966   format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %}
8967   ins_encode %{
8968     int vlen_enc = vector_length_encoding(this);
8969     BasicType bt = Matcher::vector_element_basic_type(this);
8970     int opc = this->ideal_Opcode();
8971     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
8972                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
8973   %}
8974   ins_pipe( pipe_slow );
8975 %}
8976 
8977 instruct vdiv_mem_masked(vec dst, memory src2, kReg mask) %{
8978   match(Set dst (DivVF (Binary dst (LoadVector src2)) mask));
8979   match(Set dst (DivVD (Binary dst (LoadVector src2)) mask));
8980   format %{ "vpdiv_masked $dst, $dst, $src2, $mask\t! div masked operation" %}
8981   ins_encode %{
8982     int vlen_enc = vector_length_encoding(this);
8983     BasicType bt = Matcher::vector_element_basic_type(this);
8984     int opc = this->ideal_Opcode();
8985     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
8986                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
8987   %}
8988   ins_pipe( pipe_slow );
8989 %}
8990 
8991 
8992 instruct vrol_imm_masked(vec dst, immI8 shift, kReg mask) %{
8993   match(Set dst (RotateLeftV (Binary dst shift) mask));
8994   match(Set dst (RotateRightV (Binary dst shift) mask));
8995   format %{ "vprotate_imm_masked $dst, $dst, $shift, $mask\t! rotate masked operation" %}
8996   ins_encode %{
8997     int vlen_enc = vector_length_encoding(this);
8998     BasicType bt = Matcher::vector_element_basic_type(this);
8999     int opc = this->ideal_Opcode();
9000     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
9001                    $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
9002   %}
9003   ins_pipe( pipe_slow );
9004 %}
9005 
9006 instruct vrol_reg_masked(vec dst, vec src2, kReg mask) %{
9007   match(Set dst (RotateLeftV (Binary dst src2) mask));
9008   match(Set dst (RotateRightV (Binary dst src2) mask));
9009   format %{ "vrotate_masked $dst, $dst, $src2, $mask\t! rotate masked operation" %}
9010   ins_encode %{
9011     int vlen_enc = vector_length_encoding(this);
9012     BasicType bt = Matcher::vector_element_basic_type(this);
9013     int opc = this->ideal_Opcode();
9014     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
9015                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
9016   %}
9017   ins_pipe( pipe_slow );
9018 %}
9019 
9020 instruct vlshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
9021   match(Set dst (LShiftVS (Binary dst (LShiftCntV shift)) mask));
9022   match(Set dst (LShiftVI (Binary dst (LShiftCntV shift)) mask));
9023   match(Set dst (LShiftVL (Binary dst (LShiftCntV shift)) mask));
9024   format %{ "vplshift_imm_masked $dst, $dst, $shift, $mask\t! lshift masked operation" %}
9025   ins_encode %{
9026     int vlen_enc = vector_length_encoding(this);
9027     BasicType bt = Matcher::vector_element_basic_type(this);
9028     int opc = this->ideal_Opcode();
9029     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
9030                    $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
9031   %}
9032   ins_pipe( pipe_slow );
9033 %}
9034 
9035 instruct vlshift_reg_masked(vec dst, vec src2, kReg mask) %{
9036   match(Set dst (LShiftVS (Binary dst src2) mask));
9037   match(Set dst (LShiftVI (Binary dst src2) mask));
9038   match(Set dst (LShiftVL (Binary dst src2) mask));
9039   format %{ "vplshift_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %}
9040   ins_encode %{
9041     int vlen_enc = vector_length_encoding(this);
9042     BasicType bt = Matcher::vector_element_basic_type(this);
9043     int opc = this->ideal_Opcode();
9044     bool is_varshift = !VectorNode::is_vshift_cnt_opcode(in(2)->isa_Mach()->ideal_Opcode());
9045     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
9046                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, is_varshift);
9047   %}
9048   ins_pipe( pipe_slow );
9049 %}
9050 
9051 instruct vlshift_mem_masked(vec dst, memory src2, kReg mask) %{
9052   match(Set dst (LShiftVS (Binary dst (LoadVector src2)) mask));
9053   match(Set dst (LShiftVI (Binary dst (LoadVector src2)) mask));
9054   match(Set dst (LShiftVL (Binary dst (LoadVector src2)) mask));
9055   format %{ "vplshift_masked $dst, $dst, $src2, $mask\t! lshift masked operation" %}
9056   ins_encode %{
9057     int vlen_enc = vector_length_encoding(this);
9058     BasicType bt = Matcher::vector_element_basic_type(this);
9059     int opc = this->ideal_Opcode();
9060     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
9061                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
9062   %}
9063   ins_pipe( pipe_slow );
9064 %}
9065 
9066 instruct vrshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
9067   match(Set dst (RShiftVS (Binary dst (RShiftCntV shift)) mask));
9068   match(Set dst (RShiftVI (Binary dst (RShiftCntV shift)) mask));
9069   match(Set dst (RShiftVL (Binary dst (RShiftCntV shift)) mask));
9070   format %{ "vprshift_imm_masked $dst, $dst, $shift, $mask\t! rshift masked operation" %}
9071   ins_encode %{
9072     int vlen_enc = vector_length_encoding(this);
9073     BasicType bt = Matcher::vector_element_basic_type(this);
9074     int opc = this->ideal_Opcode();
9075     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
9076                    $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
9077   %}
9078   ins_pipe( pipe_slow );
9079 %}
9080 
9081 instruct vrshift_reg_masked(vec dst, vec src2, kReg mask) %{
9082   match(Set dst (RShiftVS (Binary dst src2) mask));
9083   match(Set dst (RShiftVI (Binary dst src2) mask));
9084   match(Set dst (RShiftVL (Binary dst src2) mask));
9085   format %{ "vprshift_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %}
9086   ins_encode %{
9087     int vlen_enc = vector_length_encoding(this);
9088     BasicType bt = Matcher::vector_element_basic_type(this);
9089     int opc = this->ideal_Opcode();
9090     bool is_varshift = !VectorNode::is_vshift_cnt_opcode(in(2)->isa_Mach()->ideal_Opcode());
9091     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
9092                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, is_varshift);
9093   %}
9094   ins_pipe( pipe_slow );
9095 %}
9096 
9097 instruct vrshift_mem_masked(vec dst, memory src2, kReg mask) %{
9098   match(Set dst (RShiftVS (Binary dst (LoadVector src2)) mask));
9099   match(Set dst (RShiftVI (Binary dst (LoadVector src2)) mask));
9100   match(Set dst (RShiftVL (Binary dst (LoadVector src2)) mask));
9101   format %{ "vprshift_masked $dst, $dst, $src2, $mask\t! rshift masked operation" %}
9102   ins_encode %{
9103     int vlen_enc = vector_length_encoding(this);
9104     BasicType bt = Matcher::vector_element_basic_type(this);
9105     int opc = this->ideal_Opcode();
9106     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
9107                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
9108   %}
9109   ins_pipe( pipe_slow );
9110 %}
9111 
9112 instruct vurshift_imm_masked(vec dst, immI8 shift, kReg mask) %{
9113   match(Set dst (URShiftVS (Binary dst (RShiftCntV shift)) mask));
9114   match(Set dst (URShiftVI (Binary dst (RShiftCntV shift)) mask));
9115   match(Set dst (URShiftVL (Binary dst (RShiftCntV shift)) mask));
9116   format %{ "vpurshift_imm_masked $dst, $dst, $shift, $mask\t! urshift masked operation" %}
9117   ins_encode %{
9118     int vlen_enc = vector_length_encoding(this);
9119     BasicType bt = Matcher::vector_element_basic_type(this);
9120     int opc = this->ideal_Opcode();
9121     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
9122                    $dst$$XMMRegister, $shift$$constant, true, vlen_enc);
9123   %}
9124   ins_pipe( pipe_slow );
9125 %}
9126 
9127 instruct vurshift_reg_masked(vec dst, vec src2, kReg mask) %{
9128   match(Set dst (URShiftVS (Binary dst src2) mask));
9129   match(Set dst (URShiftVI (Binary dst src2) mask));
9130   match(Set dst (URShiftVL (Binary dst src2) mask));
9131   format %{ "vpurshift_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %}
9132   ins_encode %{
9133     int vlen_enc = vector_length_encoding(this);
9134     BasicType bt = Matcher::vector_element_basic_type(this);
9135     int opc = this->ideal_Opcode();
9136     bool is_varshift = !VectorNode::is_vshift_cnt_opcode(in(2)->isa_Mach()->ideal_Opcode());
9137     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
9138                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc, is_varshift);
9139   %}
9140   ins_pipe( pipe_slow );
9141 %}
9142 
9143 instruct vurshift_mem_masked(vec dst, memory src2, kReg mask) %{
9144   match(Set dst (URShiftVS (Binary dst (LoadVector src2)) mask));
9145   match(Set dst (URShiftVI (Binary dst (LoadVector src2)) mask));
9146   match(Set dst (URShiftVL (Binary dst (LoadVector src2)) mask));
9147   format %{ "vpurshift_masked $dst, $dst, $src2, $mask\t! urshift masked operation" %}
9148   ins_encode %{
9149     int vlen_enc = vector_length_encoding(this);
9150     BasicType bt = Matcher::vector_element_basic_type(this);
9151     int opc = this->ideal_Opcode();
9152     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
9153                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
9154   %}
9155   ins_pipe( pipe_slow );
9156 %}
9157 
9158 instruct vmaxv_reg_masked(vec dst, vec src2, kReg mask) %{
9159   match(Set dst (MaxV (Binary dst src2) mask));
9160   format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %}
9161   ins_encode %{
9162     int vlen_enc = vector_length_encoding(this);
9163     BasicType bt = Matcher::vector_element_basic_type(this);
9164     int opc = this->ideal_Opcode();
9165     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
9166                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
9167   %}
9168   ins_pipe( pipe_slow );
9169 %}
9170 
9171 instruct vmaxv_mem_masked(vec dst, memory src2, kReg mask) %{
9172   match(Set dst (MaxV (Binary dst (LoadVector src2)) mask));
9173   format %{ "vpmax_masked $dst, $dst, $src2, $mask\t! max masked operation" %}
9174   ins_encode %{
9175     int vlen_enc = vector_length_encoding(this);
9176     BasicType bt = Matcher::vector_element_basic_type(this);
9177     int opc = this->ideal_Opcode();
9178     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
9179                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
9180   %}
9181   ins_pipe( pipe_slow );
9182 %}
9183 
9184 instruct vminv_reg_masked(vec dst, vec src2, kReg mask) %{
9185   match(Set dst (MinV (Binary dst src2) mask));
9186   format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %}
9187   ins_encode %{
9188     int vlen_enc = vector_length_encoding(this);
9189     BasicType bt = Matcher::vector_element_basic_type(this);
9190     int opc = this->ideal_Opcode();
9191     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
9192                    $dst$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
9193   %}
9194   ins_pipe( pipe_slow );
9195 %}
9196 
9197 instruct vminv_mem_masked(vec dst, memory src2, kReg mask) %{
9198   match(Set dst (MinV (Binary dst (LoadVector src2)) mask));
9199   format %{ "vpmin_masked $dst, $dst, $src2, $mask\t! min masked operation" %}
9200   ins_encode %{
9201     int vlen_enc = vector_length_encoding(this);
9202     BasicType bt = Matcher::vector_element_basic_type(this);
9203     int opc = this->ideal_Opcode();
9204     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
9205                    $dst$$XMMRegister, $src2$$Address, true, vlen_enc);
9206   %}
9207   ins_pipe( pipe_slow );
9208 %}
9209 
9210 instruct vrearrangev_reg_masked(vec dst, vec src2, kReg mask) %{
9211   match(Set dst (VectorRearrange (Binary dst src2) mask));
9212   format %{ "vprearrange_masked $dst, $dst, $src2, $mask\t! rearrange masked operation" %}
9213   ins_encode %{
9214     int vlen_enc = vector_length_encoding(this);
9215     BasicType bt = Matcher::vector_element_basic_type(this);
9216     int opc = this->ideal_Opcode();
9217     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
9218                    $dst$$XMMRegister, $src2$$XMMRegister, false, vlen_enc);
9219   %}
9220   ins_pipe( pipe_slow );
9221 %}
9222 
9223 instruct vabs_masked(vec dst, kReg mask) %{
9224   match(Set dst (AbsVB dst mask));
9225   match(Set dst (AbsVS dst mask));
9226   match(Set dst (AbsVI dst mask));
9227   match(Set dst (AbsVL dst mask));
9228   format %{ "vabs_masked $dst, $mask \t! vabs masked operation" %}
9229   ins_cost(100);
9230   ins_encode %{
9231     int vlen_enc = vector_length_encoding(this);
9232     BasicType bt = Matcher::vector_element_basic_type(this);
9233     int opc = this->ideal_Opcode();
9234     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
9235                    $dst$$XMMRegister, $dst$$XMMRegister, true, vlen_enc);
9236   %}
9237   ins_pipe( pipe_slow );
9238 %}
9239 
9240 instruct vfma_reg_masked(vec dst, vec src2, vec src3, kReg mask) %{
9241   match(Set dst (FmaVF (Binary dst src2) (Binary src3 mask)));
9242   match(Set dst (FmaVD (Binary dst src2) (Binary src3 mask)));
9243   format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %}
9244   ins_encode %{
9245     int vlen_enc = vector_length_encoding(this);
9246     BasicType bt = Matcher::vector_element_basic_type(this);
9247     int opc = this->ideal_Opcode();
9248     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
9249                    $src2$$XMMRegister, $src3$$XMMRegister, true, vlen_enc);
9250   %}
9251   ins_pipe( pipe_slow );
9252 %}
9253 
9254 instruct vfma_mem_masked(vec dst, vec src2, memory src3, kReg mask) %{
9255   match(Set dst (FmaVF (Binary dst src2) (Binary (LoadVector src3) mask)));
9256   match(Set dst (FmaVD (Binary dst src2) (Binary (LoadVector src3) mask)));
9257   format %{ "vfma_masked $dst, $src2, $src3, $mask \t! vfma masked operation" %}
9258   ins_encode %{
9259     int vlen_enc = vector_length_encoding(this);
9260     BasicType bt = Matcher::vector_element_basic_type(this);
9261     int opc = this->ideal_Opcode();
9262     __ evmasked_op(opc, bt, $mask$$KRegister, $dst$$XMMRegister,
9263                    $src2$$XMMRegister, $src3$$Address, true, vlen_enc);
9264   %}
9265   ins_pipe( pipe_slow );
9266 %}
9267 
9268 instruct evcmp_masked(kReg dst, vec src1, vec src2, immI8 cond, kReg mask, rRegP scratch) %{
9269   match(Set dst (VectorMaskCmp (Binary src1 src2) (Binary cond mask)));
9270   effect(TEMP scratch);
9271   format %{ "vcmp_masked $dst, $src1, $src2, $cond, $mask\t! using $scratch as TEMP" %}
9272   ins_encode %{
9273     assert(bottom_type()->isa_vectmask(), "TypeVectMask expected");
9274     int vlen_enc = vector_length_encoding(this, $src1);
9275     BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
9276 
9277     // Comparison i
9278     switch (src1_elem_bt) {
9279       case T_BYTE: {
9280         bool is_unsigned = is_unsigned_booltest_pred($cond$$constant);
9281         Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
9282         __ evpcmpb($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
9283         break;
9284       }
9285       case T_SHORT: {
9286         bool is_unsigned = is_unsigned_booltest_pred($cond$$constant);
9287         Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
9288         __ evpcmpw($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
9289         break;
9290       }
9291       case T_INT: {
9292         bool is_unsigned = is_unsigned_booltest_pred($cond$$constant);
9293         Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
9294         __ evpcmpd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
9295         break;
9296       }
9297       case T_LONG: {
9298         bool is_unsigned = is_unsigned_booltest_pred($cond$$constant);
9299         Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
9300         __ evpcmpq($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
9301         break;
9302       }
9303       case T_FLOAT: {
9304         Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
9305         __ evcmpps($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
9306         break;
9307       }
9308       case T_DOUBLE: {
9309         Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
9310         __ evcmppd($dst$$KRegister, $mask$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
9311         break;
9312       }
9313       default: assert(false, "%s", type2name(src1_elem_bt)); break;
9314     }
9315   %}
9316   ins_pipe( pipe_slow );
9317 %}
9318 
9319 #ifdef _LP64
9320 instruct mask_all_evexI_imm(kReg dst, immI cnt, rRegL tmp) %{
9321   match(Set dst (MaskAll cnt));
9322   effect(TEMP_DEF dst, TEMP tmp);
9323   format %{ "mask_all_evexI $dst, $cnt \t! using $tmp as TEMP" %}
9324   ins_encode %{
9325     int vec_len = Matcher::vector_length(this);
9326     if (VM_Version::supports_avx512bw()) {
9327       __ movq($tmp$$Register, $cnt$$constant);
9328       __ kmovql($dst$$KRegister, $tmp$$Register);
9329       __ kshiftrql($dst$$KRegister, $dst$$KRegister, 64 - vec_len);
9330     } else {
9331       assert(vec_len <= 16, "");
9332       __ movq($tmp$$Register, $cnt$$constant);
9333       __ kmovwl($dst$$KRegister, $tmp$$Register);
9334       __ kshiftrwl($dst$$KRegister, $dst$$KRegister, 16 - vec_len);
9335     }
9336   %}
9337   ins_pipe( pipe_slow );
9338 %}
9339 
9340 instruct mask_all_evexI(kReg dst, rRegI src, rRegL tmp) %{
9341   match(Set dst (MaskAll src));
9342   effect(TEMP_DEF dst, TEMP tmp);
9343   format %{ "mask_all_evexI $dst, $src \t! using $tmp as TEMP" %}
9344   ins_encode %{
9345     int vec_len = Matcher::vector_length(this);
9346     if (VM_Version::supports_avx512bw()) {
9347       __ movslq($tmp$$Register, $src$$Register);
9348       __ kmovql($dst$$KRegister, $tmp$$Register);
9349       __ kshiftrql($dst$$KRegister, $dst$$KRegister, 64 - vec_len);
9350     } else {
9351       assert(vec_len <= 16, "");
9352       __ kmovwl($dst$$KRegister, $src$$Register);
9353       __ kshiftrwl($dst$$KRegister, $dst$$KRegister, 16 - vec_len);
9354     }
9355   %}
9356   ins_pipe( pipe_slow );
9357 %}
9358 
9359 instruct mask_all_evexL(kReg dst, rRegL src) %{
9360   match(Set dst (MaskAll src));
9361   effect(TEMP_DEF dst);
9362   format %{ "mask_all_evexL $dst, $src \t! mask all operation" %}
9363   ins_encode %{
9364     int vec_len = Matcher::vector_length(this);
9365     if (VM_Version::supports_avx512bw()) {
9366       __ kmovql($dst$$KRegister, $src$$Register);
9367       __ kshiftrql($dst$$KRegister, $dst$$KRegister, 64 - vec_len);
9368     } else {
9369       assert(vec_len <= 16, "");
9370       __ kmovwl($dst$$KRegister, $src$$Register);
9371       __ kshiftrwl($dst$$KRegister, $dst$$KRegister, 16 - vec_len);
9372     }
9373   %}
9374   ins_pipe( pipe_slow );
9375 %}
9376 
9377 instruct mask_not_immLT8(kReg dst, kReg src, rRegI rtmp, kReg ktmp, immI_M1 cnt) %{
9378   predicate(Matcher::vector_length(n) < 8 && VM_Version::supports_avx512dq());
9379   match(Set dst (XorVMask src (MaskAll cnt)));
9380   effect(TEMP_DEF dst, TEMP rtmp, TEMP ktmp);
9381   format %{ "mask_not_LT8 $dst, $src, $cnt \t!using $ktmp and $rtmp as TEMP" %}
9382   ins_encode %{
9383     uint masklen = Matcher::vector_length(this);
9384     __ knot(masklen, $dst$$KRegister, $src$$KRegister, $ktmp$$KRegister, $rtmp$$Register);
9385   %}
9386   ins_pipe( pipe_slow );
9387 %}
9388 
9389 instruct mask_not_imm(kReg dst, kReg src, immI_M1 cnt) %{
9390   predicate((Matcher::vector_length(n) == 8 && VM_Version::supports_avx512dq()) ||
9391             (Matcher::vector_length(n) == 16) ||
9392             (Matcher::vector_length(n) > 16 && VM_Version::supports_avx512bw()));
9393   match(Set dst (XorVMask src (MaskAll cnt)));
9394   format %{ "mask_not $dst, $src, $cnt \t! mask not operation" %}
9395   ins_encode %{
9396     uint masklen = Matcher::vector_length(this);
9397     __ knot(masklen, $dst$$KRegister, $src$$KRegister);
9398   %}
9399   ins_pipe( pipe_slow );
9400 %}
9401 #endif
9402 
9403 instruct mask_opers_evex(kReg dst, kReg src1, kReg src2, kReg kscratch) %{
9404   match(Set dst (AndVMask src1 src2));
9405   match(Set dst (OrVMask src1 src2));
9406   match(Set dst (XorVMask src1 src2));
9407   effect(TEMP kscratch);
9408   format %{ "mask_opers_evex $dst, $src1, $src2\t! using $kscratch as TEMP" %}
9409   ins_encode %{
9410     const MachNode* mask1 = static_cast<const MachNode*>(this->in(this->operand_index($src1)));
9411     const MachNode* mask2 = static_cast<const MachNode*>(this->in(this->operand_index($src2)));
9412     assert(0 == Type::cmp(mask1->bottom_type(), mask2->bottom_type()), "");
9413     uint masklen = Matcher::vector_length(this);
9414     masklen = (masklen < 16 && !VM_Version::supports_avx512dq()) ? 16 : masklen;
9415     __ masked_op(this->ideal_Opcode(), masklen, $dst$$KRegister, $src1$$KRegister, $src2$$KRegister);
9416   %}
9417   ins_pipe( pipe_slow );
9418 %}
9419 
9420 instruct castMM(kReg dst)
9421 %{
9422   match(Set dst (CastVV dst));
9423 
9424   size(0);
9425   format %{ "# castVV of $dst" %}
9426   ins_encode(/* empty encoding */);
9427   ins_cost(0);
9428   ins_pipe(empty);
9429 %}
9430 
9431 instruct castVV(vec dst)
9432 %{
9433   match(Set dst (CastVV dst));
9434 
9435   size(0);
9436   format %{ "# castVV of $dst" %}
9437   ins_encode(/* empty encoding */);
9438   ins_cost(0);
9439   ins_pipe(empty);
9440 %}
9441 
9442 instruct castVVLeg(legVec dst)
9443 %{
9444   match(Set dst (CastVV dst));
9445 
9446   size(0);
9447   format %{ "# castVV of $dst" %}
9448   ins_encode(/* empty encoding */);
9449   ins_cost(0);
9450   ins_pipe(empty);
< prev index next >