< prev index next >

src/hotspot/cpu/x86/c2_MacroAssembler_x86.cpp

Print this page

1444   } else {
1445     assert(!is_legacy || !is_subword_type(elem_bt) || vlen_in_bytes < 64, "");
1446     int vlen_enc = vector_length_encoding(vlen_in_bytes);
1447 
1448     vpxor (dst, dst, dst, vlen_enc);
1449     vpsubb(dst, dst, src, is_legacy ? AVX_256bit : vlen_enc);
1450 
1451     switch (elem_bt) {
1452       case T_BYTE:   /* nothing to do */            break;
1453       case T_SHORT:  vpmovsxbw(dst, dst, vlen_enc); break;
1454       case T_INT:    vpmovsxbd(dst, dst, vlen_enc); break;
1455       case T_FLOAT:  vpmovsxbd(dst, dst, vlen_enc); break;
1456       case T_LONG:   vpmovsxbq(dst, dst, vlen_enc); break;
1457       case T_DOUBLE: vpmovsxbq(dst, dst, vlen_enc); break;
1458 
1459       default: assert(false, "%s", type2name(elem_bt));
1460     }
1461   }
1462 }
1463 













1464 void C2_MacroAssembler::load_iota_indices(XMMRegister dst, Register scratch, int vlen_in_bytes) {
1465   ExternalAddress addr(StubRoutines::x86::vector_iota_indices());
1466   if (vlen_in_bytes == 4) {
1467     movdl(dst, addr);
1468   } else if (vlen_in_bytes == 8) {
1469     movq(dst, addr);
1470   } else if (vlen_in_bytes == 16) {
1471     movdqu(dst, addr, scratch);
1472   } else if (vlen_in_bytes == 32) {
1473     vmovdqu(dst, addr, scratch);
1474   } else {
1475     assert(vlen_in_bytes == 64, "%d", vlen_in_bytes);
1476     evmovdqub(dst, k0, addr, false /*merge*/, Assembler::AVX_512bit, scratch);
1477   }
1478 }
1479 
1480 // Reductions for vectors of bytes, shorts, ints, longs, floats, and doubles.
1481 
1482 void C2_MacroAssembler::reduce_operation_128(BasicType typ, int opcode, XMMRegister dst, XMMRegister src) {
1483   int vector_len = Assembler::AVX_128bit;

3810     load_unsigned_byte(limit, Address(ary2, 0));
3811     cmpl(chr, limit);
3812     jccb(Assembler::notEqual, FALSE_LABEL);
3813   }
3814   bind(TRUE_LABEL);
3815   movl(result, 1);   // return true
3816   jmpb(DONE);
3817 
3818   bind(FALSE_LABEL);
3819   xorl(result, result); // return false
3820 
3821   // That's it
3822   bind(DONE);
3823   if (UseAVX >= 2) {
3824     // clean upper bits of YMM registers
3825     vpxor(vec1, vec1);
3826     vpxor(vec2, vec2);
3827   }
3828 }
3829 




















































































































































































































3830 #ifdef _LP64
3831 void C2_MacroAssembler::vector_mask_operation(int opc, Register dst, XMMRegister mask, XMMRegister xtmp,
3832                                               Register tmp, KRegister ktmp, int masklen, int vec_enc) {
3833   assert(VM_Version::supports_avx512vlbw(), "");
3834   vpxor(xtmp, xtmp, xtmp, vec_enc);
3835   vpsubb(xtmp, xtmp, mask, vec_enc);
3836   evpmovb2m(ktmp, xtmp, vec_enc);
3837   kmovql(tmp, ktmp);





3838   switch(opc) {
3839     case Op_VectorMaskTrueCount:
3840       popcntq(dst, tmp);
3841       break;
3842     case Op_VectorMaskLastTrue:
3843       mov64(dst, -1);
3844       bsrq(tmp, tmp);
3845       cmov(Assembler::notZero, dst, tmp);
3846       break;
3847     case Op_VectorMaskFirstTrue:
3848       mov64(dst, masklen);
3849       bsfq(tmp, tmp);
3850       cmov(Assembler::notZero, dst, tmp);
3851       break;
3852     default: assert(false, "Unhandled mask operation");
3853   }
3854 }
3855 
3856 void C2_MacroAssembler::vector_mask_operation(int opc, Register dst, XMMRegister mask, XMMRegister xtmp,
3857                                               XMMRegister xtmp1, Register tmp, int masklen, int vec_enc) {

3858   assert(VM_Version::supports_avx(), "");
3859   vpxor(xtmp, xtmp, xtmp, vec_enc);
3860   vpsubb(xtmp, xtmp, mask, vec_enc);
3861   vpmovmskb(tmp, xtmp, vec_enc);
3862   if (masklen < 64) {
3863     andq(tmp, (((jlong)1 << masklen) - 1));
3864   }
3865   switch(opc) {
3866     case Op_VectorMaskTrueCount:
3867       popcntq(dst, tmp);
3868       break;
3869     case Op_VectorMaskLastTrue:
3870       mov64(dst, -1);
3871       bsrq(tmp, tmp);
3872       cmov(Assembler::notZero, dst, tmp);
3873       break;
3874     case Op_VectorMaskFirstTrue:
3875       mov64(dst, masklen);
3876       bsfq(tmp, tmp);
3877       cmov(Assembler::notZero, dst, tmp);
3878       break;
3879     default: assert(false, "Unhandled mask operation");
3880   }
3881 }
3882 #endif

1444   } else {
1445     assert(!is_legacy || !is_subword_type(elem_bt) || vlen_in_bytes < 64, "");
1446     int vlen_enc = vector_length_encoding(vlen_in_bytes);
1447 
1448     vpxor (dst, dst, dst, vlen_enc);
1449     vpsubb(dst, dst, src, is_legacy ? AVX_256bit : vlen_enc);
1450 
1451     switch (elem_bt) {
1452       case T_BYTE:   /* nothing to do */            break;
1453       case T_SHORT:  vpmovsxbw(dst, dst, vlen_enc); break;
1454       case T_INT:    vpmovsxbd(dst, dst, vlen_enc); break;
1455       case T_FLOAT:  vpmovsxbd(dst, dst, vlen_enc); break;
1456       case T_LONG:   vpmovsxbq(dst, dst, vlen_enc); break;
1457       case T_DOUBLE: vpmovsxbq(dst, dst, vlen_enc); break;
1458 
1459       default: assert(false, "%s", type2name(elem_bt));
1460     }
1461   }
1462 }
1463 
1464 void C2_MacroAssembler::load_vector_mask(KRegister dst, XMMRegister src, XMMRegister xtmp,
1465                                          Register tmp, bool novlbwdq, int vlen_enc) {
1466   if (novlbwdq) {
1467     vpmovsxbd(xtmp, src, vlen_enc);
1468     evpcmpd(dst, k0, xtmp, ExternalAddress(StubRoutines::x86::vector_int_mask_cmp_bits()),
1469             Assembler::eq, true, vlen_enc, tmp);
1470   } else {
1471     vpxor(xtmp, xtmp, xtmp, vlen_enc);
1472     vpsubb(xtmp, xtmp, src, vlen_enc);
1473     evpmovb2m(dst, xtmp, vlen_enc);
1474   }
1475 }
1476 
1477 void C2_MacroAssembler::load_iota_indices(XMMRegister dst, Register scratch, int vlen_in_bytes) {
1478   ExternalAddress addr(StubRoutines::x86::vector_iota_indices());
1479   if (vlen_in_bytes == 4) {
1480     movdl(dst, addr);
1481   } else if (vlen_in_bytes == 8) {
1482     movq(dst, addr);
1483   } else if (vlen_in_bytes == 16) {
1484     movdqu(dst, addr, scratch);
1485   } else if (vlen_in_bytes == 32) {
1486     vmovdqu(dst, addr, scratch);
1487   } else {
1488     assert(vlen_in_bytes == 64, "%d", vlen_in_bytes);
1489     evmovdqub(dst, k0, addr, false /*merge*/, Assembler::AVX_512bit, scratch);
1490   }
1491 }
1492 
1493 // Reductions for vectors of bytes, shorts, ints, longs, floats, and doubles.
1494 
1495 void C2_MacroAssembler::reduce_operation_128(BasicType typ, int opcode, XMMRegister dst, XMMRegister src) {
1496   int vector_len = Assembler::AVX_128bit;

3823     load_unsigned_byte(limit, Address(ary2, 0));
3824     cmpl(chr, limit);
3825     jccb(Assembler::notEqual, FALSE_LABEL);
3826   }
3827   bind(TRUE_LABEL);
3828   movl(result, 1);   // return true
3829   jmpb(DONE);
3830 
3831   bind(FALSE_LABEL);
3832   xorl(result, result); // return false
3833 
3834   // That's it
3835   bind(DONE);
3836   if (UseAVX >= 2) {
3837     // clean upper bits of YMM registers
3838     vpxor(vec1, vec1);
3839     vpxor(vec2, vec2);
3840   }
3841 }
3842 
3843 void C2_MacroAssembler::evmasked_op(int ideal_opc, BasicType eType, KRegister mask, XMMRegister dst,
3844                                     XMMRegister src1, int imm8, bool merge, int vlen_enc) {
3845   switch(ideal_opc) {
3846     case Op_LShiftVS:
3847       Assembler::evpsllw(dst, mask, src1, imm8, merge, vlen_enc); break;
3848     case Op_LShiftVI:
3849       Assembler::evpslld(dst, mask, src1, imm8, merge, vlen_enc); break;
3850     case Op_LShiftVL:
3851       Assembler::evpsllq(dst, mask, src1, imm8, merge, vlen_enc); break;
3852     case Op_RShiftVS:
3853       Assembler::evpsraw(dst, mask, src1, imm8, merge, vlen_enc); break;
3854     case Op_RShiftVI:
3855       Assembler::evpsrad(dst, mask, src1, imm8, merge, vlen_enc); break;
3856     case Op_RShiftVL:
3857       Assembler::evpsraq(dst, mask, src1, imm8, merge, vlen_enc); break;
3858     case Op_URShiftVS:
3859       Assembler::evpsrlw(dst, mask, src1, imm8, merge, vlen_enc); break;
3860     case Op_URShiftVI:
3861       Assembler::evpsrld(dst, mask, src1, imm8, merge, vlen_enc); break;
3862     case Op_URShiftVL:
3863       Assembler::evpsrlq(dst, mask, src1, imm8, merge, vlen_enc); break;
3864     case Op_RotateRightV:
3865       evrord(eType, dst, mask, src1, imm8, merge, vlen_enc); break;
3866     case Op_RotateLeftV:
3867       evrold(eType, dst, mask, src1, imm8, merge, vlen_enc); break;
3868     default:
3869       fatal("Unsupported masked operation"); break;
3870   }
3871 }
3872 
3873 void C2_MacroAssembler::evmasked_op(int ideal_opc, BasicType eType, KRegister mask, XMMRegister dst,
3874                                     XMMRegister src1, XMMRegister src2, bool merge, int vlen_enc,
3875                                     bool is_varshift) {
3876   switch (ideal_opc) {
3877     case Op_AddVB:
3878       evpaddb(dst, mask, src1, src2, merge, vlen_enc); break;
3879     case Op_AddVS:
3880       evpaddw(dst, mask, src1, src2, merge, vlen_enc); break;
3881     case Op_AddVI:
3882       evpaddd(dst, mask, src1, src2, merge, vlen_enc); break;
3883     case Op_AddVL:
3884       evpaddq(dst, mask, src1, src2, merge, vlen_enc); break;
3885     case Op_AddVF:
3886       evaddps(dst, mask, src1, src2, merge, vlen_enc); break;
3887     case Op_AddVD:
3888       evaddpd(dst, mask, src1, src2, merge, vlen_enc); break;
3889     case Op_SubVB:
3890       evpsubb(dst, mask, src1, src2, merge, vlen_enc); break;
3891     case Op_SubVS:
3892       evpsubw(dst, mask, src1, src2, merge, vlen_enc); break;
3893     case Op_SubVI:
3894       evpsubd(dst, mask, src1, src2, merge, vlen_enc); break;
3895     case Op_SubVL:
3896       evpsubq(dst, mask, src1, src2, merge, vlen_enc); break;
3897     case Op_SubVF:
3898       evsubps(dst, mask, src1, src2, merge, vlen_enc); break;
3899     case Op_SubVD:
3900       evsubpd(dst, mask, src1, src2, merge, vlen_enc); break;
3901     case Op_MulVS:
3902       evpmullw(dst, mask, src1, src2, merge, vlen_enc); break;
3903     case Op_MulVI:
3904       evpmulld(dst, mask, src1, src2, merge, vlen_enc); break;
3905     case Op_MulVL:
3906       evpmullq(dst, mask, src1, src2, merge, vlen_enc); break;
3907     case Op_MulVF:
3908       evmulps(dst, mask, src1, src2, merge, vlen_enc); break;
3909     case Op_MulVD:
3910       evmulpd(dst, mask, src1, src2, merge, vlen_enc); break;
3911     case Op_DivVF:
3912       evdivps(dst, mask, src1, src2, merge, vlen_enc); break;
3913     case Op_DivVD:
3914       evdivpd(dst, mask, src1, src2, merge, vlen_enc); break;
3915     case Op_SqrtVF:
3916       evsqrtps(dst, mask, src1, src2, merge, vlen_enc); break;
3917     case Op_SqrtVD:
3918       evsqrtpd(dst, mask, src1, src2, merge, vlen_enc); break;
3919     case Op_AbsVB:
3920       evpabsb(dst, mask, src2, merge, vlen_enc); break;
3921     case Op_AbsVS:
3922       evpabsw(dst, mask, src2, merge, vlen_enc); break;
3923     case Op_AbsVI:
3924       evpabsd(dst, mask, src2, merge, vlen_enc); break;
3925     case Op_AbsVL:
3926       evpabsq(dst, mask, src2, merge, vlen_enc); break;
3927     case Op_FmaVF:
3928       evpfma213ps(dst, mask, src1, src2, merge, vlen_enc); break;
3929     case Op_FmaVD:
3930       evpfma213pd(dst, mask, src1, src2, merge, vlen_enc); break;
3931     case Op_VectorRearrange:
3932       evperm(eType, dst, mask, src2, src1, merge, vlen_enc); break;
3933     case Op_LShiftVS:
3934       evpsllw(dst, mask, src1, src2, merge, vlen_enc, is_varshift); break;
3935     case Op_LShiftVI:
3936       evpslld(dst, mask, src1, src2, merge, vlen_enc, is_varshift); break;
3937     case Op_LShiftVL:
3938       evpsllq(dst, mask, src1, src2, merge, vlen_enc, is_varshift); break;
3939     case Op_RShiftVS:
3940       evpsraw(dst, mask, src1, src2, merge, vlen_enc, is_varshift); break;
3941     case Op_RShiftVI:
3942       evpsrad(dst, mask, src1, src2, merge, vlen_enc, is_varshift); break;
3943     case Op_RShiftVL:
3944       evpsraq(dst, mask, src1, src2, merge, vlen_enc, is_varshift); break;
3945     case Op_URShiftVS:
3946       evpsrlw(dst, mask, src1, src2, merge, vlen_enc, is_varshift); break;
3947     case Op_URShiftVI:
3948       evpsrld(dst, mask, src1, src2, merge, vlen_enc, is_varshift); break;
3949     case Op_URShiftVL:
3950       evpsrlq(dst, mask, src1, src2, merge, vlen_enc, is_varshift); break;
3951     case Op_RotateLeftV:
3952       evrold(eType, dst, mask, src1, src2, merge, vlen_enc); break;
3953     case Op_RotateRightV:
3954       evrord(eType, dst, mask, src1, src2, merge, vlen_enc); break;
3955     case Op_MaxV:
3956       evpmaxs(eType, dst, mask, src1, src2, merge, vlen_enc); break;
3957     case Op_MinV:
3958       evpmins(eType, dst, mask, src1, src2, merge, vlen_enc); break;
3959     case Op_XorV:
3960       evxor(eType, dst, mask, src1, src2, merge, vlen_enc); break;
3961     case Op_OrV:
3962       evor(eType, dst, mask, src1, src2, merge, vlen_enc); break;
3963     case Op_AndV:
3964       evand(eType, dst, mask, src1, src2, merge, vlen_enc); break;
3965     default:
3966       fatal("Unsupported masked operation"); break;
3967   }
3968 }
3969 
3970 void C2_MacroAssembler::evmasked_op(int ideal_opc, BasicType eType, KRegister mask, XMMRegister dst,
3971                                     XMMRegister src1, Address src2, bool merge, int vlen_enc) {
3972   switch (ideal_opc) {
3973     case Op_AddVB:
3974       evpaddb(dst, mask, src1, src2, merge, vlen_enc); break;
3975     case Op_AddVS:
3976       evpaddw(dst, mask, src1, src2, merge, vlen_enc); break;
3977     case Op_AddVI:
3978       evpaddd(dst, mask, src1, src2, merge, vlen_enc); break;
3979     case Op_AddVL:
3980       evpaddq(dst, mask, src1, src2, merge, vlen_enc); break;
3981     case Op_AddVF:
3982       evaddps(dst, mask, src1, src2, merge, vlen_enc); break;
3983     case Op_AddVD:
3984       evaddpd(dst, mask, src1, src2, merge, vlen_enc); break;
3985     case Op_SubVB:
3986       evpsubb(dst, mask, src1, src2, merge, vlen_enc); break;
3987     case Op_SubVS:
3988       evpsubw(dst, mask, src1, src2, merge, vlen_enc); break;
3989     case Op_SubVI:
3990       evpsubd(dst, mask, src1, src2, merge, vlen_enc); break;
3991     case Op_SubVL:
3992       evpsubq(dst, mask, src1, src2, merge, vlen_enc); break;
3993     case Op_SubVF:
3994       evsubps(dst, mask, src1, src2, merge, vlen_enc); break;
3995     case Op_SubVD:
3996       evsubpd(dst, mask, src1, src2, merge, vlen_enc); break;
3997     case Op_MulVS:
3998       evpmullw(dst, mask, src1, src2, merge, vlen_enc); break;
3999     case Op_MulVI:
4000       evpmulld(dst, mask, src1, src2, merge, vlen_enc); break;
4001     case Op_MulVL:
4002       evpmullq(dst, mask, src1, src2, merge, vlen_enc); break;
4003     case Op_MulVF:
4004       evmulps(dst, mask, src1, src2, merge, vlen_enc); break;
4005     case Op_MulVD:
4006       evmulpd(dst, mask, src1, src2, merge, vlen_enc); break;
4007     case Op_DivVF:
4008       evdivps(dst, mask, src1, src2, merge, vlen_enc); break;
4009     case Op_DivVD:
4010       evdivpd(dst, mask, src1, src2, merge, vlen_enc); break;
4011     case Op_FmaVF:
4012       evpfma213ps(dst, mask, src1, src2, merge, vlen_enc); break;
4013     case Op_FmaVD:
4014       evpfma213pd(dst, mask, src1, src2, merge, vlen_enc); break;
4015     case Op_MaxV:
4016       evpmaxs(eType, dst, mask, src1, src2, merge, vlen_enc); break;
4017     case Op_MinV:
4018       evpmins(eType, dst, mask, src1, src2, merge, vlen_enc); break;
4019     case Op_XorV:
4020       evxor(eType, dst, mask, src1, src2, merge, vlen_enc); break;
4021     case Op_OrV:
4022       evor(eType, dst, mask, src1, src2, merge, vlen_enc); break;
4023     case Op_AndV:
4024       evand(eType, dst, mask, src1, src2, merge, vlen_enc); break;
4025     default:
4026       fatal("Unsupported masked operation"); break;
4027   }
4028 }
4029 
4030 void C2_MacroAssembler::masked_op(int ideal_opc, int mask_len, KRegister dst,
4031                                   KRegister src1, KRegister src2) {
4032   BasicType etype = T_ILLEGAL;
4033   switch(mask_len) {
4034     case 2:
4035     case 4:
4036     case 8:  etype = T_BYTE; break;
4037     case 16: etype = T_SHORT; break;
4038     case 32: etype = T_INT; break;
4039     case 64: etype = T_LONG; break;
4040     default: fatal("Unsupported type"); break;
4041   }
4042   assert(etype != T_ILLEGAL, "");
4043   switch(ideal_opc) {
4044     case Op_AndVMask:
4045       kand(etype, dst, src1, src2); break;
4046     case Op_OrVMask:
4047       kor(etype, dst, src1, src2); break;
4048     case Op_XorVMask:
4049       kxor(etype, dst, src1, src2); break;
4050     default:
4051       fatal("Unsupported masked operation"); break;
4052   }
4053 }
4054 
4055 #ifdef _LP64
4056 void C2_MacroAssembler::vector_mask_operation(int opc, Register dst, KRegister mask,
4057                                               Register tmp, int masklen, int masksize,
4058                                               int vec_enc) {
4059   if(VM_Version::supports_avx512bw()) {
4060     kmovql(tmp, mask);
4061   } else {
4062     assert(masklen <= 16, "");
4063     kmovwl(tmp, mask);
4064   }
4065   if (masksize < 16) {
4066     andq(tmp, (((jlong)1 << masklen) - 1));
4067   }
4068   switch(opc) {
4069     case Op_VectorMaskTrueCount:
4070       popcntq(dst, tmp);
4071       break;
4072     case Op_VectorMaskLastTrue:
4073       mov64(dst, -1);
4074       bsrq(tmp, tmp);
4075       cmov(Assembler::notZero, dst, tmp);
4076       break;
4077     case Op_VectorMaskFirstTrue:
4078       mov64(dst, masklen);
4079       bsfq(tmp, tmp);
4080       cmov(Assembler::notZero, dst, tmp);
4081       break;
4082     default: assert(false, "Unhandled mask operation");
4083   }
4084 }
4085 
4086 void C2_MacroAssembler::vector_mask_operation(int opc, Register dst, XMMRegister mask, XMMRegister xtmp,
4087                                               XMMRegister xtmp1, Register tmp, int masklen, int masksize,
4088                                               int vec_enc) {
4089   assert(VM_Version::supports_avx(), "");
4090   vpxor(xtmp, xtmp, xtmp, vec_enc);
4091   vpsubb(xtmp, xtmp, mask, vec_enc);
4092   vpmovmskb(tmp, xtmp, vec_enc);
4093   if (masksize < 16) {
4094     andq(tmp, (((jlong)1 << masklen) - 1));
4095   }
4096   switch(opc) {
4097     case Op_VectorMaskTrueCount:
4098       popcntq(dst, tmp);
4099       break;
4100     case Op_VectorMaskLastTrue:
4101       mov64(dst, -1);
4102       bsrq(tmp, tmp);
4103       cmov(Assembler::notZero, dst, tmp);
4104       break;
4105     case Op_VectorMaskFirstTrue:
4106       mov64(dst, masklen);
4107       bsfq(tmp, tmp);
4108       cmov(Assembler::notZero, dst, tmp);
4109       break;
4110     default: assert(false, "Unhandled mask operation");
4111   }
4112 }
4113 #endif
< prev index next >