1295 int MachNode::compute_padding(int current_offset) const {
1296 if (flags() & Node::PD::Flag_intel_jcc_erratum) {
1297 Compile* C = Compile::current();
1298 PhaseOutput* output = C->output();
1299 Block* block = output->block();
1300 int index = output->index();
1301 return IntelJccErratum::compute_padding(current_offset, this, block, index, C->regalloc());
1302 } else {
1303 return 0;
1304 }
1305 }
1306
1307 // Emit exception handler code.
1308 // Stuff framesize into a register and call a VM stub routine.
1309 int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf) {
1310
1311 // Note that the code buffer's insts_mark is always relative to insts.
1312 // That's why we must use the macroassembler to generate a handler.
1313 C2_MacroAssembler _masm(&cbuf);
1314 address base = __ start_a_stub(size_exception_handler());
1315 if (base == NULL) {
1316 ciEnv::current()->record_failure("CodeCache is full");
1317 return 0; // CodeBuffer::expand failed
1318 }
1319 int offset = __ offset();
1320 __ jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point()));
1321 assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
1322 __ end_a_stub();
1323 return offset;
1324 }
1325
1326 // Emit deopt handler code.
1327 int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) {
1328
1329 // Note that the code buffer's insts_mark is always relative to insts.
1330 // That's why we must use the macroassembler to generate a handler.
1331 C2_MacroAssembler _masm(&cbuf);
1332 address base = __ start_a_stub(size_deopt_handler());
1333 if (base == NULL) {
1334 ciEnv::current()->record_failure("CodeCache is full");
1335 return 0; // CodeBuffer::expand failed
1336 }
1337 int offset = __ offset();
1338
1339 #ifdef _LP64
1340 address the_pc = (address) __ pc();
1341 Label next;
1342 // push a "the_pc" on the stack without destroying any registers
1343 // as they all may be live.
1344
1345 // push address of "next"
1346 __ call(next, relocInfo::none); // reloc none is fine since it is a disp32
1347 __ bind(next);
1348 // adjust it so it matches "the_pc"
1349 __ subptr(Address(rsp, 0), __ offset() - offset);
1350 #else
1351 InternalAddress here(__ pc());
1352 __ pushptr(here.addr(), noreg);
1353 #endif
2169 return new legVecZOper();
2170 }
2171 if (legacy) {
2172 switch (ideal_reg) {
2173 case Op_VecS: return new legVecSOper();
2174 case Op_VecD: return new legVecDOper();
2175 case Op_VecX: return new legVecXOper();
2176 case Op_VecY: return new legVecYOper();
2177 case Op_VecZ: return new legVecZOper();
2178 }
2179 } else {
2180 switch (ideal_reg) {
2181 case Op_VecS: return new vecSOper();
2182 case Op_VecD: return new vecDOper();
2183 case Op_VecX: return new vecXOper();
2184 case Op_VecY: return new vecYOper();
2185 case Op_VecZ: return new vecZOper();
2186 }
2187 }
2188 ShouldNotReachHere();
2189 return NULL;
2190 }
2191
2192 bool Matcher::is_reg2reg_move(MachNode* m) {
2193 switch (m->rule()) {
2194 case MoveVec2Leg_rule:
2195 case MoveLeg2Vec_rule:
2196 case MoveF2VL_rule:
2197 case MoveF2LEG_rule:
2198 case MoveVL2F_rule:
2199 case MoveLEG2F_rule:
2200 case MoveD2VL_rule:
2201 case MoveD2LEG_rule:
2202 case MoveVL2D_rule:
2203 case MoveLEG2D_rule:
2204 return true;
2205 default:
2206 return false;
2207 }
2208 }
2209
2338 }
2339 return false;
2340 }
2341
2342 // This function identifies sub-graphs in which a 'load' node is
2343 // input to two different nodes, and such that it can be matched
2344 // with BMI instructions like blsi, blsr, etc.
2345 // Example : for b = -a[i] & a[i] can be matched to blsi r32, m32.
2346 // The graph is (AndL (SubL Con0 LoadL*) LoadL*), where LoadL*
2347 // refers to the same node.
2348 //
2349 // Match the generic fused operations pattern (op1 (op2 Con{ConType} mop) mop)
2350 // This is a temporary solution until we make DAGs expressible in ADL.
2351 template<typename ConType>
2352 class FusedPatternMatcher {
2353 Node* _op1_node;
2354 Node* _mop_node;
2355 int _con_op;
2356
2357 static int match_next(Node* n, int next_op, int next_op_idx) {
2358 if (n->in(1) == NULL || n->in(2) == NULL) {
2359 return -1;
2360 }
2361
2362 if (next_op_idx == -1) { // n is commutative, try rotations
2363 if (n->in(1)->Opcode() == next_op) {
2364 return 1;
2365 } else if (n->in(2)->Opcode() == next_op) {
2366 return 2;
2367 }
2368 } else {
2369 assert(next_op_idx > 0 && next_op_idx <= 2, "Bad argument index");
2370 if (n->in(next_op_idx)->Opcode() == next_op) {
2371 return next_op_idx;
2372 }
2373 }
2374 return -1;
2375 }
2376
2377 public:
2378 FusedPatternMatcher(Node* op1_node, Node* mop_node, int con_op) :
2405 if (op2_con_idx == -1) {
2406 return false;
2407 }
2408 // Memory operation must be the other edge
2409 int op2_mop_idx = (op2_con_idx & 1) + 1;
2410 // Check that the memory operation is the same node
2411 if (op2_node->in(op2_mop_idx) == _mop_node) {
2412 // Now check the constant
2413 const Type* con_type = op2_node->in(op2_con_idx)->bottom_type();
2414 if (con_type != Type::TOP && ConType::as_self(con_type)->get_con() == con_value) {
2415 return true;
2416 }
2417 }
2418 }
2419 return false;
2420 }
2421 };
2422
2423 static bool is_bmi_pattern(Node* n, Node* m) {
2424 assert(UseBMI1Instructions, "sanity");
2425 if (n != NULL && m != NULL) {
2426 if (m->Opcode() == Op_LoadI) {
2427 FusedPatternMatcher<TypeInt> bmii(n, m, Op_ConI);
2428 return bmii.match(Op_AndI, -1, Op_SubI, 1, 0) ||
2429 bmii.match(Op_AndI, -1, Op_AddI, -1, -1) ||
2430 bmii.match(Op_XorI, -1, Op_AddI, -1, -1);
2431 } else if (m->Opcode() == Op_LoadL) {
2432 FusedPatternMatcher<TypeLong> bmil(n, m, Op_ConL);
2433 return bmil.match(Op_AndL, -1, Op_SubL, 1, 0) ||
2434 bmil.match(Op_AndL, -1, Op_AddL, -1, -1) ||
2435 bmil.match(Op_XorL, -1, Op_AddL, -1, -1);
2436 }
2437 }
2438 return false;
2439 }
2440
2441 // Should the matcher clone input 'm' of node 'n'?
2442 bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) {
2443 // If 'n' and 'm' are part of a graph for BMI instruction, clone the input 'm'.
2444 if (UseBMI1Instructions && is_bmi_pattern(n, m)) {
2445 mstack.push(m, Visit);
2771 uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const {
2772 return MachNode::size(ra_);
2773 }
2774
2775 %}
2776
2777 encode %{
2778
2779 enc_class call_epilog %{
2780 C2_MacroAssembler _masm(&cbuf);
2781 if (VerifyStackAtCalls) {
2782 // Check that stack depth is unchanged: find majik cookie on stack
2783 int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word));
2784 Label L;
2785 __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d);
2786 __ jccb(Assembler::equal, L);
2787 // Die if stack mismatch
2788 __ int3();
2789 __ bind(L);
2790 }
2791 %}
2792
2793 %}
2794
2795 // Operands for bound floating pointer register arguments
2796 operand rxmm0() %{
2797 constraint(ALLOC_IN_RC(xmm0_reg));
2798 match(VecX);
2799 format%{%}
2800 interface(REG_INTER);
2801 %}
2802
2803 //----------OPERANDS-----------------------------------------------------------
2804 // Operand definitions must precede instruction definitions for correct parsing
2805 // in the ADLC because operands constitute user defined types which are used in
2806 // instruction definitions.
2807
2808 // Vectors
2809
2810 // Dummy generic vector class. Should be used for all vector operands.
7441 instruct vround_reg_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
7442 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
7443 match(Set dst (RoundVD src));
7444 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
7445 format %{ "vector_round_long $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %}
7446 ins_encode %{
7447 int vlen_enc = vector_length_encoding(this);
7448 InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
7449 __ vector_round_double_evex($dst$$XMMRegister, $src$$XMMRegister,
7450 ExternalAddress(StubRoutines::x86::vector_double_sign_flip()), new_mxcsr, vlen_enc,
7451 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
7452 %}
7453 ins_pipe( pipe_slow );
7454 %}
7455
7456 #endif // _LP64
7457
7458 // --------------------------------- VectorMaskCmp --------------------------------------
7459
7460 instruct vcmpFD(legVec dst, legVec src1, legVec src2, immI8 cond) %{
7461 predicate(n->bottom_type()->isa_vectmask() == NULL &&
7462 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 8 && // src1
7463 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
7464 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
7465 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
7466 format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %}
7467 ins_encode %{
7468 int vlen_enc = vector_length_encoding(this, $src1);
7469 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
7470 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
7471 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
7472 } else {
7473 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
7474 }
7475 %}
7476 ins_pipe( pipe_slow );
7477 %}
7478
7479 instruct evcmpFD64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{
7480 predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64 && // src1
7481 n->bottom_type()->isa_vectmask() == NULL &&
7482 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
7483 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
7484 effect(TEMP ktmp);
7485 format %{ "vector_compare $dst,$src1,$src2,$cond" %}
7486 ins_encode %{
7487 int vlen_enc = Assembler::AVX_512bit;
7488 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
7489 KRegister mask = k0; // The comparison itself is not being masked.
7490 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
7491 __ evcmpps($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
7492 __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg);
7493 } else {
7494 __ evcmppd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
7495 __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg);
7496 }
7497 %}
7498 ins_pipe( pipe_slow );
7499 %}
7500
7501 instruct evcmpFD(kReg dst, vec src1, vec src2, immI8 cond) %{
7502 predicate(n->bottom_type()->isa_vectmask() &&
7503 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
7504 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
7505 format %{ "vector_compare_evex $dst,$src1,$src2,$cond\t!" %}
7506 ins_encode %{
7507 assert(bottom_type()->isa_vectmask(), "TypeVectMask expected");
7508 int vlen_enc = vector_length_encoding(this, $src1);
7509 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
7510 KRegister mask = k0; // The comparison itself is not being masked.
7511 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
7512 __ evcmpps($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
7513 } else {
7514 __ evcmppd($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
7515 }
7516 %}
7517 ins_pipe( pipe_slow );
7518 %}
7519
7520 instruct vcmp_direct(legVec dst, legVec src1, legVec src2, immI8 cond) %{
7521 predicate(n->bottom_type()->isa_vectmask() == NULL &&
7522 !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
7523 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1
7524 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
7525 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) &&
7526 (n->in(2)->get_int() == BoolTest::eq ||
7527 n->in(2)->get_int() == BoolTest::lt ||
7528 n->in(2)->get_int() == BoolTest::gt)); // cond
7529 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
7530 format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %}
7531 ins_encode %{
7532 int vlen_enc = vector_length_encoding(this, $src1);
7533 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
7534 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
7535 __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, cmp, ww, vlen_enc);
7536 %}
7537 ins_pipe( pipe_slow );
7538 %}
7539
7540 instruct vcmp_negate(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{
7541 predicate(n->bottom_type()->isa_vectmask() == NULL &&
7542 !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
7543 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1
7544 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
7545 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) &&
7546 (n->in(2)->get_int() == BoolTest::ne ||
7547 n->in(2)->get_int() == BoolTest::le ||
7548 n->in(2)->get_int() == BoolTest::ge)); // cond
7549 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
7550 effect(TEMP dst, TEMP xtmp);
7551 format %{ "vector_compare $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %}
7552 ins_encode %{
7553 int vlen_enc = vector_length_encoding(this, $src1);
7554 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
7555 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
7556 __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc);
7557 %}
7558 ins_pipe( pipe_slow );
7559 %}
7560
7561 instruct vcmpu(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{
7562 predicate(n->bottom_type()->isa_vectmask() == NULL &&
7563 Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
7564 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1
7565 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
7566 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
7567 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
7568 effect(TEMP dst, TEMP xtmp);
7569 format %{ "vector_compareu $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %}
7570 ins_encode %{
7571 InternalAddress flip_bit = $constantaddress(high_bit_set(Matcher::vector_element_basic_type(this, $src1)));
7572 int vlen_enc = vector_length_encoding(this, $src1);
7573 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
7574 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
7575
7576 if (vlen_enc == Assembler::AVX_128bit) {
7577 __ vmovddup($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg);
7578 } else {
7579 __ vbroadcastsd($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg);
7580 }
7581 __ vpxor($dst$$XMMRegister, $xtmp$$XMMRegister, $src1$$XMMRegister, vlen_enc);
7582 __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $src2$$XMMRegister, vlen_enc);
7583 __ vpcmpCCW($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc);
7584 %}
7585 ins_pipe( pipe_slow );
7586 %}
7587
7588 instruct vcmp64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{
7589 predicate((n->bottom_type()->isa_vectmask() == NULL &&
7590 Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64) && // src1
7591 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
7592 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
7593 effect(TEMP ktmp);
7594 format %{ "vector_compare $dst,$src1,$src2,$cond" %}
7595 ins_encode %{
7596 assert(UseAVX > 2, "required");
7597
7598 int vlen_enc = vector_length_encoding(this, $src1);
7599 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
7600 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
7601 KRegister mask = k0; // The comparison itself is not being masked.
7602 bool merge = false;
7603 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
7604
7605 switch (src1_elem_bt) {
7606 case T_INT: {
7607 __ evpcmpd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
7608 __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg);
7609 break;
7785 // --------------------------------- Vector Blend --------------------------------------
7786
7787 instruct blendvp(vec dst, vec src, vec mask, rxmm0 tmp) %{
7788 predicate(UseAVX == 0);
7789 match(Set dst (VectorBlend (Binary dst src) mask));
7790 format %{ "vector_blend $dst,$src,$mask\t! using $tmp as TEMP" %}
7791 effect(TEMP tmp);
7792 ins_encode %{
7793 assert(UseSSE >= 4, "required");
7794
7795 if ($mask$$XMMRegister != $tmp$$XMMRegister) {
7796 __ movdqu($tmp$$XMMRegister, $mask$$XMMRegister);
7797 }
7798 __ pblendvb($dst$$XMMRegister, $src$$XMMRegister); // uses xmm0 as mask
7799 %}
7800 ins_pipe( pipe_slow );
7801 %}
7802
7803 instruct vblendvpI(legVec dst, legVec src1, legVec src2, legVec mask) %{
7804 predicate(UseAVX > 0 && !EnableX86ECoreOpts &&
7805 n->in(2)->bottom_type()->isa_vectmask() == NULL &&
7806 Matcher::vector_length_in_bytes(n) <= 32 &&
7807 is_integral_type(Matcher::vector_element_basic_type(n)));
7808 match(Set dst (VectorBlend (Binary src1 src2) mask));
7809 format %{ "vector_blend $dst,$src1,$src2,$mask\t!" %}
7810 ins_encode %{
7811 int vlen_enc = vector_length_encoding(this);
7812 __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc);
7813 %}
7814 ins_pipe( pipe_slow );
7815 %}
7816
7817 instruct vblendvpFD(legVec dst, legVec src1, legVec src2, legVec mask) %{
7818 predicate(UseAVX > 0 && !EnableX86ECoreOpts &&
7819 n->in(2)->bottom_type()->isa_vectmask() == NULL &&
7820 Matcher::vector_length_in_bytes(n) <= 32 &&
7821 !is_integral_type(Matcher::vector_element_basic_type(n)));
7822 match(Set dst (VectorBlend (Binary src1 src2) mask));
7823 format %{ "vector_blend $dst,$src1,$src2,$mask\t!" %}
7824 ins_encode %{
7825 int vlen_enc = vector_length_encoding(this);
7826 __ vblendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc);
7827 %}
7828 ins_pipe( pipe_slow );
7829 %}
7830
7831 instruct vblendvp(legVec dst, legVec src1, legVec src2, legVec mask, legVec vtmp) %{
7832 predicate(UseAVX > 0 && EnableX86ECoreOpts &&
7833 n->in(2)->bottom_type()->isa_vectmask() == NULL &&
7834 Matcher::vector_length_in_bytes(n) <= 32);
7835 match(Set dst (VectorBlend (Binary src1 src2) mask));
7836 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using $vtmp as TEMP" %}
7837 effect(TEMP vtmp, TEMP dst);
7838 ins_encode %{
7839 int vlen_enc = vector_length_encoding(this);
7840 __ vpandn($vtmp$$XMMRegister, $mask$$XMMRegister, $src1$$XMMRegister, vlen_enc);
7841 __ vpand ($dst$$XMMRegister, $mask$$XMMRegister, $src2$$XMMRegister, vlen_enc);
7842 __ vpor ($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
7843 %}
7844 ins_pipe( pipe_slow );
7845 %}
7846
7847 instruct evblendvp64(vec dst, vec src1, vec src2, vec mask, kReg ktmp) %{
7848 predicate(Matcher::vector_length_in_bytes(n) == 64 &&
7849 n->in(2)->bottom_type()->isa_vectmask() == NULL);
7850 match(Set dst (VectorBlend (Binary src1 src2) mask));
7851 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using k2 as TEMP" %}
7852 effect(TEMP ktmp);
7853 ins_encode %{
7854 int vlen_enc = Assembler::AVX_512bit;
7855 BasicType elem_bt = Matcher::vector_element_basic_type(this);
7856 __ evpcmp(elem_bt, $ktmp$$KRegister, k0, $mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), Assembler::eq, vlen_enc, noreg);
7857 __ evpblend(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
7858 %}
7859 ins_pipe( pipe_slow );
7860 %}
7861
7862
7863 instruct evblendvp64_masked(vec dst, vec src1, vec src2, kReg mask) %{
7864 predicate(n->in(2)->bottom_type()->isa_vectmask() &&
7865 (!is_subword_type(Matcher::vector_element_basic_type(n)) ||
7866 VM_Version::supports_avx512bw()));
7867 match(Set dst (VectorBlend (Binary src1 src2) mask));
7868 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using k2 as TEMP" %}
7869 ins_encode %{
8046 %}
8047 ins_pipe( pipe_slow );
8048 %}
8049
8050 instruct ktest_ge8(rFlagsRegU cr, kReg src1, kReg src2) %{
8051 predicate(Matcher::vector_length(n->in(1)) >= 16 ||
8052 (Matcher::vector_length(n->in(1)) == 8 && VM_Version::supports_avx512dq()));
8053 match(Set cr (VectorTest src1 src2));
8054 format %{ "ktest_ge8 $src1, $src2\n\t" %}
8055 ins_encode %{
8056 uint masklen = Matcher::vector_length(this, $src1);
8057 __ kortest(masklen, $src1$$KRegister, $src1$$KRegister);
8058 %}
8059 ins_pipe( pipe_slow );
8060 %}
8061 #endif
8062
8063 //------------------------------------- LoadMask --------------------------------------------
8064
8065 instruct loadMask(legVec dst, legVec src) %{
8066 predicate(n->bottom_type()->isa_vectmask() == NULL && !VM_Version::supports_avx512vlbw());
8067 match(Set dst (VectorLoadMask src));
8068 effect(TEMP dst);
8069 format %{ "vector_loadmask_byte $dst, $src\n\t" %}
8070 ins_encode %{
8071 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
8072 BasicType elem_bt = Matcher::vector_element_basic_type(this);
8073 __ load_vector_mask($dst$$XMMRegister, $src$$XMMRegister, vlen_in_bytes, elem_bt, true);
8074 %}
8075 ins_pipe( pipe_slow );
8076 %}
8077
8078 instruct loadMask64(kReg dst, vec src, vec xtmp) %{
8079 predicate(n->bottom_type()->isa_vectmask() && !VM_Version::supports_avx512vlbw());
8080 match(Set dst (VectorLoadMask src));
8081 effect(TEMP xtmp);
8082 format %{ "vector_loadmask_64byte $dst, $src\t! using $xtmp as TEMP" %}
8083 ins_encode %{
8084 __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister,
8085 true, Assembler::AVX_512bit);
8086 %}
8087 ins_pipe( pipe_slow );
8088 %}
8089
8090 instruct loadMask_evex(kReg dst, vec src, vec xtmp) %{
8091 predicate(n->bottom_type()->isa_vectmask() && VM_Version::supports_avx512vlbw());
8092 match(Set dst (VectorLoadMask src));
8093 effect(TEMP xtmp);
8094 format %{ "vector_loadmask_byte $dst, $src\t! using $xtmp as TEMP" %}
8095 ins_encode %{
8096 int vlen_enc = vector_length_encoding(in(1));
8097 __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister,
8098 false, vlen_enc);
8099 %}
8100 ins_pipe( pipe_slow );
8101 %}
8102
8103 //------------------------------------- StoreMask --------------------------------------------
8104
8105 instruct vstoreMask1B(vec dst, vec src, immI_1 size) %{
8106 predicate(Matcher::vector_length(n) < 64 && n->in(1)->bottom_type()->isa_vectmask() == NULL);
8107 match(Set dst (VectorStoreMask src size));
8108 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
8109 ins_encode %{
8110 int vlen = Matcher::vector_length(this);
8111 if (vlen <= 16 && UseAVX <= 2) {
8112 assert(UseSSE >= 3, "required");
8113 __ pabsb($dst$$XMMRegister, $src$$XMMRegister);
8114 } else {
8115 assert(UseAVX > 0, "required");
8116 int src_vlen_enc = vector_length_encoding(this, $src);
8117 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
8118 }
8119 %}
8120 ins_pipe( pipe_slow );
8121 %}
8122
8123 instruct vstoreMask2B(vec dst, vec src, vec xtmp, immI_2 size) %{
8124 predicate(Matcher::vector_length(n) <= 16 && n->in(1)->bottom_type()->isa_vectmask() == NULL);
8125 match(Set dst (VectorStoreMask src size));
8126 effect(TEMP_DEF dst, TEMP xtmp);
8127 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
8128 ins_encode %{
8129 int vlen_enc = Assembler::AVX_128bit;
8130 int vlen = Matcher::vector_length(this);
8131 if (vlen <= 8) {
8132 assert(UseSSE >= 3, "required");
8133 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
8134 __ pabsw($dst$$XMMRegister, $src$$XMMRegister);
8135 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
8136 } else {
8137 assert(UseAVX > 0, "required");
8138 __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1);
8139 __ vpacksswb($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc);
8140 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
8141 }
8142 %}
8143 ins_pipe( pipe_slow );
8144 %}
8145
8146 instruct vstoreMask4B(vec dst, vec src, vec xtmp, immI_4 size) %{
8147 predicate(UseAVX <= 2 && Matcher::vector_length(n) <= 8 && n->in(1)->bottom_type()->isa_vectmask() == NULL);
8148 match(Set dst (VectorStoreMask src size));
8149 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
8150 effect(TEMP_DEF dst, TEMP xtmp);
8151 ins_encode %{
8152 int vlen_enc = Assembler::AVX_128bit;
8153 int vlen = Matcher::vector_length(this);
8154 if (vlen <= 4) {
8155 assert(UseSSE >= 3, "required");
8156 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
8157 __ pabsd($dst$$XMMRegister, $src$$XMMRegister);
8158 __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister);
8159 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
8160 } else {
8161 assert(UseAVX > 0, "required");
8162 __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, vlen_enc);
8163 __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1);
8164 __ vpackssdw($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc);
8165 __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, vlen_enc);
8166 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
8167 }
8187
8188 instruct storeMask8B_avx(vec dst, vec src, immI_8 size, vec vtmp) %{
8189 predicate(UseAVX <= 2 && Matcher::vector_length(n) == 4);
8190 match(Set dst (VectorStoreMask src size));
8191 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s], using $vtmp as TEMP" %}
8192 effect(TEMP_DEF dst, TEMP vtmp);
8193 ins_encode %{
8194 int vlen_enc = Assembler::AVX_128bit;
8195 __ vshufps($dst$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 0x88, Assembler::AVX_256bit);
8196 __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1);
8197 __ vblendps($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0xC, vlen_enc);
8198 __ vpxor($vtmp$$XMMRegister, $vtmp$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
8199 __ vpackssdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
8200 __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
8201 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
8202 %}
8203 ins_pipe( pipe_slow );
8204 %}
8205
8206 instruct vstoreMask4B_evex_novectmask(vec dst, vec src, immI_4 size) %{
8207 predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_vectmask() == NULL);
8208 match(Set dst (VectorStoreMask src size));
8209 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
8210 ins_encode %{
8211 int src_vlen_enc = vector_length_encoding(this, $src);
8212 int dst_vlen_enc = vector_length_encoding(this);
8213 if (!VM_Version::supports_avx512vl()) {
8214 src_vlen_enc = Assembler::AVX_512bit;
8215 }
8216 __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
8217 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
8218 %}
8219 ins_pipe( pipe_slow );
8220 %}
8221
8222 instruct vstoreMask8B_evex_novectmask(vec dst, vec src, immI_8 size) %{
8223 predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_vectmask() == NULL);
8224 match(Set dst (VectorStoreMask src size));
8225 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
8226 ins_encode %{
8227 int src_vlen_enc = vector_length_encoding(this, $src);
8228 int dst_vlen_enc = vector_length_encoding(this);
8229 if (!VM_Version::supports_avx512vl()) {
8230 src_vlen_enc = Assembler::AVX_512bit;
8231 }
8232 __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
8233 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
8234 %}
8235 ins_pipe( pipe_slow );
8236 %}
8237
8238 instruct vstoreMask_evex_vectmask(vec dst, kReg mask, immI size) %{
8239 predicate(n->in(1)->bottom_type()->isa_vectmask() && !VM_Version::supports_avx512vlbw());
8240 match(Set dst (VectorStoreMask mask size));
8241 effect(TEMP_DEF dst);
8242 format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %}
8243 ins_encode %{
9025 %}
9026
9027 instruct vmask_tolong_evex(rRegL dst, kReg mask, rFlagsReg cr) %{
9028 predicate(n->in(1)->bottom_type()->isa_vectmask());
9029 match(Set dst (VectorMaskToLong mask));
9030 effect(TEMP dst, KILL cr);
9031 format %{ "vector_tolong_evex $dst, $mask \t! vector mask tolong" %}
9032 ins_encode %{
9033 int opcode = this->ideal_Opcode();
9034 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
9035 int mask_len = Matcher::vector_length(this, $mask);
9036 int mask_size = mask_len * type2aelembytes(mbt);
9037 int vlen_enc = vector_length_encoding(this, $mask);
9038 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
9039 $dst$$Register, mask_len, mask_size, vlen_enc);
9040 %}
9041 ins_pipe( pipe_slow );
9042 %}
9043
9044 instruct vmask_tolong_bool(rRegL dst, vec mask, vec xtmp, rFlagsReg cr) %{
9045 predicate(n->in(1)->bottom_type()->isa_vectmask() == NULL);
9046 match(Set dst (VectorMaskToLong mask));
9047 format %{ "vector_tolong_bool $dst, $mask \t! using $xtmp as TEMP" %}
9048 effect(TEMP_DEF dst, TEMP xtmp, KILL cr);
9049 ins_encode %{
9050 int opcode = this->ideal_Opcode();
9051 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
9052 int mask_len = Matcher::vector_length(this, $mask);
9053 int vlen_enc = vector_length_encoding(this, $mask);
9054 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
9055 $dst$$Register, mask_len, mbt, vlen_enc);
9056 %}
9057 ins_pipe( pipe_slow );
9058 %}
9059
9060 instruct vmask_tolong_avx(rRegL dst, vec mask, immI size, vec xtmp, rFlagsReg cr) %{
9061 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == NULL);
9062 match(Set dst (VectorMaskToLong (VectorStoreMask mask size)));
9063 format %{ "vector_tolong_avx $dst, $mask \t! using $xtmp as TEMP" %}
9064 effect(TEMP_DEF dst, TEMP xtmp, KILL cr);
9065 ins_encode %{
9066 int opcode = this->ideal_Opcode();
9067 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
9068 int mask_len = Matcher::vector_length(this, $mask);
9069 int vlen_enc = vector_length_encoding(this, $mask);
9070 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
9071 $dst$$Register, mask_len, mbt, vlen_enc);
9072 %}
9073 ins_pipe( pipe_slow );
9074 %}
9075
9076 instruct vmask_truecount_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{
9077 predicate(n->in(1)->bottom_type()->isa_vectmask());
9078 match(Set dst (VectorMaskTrueCount mask));
9079 effect(TEMP_DEF dst, TEMP tmp, KILL cr);
9080 format %{ "vector_truecount_evex $dst, $mask \t! using $tmp as TEMP" %}
9081 ins_encode %{
9082 int opcode = this->ideal_Opcode();
9083 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
9084 int mask_len = Matcher::vector_length(this, $mask);
9085 int mask_size = mask_len * type2aelembytes(mbt);
9086 int vlen_enc = vector_length_encoding(this, $mask);
9087 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
9088 $tmp$$Register, mask_len, mask_size, vlen_enc);
9089 %}
9090 ins_pipe( pipe_slow );
9091 %}
9092
9093 instruct vmask_truecount_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{
9094 predicate(n->in(1)->bottom_type()->isa_vectmask() == NULL);
9095 match(Set dst (VectorMaskTrueCount mask));
9096 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
9097 format %{ "vector_truecount_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
9098 ins_encode %{
9099 int opcode = this->ideal_Opcode();
9100 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
9101 int mask_len = Matcher::vector_length(this, $mask);
9102 int vlen_enc = vector_length_encoding(this, $mask);
9103 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
9104 $tmp$$Register, mask_len, mbt, vlen_enc);
9105 %}
9106 ins_pipe( pipe_slow );
9107 %}
9108
9109 instruct vmask_truecount_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{
9110 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == NULL);
9111 match(Set dst (VectorMaskTrueCount (VectorStoreMask mask size)));
9112 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
9113 format %{ "vector_truecount_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
9114 ins_encode %{
9115 int opcode = this->ideal_Opcode();
9116 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
9117 int mask_len = Matcher::vector_length(this, $mask);
9118 int vlen_enc = vector_length_encoding(this, $mask);
9119 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
9120 $tmp$$Register, mask_len, mbt, vlen_enc);
9121 %}
9122 ins_pipe( pipe_slow );
9123 %}
9124
9125 instruct vmask_first_or_last_true_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{
9126 predicate(n->in(1)->bottom_type()->isa_vectmask());
9127 match(Set dst (VectorMaskFirstTrue mask));
9128 match(Set dst (VectorMaskLastTrue mask));
9129 effect(TEMP_DEF dst, TEMP tmp, KILL cr);
9130 format %{ "vector_mask_first_or_last_true_evex $dst, $mask \t! using $tmp as TEMP" %}
9131 ins_encode %{
9132 int opcode = this->ideal_Opcode();
9133 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
9134 int mask_len = Matcher::vector_length(this, $mask);
9135 int mask_size = mask_len * type2aelembytes(mbt);
9136 int vlen_enc = vector_length_encoding(this, $mask);
9137 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
9138 $tmp$$Register, mask_len, mask_size, vlen_enc);
9139 %}
9140 ins_pipe( pipe_slow );
9141 %}
9142
9143 instruct vmask_first_or_last_true_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{
9144 predicate(n->in(1)->bottom_type()->isa_vectmask() == NULL);
9145 match(Set dst (VectorMaskFirstTrue mask));
9146 match(Set dst (VectorMaskLastTrue mask));
9147 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
9148 format %{ "vector_mask_first_or_last_true_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
9149 ins_encode %{
9150 int opcode = this->ideal_Opcode();
9151 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
9152 int mask_len = Matcher::vector_length(this, $mask);
9153 int vlen_enc = vector_length_encoding(this, $mask);
9154 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
9155 $tmp$$Register, mask_len, mbt, vlen_enc);
9156 %}
9157 ins_pipe( pipe_slow );
9158 %}
9159
9160 instruct vmask_first_or_last_true_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{
9161 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == NULL);
9162 match(Set dst (VectorMaskFirstTrue (VectorStoreMask mask size)));
9163 match(Set dst (VectorMaskLastTrue (VectorStoreMask mask size)));
9164 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
9165 format %{ "vector_mask_first_or_last_true_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
9166 ins_encode %{
9167 int opcode = this->ideal_Opcode();
9168 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
9169 int mask_len = Matcher::vector_length(this, $mask);
9170 int vlen_enc = vector_length_encoding(this, $mask);
9171 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
9172 $tmp$$Register, mask_len, mbt, vlen_enc);
9173 %}
9174 ins_pipe( pipe_slow );
9175 %}
9176
9177 // --------------------------------- Compress/Expand Operations ---------------------------
9178
9179 instruct vcompress_expand_reg_evex(vec dst, vec src, kReg mask) %{
9180 match(Set dst (CompressV src mask));
9181 match(Set dst (ExpandV src mask));
9978 uint masklen = Matcher::vector_length(this);
9979 __ knot(masklen, $dst$$KRegister, $src$$KRegister, $ktmp$$KRegister, $rtmp$$Register);
9980 %}
9981 ins_pipe( pipe_slow );
9982 %}
9983
9984 instruct mask_not_imm(kReg dst, kReg src, immI_M1 cnt) %{
9985 predicate((Matcher::vector_length(n) == 8 && VM_Version::supports_avx512dq()) ||
9986 (Matcher::vector_length(n) == 16) ||
9987 (Matcher::vector_length(n) > 16 && VM_Version::supports_avx512bw()));
9988 match(Set dst (XorVMask src (MaskAll cnt)));
9989 format %{ "mask_not $dst, $src, $cnt \t! mask not operation" %}
9990 ins_encode %{
9991 uint masklen = Matcher::vector_length(this);
9992 __ knot(masklen, $dst$$KRegister, $src$$KRegister);
9993 %}
9994 ins_pipe( pipe_slow );
9995 %}
9996
9997 instruct long_to_maskLE8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2, vec xtmp) %{
9998 predicate(n->bottom_type()->isa_vectmask() == NULL && Matcher::vector_length(n) <= 8);
9999 match(Set dst (VectorLongToMask src));
10000 effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, TEMP xtmp);
10001 format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2, $xtmp as TEMP" %}
10002 ins_encode %{
10003 int mask_len = Matcher::vector_length(this);
10004 int vec_enc = vector_length_encoding(mask_len);
10005 __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register,
10006 $rtmp2$$Register, xnoreg, mask_len, vec_enc);
10007 %}
10008 ins_pipe( pipe_slow );
10009 %}
10010
10011
10012 instruct long_to_maskGT8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2, vec xtmp1, rFlagsReg cr) %{
10013 predicate(n->bottom_type()->isa_vectmask() == NULL && Matcher::vector_length(n) > 8);
10014 match(Set dst (VectorLongToMask src));
10015 effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, TEMP xtmp1, KILL cr);
10016 format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2, $xtmp1, as TEMP" %}
10017 ins_encode %{
10018 int mask_len = Matcher::vector_length(this);
10019 assert(mask_len <= 32, "invalid mask length");
10020 int vec_enc = vector_length_encoding(mask_len);
10021 __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register,
10022 $rtmp2$$Register, $xtmp1$$XMMRegister, mask_len, vec_enc);
10023 %}
10024 ins_pipe( pipe_slow );
10025 %}
10026
10027 instruct long_to_mask_evex(kReg dst, rRegL src) %{
10028 predicate(n->bottom_type()->isa_vectmask());
10029 match(Set dst (VectorLongToMask src));
10030 format %{ "long_to_mask_evex $dst, $src\t!" %}
10031 ins_encode %{
10032 __ kmov($dst$$KRegister, $src$$Register);
10033 %}
|
1295 int MachNode::compute_padding(int current_offset) const {
1296 if (flags() & Node::PD::Flag_intel_jcc_erratum) {
1297 Compile* C = Compile::current();
1298 PhaseOutput* output = C->output();
1299 Block* block = output->block();
1300 int index = output->index();
1301 return IntelJccErratum::compute_padding(current_offset, this, block, index, C->regalloc());
1302 } else {
1303 return 0;
1304 }
1305 }
1306
1307 // Emit exception handler code.
1308 // Stuff framesize into a register and call a VM stub routine.
1309 int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf) {
1310
1311 // Note that the code buffer's insts_mark is always relative to insts.
1312 // That's why we must use the macroassembler to generate a handler.
1313 C2_MacroAssembler _masm(&cbuf);
1314 address base = __ start_a_stub(size_exception_handler());
1315 if (base == nullptr) {
1316 ciEnv::current()->record_failure("CodeCache is full");
1317 return 0; // CodeBuffer::expand failed
1318 }
1319 int offset = __ offset();
1320 __ jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point()));
1321 assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
1322 __ end_a_stub();
1323 return offset;
1324 }
1325
1326 // Emit deopt handler code.
1327 int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) {
1328
1329 // Note that the code buffer's insts_mark is always relative to insts.
1330 // That's why we must use the macroassembler to generate a handler.
1331 C2_MacroAssembler _masm(&cbuf);
1332 address base = __ start_a_stub(size_deopt_handler());
1333 if (base == nullptr) {
1334 ciEnv::current()->record_failure("CodeCache is full");
1335 return 0; // CodeBuffer::expand failed
1336 }
1337 int offset = __ offset();
1338
1339 #ifdef _LP64
1340 address the_pc = (address) __ pc();
1341 Label next;
1342 // push a "the_pc" on the stack without destroying any registers
1343 // as they all may be live.
1344
1345 // push address of "next"
1346 __ call(next, relocInfo::none); // reloc none is fine since it is a disp32
1347 __ bind(next);
1348 // adjust it so it matches "the_pc"
1349 __ subptr(Address(rsp, 0), __ offset() - offset);
1350 #else
1351 InternalAddress here(__ pc());
1352 __ pushptr(here.addr(), noreg);
1353 #endif
2169 return new legVecZOper();
2170 }
2171 if (legacy) {
2172 switch (ideal_reg) {
2173 case Op_VecS: return new legVecSOper();
2174 case Op_VecD: return new legVecDOper();
2175 case Op_VecX: return new legVecXOper();
2176 case Op_VecY: return new legVecYOper();
2177 case Op_VecZ: return new legVecZOper();
2178 }
2179 } else {
2180 switch (ideal_reg) {
2181 case Op_VecS: return new vecSOper();
2182 case Op_VecD: return new vecDOper();
2183 case Op_VecX: return new vecXOper();
2184 case Op_VecY: return new vecYOper();
2185 case Op_VecZ: return new vecZOper();
2186 }
2187 }
2188 ShouldNotReachHere();
2189 return nullptr;
2190 }
2191
2192 bool Matcher::is_reg2reg_move(MachNode* m) {
2193 switch (m->rule()) {
2194 case MoveVec2Leg_rule:
2195 case MoveLeg2Vec_rule:
2196 case MoveF2VL_rule:
2197 case MoveF2LEG_rule:
2198 case MoveVL2F_rule:
2199 case MoveLEG2F_rule:
2200 case MoveD2VL_rule:
2201 case MoveD2LEG_rule:
2202 case MoveVL2D_rule:
2203 case MoveLEG2D_rule:
2204 return true;
2205 default:
2206 return false;
2207 }
2208 }
2209
2338 }
2339 return false;
2340 }
2341
2342 // This function identifies sub-graphs in which a 'load' node is
2343 // input to two different nodes, and such that it can be matched
2344 // with BMI instructions like blsi, blsr, etc.
2345 // Example : for b = -a[i] & a[i] can be matched to blsi r32, m32.
2346 // The graph is (AndL (SubL Con0 LoadL*) LoadL*), where LoadL*
2347 // refers to the same node.
2348 //
2349 // Match the generic fused operations pattern (op1 (op2 Con{ConType} mop) mop)
2350 // This is a temporary solution until we make DAGs expressible in ADL.
2351 template<typename ConType>
2352 class FusedPatternMatcher {
2353 Node* _op1_node;
2354 Node* _mop_node;
2355 int _con_op;
2356
2357 static int match_next(Node* n, int next_op, int next_op_idx) {
2358 if (n->in(1) == nullptr || n->in(2) == nullptr) {
2359 return -1;
2360 }
2361
2362 if (next_op_idx == -1) { // n is commutative, try rotations
2363 if (n->in(1)->Opcode() == next_op) {
2364 return 1;
2365 } else if (n->in(2)->Opcode() == next_op) {
2366 return 2;
2367 }
2368 } else {
2369 assert(next_op_idx > 0 && next_op_idx <= 2, "Bad argument index");
2370 if (n->in(next_op_idx)->Opcode() == next_op) {
2371 return next_op_idx;
2372 }
2373 }
2374 return -1;
2375 }
2376
2377 public:
2378 FusedPatternMatcher(Node* op1_node, Node* mop_node, int con_op) :
2405 if (op2_con_idx == -1) {
2406 return false;
2407 }
2408 // Memory operation must be the other edge
2409 int op2_mop_idx = (op2_con_idx & 1) + 1;
2410 // Check that the memory operation is the same node
2411 if (op2_node->in(op2_mop_idx) == _mop_node) {
2412 // Now check the constant
2413 const Type* con_type = op2_node->in(op2_con_idx)->bottom_type();
2414 if (con_type != Type::TOP && ConType::as_self(con_type)->get_con() == con_value) {
2415 return true;
2416 }
2417 }
2418 }
2419 return false;
2420 }
2421 };
2422
2423 static bool is_bmi_pattern(Node* n, Node* m) {
2424 assert(UseBMI1Instructions, "sanity");
2425 if (n != nullptr && m != nullptr) {
2426 if (m->Opcode() == Op_LoadI) {
2427 FusedPatternMatcher<TypeInt> bmii(n, m, Op_ConI);
2428 return bmii.match(Op_AndI, -1, Op_SubI, 1, 0) ||
2429 bmii.match(Op_AndI, -1, Op_AddI, -1, -1) ||
2430 bmii.match(Op_XorI, -1, Op_AddI, -1, -1);
2431 } else if (m->Opcode() == Op_LoadL) {
2432 FusedPatternMatcher<TypeLong> bmil(n, m, Op_ConL);
2433 return bmil.match(Op_AndL, -1, Op_SubL, 1, 0) ||
2434 bmil.match(Op_AndL, -1, Op_AddL, -1, -1) ||
2435 bmil.match(Op_XorL, -1, Op_AddL, -1, -1);
2436 }
2437 }
2438 return false;
2439 }
2440
2441 // Should the matcher clone input 'm' of node 'n'?
2442 bool Matcher::pd_clone_node(Node* n, Node* m, Matcher::MStack& mstack) {
2443 // If 'n' and 'm' are part of a graph for BMI instruction, clone the input 'm'.
2444 if (UseBMI1Instructions && is_bmi_pattern(n, m)) {
2445 mstack.push(m, Visit);
2771 uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const {
2772 return MachNode::size(ra_);
2773 }
2774
2775 %}
2776
2777 encode %{
2778
2779 enc_class call_epilog %{
2780 C2_MacroAssembler _masm(&cbuf);
2781 if (VerifyStackAtCalls) {
2782 // Check that stack depth is unchanged: find majik cookie on stack
2783 int framesize = ra_->reg2offset_unchecked(OptoReg::add(ra_->_matcher._old_SP, -3*VMRegImpl::slots_per_word));
2784 Label L;
2785 __ cmpptr(Address(rsp, framesize), (int32_t)0xbadb100d);
2786 __ jccb(Assembler::equal, L);
2787 // Die if stack mismatch
2788 __ int3();
2789 __ bind(L);
2790 }
2791 if (tf()->returns_inline_type_as_fields() && !_method->is_method_handle_intrinsic()) {
2792 C2_MacroAssembler _masm(&cbuf);
2793 // The last return value is not set by the callee but used to pass IsInit information to compiled code.
2794 // Search for the corresponding projection, get the register and emit code that initialized it.
2795 uint con = (tf()->range_cc()->cnt() - 1);
2796 for (DUIterator_Fast imax, i = fast_outs(imax); i < imax; i++) {
2797 ProjNode* proj = fast_out(i)->as_Proj();
2798 if (proj->_con == con) {
2799 // Set IsInit if rax is non-null (a non-null value is returned buffered or scalarized)
2800 OptoReg::Name optoReg = ra_->get_reg_first(proj);
2801 VMReg reg = OptoReg::as_VMReg(optoReg, ra_->_framesize, OptoReg::reg2stack(ra_->_matcher._new_SP));
2802 Register toReg = reg->is_reg() ? reg->as_Register() : rscratch1;
2803 __ testq(rax, rax);
2804 __ setb(Assembler::notZero, toReg);
2805 __ movzbl(toReg, toReg);
2806 if (reg->is_stack()) {
2807 int st_off = reg->reg2stack() * VMRegImpl::stack_slot_size;
2808 __ movq(Address(rsp, st_off), toReg);
2809 }
2810 break;
2811 }
2812 }
2813 if (return_value_is_used()) {
2814 // An inline type is returned as fields in multiple registers.
2815 // Rax either contains an oop if the inline type is buffered or a pointer
2816 // to the corresponding InlineKlass with the lowest bit set to 1. Zero rax
2817 // if the lowest bit is set to allow C2 to use the oop after null checking.
2818 // rax &= (rax & 1) - 1
2819 __ movptr(rscratch1, rax);
2820 __ andptr(rscratch1, 0x1);
2821 __ subptr(rscratch1, 0x1);
2822 __ andptr(rax, rscratch1);
2823 }
2824 }
2825 %}
2826
2827 %}
2828
2829 // Operands for bound floating pointer register arguments
2830 operand rxmm0() %{
2831 constraint(ALLOC_IN_RC(xmm0_reg));
2832 match(VecX);
2833 format%{%}
2834 interface(REG_INTER);
2835 %}
2836
2837 //----------OPERANDS-----------------------------------------------------------
2838 // Operand definitions must precede instruction definitions for correct parsing
2839 // in the ADLC because operands constitute user defined types which are used in
2840 // instruction definitions.
2841
2842 // Vectors
2843
2844 // Dummy generic vector class. Should be used for all vector operands.
7475 instruct vround_reg_evex(vec dst, vec src, rRegP tmp, vec xtmp1, vec xtmp2, kReg ktmp1, kReg ktmp2, rFlagsReg cr) %{
7476 predicate(Matcher::vector_element_basic_type(n) == T_LONG);
7477 match(Set dst (RoundVD src));
7478 effect(TEMP dst, TEMP tmp, TEMP xtmp1, TEMP xtmp2, TEMP ktmp1, TEMP ktmp2, KILL cr);
7479 format %{ "vector_round_long $dst,$src\t! using $tmp, $xtmp1, $xtmp2, $ktmp1, $ktmp2 as TEMP" %}
7480 ins_encode %{
7481 int vlen_enc = vector_length_encoding(this);
7482 InternalAddress new_mxcsr = $constantaddress((jint)(EnableX86ECoreOpts ? 0x3FBF : 0x3F80));
7483 __ vector_round_double_evex($dst$$XMMRegister, $src$$XMMRegister,
7484 ExternalAddress(StubRoutines::x86::vector_double_sign_flip()), new_mxcsr, vlen_enc,
7485 $tmp$$Register, $xtmp1$$XMMRegister, $xtmp2$$XMMRegister, $ktmp1$$KRegister, $ktmp2$$KRegister);
7486 %}
7487 ins_pipe( pipe_slow );
7488 %}
7489
7490 #endif // _LP64
7491
7492 // --------------------------------- VectorMaskCmp --------------------------------------
7493
7494 instruct vcmpFD(legVec dst, legVec src1, legVec src2, immI8 cond) %{
7495 predicate(n->bottom_type()->isa_vectmask() == nullptr &&
7496 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 8 && // src1
7497 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
7498 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
7499 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
7500 format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %}
7501 ins_encode %{
7502 int vlen_enc = vector_length_encoding(this, $src1);
7503 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
7504 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
7505 __ vcmpps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
7506 } else {
7507 __ vcmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
7508 }
7509 %}
7510 ins_pipe( pipe_slow );
7511 %}
7512
7513 instruct evcmpFD64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{
7514 predicate(Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64 && // src1
7515 n->bottom_type()->isa_vectmask() == nullptr &&
7516 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
7517 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
7518 effect(TEMP ktmp);
7519 format %{ "vector_compare $dst,$src1,$src2,$cond" %}
7520 ins_encode %{
7521 int vlen_enc = Assembler::AVX_512bit;
7522 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
7523 KRegister mask = k0; // The comparison itself is not being masked.
7524 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
7525 __ evcmpps($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
7526 __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg);
7527 } else {
7528 __ evcmppd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
7529 __ evmovdquq($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), false, vlen_enc, noreg);
7530 }
7531 %}
7532 ins_pipe( pipe_slow );
7533 %}
7534
7535 instruct evcmpFD(kReg dst, vec src1, vec src2, immI8 cond) %{
7536 predicate(n->bottom_type()->isa_vectmask() &&
7537 is_floating_point_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1 T_FLOAT, T_DOUBLE
7538 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
7539 format %{ "vector_compare_evex $dst,$src1,$src2,$cond\t!" %}
7540 ins_encode %{
7541 assert(bottom_type()->isa_vectmask(), "TypeVectMask expected");
7542 int vlen_enc = vector_length_encoding(this, $src1);
7543 Assembler::ComparisonPredicateFP cmp = booltest_pred_to_comparison_pred_fp($cond$$constant);
7544 KRegister mask = k0; // The comparison itself is not being masked.
7545 if (Matcher::vector_element_basic_type(this, $src1) == T_FLOAT) {
7546 __ evcmpps($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
7547 } else {
7548 __ evcmppd($dst$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, vlen_enc);
7549 }
7550 %}
7551 ins_pipe( pipe_slow );
7552 %}
7553
7554 instruct vcmp_direct(legVec dst, legVec src1, legVec src2, immI8 cond) %{
7555 predicate(n->bottom_type()->isa_vectmask() == nullptr &&
7556 !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
7557 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1
7558 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
7559 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) &&
7560 (n->in(2)->get_int() == BoolTest::eq ||
7561 n->in(2)->get_int() == BoolTest::lt ||
7562 n->in(2)->get_int() == BoolTest::gt)); // cond
7563 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
7564 format %{ "vector_compare $dst,$src1,$src2,$cond\t!" %}
7565 ins_encode %{
7566 int vlen_enc = vector_length_encoding(this, $src1);
7567 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
7568 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
7569 __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, xnoreg, cmp, ww, vlen_enc);
7570 %}
7571 ins_pipe( pipe_slow );
7572 %}
7573
7574 instruct vcmp_negate(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{
7575 predicate(n->bottom_type()->isa_vectmask() == nullptr &&
7576 !Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
7577 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1
7578 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
7579 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1))) &&
7580 (n->in(2)->get_int() == BoolTest::ne ||
7581 n->in(2)->get_int() == BoolTest::le ||
7582 n->in(2)->get_int() == BoolTest::ge)); // cond
7583 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
7584 effect(TEMP dst, TEMP xtmp);
7585 format %{ "vector_compare $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %}
7586 ins_encode %{
7587 int vlen_enc = vector_length_encoding(this, $src1);
7588 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
7589 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
7590 __ vpcmpCCW($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc);
7591 %}
7592 ins_pipe( pipe_slow );
7593 %}
7594
7595 instruct vcmpu(legVec dst, legVec src1, legVec src2, immI8 cond, legVec xtmp) %{
7596 predicate(n->bottom_type()->isa_vectmask() == nullptr &&
7597 Matcher::is_unsigned_booltest_pred(n->in(2)->get_int()) &&
7598 Matcher::vector_length_in_bytes(n->in(1)->in(1)) >= 4 && // src1
7599 Matcher::vector_length_in_bytes(n->in(1)->in(1)) <= 32 && // src1
7600 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
7601 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
7602 effect(TEMP dst, TEMP xtmp);
7603 format %{ "vector_compareu $dst,$src1,$src2,$cond\t! using $xtmp as TEMP" %}
7604 ins_encode %{
7605 InternalAddress flip_bit = $constantaddress(high_bit_set(Matcher::vector_element_basic_type(this, $src1)));
7606 int vlen_enc = vector_length_encoding(this, $src1);
7607 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
7608 Assembler::Width ww = widthForType(Matcher::vector_element_basic_type(this, $src1));
7609
7610 if (vlen_enc == Assembler::AVX_128bit) {
7611 __ vmovddup($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg);
7612 } else {
7613 __ vbroadcastsd($xtmp$$XMMRegister, flip_bit, vlen_enc, noreg);
7614 }
7615 __ vpxor($dst$$XMMRegister, $xtmp$$XMMRegister, $src1$$XMMRegister, vlen_enc);
7616 __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $src2$$XMMRegister, vlen_enc);
7617 __ vpcmpCCW($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, cmp, ww, vlen_enc);
7618 %}
7619 ins_pipe( pipe_slow );
7620 %}
7621
7622 instruct vcmp64(vec dst, vec src1, vec src2, immI8 cond, kReg ktmp) %{
7623 predicate((n->bottom_type()->isa_vectmask() == nullptr &&
7624 Matcher::vector_length_in_bytes(n->in(1)->in(1)) == 64) && // src1
7625 is_integral_type(Matcher::vector_element_basic_type(n->in(1)->in(1)))); // src1
7626 match(Set dst (VectorMaskCmp (Binary src1 src2) cond));
7627 effect(TEMP ktmp);
7628 format %{ "vector_compare $dst,$src1,$src2,$cond" %}
7629 ins_encode %{
7630 assert(UseAVX > 2, "required");
7631
7632 int vlen_enc = vector_length_encoding(this, $src1);
7633 Assembler::ComparisonPredicate cmp = booltest_pred_to_comparison_pred($cond$$constant);
7634 bool is_unsigned = Matcher::is_unsigned_booltest_pred($cond$$constant);
7635 KRegister mask = k0; // The comparison itself is not being masked.
7636 bool merge = false;
7637 BasicType src1_elem_bt = Matcher::vector_element_basic_type(this, $src1);
7638
7639 switch (src1_elem_bt) {
7640 case T_INT: {
7641 __ evpcmpd($ktmp$$KRegister, mask, $src1$$XMMRegister, $src2$$XMMRegister, cmp, !is_unsigned, vlen_enc);
7642 __ evmovdqul($dst$$XMMRegister, $ktmp$$KRegister, ExternalAddress(vector_all_bits_set()), merge, vlen_enc, noreg);
7643 break;
7819 // --------------------------------- Vector Blend --------------------------------------
7820
7821 instruct blendvp(vec dst, vec src, vec mask, rxmm0 tmp) %{
7822 predicate(UseAVX == 0);
7823 match(Set dst (VectorBlend (Binary dst src) mask));
7824 format %{ "vector_blend $dst,$src,$mask\t! using $tmp as TEMP" %}
7825 effect(TEMP tmp);
7826 ins_encode %{
7827 assert(UseSSE >= 4, "required");
7828
7829 if ($mask$$XMMRegister != $tmp$$XMMRegister) {
7830 __ movdqu($tmp$$XMMRegister, $mask$$XMMRegister);
7831 }
7832 __ pblendvb($dst$$XMMRegister, $src$$XMMRegister); // uses xmm0 as mask
7833 %}
7834 ins_pipe( pipe_slow );
7835 %}
7836
7837 instruct vblendvpI(legVec dst, legVec src1, legVec src2, legVec mask) %{
7838 predicate(UseAVX > 0 && !EnableX86ECoreOpts &&
7839 n->in(2)->bottom_type()->isa_vectmask() == nullptr &&
7840 Matcher::vector_length_in_bytes(n) <= 32 &&
7841 is_integral_type(Matcher::vector_element_basic_type(n)));
7842 match(Set dst (VectorBlend (Binary src1 src2) mask));
7843 format %{ "vector_blend $dst,$src1,$src2,$mask\t!" %}
7844 ins_encode %{
7845 int vlen_enc = vector_length_encoding(this);
7846 __ vpblendvb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc);
7847 %}
7848 ins_pipe( pipe_slow );
7849 %}
7850
7851 instruct vblendvpFD(legVec dst, legVec src1, legVec src2, legVec mask) %{
7852 predicate(UseAVX > 0 && !EnableX86ECoreOpts &&
7853 n->in(2)->bottom_type()->isa_vectmask() == nullptr &&
7854 Matcher::vector_length_in_bytes(n) <= 32 &&
7855 !is_integral_type(Matcher::vector_element_basic_type(n)));
7856 match(Set dst (VectorBlend (Binary src1 src2) mask));
7857 format %{ "vector_blend $dst,$src1,$src2,$mask\t!" %}
7858 ins_encode %{
7859 int vlen_enc = vector_length_encoding(this);
7860 __ vblendvps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $mask$$XMMRegister, vlen_enc);
7861 %}
7862 ins_pipe( pipe_slow );
7863 %}
7864
7865 instruct vblendvp(legVec dst, legVec src1, legVec src2, legVec mask, legVec vtmp) %{
7866 predicate(UseAVX > 0 && EnableX86ECoreOpts &&
7867 n->in(2)->bottom_type()->isa_vectmask() == NULL &&
7868 Matcher::vector_length_in_bytes(n) <= 32);
7869 match(Set dst (VectorBlend (Binary src1 src2) mask));
7870 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using $vtmp as TEMP" %}
7871 effect(TEMP vtmp, TEMP dst);
7872 ins_encode %{
7873 int vlen_enc = vector_length_encoding(this);
7874 __ vpandn($vtmp$$XMMRegister, $mask$$XMMRegister, $src1$$XMMRegister, vlen_enc);
7875 __ vpand ($dst$$XMMRegister, $mask$$XMMRegister, $src2$$XMMRegister, vlen_enc);
7876 __ vpor ($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
7877 %}
7878 ins_pipe( pipe_slow );
7879 %}
7880
7881 instruct evblendvp64(vec dst, vec src1, vec src2, vec mask, kReg ktmp) %{
7882 predicate(Matcher::vector_length_in_bytes(n) == 64 &&
7883 n->in(2)->bottom_type()->isa_vectmask() == nullptr);
7884 match(Set dst (VectorBlend (Binary src1 src2) mask));
7885 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using k2 as TEMP" %}
7886 effect(TEMP ktmp);
7887 ins_encode %{
7888 int vlen_enc = Assembler::AVX_512bit;
7889 BasicType elem_bt = Matcher::vector_element_basic_type(this);
7890 __ evpcmp(elem_bt, $ktmp$$KRegister, k0, $mask$$XMMRegister, ExternalAddress(vector_all_bits_set()), Assembler::eq, vlen_enc, noreg);
7891 __ evpblend(elem_bt, $dst$$XMMRegister, $ktmp$$KRegister, $src1$$XMMRegister, $src2$$XMMRegister, true, vlen_enc);
7892 %}
7893 ins_pipe( pipe_slow );
7894 %}
7895
7896
7897 instruct evblendvp64_masked(vec dst, vec src1, vec src2, kReg mask) %{
7898 predicate(n->in(2)->bottom_type()->isa_vectmask() &&
7899 (!is_subword_type(Matcher::vector_element_basic_type(n)) ||
7900 VM_Version::supports_avx512bw()));
7901 match(Set dst (VectorBlend (Binary src1 src2) mask));
7902 format %{ "vector_blend $dst,$src1,$src2,$mask\t! using k2 as TEMP" %}
7903 ins_encode %{
8080 %}
8081 ins_pipe( pipe_slow );
8082 %}
8083
8084 instruct ktest_ge8(rFlagsRegU cr, kReg src1, kReg src2) %{
8085 predicate(Matcher::vector_length(n->in(1)) >= 16 ||
8086 (Matcher::vector_length(n->in(1)) == 8 && VM_Version::supports_avx512dq()));
8087 match(Set cr (VectorTest src1 src2));
8088 format %{ "ktest_ge8 $src1, $src2\n\t" %}
8089 ins_encode %{
8090 uint masklen = Matcher::vector_length(this, $src1);
8091 __ kortest(masklen, $src1$$KRegister, $src1$$KRegister);
8092 %}
8093 ins_pipe( pipe_slow );
8094 %}
8095 #endif
8096
8097 //------------------------------------- LoadMask --------------------------------------------
8098
8099 instruct loadMask(legVec dst, legVec src) %{
8100 predicate(n->bottom_type()->isa_vectmask() == nullptr && !VM_Version::supports_avx512vlbw());
8101 match(Set dst (VectorLoadMask src));
8102 effect(TEMP dst);
8103 format %{ "vector_loadmask_byte $dst, $src\n\t" %}
8104 ins_encode %{
8105 int vlen_in_bytes = Matcher::vector_length_in_bytes(this);
8106 BasicType elem_bt = Matcher::vector_element_basic_type(this);
8107 __ load_vector_mask($dst$$XMMRegister, $src$$XMMRegister, vlen_in_bytes, elem_bt, true);
8108 %}
8109 ins_pipe( pipe_slow );
8110 %}
8111
8112 instruct loadMask64(kReg dst, vec src, vec xtmp) %{
8113 predicate(n->bottom_type()->isa_vectmask() && !VM_Version::supports_avx512vlbw());
8114 match(Set dst (VectorLoadMask src));
8115 effect(TEMP xtmp);
8116 format %{ "vector_loadmask_64byte $dst, $src\t! using $xtmp as TEMP" %}
8117 ins_encode %{
8118 __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister,
8119 true, Assembler::AVX_512bit);
8120 %}
8121 ins_pipe( pipe_slow );
8122 %}
8123
8124 instruct loadMask_evex(kReg dst, vec src, vec xtmp) %{
8125 predicate(n->bottom_type()->isa_vectmask() && VM_Version::supports_avx512vlbw());
8126 match(Set dst (VectorLoadMask src));
8127 effect(TEMP xtmp);
8128 format %{ "vector_loadmask_byte $dst, $src\t! using $xtmp as TEMP" %}
8129 ins_encode %{
8130 int vlen_enc = vector_length_encoding(in(1));
8131 __ load_vector_mask($dst$$KRegister, $src$$XMMRegister, $xtmp$$XMMRegister,
8132 false, vlen_enc);
8133 %}
8134 ins_pipe( pipe_slow );
8135 %}
8136
8137 //------------------------------------- StoreMask --------------------------------------------
8138
8139 instruct vstoreMask1B(vec dst, vec src, immI_1 size) %{
8140 predicate(Matcher::vector_length(n) < 64 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
8141 match(Set dst (VectorStoreMask src size));
8142 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
8143 ins_encode %{
8144 int vlen = Matcher::vector_length(this);
8145 if (vlen <= 16 && UseAVX <= 2) {
8146 assert(UseSSE >= 3, "required");
8147 __ pabsb($dst$$XMMRegister, $src$$XMMRegister);
8148 } else {
8149 assert(UseAVX > 0, "required");
8150 int src_vlen_enc = vector_length_encoding(this, $src);
8151 __ vpabsb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
8152 }
8153 %}
8154 ins_pipe( pipe_slow );
8155 %}
8156
8157 instruct vstoreMask2B(vec dst, vec src, vec xtmp, immI_2 size) %{
8158 predicate(Matcher::vector_length(n) <= 16 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
8159 match(Set dst (VectorStoreMask src size));
8160 effect(TEMP_DEF dst, TEMP xtmp);
8161 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
8162 ins_encode %{
8163 int vlen_enc = Assembler::AVX_128bit;
8164 int vlen = Matcher::vector_length(this);
8165 if (vlen <= 8) {
8166 assert(UseSSE >= 3, "required");
8167 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
8168 __ pabsw($dst$$XMMRegister, $src$$XMMRegister);
8169 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
8170 } else {
8171 assert(UseAVX > 0, "required");
8172 __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1);
8173 __ vpacksswb($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc);
8174 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
8175 }
8176 %}
8177 ins_pipe( pipe_slow );
8178 %}
8179
8180 instruct vstoreMask4B(vec dst, vec src, vec xtmp, immI_4 size) %{
8181 predicate(UseAVX <= 2 && Matcher::vector_length(n) <= 8 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
8182 match(Set dst (VectorStoreMask src size));
8183 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
8184 effect(TEMP_DEF dst, TEMP xtmp);
8185 ins_encode %{
8186 int vlen_enc = Assembler::AVX_128bit;
8187 int vlen = Matcher::vector_length(this);
8188 if (vlen <= 4) {
8189 assert(UseSSE >= 3, "required");
8190 __ pxor($xtmp$$XMMRegister, $xtmp$$XMMRegister);
8191 __ pabsd($dst$$XMMRegister, $src$$XMMRegister);
8192 __ packusdw($dst$$XMMRegister, $xtmp$$XMMRegister);
8193 __ packuswb($dst$$XMMRegister, $xtmp$$XMMRegister);
8194 } else {
8195 assert(UseAVX > 0, "required");
8196 __ vpxor($xtmp$$XMMRegister, $xtmp$$XMMRegister, $xtmp$$XMMRegister, vlen_enc);
8197 __ vextracti128($dst$$XMMRegister, $src$$XMMRegister, 0x1);
8198 __ vpackssdw($dst$$XMMRegister, $src$$XMMRegister, $dst$$XMMRegister, vlen_enc);
8199 __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $xtmp$$XMMRegister, vlen_enc);
8200 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
8201 }
8221
8222 instruct storeMask8B_avx(vec dst, vec src, immI_8 size, vec vtmp) %{
8223 predicate(UseAVX <= 2 && Matcher::vector_length(n) == 4);
8224 match(Set dst (VectorStoreMask src size));
8225 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s], using $vtmp as TEMP" %}
8226 effect(TEMP_DEF dst, TEMP vtmp);
8227 ins_encode %{
8228 int vlen_enc = Assembler::AVX_128bit;
8229 __ vshufps($dst$$XMMRegister, $src$$XMMRegister, $src$$XMMRegister, 0x88, Assembler::AVX_256bit);
8230 __ vextracti128($vtmp$$XMMRegister, $dst$$XMMRegister, 0x1);
8231 __ vblendps($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, 0xC, vlen_enc);
8232 __ vpxor($vtmp$$XMMRegister, $vtmp$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
8233 __ vpackssdw($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
8234 __ vpacksswb($dst$$XMMRegister, $dst$$XMMRegister, $vtmp$$XMMRegister, vlen_enc);
8235 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, vlen_enc);
8236 %}
8237 ins_pipe( pipe_slow );
8238 %}
8239
8240 instruct vstoreMask4B_evex_novectmask(vec dst, vec src, immI_4 size) %{
8241 predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
8242 match(Set dst (VectorStoreMask src size));
8243 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
8244 ins_encode %{
8245 int src_vlen_enc = vector_length_encoding(this, $src);
8246 int dst_vlen_enc = vector_length_encoding(this);
8247 if (!VM_Version::supports_avx512vl()) {
8248 src_vlen_enc = Assembler::AVX_512bit;
8249 }
8250 __ evpmovdb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
8251 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
8252 %}
8253 ins_pipe( pipe_slow );
8254 %}
8255
8256 instruct vstoreMask8B_evex_novectmask(vec dst, vec src, immI_8 size) %{
8257 predicate(UseAVX > 2 && n->in(1)->bottom_type()->isa_vectmask() == nullptr);
8258 match(Set dst (VectorStoreMask src size));
8259 format %{ "vector_store_mask $dst, $src \t! elem size is $size byte[s]" %}
8260 ins_encode %{
8261 int src_vlen_enc = vector_length_encoding(this, $src);
8262 int dst_vlen_enc = vector_length_encoding(this);
8263 if (!VM_Version::supports_avx512vl()) {
8264 src_vlen_enc = Assembler::AVX_512bit;
8265 }
8266 __ evpmovqb($dst$$XMMRegister, $src$$XMMRegister, src_vlen_enc);
8267 __ vpabsb($dst$$XMMRegister, $dst$$XMMRegister, dst_vlen_enc);
8268 %}
8269 ins_pipe( pipe_slow );
8270 %}
8271
8272 instruct vstoreMask_evex_vectmask(vec dst, kReg mask, immI size) %{
8273 predicate(n->in(1)->bottom_type()->isa_vectmask() && !VM_Version::supports_avx512vlbw());
8274 match(Set dst (VectorStoreMask mask size));
8275 effect(TEMP_DEF dst);
8276 format %{ "vector_store_mask $dst, $mask \t! elem size is $size byte[s]" %}
8277 ins_encode %{
9059 %}
9060
9061 instruct vmask_tolong_evex(rRegL dst, kReg mask, rFlagsReg cr) %{
9062 predicate(n->in(1)->bottom_type()->isa_vectmask());
9063 match(Set dst (VectorMaskToLong mask));
9064 effect(TEMP dst, KILL cr);
9065 format %{ "vector_tolong_evex $dst, $mask \t! vector mask tolong" %}
9066 ins_encode %{
9067 int opcode = this->ideal_Opcode();
9068 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
9069 int mask_len = Matcher::vector_length(this, $mask);
9070 int mask_size = mask_len * type2aelembytes(mbt);
9071 int vlen_enc = vector_length_encoding(this, $mask);
9072 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
9073 $dst$$Register, mask_len, mask_size, vlen_enc);
9074 %}
9075 ins_pipe( pipe_slow );
9076 %}
9077
9078 instruct vmask_tolong_bool(rRegL dst, vec mask, vec xtmp, rFlagsReg cr) %{
9079 predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr);
9080 match(Set dst (VectorMaskToLong mask));
9081 format %{ "vector_tolong_bool $dst, $mask \t! using $xtmp as TEMP" %}
9082 effect(TEMP_DEF dst, TEMP xtmp, KILL cr);
9083 ins_encode %{
9084 int opcode = this->ideal_Opcode();
9085 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
9086 int mask_len = Matcher::vector_length(this, $mask);
9087 int vlen_enc = vector_length_encoding(this, $mask);
9088 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
9089 $dst$$Register, mask_len, mbt, vlen_enc);
9090 %}
9091 ins_pipe( pipe_slow );
9092 %}
9093
9094 instruct vmask_tolong_avx(rRegL dst, vec mask, immI size, vec xtmp, rFlagsReg cr) %{
9095 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr);
9096 match(Set dst (VectorMaskToLong (VectorStoreMask mask size)));
9097 format %{ "vector_tolong_avx $dst, $mask \t! using $xtmp as TEMP" %}
9098 effect(TEMP_DEF dst, TEMP xtmp, KILL cr);
9099 ins_encode %{
9100 int opcode = this->ideal_Opcode();
9101 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
9102 int mask_len = Matcher::vector_length(this, $mask);
9103 int vlen_enc = vector_length_encoding(this, $mask);
9104 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
9105 $dst$$Register, mask_len, mbt, vlen_enc);
9106 %}
9107 ins_pipe( pipe_slow );
9108 %}
9109
9110 instruct vmask_truecount_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{
9111 predicate(n->in(1)->bottom_type()->isa_vectmask());
9112 match(Set dst (VectorMaskTrueCount mask));
9113 effect(TEMP_DEF dst, TEMP tmp, KILL cr);
9114 format %{ "vector_truecount_evex $dst, $mask \t! using $tmp as TEMP" %}
9115 ins_encode %{
9116 int opcode = this->ideal_Opcode();
9117 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
9118 int mask_len = Matcher::vector_length(this, $mask);
9119 int mask_size = mask_len * type2aelembytes(mbt);
9120 int vlen_enc = vector_length_encoding(this, $mask);
9121 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
9122 $tmp$$Register, mask_len, mask_size, vlen_enc);
9123 %}
9124 ins_pipe( pipe_slow );
9125 %}
9126
9127 instruct vmask_truecount_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{
9128 predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr);
9129 match(Set dst (VectorMaskTrueCount mask));
9130 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
9131 format %{ "vector_truecount_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
9132 ins_encode %{
9133 int opcode = this->ideal_Opcode();
9134 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
9135 int mask_len = Matcher::vector_length(this, $mask);
9136 int vlen_enc = vector_length_encoding(this, $mask);
9137 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
9138 $tmp$$Register, mask_len, mbt, vlen_enc);
9139 %}
9140 ins_pipe( pipe_slow );
9141 %}
9142
9143 instruct vmask_truecount_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{
9144 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr);
9145 match(Set dst (VectorMaskTrueCount (VectorStoreMask mask size)));
9146 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
9147 format %{ "vector_truecount_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
9148 ins_encode %{
9149 int opcode = this->ideal_Opcode();
9150 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
9151 int mask_len = Matcher::vector_length(this, $mask);
9152 int vlen_enc = vector_length_encoding(this, $mask);
9153 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
9154 $tmp$$Register, mask_len, mbt, vlen_enc);
9155 %}
9156 ins_pipe( pipe_slow );
9157 %}
9158
9159 instruct vmask_first_or_last_true_evex(rRegI dst, kReg mask, rRegL tmp, rFlagsReg cr) %{
9160 predicate(n->in(1)->bottom_type()->isa_vectmask());
9161 match(Set dst (VectorMaskFirstTrue mask));
9162 match(Set dst (VectorMaskLastTrue mask));
9163 effect(TEMP_DEF dst, TEMP tmp, KILL cr);
9164 format %{ "vector_mask_first_or_last_true_evex $dst, $mask \t! using $tmp as TEMP" %}
9165 ins_encode %{
9166 int opcode = this->ideal_Opcode();
9167 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
9168 int mask_len = Matcher::vector_length(this, $mask);
9169 int mask_size = mask_len * type2aelembytes(mbt);
9170 int vlen_enc = vector_length_encoding(this, $mask);
9171 __ vector_mask_operation(opcode, $dst$$Register, $mask$$KRegister,
9172 $tmp$$Register, mask_len, mask_size, vlen_enc);
9173 %}
9174 ins_pipe( pipe_slow );
9175 %}
9176
9177 instruct vmask_first_or_last_true_bool(rRegI dst, vec mask, rRegL tmp, vec xtmp, rFlagsReg cr) %{
9178 predicate(n->in(1)->bottom_type()->isa_vectmask() == nullptr);
9179 match(Set dst (VectorMaskFirstTrue mask));
9180 match(Set dst (VectorMaskLastTrue mask));
9181 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
9182 format %{ "vector_mask_first_or_last_true_bool $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
9183 ins_encode %{
9184 int opcode = this->ideal_Opcode();
9185 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
9186 int mask_len = Matcher::vector_length(this, $mask);
9187 int vlen_enc = vector_length_encoding(this, $mask);
9188 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
9189 $tmp$$Register, mask_len, mbt, vlen_enc);
9190 %}
9191 ins_pipe( pipe_slow );
9192 %}
9193
9194 instruct vmask_first_or_last_true_avx(rRegI dst, vec mask, immI size, rRegL tmp, vec xtmp, rFlagsReg cr) %{
9195 predicate(n->in(1)->in(1)->bottom_type()->isa_vectmask() == nullptr);
9196 match(Set dst (VectorMaskFirstTrue (VectorStoreMask mask size)));
9197 match(Set dst (VectorMaskLastTrue (VectorStoreMask mask size)));
9198 effect(TEMP_DEF dst, TEMP tmp, TEMP xtmp, KILL cr);
9199 format %{ "vector_mask_first_or_last_true_avx $dst, $mask \t! using $tmp, $xtmp as TEMP" %}
9200 ins_encode %{
9201 int opcode = this->ideal_Opcode();
9202 BasicType mbt = Matcher::vector_element_basic_type(this, $mask);
9203 int mask_len = Matcher::vector_length(this, $mask);
9204 int vlen_enc = vector_length_encoding(this, $mask);
9205 __ vector_mask_operation(opcode, $dst$$Register, $mask$$XMMRegister, $xtmp$$XMMRegister,
9206 $tmp$$Register, mask_len, mbt, vlen_enc);
9207 %}
9208 ins_pipe( pipe_slow );
9209 %}
9210
9211 // --------------------------------- Compress/Expand Operations ---------------------------
9212
9213 instruct vcompress_expand_reg_evex(vec dst, vec src, kReg mask) %{
9214 match(Set dst (CompressV src mask));
9215 match(Set dst (ExpandV src mask));
10012 uint masklen = Matcher::vector_length(this);
10013 __ knot(masklen, $dst$$KRegister, $src$$KRegister, $ktmp$$KRegister, $rtmp$$Register);
10014 %}
10015 ins_pipe( pipe_slow );
10016 %}
10017
10018 instruct mask_not_imm(kReg dst, kReg src, immI_M1 cnt) %{
10019 predicate((Matcher::vector_length(n) == 8 && VM_Version::supports_avx512dq()) ||
10020 (Matcher::vector_length(n) == 16) ||
10021 (Matcher::vector_length(n) > 16 && VM_Version::supports_avx512bw()));
10022 match(Set dst (XorVMask src (MaskAll cnt)));
10023 format %{ "mask_not $dst, $src, $cnt \t! mask not operation" %}
10024 ins_encode %{
10025 uint masklen = Matcher::vector_length(this);
10026 __ knot(masklen, $dst$$KRegister, $src$$KRegister);
10027 %}
10028 ins_pipe( pipe_slow );
10029 %}
10030
10031 instruct long_to_maskLE8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2, vec xtmp) %{
10032 predicate(n->bottom_type()->isa_vectmask() == nullptr && Matcher::vector_length(n) <= 8);
10033 match(Set dst (VectorLongToMask src));
10034 effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, TEMP xtmp);
10035 format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2, $xtmp as TEMP" %}
10036 ins_encode %{
10037 int mask_len = Matcher::vector_length(this);
10038 int vec_enc = vector_length_encoding(mask_len);
10039 __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register,
10040 $rtmp2$$Register, xnoreg, mask_len, vec_enc);
10041 %}
10042 ins_pipe( pipe_slow );
10043 %}
10044
10045
10046 instruct long_to_maskGT8_avx(vec dst, rRegL src, rRegL rtmp1, rRegL rtmp2, vec xtmp1, rFlagsReg cr) %{
10047 predicate(n->bottom_type()->isa_vectmask() == nullptr && Matcher::vector_length(n) > 8);
10048 match(Set dst (VectorLongToMask src));
10049 effect(TEMP dst, TEMP rtmp1, TEMP rtmp2, TEMP xtmp1, KILL cr);
10050 format %{ "long_to_mask_avx $dst, $src\t! using $rtmp1, $rtmp2, $xtmp1, as TEMP" %}
10051 ins_encode %{
10052 int mask_len = Matcher::vector_length(this);
10053 assert(mask_len <= 32, "invalid mask length");
10054 int vec_enc = vector_length_encoding(mask_len);
10055 __ vector_long_to_maskvec($dst$$XMMRegister, $src$$Register, $rtmp1$$Register,
10056 $rtmp2$$Register, $xtmp1$$XMMRegister, mask_len, vec_enc);
10057 %}
10058 ins_pipe( pipe_slow );
10059 %}
10060
10061 instruct long_to_mask_evex(kReg dst, rRegL src) %{
10062 predicate(n->bottom_type()->isa_vectmask());
10063 match(Set dst (VectorLongToMask src));
10064 format %{ "long_to_mask_evex $dst, $src\t!" %}
10065 ins_encode %{
10066 __ kmov($dst$$KRegister, $src$$Register);
10067 %}
|