2417 return VM_Version::supports_on_spin_wait();
2418 case Op_CacheWB:
2419 case Op_CacheWBPreSync:
2420 case Op_CacheWBPostSync:
2421 if (!VM_Version::supports_data_cache_line_flush()) {
2422 ret_value = false;
2423 }
2424 break;
2425 case Op_LoadVectorMasked:
2426 case Op_StoreVectorMasked:
2427 case Op_LoadVectorGatherMasked:
2428 case Op_StoreVectorScatterMasked:
2429 case Op_MaskAll:
2430 case Op_AndVMask:
2431 case Op_OrVMask:
2432 case Op_XorVMask:
2433 if (UseSVE == 0) {
2434 ret_value = false;
2435 }
2436 break;
2437 }
2438
2439 return ret_value; // Per default match rules are supported.
2440 }
2441
2442 // Identify extra cases that we might want to provide match rules for vector nodes and
2443 // other intrinsics guarded with vector length (vlen) and element type (bt).
2444 const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
2445 if (!match_rule_supported(opcode)) {
2446 return false;
2447 }
2448 int bit_size = vlen * type2aelembytes(bt) * 8;
2449 if (UseSVE == 0 && bit_size > 128) {
2450 return false;
2451 }
2452 if (UseSVE > 0) {
2453 return op_sve_supported(opcode, vlen, bt);
2454 } else { // NEON
2455 // Special cases
2456 switch (opcode) {
2457 case Op_VectorMaskCmp:
2458 if (vlen < 2 || bit_size < 64) {
2459 return false;
2460 }
2461 break;
2462 case Op_MulAddVS2VI:
2463 if (bit_size < 128) {
2464 return false;
2465 }
2466 break;
2467 case Op_MulVL:
2468 return false;
2469 case Op_VectorLoadShuffle:
2470 case Op_VectorRearrange:
2471 if (vlen < 4) {
2472 return false;
2473 }
2474 break;
2475 case Op_LoadVectorGather:
2476 case Op_StoreVectorScatter:
2477 return false;
2478 default:
2479 break;
2480 }
2481 }
2482 return vector_size_supported(bt, vlen);
2483 }
2484
2485 const bool Matcher::match_rule_supported_vector_masked(int opcode, int vlen, BasicType bt) {
2486 // Only SVE supports masked operations.
2487 if (UseSVE == 0) {
2488 return false;
2489 }
2490 return match_rule_supported(opcode) &&
2491 masked_op_sve_supported(opcode, vlen, bt);
2492 }
2493
2494 const RegMask* Matcher::predicate_reg_mask(void) {
2495 return &_PR_REG_mask;
2496 }
8624 %}
8625
8626 instruct countTrailingZerosL(iRegINoSp dst, iRegL src) %{
8627 match(Set dst (CountTrailingZerosL src));
8628
8629 ins_cost(INSN_COST * 2);
8630 format %{ "rbit $dst, $src\n\t"
8631 "clz $dst, $dst" %}
8632 ins_encode %{
8633 __ rbit(as_Register($dst$$reg), as_Register($src$$reg));
8634 __ clz(as_Register($dst$$reg), as_Register($dst$$reg));
8635 %}
8636
8637 ins_pipe(ialu_reg);
8638 %}
8639
8640 //---------- Population Count Instructions -------------------------------------
8641 //
8642
8643 instruct popCountI(iRegINoSp dst, iRegIorL2I src, vRegF tmp) %{
8644 predicate(UsePopCountInstruction);
8645 match(Set dst (PopCountI src));
8646 effect(TEMP tmp);
8647 ins_cost(INSN_COST * 13);
8648
8649 format %{ "movw $src, $src\n\t"
8650 "mov $tmp, $src\t# vector (1D)\n\t"
8651 "cnt $tmp, $tmp\t# vector (8B)\n\t"
8652 "addv $tmp, $tmp\t# vector (8B)\n\t"
8653 "mov $dst, $tmp\t# vector (1D)" %}
8654 ins_encode %{
8655 __ movw($src$$Register, $src$$Register); // ensure top 32 bits 0
8656 __ mov($tmp$$FloatRegister, __ D, 0, $src$$Register);
8657 __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
8658 __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
8659 __ mov($dst$$Register, $tmp$$FloatRegister, __ D, 0);
8660 %}
8661
8662 ins_pipe(pipe_class_default);
8663 %}
8664
8665 instruct popCountI_mem(iRegINoSp dst, memory4 mem, vRegF tmp) %{
8666 predicate(UsePopCountInstruction);
8667 match(Set dst (PopCountI (LoadI mem)));
8668 effect(TEMP tmp);
8669 ins_cost(INSN_COST * 13);
8670
8671 format %{ "ldrs $tmp, $mem\n\t"
8672 "cnt $tmp, $tmp\t# vector (8B)\n\t"
8673 "addv $tmp, $tmp\t# vector (8B)\n\t"
8674 "mov $dst, $tmp\t# vector (1D)" %}
8675 ins_encode %{
8676 FloatRegister tmp_reg = as_FloatRegister($tmp$$reg);
8677 loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::ldrs, tmp_reg, $mem->opcode(),
8678 as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 4);
8679 __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
8680 __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
8681 __ mov($dst$$Register, $tmp$$FloatRegister, __ D, 0);
8682 %}
8683
8684 ins_pipe(pipe_class_default);
8685 %}
8686
8687 // Note: Long.bitCount(long) returns an int.
8688 instruct popCountL(iRegINoSp dst, iRegL src, vRegD tmp) %{
8689 predicate(UsePopCountInstruction);
8690 match(Set dst (PopCountL src));
8691 effect(TEMP tmp);
8692 ins_cost(INSN_COST * 13);
8693
8694 format %{ "mov $tmp, $src\t# vector (1D)\n\t"
8695 "cnt $tmp, $tmp\t# vector (8B)\n\t"
8696 "addv $tmp, $tmp\t# vector (8B)\n\t"
8697 "mov $dst, $tmp\t# vector (1D)" %}
8698 ins_encode %{
8699 __ mov($tmp$$FloatRegister, __ D, 0, $src$$Register);
8700 __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
8701 __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
8702 __ mov($dst$$Register, $tmp$$FloatRegister, __ D, 0);
8703 %}
8704
8705 ins_pipe(pipe_class_default);
8706 %}
8707
8708 instruct popCountL_mem(iRegINoSp dst, memory8 mem, vRegD tmp) %{
8709 predicate(UsePopCountInstruction);
8710 match(Set dst (PopCountL (LoadL mem)));
8711 effect(TEMP tmp);
8712 ins_cost(INSN_COST * 13);
8713
8714 format %{ "ldrd $tmp, $mem\n\t"
8715 "cnt $tmp, $tmp\t# vector (8B)\n\t"
8716 "addv $tmp, $tmp\t# vector (8B)\n\t"
8717 "mov $dst, $tmp\t# vector (1D)" %}
8718 ins_encode %{
8719 FloatRegister tmp_reg = as_FloatRegister($tmp$$reg);
8720 loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::ldrd, tmp_reg, $mem->opcode(),
8721 as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 8);
8722 __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
8723 __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
8724 __ mov($dst$$Register, $tmp$$FloatRegister, __ D, 0);
8725 %}
8726
8727 ins_pipe(pipe_class_default);
8728 %}
8729
8730 // ============================================================================
8731 // MemBar Instruction
8732
8733 instruct load_fence() %{
8734 match(LoadFence);
8735 ins_cost(VOLATILE_REF_COST);
8736
8737 format %{ "load_fence" %}
8738
|
2417 return VM_Version::supports_on_spin_wait();
2418 case Op_CacheWB:
2419 case Op_CacheWBPreSync:
2420 case Op_CacheWBPostSync:
2421 if (!VM_Version::supports_data_cache_line_flush()) {
2422 ret_value = false;
2423 }
2424 break;
2425 case Op_LoadVectorMasked:
2426 case Op_StoreVectorMasked:
2427 case Op_LoadVectorGatherMasked:
2428 case Op_StoreVectorScatterMasked:
2429 case Op_MaskAll:
2430 case Op_AndVMask:
2431 case Op_OrVMask:
2432 case Op_XorVMask:
2433 if (UseSVE == 0) {
2434 ret_value = false;
2435 }
2436 break;
2437 case Op_PopCountI:
2438 case Op_PopCountL:
2439 case Op_PopCountVI:
2440 case Op_PopCountVL:
2441 if (!UsePopCountInstruction) {
2442 ret_value = false;
2443 }
2444 break;
2445 }
2446
2447 return ret_value; // Per default match rules are supported.
2448 }
2449
2450 // Identify extra cases that we might want to provide match rules for vector nodes and
2451 // other intrinsics guarded with vector length (vlen) and element type (bt).
2452 const bool Matcher::match_rule_supported_vector(int opcode, int vlen, BasicType bt) {
2453 if (!match_rule_supported(opcode)) {
2454 return false;
2455 }
2456 int bit_size = vlen * type2aelembytes(bt) * 8;
2457 if (UseSVE == 0 && bit_size > 128) {
2458 return false;
2459 }
2460 if (UseSVE > 0) {
2461 return op_sve_supported(opcode, vlen, bt);
2462 } else { // NEON
2463 // Special cases
2464 switch (opcode) {
2465 case Op_VectorMaskCmp:
2466 if (vlen < 2 || bit_size < 64) {
2467 return false;
2468 }
2469 break;
2470 case Op_MulAddVS2VI:
2471 if (bit_size < 128) {
2472 return false;
2473 }
2474 break;
2475 case Op_MulVL:
2476 return false;
2477 case Op_VectorLoadShuffle:
2478 case Op_VectorRearrange:
2479 if (vlen < 4) {
2480 return false;
2481 }
2482 break;
2483 case Op_LoadVectorGather:
2484 case Op_StoreVectorScatter:
2485 case Op_CompressV:
2486 case Op_CompressM:
2487 case Op_ExpandV:
2488 return false;
2489 default:
2490 break;
2491 }
2492 }
2493 return vector_size_supported(bt, vlen);
2494 }
2495
2496 const bool Matcher::match_rule_supported_vector_masked(int opcode, int vlen, BasicType bt) {
2497 // Only SVE supports masked operations.
2498 if (UseSVE == 0) {
2499 return false;
2500 }
2501 return match_rule_supported(opcode) &&
2502 masked_op_sve_supported(opcode, vlen, bt);
2503 }
2504
2505 const RegMask* Matcher::predicate_reg_mask(void) {
2506 return &_PR_REG_mask;
2507 }
8635 %}
8636
8637 instruct countTrailingZerosL(iRegINoSp dst, iRegL src) %{
8638 match(Set dst (CountTrailingZerosL src));
8639
8640 ins_cost(INSN_COST * 2);
8641 format %{ "rbit $dst, $src\n\t"
8642 "clz $dst, $dst" %}
8643 ins_encode %{
8644 __ rbit(as_Register($dst$$reg), as_Register($src$$reg));
8645 __ clz(as_Register($dst$$reg), as_Register($dst$$reg));
8646 %}
8647
8648 ins_pipe(ialu_reg);
8649 %}
8650
8651 //---------- Population Count Instructions -------------------------------------
8652 //
8653
8654 instruct popCountI(iRegINoSp dst, iRegIorL2I src, vRegF tmp) %{
8655 match(Set dst (PopCountI src));
8656 effect(TEMP tmp);
8657 ins_cost(INSN_COST * 13);
8658
8659 format %{ "movw $src, $src\n\t"
8660 "mov $tmp, $src\t# vector (1D)\n\t"
8661 "cnt $tmp, $tmp\t# vector (8B)\n\t"
8662 "addv $tmp, $tmp\t# vector (8B)\n\t"
8663 "mov $dst, $tmp\t# vector (1D)" %}
8664 ins_encode %{
8665 assert(UsePopCountInstruction, "unsupported");
8666 __ movw($src$$Register, $src$$Register); // ensure top 32 bits 0
8667 __ mov($tmp$$FloatRegister, __ D, 0, $src$$Register);
8668 __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
8669 __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
8670 __ mov($dst$$Register, $tmp$$FloatRegister, __ D, 0);
8671 %}
8672
8673 ins_pipe(pipe_class_default);
8674 %}
8675
8676 instruct popCountI_mem(iRegINoSp dst, memory4 mem, vRegF tmp) %{
8677 match(Set dst (PopCountI (LoadI mem)));
8678 effect(TEMP tmp);
8679 ins_cost(INSN_COST * 13);
8680
8681 format %{ "ldrs $tmp, $mem\n\t"
8682 "cnt $tmp, $tmp\t# vector (8B)\n\t"
8683 "addv $tmp, $tmp\t# vector (8B)\n\t"
8684 "mov $dst, $tmp\t# vector (1D)" %}
8685 ins_encode %{
8686 assert(UsePopCountInstruction, "unsupported");
8687 FloatRegister tmp_reg = as_FloatRegister($tmp$$reg);
8688 loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::ldrs, tmp_reg, $mem->opcode(),
8689 as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 4);
8690 __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
8691 __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
8692 __ mov($dst$$Register, $tmp$$FloatRegister, __ D, 0);
8693 %}
8694
8695 ins_pipe(pipe_class_default);
8696 %}
8697
8698 // Note: Long.bitCount(long) returns an int.
8699 instruct popCountL(iRegINoSp dst, iRegL src, vRegD tmp) %{
8700 match(Set dst (PopCountL src));
8701 effect(TEMP tmp);
8702 ins_cost(INSN_COST * 13);
8703
8704 format %{ "mov $tmp, $src\t# vector (1D)\n\t"
8705 "cnt $tmp, $tmp\t# vector (8B)\n\t"
8706 "addv $tmp, $tmp\t# vector (8B)\n\t"
8707 "mov $dst, $tmp\t# vector (1D)" %}
8708 ins_encode %{
8709 assert(UsePopCountInstruction, "unsupported");
8710 __ mov($tmp$$FloatRegister, __ D, 0, $src$$Register);
8711 __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
8712 __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
8713 __ mov($dst$$Register, $tmp$$FloatRegister, __ D, 0);
8714 %}
8715
8716 ins_pipe(pipe_class_default);
8717 %}
8718
8719 instruct popCountL_mem(iRegINoSp dst, memory8 mem, vRegD tmp) %{
8720 match(Set dst (PopCountL (LoadL mem)));
8721 effect(TEMP tmp);
8722 ins_cost(INSN_COST * 13);
8723
8724 format %{ "ldrd $tmp, $mem\n\t"
8725 "cnt $tmp, $tmp\t# vector (8B)\n\t"
8726 "addv $tmp, $tmp\t# vector (8B)\n\t"
8727 "mov $dst, $tmp\t# vector (1D)" %}
8728 ins_encode %{
8729 assert(UsePopCountInstruction, "unsupported");
8730 FloatRegister tmp_reg = as_FloatRegister($tmp$$reg);
8731 loadStore(C2_MacroAssembler(&cbuf), &MacroAssembler::ldrd, tmp_reg, $mem->opcode(),
8732 as_Register($mem$$base), $mem$$index, $mem$$scale, $mem$$disp, 8);
8733 __ cnt($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
8734 __ addv($tmp$$FloatRegister, __ T8B, $tmp$$FloatRegister);
8735 __ mov($dst$$Register, $tmp$$FloatRegister, __ D, 0);
8736 %}
8737
8738 ins_pipe(pipe_class_default);
8739 %}
8740
8741 // ============================================================================
8742 // MemBar Instruction
8743
8744 instruct load_fence() %{
8745 match(LoadFence);
8746 ins_cost(VOLATILE_REF_COST);
8747
8748 format %{ "load_fence" %}
8749
|