< prev index next >

src/hotspot/share/opto/vectorIntrinsics.cpp

Print this page
*** 57,50 ***
  
    return true;
  }
  #endif
  
! bool LibraryCallKit::arch_supports_vector_rotate(int opc, int num_elem, BasicType elem_bt, bool has_scalar_args) {
!     bool is_supported = true;
!     // has_scalar_args flag is true only for non-constant scalar shift count,
!     // since in this case shift needs to be broadcasted.
!     if (!Matcher::match_rule_supported_vector(opc, num_elem, elem_bt) ||
!          (has_scalar_args &&
!            !arch_supports_vector(VectorNode::replicate_opcode(elem_bt), num_elem, elem_bt, VecMaskNotUsed))) {
!       is_supported = false;
!     }
! 
!     int lshiftopc, rshiftopc;
!     switch(elem_bt) {
!       case T_BYTE:
!         lshiftopc = Op_LShiftI;
!         rshiftopc = Op_URShiftB;
!         break;
!       case T_SHORT:
!         lshiftopc = Op_LShiftI;
!         rshiftopc = Op_URShiftS;
!         break;
!       case T_INT:
!         lshiftopc = Op_LShiftI;
!         rshiftopc = Op_URShiftI;
!         break;
!       case T_LONG:
!         lshiftopc = Op_LShiftL;
!         rshiftopc = Op_URShiftL;
!         break;
!       default:
!         assert(false, "Unexpected type");
!     }
!     int lshiftvopc = VectorNode::opcode(lshiftopc, elem_bt);
!     int rshiftvopc = VectorNode::opcode(rshiftopc, elem_bt);
!     if (!is_supported &&
!         arch_supports_vector(lshiftvopc, num_elem, elem_bt, VecMaskNotUsed, has_scalar_args) &&
!         arch_supports_vector(rshiftvopc, num_elem, elem_bt, VecMaskNotUsed, has_scalar_args) &&
!         arch_supports_vector(Op_OrV, num_elem, elem_bt, VecMaskNotUsed)) {
!       is_supported = true;
!     }
!     return is_supported;
  }
  
  Node* GraphKit::box_vector(Node* vector, const TypeInstPtr* vbox_type, BasicType elem_bt, int num_elem, bool deoptimize_on_exception) {
    assert(EnableVectorSupport, "");
  
--- 57,88 ---
  
    return true;
  }
  #endif
  
! static bool is_vector_mask(ciKlass* klass) {
!   return klass->is_subclass_of(ciEnv::current()->vector_VectorMask_klass());
! }
! 
! static bool is_vector_shuffle(ciKlass* klass) {
!   return klass->is_subclass_of(ciEnv::current()->vector_VectorShuffle_klass());
! }
! 
! bool LibraryCallKit::arch_supports_vector_rotate(int opc, int num_elem, BasicType elem_bt,
!                                                  VectorMaskUseType mask_use_type, bool has_scalar_args) {
!   bool is_supported = true;
! 
!   // has_scalar_args flag is true only for non-constant scalar shift count,
!   // since in this case shift needs to be broadcasted.
!   if (!Matcher::match_rule_supported_vector(opc, num_elem, elem_bt) ||
!        (has_scalar_args &&
!          !arch_supports_vector(VectorNode::replicate_opcode(elem_bt), num_elem, elem_bt, VecMaskNotUsed))) {
!     is_supported = false;
!   }
! 
!   if (is_supported) {
!     // Check whether mask unboxing is supported.
!     if ((mask_use_type & VecMaskUseLoad) != 0) {
!       if (!Matcher::match_rule_supported_vector(Op_VectorLoadMask, num_elem, elem_bt)) {
!       #ifndef PRODUCT
!         if (C->print_intrinsics()) {
!           tty->print_cr("  ** Rejected vector mask loading (%s,%s,%d) because architecture does not support it",
!                         NodeClassNames[Op_VectorLoadMask], type2name(elem_bt), num_elem);
!         }
!       #endif
!         return false;
!       }
!     }
! 
!     if ((mask_use_type & VecMaskUsePred) != 0) {
!       if (!Matcher::has_predicated_vectors() ||
!           !Matcher::match_rule_supported_vector_masked(opc, num_elem, elem_bt)) {
!       #ifndef PRODUCT
!         if (C->print_intrinsics()) {
!           tty->print_cr("Rejected vector mask predicate using (%s,%s,%d) because architecture does not support it",
+                         NodeClassNames[opc], type2name(elem_bt), num_elem);
+         }
+       #endif
+         return false;
+       }
+     }
+   }
+ 
+   int lshiftopc, rshiftopc;
+   switch(elem_bt) {
+     case T_BYTE:
+       lshiftopc = Op_LShiftI;
+       rshiftopc = Op_URShiftB;
+       break;
+     case T_SHORT:
+       lshiftopc = Op_LShiftI;
+       rshiftopc = Op_URShiftS;
+       break;
+     case T_INT:
+       lshiftopc = Op_LShiftI;
+       rshiftopc = Op_URShiftI;
+       break;
+     case T_LONG:
+       lshiftopc = Op_LShiftL;
+       rshiftopc = Op_URShiftL;
+       break;
+     default:
+       assert(false, "Unexpected type");
+   }
+   int lshiftvopc = VectorNode::opcode(lshiftopc, elem_bt);
+   int rshiftvopc = VectorNode::opcode(rshiftopc, elem_bt);
+   if (!is_supported &&
+       arch_supports_vector(lshiftvopc, num_elem, elem_bt, VecMaskNotUsed, has_scalar_args) &&
+       arch_supports_vector(rshiftvopc, num_elem, elem_bt, VecMaskNotUsed, has_scalar_args) &&
+       arch_supports_vector(Op_OrV, num_elem, elem_bt, VecMaskNotUsed)) {
+     is_supported = true;
+   }
+   return is_supported;
  }
  
  Node* GraphKit::box_vector(Node* vector, const TypeInstPtr* vbox_type, BasicType elem_bt, int num_elem, bool deoptimize_on_exception) {
    assert(EnableVectorSupport, "");
  

*** 113,11 ***
    set_i_o(gvn().transform( new ProjNode(alloc, TypeFunc::I_O) ));
    set_all_memory(gvn().transform( new ProjNode(alloc, TypeFunc::Memory) ));
    Node* ret = gvn().transform(new ProjNode(alloc, TypeFunc::Parms));
  
    assert(check_vbox(vbox_type), "");
!   const TypeVect* vt = TypeVect::make(elem_bt, num_elem);
    VectorBoxNode* vbox = new VectorBoxNode(C, ret, vector, vbox_type, vt);
    return gvn().transform(vbox);
  }
  
  Node* GraphKit::unbox_vector(Node* v, const TypeInstPtr* vbox_type, BasicType elem_bt, int num_elem, bool shuffle_to_vector) {
--- 151,11 ---
    set_i_o(gvn().transform( new ProjNode(alloc, TypeFunc::I_O) ));
    set_all_memory(gvn().transform( new ProjNode(alloc, TypeFunc::Memory) ));
    Node* ret = gvn().transform(new ProjNode(alloc, TypeFunc::Parms));
  
    assert(check_vbox(vbox_type), "");
!   const TypeVect* vt = TypeVect::make(elem_bt, num_elem, is_vector_mask(vbox_type->klass()));
    VectorBoxNode* vbox = new VectorBoxNode(C, ret, vector, vbox_type, vt);
    return gvn().transform(vbox);
  }
  
  Node* GraphKit::unbox_vector(Node* v, const TypeInstPtr* vbox_type, BasicType elem_bt, int num_elem, bool shuffle_to_vector) {

*** 128,11 ***
    }
    if (vbox_type_v->maybe_null()) {
      return NULL; // no nulls are allowed
    }
    assert(check_vbox(vbox_type), "");
!   const TypeVect* vt = TypeVect::make(elem_bt, num_elem);
    Node* unbox = gvn().transform(new VectorUnboxNode(C, vt, v, merged_memory(), shuffle_to_vector));
    return unbox;
  }
  
  Node* GraphKit::vector_shift_count(Node* cnt, int shift_op, BasicType bt, int num_elem) {
--- 166,11 ---
    }
    if (vbox_type_v->maybe_null()) {
      return NULL; // no nulls are allowed
    }
    assert(check_vbox(vbox_type), "");
!   const TypeVect* vt = TypeVect::make(elem_bt, num_elem, is_vector_mask(vbox_type->klass()));
    Node* unbox = gvn().transform(new VectorUnboxNode(C, vt, v, merged_memory(), shuffle_to_vector));
    return unbox;
  }
  
  Node* GraphKit::vector_shift_count(Node* cnt, int shift_op, BasicType bt, int num_elem) {

*** 153,11 ***
  #endif
      return false;
    }
  
    if (VectorNode::is_vector_rotate(sopc)) {
!     if(!arch_supports_vector_rotate(sopc, num_elem, type, has_scalar_args)) {
  #ifndef PRODUCT
        if (C->print_intrinsics()) {
          tty->print_cr("  ** Rejected vector op (%s,%s,%d) because architecture does not support variable vector shifts",
                        NodeClassNames[sopc], type2name(type), num_elem);
        }
--- 191,11 ---
  #endif
      return false;
    }
  
    if (VectorNode::is_vector_rotate(sopc)) {
!     if(!arch_supports_vector_rotate(sopc, num_elem, type, mask_use_type, has_scalar_args)) {
  #ifndef PRODUCT
        if (C->print_intrinsics()) {
          tty->print_cr("  ** Rejected vector op (%s,%s,%d) because architecture does not support variable vector shifts",
                        NodeClassNames[sopc], type2name(type), num_elem);
        }

*** 211,11 ***
      }
      return false;
    }
  
    // Check whether mask unboxing is supported.
!   if (mask_use_type == VecMaskUseAll || mask_use_type == VecMaskUseLoad) {
      if (!Matcher::match_rule_supported_vector(Op_VectorLoadMask, num_elem, type)) {
      #ifndef PRODUCT
        if (C->print_intrinsics()) {
          tty->print_cr("  ** Rejected vector mask loading (%s,%s,%d) because architecture does not support it",
                        NodeClassNames[Op_VectorLoadMask], type2name(type), num_elem);
--- 249,11 ---
      }
      return false;
    }
  
    // Check whether mask unboxing is supported.
!   if ((mask_use_type & VecMaskUseLoad) != 0) {
      if (!Matcher::match_rule_supported_vector(Op_VectorLoadMask, num_elem, type)) {
      #ifndef PRODUCT
        if (C->print_intrinsics()) {
          tty->print_cr("  ** Rejected vector mask loading (%s,%s,%d) because architecture does not support it",
                        NodeClassNames[Op_VectorLoadMask], type2name(type), num_elem);

*** 224,11 ***
        return false;
      }
    }
  
    // Check whether mask boxing is supported.
!   if (mask_use_type == VecMaskUseAll || mask_use_type == VecMaskUseStore) {
      if (!Matcher::match_rule_supported_vector(Op_VectorStoreMask, num_elem, type)) {
      #ifndef PRODUCT
        if (C->print_intrinsics()) {
          tty->print_cr("Rejected vector mask storing (%s,%s,%d) because architecture does not support it",
                        NodeClassNames[Op_VectorStoreMask], type2name(type), num_elem);
--- 262,11 ---
        return false;
      }
    }
  
    // Check whether mask boxing is supported.
!   if ((mask_use_type & VecMaskUseStore) != 0) {
      if (!Matcher::match_rule_supported_vector(Op_VectorStoreMask, num_elem, type)) {
      #ifndef PRODUCT
        if (C->print_intrinsics()) {
          tty->print_cr("Rejected vector mask storing (%s,%s,%d) because architecture does not support it",
                        NodeClassNames[Op_VectorStoreMask], type2name(type), num_elem);

*** 236,19 ***
      #endif
        return false;
      }
    }
  
!   return true;
! }
! 
! static bool is_vector_mask(ciKlass* klass) {
!   return klass->is_subclass_of(ciEnv::current()->vector_VectorMask_klass());
! }
  
! static bool is_vector_shuffle(ciKlass* klass) {
-   return klass->is_subclass_of(ciEnv::current()->vector_VectorShuffle_klass());
  }
  
  static bool is_klass_initialized(const TypeInstPtr* vec_klass) {
    if (vec_klass->const_oop() == NULL) {
      return false; // uninitialized or some kind of unsafe access
--- 274,24 ---
      #endif
        return false;
      }
    }
  
!   if ((mask_use_type & VecMaskUsePred) != 0) {
!     if (!Matcher::has_predicated_vectors() ||
!         !Matcher::match_rule_supported_vector_masked(sopc, num_elem, type)) {
!     #ifndef PRODUCT
!       if (C->print_intrinsics()) {
!         tty->print_cr("Rejected vector mask predicate using (%s,%s,%d) because architecture does not support it",
+                       NodeClassNames[sopc], type2name(type), num_elem);
+       }
+     #endif
+       return false;
+     }
+   }
  
!   return true;
  }
  
  static bool is_klass_initialized(const TypeInstPtr* vec_klass) {
    if (vec_klass->const_oop() == NULL) {
      return false; // uninitialized or some kind of unsafe access

*** 257,44 ***
    ciInstanceKlass* klass =  vec_klass->const_oop()->as_instance()->java_lang_Class_klass()->as_instance_klass();
    return klass->is_initialized();
  }
  
  // public static
! // <VM>
! // VM unaryOp(int oprId, Class<? extends VM> vmClass, Class<?> elementType, int length,
! //            VM vm,
! //            Function<VM, VM> defaultImpl) {
  //
  // public static
! // <VM>
! // VM binaryOp(int oprId, Class<? extends VM> vmClass, Class<?> elementType, int length,
! //             VM vm1, VM vm2,
! //             BiFunction<VM, VM, VM> defaultImpl) {
  //
  // public static
! // <VM>
! // VM ternaryOp(int oprId, Class<? extends VM> vmClass, Class<?> elementType, int length,
! //              VM vm1, VM vm2, VM vm3,
! //              TernaryOperation<VM> defaultImpl) {
  //
  bool LibraryCallKit::inline_vector_nary_operation(int n) {
    const TypeInt*     opr          = gvn().type(argument(0))->isa_int();
    const TypeInstPtr* vector_klass = gvn().type(argument(1))->isa_instptr();
!   const TypeInstPtr* elem_klass   = gvn().type(argument(2))->isa_instptr();
!   const TypeInt*     vlen         = gvn().type(argument(3))->isa_int();
  
    if (opr == NULL || vector_klass == NULL || elem_klass == NULL || vlen == NULL ||
        !opr->is_con() || vector_klass->const_oop() == NULL || elem_klass->const_oop() == NULL || !vlen->is_con()) {
      if (C->print_intrinsics()) {
        tty->print_cr("  ** missing constant: opr=%s vclass=%s etype=%s vlen=%s",
                      NodeClassNames[argument(0)->Opcode()],
                      NodeClassNames[argument(1)->Opcode()],
!                     NodeClassNames[argument(2)->Opcode()],
!                     NodeClassNames[argument(3)->Opcode()]);
      }
      return false; // not enough info for intrinsification
    }
    ciType* elem_type = elem_klass->const_oop()->as_instance()->java_mirror_type();
    if (!elem_type->is_primitive_type()) {
      if (C->print_intrinsics()) {
        tty->print_cr("  ** not a primitive bt=%d", elem_type->basic_type());
      }
--- 300,52 ---
    ciInstanceKlass* klass =  vec_klass->const_oop()->as_instance()->java_lang_Class_klass()->as_instance_klass();
    return klass->is_initialized();
  }
  
  // public static
! // <V extends Vector<E>,
! //  M extends VectorMask<E>,
! //  E>
! // V unaryOp(int oprId, Class<? extends V> vmClass, Class<? extends M> maskClass, Class<E> elementType,
+ //           int length, V v, M m,
+ //           UnaryOperation<V, M> defaultImpl)
  //
  // public static
! // <V,
! //  M extends VectorMask<E>,
! //  E>
! // V binaryOp(int oprId, Class<? extends V> vmClass, Class<? extends M> maskClass, Class<E> elementType,
+ //            int length, V v1, V v2, M m,
+ //            BinaryOperation<V, M> defaultImpl)
  //
  // public static
! // <V extends Vector<E>,
! //  M extends VectorMask<E>,
! //  E>
! // V ternaryOp(int oprId, Class<? extends V> vmClass, Class<? extends M> maskClass, Class<E> elementType,
+ //             int length, V v1, V v2, V v3, M m,
+ //             TernaryOperation<V, M> defaultImpl)
  //
  bool LibraryCallKit::inline_vector_nary_operation(int n) {
    const TypeInt*     opr          = gvn().type(argument(0))->isa_int();
    const TypeInstPtr* vector_klass = gvn().type(argument(1))->isa_instptr();
!   const TypeInstPtr* mask_klass   = gvn().type(argument(2))->isa_instptr();
!   const TypeInstPtr* elem_klass   = gvn().type(argument(3))->isa_instptr();
+   const TypeInt*     vlen         = gvn().type(argument(4))->isa_int();
  
    if (opr == NULL || vector_klass == NULL || elem_klass == NULL || vlen == NULL ||
        !opr->is_con() || vector_klass->const_oop() == NULL || elem_klass->const_oop() == NULL || !vlen->is_con()) {
      if (C->print_intrinsics()) {
        tty->print_cr("  ** missing constant: opr=%s vclass=%s etype=%s vlen=%s",
                      NodeClassNames[argument(0)->Opcode()],
                      NodeClassNames[argument(1)->Opcode()],
!                     NodeClassNames[argument(3)->Opcode()],
!                     NodeClassNames[argument(4)->Opcode()]);
      }
      return false; // not enough info for intrinsification
    }
+ 
    ciType* elem_type = elem_klass->const_oop()->as_instance()->java_mirror_type();
    if (!elem_type->is_primitive_type()) {
      if (C->print_intrinsics()) {
        tty->print_cr("  ** not a primitive bt=%d", elem_type->basic_type());
      }

*** 304,10 ***
--- 355,38 ---
      if (C->print_intrinsics()) {
        tty->print_cr("  ** klass argument not initialized");
      }
      return false;
    }
+ 
+   // "argument(n + 5)" should be the mask object. We assume it is "null" when no mask
+   // is used to control this operation.
+   const Type* vmask_type = gvn().type(argument(n + 5));
+   bool is_masked_op = vmask_type != TypePtr::NULL_PTR;
+   if (is_masked_op) {
+     if (mask_klass == NULL || mask_klass->const_oop() == NULL) {
+       if (C->print_intrinsics()) {
+         tty->print_cr("  ** missing constant: maskclass=%s", NodeClassNames[argument(2)->Opcode()]);
+       }
+       return false; // not enough info for intrinsification
+     }
+ 
+     if (!is_klass_initialized(mask_klass)) {
+       if (C->print_intrinsics()) {
+         tty->print_cr("  ** mask klass argument not initialized");
+       }
+       return false;
+     }
+ 
+     if (vmask_type->maybe_null()) {
+       if (C->print_intrinsics()) {
+         tty->print_cr("  ** null mask values are not allowed for masked op");
+       }
+       return false;
+     }
+   }
+ 
    BasicType elem_bt = elem_type->basic_type();
    int num_elem = vlen->get_con();
    int opc = VectorSupport::vop2ideal(opr->get_con(), elem_bt);
    int sopc = VectorNode::opcode(opc, elem_bt);
    if ((opc != Op_CallLeafVector) && (sopc == 0)) {

*** 326,10 ***
--- 405,14 ---
      }
    }
    ciKlass* vbox_klass = vector_klass->const_oop()->as_instance()->java_lang_Class_klass();
    const TypeInstPtr* vbox_type = TypeInstPtr::make_exact(TypePtr::NotNull, vbox_klass);
  
+   if (is_vector_mask(vbox_klass)) {
+     assert(!is_masked_op, "mask operations do not need mask to control");
+   }
+ 
    if (opc == Op_CallLeafVector) {
      if (!UseVectorStubs) {
        if (C->print_intrinsics()) {
          tty->print_cr("  ** vector stubs support is disabled");
        }

*** 348,59 ***
        }
        return false;
      }
    }
  
!   // TODO When mask usage is supported, VecMaskNotUsed needs to be VecMaskUseLoad.
!   if ((sopc != 0) &&
!       !arch_supports_vector(sopc, num_elem, elem_bt, is_vector_mask(vbox_klass) ? VecMaskUseAll : VecMaskNotUsed)) {
      if (C->print_intrinsics()) {
!       tty->print_cr("  ** not supported: arity=%d opc=%d vlen=%d etype=%s ismask=%d",
                      n, sopc, num_elem, type2name(elem_bt),
!                     is_vector_mask(vbox_klass) ? 1 : 0);
      }
      return false; // not supported
    }
  
    Node* opd1 = NULL; Node* opd2 = NULL; Node* opd3 = NULL;
    switch (n) {
      case 3: {
!       opd3 = unbox_vector(argument(6), vbox_type, elem_bt, num_elem);
        if (opd3 == NULL) {
          if (C->print_intrinsics()) {
            tty->print_cr("  ** unbox failed v3=%s",
!                         NodeClassNames[argument(6)->Opcode()]);
          }
          return false;
        }
        // fall-through
      }
      case 2: {
!       opd2 = unbox_vector(argument(5), vbox_type, elem_bt, num_elem);
        if (opd2 == NULL) {
          if (C->print_intrinsics()) {
            tty->print_cr("  ** unbox failed v2=%s",
!                         NodeClassNames[argument(5)->Opcode()]);
          }
          return false;
        }
        // fall-through
      }
      case 1: {
!       opd1 = unbox_vector(argument(4), vbox_type, elem_bt, num_elem);
        if (opd1 == NULL) {
          if (C->print_intrinsics()) {
            tty->print_cr("  ** unbox failed v1=%s",
!                         NodeClassNames[argument(4)->Opcode()]);
          }
          return false;
        }
        break;
      }
      default: fatal("unsupported arity: %d", n);
    }
  
    Node* operation = NULL;
    if (opc == Op_CallLeafVector) {
      assert(UseVectorStubs, "sanity");
      operation = gen_call_to_svml(opr->get_con(), elem_bt, num_elem, opd1, opd2);
      if (operation == NULL) {
--- 431,85 ---
        }
        return false;
      }
    }
  
!   // When using mask, mask use type needs to be VecMaskUseLoad.
!   VectorMaskUseType mask_use_type = is_vector_mask(vbox_klass) ? VecMaskUseAll
!                                       : is_masked_op ? VecMaskUseLoad : VecMaskNotUsed;
+   if ((sopc != 0) && !arch_supports_vector(sopc, num_elem, elem_bt, mask_use_type)) {
      if (C->print_intrinsics()) {
!       tty->print_cr("  ** not supported: arity=%d opc=%d vlen=%d etype=%s ismask=%d is_masked_op=%d",
                      n, sopc, num_elem, type2name(elem_bt),
!                     is_vector_mask(vbox_klass) ? 1 : 0, is_masked_op ? 1 : 0);
      }
      return false; // not supported
    }
  
+   // Return true if current platform has implemented the masked operation with predicate feature.
+   bool use_predicate = is_masked_op && sopc != 0 && arch_supports_vector(sopc, num_elem, elem_bt, VecMaskUsePred);
+   if (is_masked_op && !use_predicate && !arch_supports_vector(Op_VectorBlend, num_elem, elem_bt, VecMaskUseLoad)) {
+     if (C->print_intrinsics()) {
+       tty->print_cr("  ** not supported: arity=%d opc=%d vlen=%d etype=%s ismask=0 is_masked_op=1",
+                     n, sopc, num_elem, type2name(elem_bt));
+     }
+     return false;
+   }
+ 
    Node* opd1 = NULL; Node* opd2 = NULL; Node* opd3 = NULL;
    switch (n) {
      case 3: {
!       opd3 = unbox_vector(argument(7), vbox_type, elem_bt, num_elem);
        if (opd3 == NULL) {
          if (C->print_intrinsics()) {
            tty->print_cr("  ** unbox failed v3=%s",
!                         NodeClassNames[argument(7)->Opcode()]);
          }
          return false;
        }
        // fall-through
      }
      case 2: {
!       opd2 = unbox_vector(argument(6), vbox_type, elem_bt, num_elem);
        if (opd2 == NULL) {
          if (C->print_intrinsics()) {
            tty->print_cr("  ** unbox failed v2=%s",
!                         NodeClassNames[argument(6)->Opcode()]);
          }
          return false;
        }
        // fall-through
      }
      case 1: {
!       opd1 = unbox_vector(argument(5), vbox_type, elem_bt, num_elem);
        if (opd1 == NULL) {
          if (C->print_intrinsics()) {
            tty->print_cr("  ** unbox failed v1=%s",
!                         NodeClassNames[argument(5)->Opcode()]);
          }
          return false;
        }
        break;
      }
      default: fatal("unsupported arity: %d", n);
    }
  
+   Node* mask = NULL;
+   if (is_masked_op) {
+     ciKlass* mbox_klass = mask_klass->const_oop()->as_instance()->java_lang_Class_klass();
+     assert(is_vector_mask(mbox_klass), "argument(2) should be a mask class");
+     const TypeInstPtr* mbox_type = TypeInstPtr::make_exact(TypePtr::NotNull, mbox_klass);
+     mask = unbox_vector(argument(n + 5), mbox_type, elem_bt, num_elem);
+     if (mask == NULL) {
+       if (C->print_intrinsics()) {
+         tty->print_cr("  ** unbox failed mask=%s",
+                       NodeClassNames[argument(n + 5)->Opcode()]);
+       }
+       return false;
+     }
+   }
+ 
    Node* operation = NULL;
    if (opc == Op_CallLeafVector) {
      assert(UseVectorStubs, "sanity");
      operation = gen_call_to_svml(opr->get_con(), elem_bt, num_elem, opd1, opd2);
      if (operation == NULL) {

*** 411,33 ***
                           num_elem * type2aelembytes(elem_bt));
        }
        return false;
       }
    } else {
!     const TypeVect* vt = TypeVect::make(elem_bt, num_elem);
      switch (n) {
        case 1:
        case 2: {
!         operation = gvn().transform(VectorNode::make(sopc, opd1, opd2, vt));
          break;
        }
        case 3: {
!         operation = gvn().transform(VectorNode::make(sopc, opd1, opd2, opd3, vt));
          break;
        }
        default: fatal("unsupported arity: %d", n);
      }
    }
    // Wrap it up in VectorBox to keep object type information.
    Node* vbox = box_vector(operation, vbox_type, elem_bt, num_elem);
    set_result(vbox);
    C->set_max_vector_size(MAX2(C->max_vector_size(), (uint)(num_elem * type2aelembytes(elem_bt))));
    return true;
  }
  
  // <Sh extends VectorShuffle<E>,  E>
! //  Sh ShuffleIota(Class<?> E, Class<?> ShuffleClass, Vector.Species<E> s, int length,
  //                  int start, int step, int wrap, ShuffleIotaOperation<Sh, E> defaultImpl)
  bool LibraryCallKit::inline_vector_shuffle_iota() {
    const TypeInstPtr* shuffle_klass = gvn().type(argument(1))->isa_instptr();
    const TypeInt*     vlen          = gvn().type(argument(3))->isa_int();
    const TypeInt*     start_val     = gvn().type(argument(4))->isa_int();
--- 520,45 ---
                           num_elem * type2aelembytes(elem_bt));
        }
        return false;
       }
    } else {
!     const TypeVect* vt = TypeVect::make(elem_bt, num_elem, is_vector_mask(vbox_klass));
      switch (n) {
        case 1:
        case 2: {
!         operation = VectorNode::make(sopc, opd1, opd2, vt, is_vector_mask(vbox_klass));
          break;
        }
        case 3: {
!         operation = VectorNode::make(sopc, opd1, opd2, opd3, vt);
          break;
        }
        default: fatal("unsupported arity: %d", n);
      }
    }
+ 
+   if (is_masked_op && mask != NULL) {
+     if (use_predicate) {
+       operation->add_req(mask);
+       operation->add_flag(Node::Flag_is_predicated_vector);
+     } else {
+       operation = gvn().transform(operation);
+       operation = new VectorBlendNode(opd1, operation, mask);
+     }
+   }
+   operation = gvn().transform(operation);
+ 
    // Wrap it up in VectorBox to keep object type information.
    Node* vbox = box_vector(operation, vbox_type, elem_bt, num_elem);
    set_result(vbox);
    C->set_max_vector_size(MAX2(C->max_vector_size(), (uint)(num_elem * type2aelembytes(elem_bt))));
    return true;
  }
  
  // <Sh extends VectorShuffle<E>,  E>
! //  Sh ShuffleIota(Class<?> E, Class<?> shuffleClass, Vector.Species<E> s, int length,
  //                  int start, int step, int wrap, ShuffleIotaOperation<Sh, E> defaultImpl)
  bool LibraryCallKit::inline_vector_shuffle_iota() {
    const TypeInstPtr* shuffle_klass = gvn().type(argument(1))->isa_instptr();
    const TypeInt*     vlen          = gvn().type(argument(3))->isa_int();
    const TypeInt*     start_val     = gvn().type(argument(4))->isa_int();

*** 507,14 ***
    Node * bcast_mod  = gvn().transform(VectorNode::scalar2vector(mod_val, num_elem, type_bt));
    if(do_wrap)  {
      // Wrap the indices greater than lane count.
      res = gvn().transform(VectorNode::make(Op_AndI, res, bcast_mod, num_elem, elem_bt));
    } else {
!     ConINode* pred_node = (ConINode*)gvn().makecon(TypeInt::make(1));
      Node * lane_cnt  = gvn().makecon(TypeInt::make(num_elem));
      Node * bcast_lane_cnt = gvn().transform(VectorNode::scalar2vector(lane_cnt, num_elem, type_bt));
!     Node* mask = gvn().transform(new VectorMaskCmpNode(BoolTest::ge, bcast_lane_cnt, res, pred_node, vt));
  
      // Make the indices greater than lane count as -ve values. This matches the java side implementation.
      res = gvn().transform(VectorNode::make(Op_AndI, res, bcast_mod, num_elem, elem_bt));
      Node * biased_val = gvn().transform(VectorNode::make(Op_SubI, res, bcast_lane_cnt, num_elem, elem_bt));
      res = gvn().transform(new VectorBlendNode(biased_val, res, mask));
--- 628,15 ---
    Node * bcast_mod  = gvn().transform(VectorNode::scalar2vector(mod_val, num_elem, type_bt));
    if(do_wrap)  {
      // Wrap the indices greater than lane count.
      res = gvn().transform(VectorNode::make(Op_AndI, res, bcast_mod, num_elem, elem_bt));
    } else {
!     ConINode* pred_node = (ConINode*)gvn().makecon(TypeInt::make(BoolTest::ge));
      Node * lane_cnt  = gvn().makecon(TypeInt::make(num_elem));
      Node * bcast_lane_cnt = gvn().transform(VectorNode::scalar2vector(lane_cnt, num_elem, type_bt));
!     const TypeVect* vmask_type = TypeVect::makemask(elem_bt, num_elem);
+     Node* mask = gvn().transform(new VectorMaskCmpNode(BoolTest::ge, bcast_lane_cnt, res, pred_node, vmask_type));
  
      // Make the indices greater than lane count as -ve values. This matches the java side implementation.
      res = gvn().transform(VectorNode::make(Op_AndI, res, bcast_mod, num_elem, elem_bt));
      Node * biased_val = gvn().transform(VectorNode::make(Op_SubI, res, bcast_lane_cnt, num_elem, elem_bt));
      res = gvn().transform(new VectorBlendNode(biased_val, res, mask));

*** 529,11 ***
    C->set_max_vector_size(MAX2(C->max_vector_size(), (uint)(num_elem * type2aelembytes(elem_bt))));
    return true;
  }
  
  // <E, M>
! // int maskReductionCoerced(int oper, Class<? extends M> maskClass, Class<?> elemClass,
  //                          int length, M m, VectorMaskOp<M> defaultImpl)
  bool LibraryCallKit::inline_vector_mask_operation() {
    const TypeInt*     oper       = gvn().type(argument(0))->isa_int();
    const TypeInstPtr* mask_klass = gvn().type(argument(1))->isa_instptr();
    const TypeInstPtr* elem_klass = gvn().type(argument(2))->isa_instptr();
--- 651,11 ---
    C->set_max_vector_size(MAX2(C->max_vector_size(), (uint)(num_elem * type2aelembytes(elem_bt))));
    return true;
  }
  
  // <E, M>
! // long maskReductionCoerced(int oper, Class<? extends M> maskClass, Class<?> elemClass,
  //                          int length, M m, VectorMaskOp<M> defaultImpl)
  bool LibraryCallKit::inline_vector_mask_operation() {
    const TypeInt*     oper       = gvn().type(argument(0))->isa_int();
    const TypeInstPtr* mask_klass = gvn().type(argument(1))->isa_instptr();
    const TypeInstPtr* elem_klass = gvn().type(argument(2))->isa_instptr();

*** 574,21 ***
  
    const Type* elem_ty = Type::get_const_basic_type(elem_bt);
    ciKlass* mbox_klass = mask_klass->const_oop()->as_instance()->java_lang_Class_klass();
    const TypeInstPtr* mask_box_type = TypeInstPtr::make_exact(TypePtr::NotNull, mbox_klass);
    Node* mask_vec = unbox_vector(mask, mask_box_type, elem_bt, num_elem, true);
!   Node* store_mask = gvn().transform(VectorStoreMaskNode::make(gvn(), mask_vec, elem_bt, num_elem));
!   Node* maskoper = gvn().transform(VectorMaskOpNode::make(store_mask, TypeInt::INT, mopc));
    set_result(maskoper);
  
    C->set_max_vector_size(MAX2(C->max_vector_size(), (uint)(num_elem * type2aelembytes(elem_bt))));
    return true;
  }
  
! // <VM ,Sh extends VectorShuffle<E>, E>
! // VM shuffleToVector(Class<VM> VecClass, Class<?>E , Class<?> ShuffleClass, Sh s, int length,
! //                    ShuffleToVectorOperation<VM,Sh,E> defaultImpl)
  bool LibraryCallKit::inline_vector_shuffle_to_vector() {
    const TypeInstPtr* vector_klass  = gvn().type(argument(0))->isa_instptr();
    const TypeInstPtr* elem_klass    = gvn().type(argument(1))->isa_instptr();
    const TypeInstPtr* shuffle_klass = gvn().type(argument(2))->isa_instptr();
    Node*              shuffle       = argument(3);
--- 696,31 ---
  
    const Type* elem_ty = Type::get_const_basic_type(elem_bt);
    ciKlass* mbox_klass = mask_klass->const_oop()->as_instance()->java_lang_Class_klass();
    const TypeInstPtr* mask_box_type = TypeInstPtr::make_exact(TypePtr::NotNull, mbox_klass);
    Node* mask_vec = unbox_vector(mask, mask_box_type, elem_bt, num_elem, true);
!   if (mask_vec->bottom_type()->isa_vectmask() == NULL) {
!     mask_vec = gvn().transform(VectorStoreMaskNode::make(gvn(), mask_vec, elem_bt, num_elem));
+   }
+   const Type* maskoper_ty = mopc == Op_VectorMaskToLong ? (const Type*)TypeLong::LONG : (const Type*)TypeInt::INT;
+   Node* maskoper = gvn().transform(VectorMaskOpNode::make(mask_vec, maskoper_ty, mopc));
+   if (mopc != Op_VectorMaskToLong) {
+     maskoper = ConvI2L(maskoper);
+   }
    set_result(maskoper);
  
    C->set_max_vector_size(MAX2(C->max_vector_size(), (uint)(num_elem * type2aelembytes(elem_bt))));
    return true;
  }
  
! // public static
! // <V,
! //  Sh extends VectorShuffle<E>,
+ //  E>
+ // V shuffleToVector(Class<? extends Vector<E>> vclass, Class<E> elementType,
+ //                   Class<? extends Sh> shuffleClass, Sh s, int length,
+ //                   ShuffleToVectorOperation<V, Sh, E> defaultImpl)
  bool LibraryCallKit::inline_vector_shuffle_to_vector() {
    const TypeInstPtr* vector_klass  = gvn().type(argument(0))->isa_instptr();
    const TypeInstPtr* elem_klass    = gvn().type(argument(1))->isa_instptr();
    const TypeInstPtr* shuffle_klass = gvn().type(argument(2))->isa_instptr();
    Node*              shuffle       = argument(3);

*** 643,14 ***
    set_result(res);
    C->set_max_vector_size(MAX2(C->max_vector_size(), (uint)(num_elem * type2aelembytes(elem_bt))));
    return true;
  }
  
! // <V extends Vector<?,?>>
! // V broadcastCoerced(Class<?> vectorClass, Class<?> elementType, int vlen,
! //                    long bits,
! //                    LongFunction<V> defaultImpl)
  bool LibraryCallKit::inline_vector_broadcast_coerced() {
    const TypeInstPtr* vector_klass = gvn().type(argument(0))->isa_instptr();
    const TypeInstPtr* elem_klass   = gvn().type(argument(1))->isa_instptr();
    const TypeInt*     vlen         = gvn().type(argument(2))->isa_int();
  
--- 775,17 ---
    set_result(res);
    C->set_max_vector_size(MAX2(C->max_vector_size(), (uint)(num_elem * type2aelembytes(elem_bt))));
    return true;
  }
  
! // public static
! // <M,
! //  S extends VectorSpecies<E>,
! //  E>
+ // M broadcastCoerced(Class<? extends M> vmClass, Class<E> elementType, int length,
+ //                    long bits, S s,
+ //                    BroadcastOperation<M, E, S> defaultImpl)
  bool LibraryCallKit::inline_vector_broadcast_coerced() {
    const TypeInstPtr* vector_klass = gvn().type(argument(0))->isa_instptr();
    const TypeInstPtr* elem_klass   = gvn().type(argument(1))->isa_instptr();
    const TypeInt*     vlen         = gvn().type(argument(2))->isa_int();
  

*** 693,11 ***
      }
      return false; // not supported
    }
  
    Node* bits = argument(3); // long
- 
    Node* elem = NULL;
    switch (elem_bt) {
      case T_BOOLEAN: // fall-through
      case T_BYTE:    // fall-through
      case T_SHORT:   // fall-through
--- 828,10 ---

*** 720,11 ***
        break;
      }
      default: fatal("%s", type2name(elem_bt));
    }
  
!   Node* broadcast = VectorNode::scalar2vector(elem, num_elem, Type::get_const_basic_type(elem_bt));
    broadcast = gvn().transform(broadcast);
  
    Node* box = box_vector(broadcast, vbox_type, elem_bt, num_elem);
    set_result(box);
    C->set_max_vector_size(MAX2(C->max_vector_size(), (uint)(num_elem * type2aelembytes(elem_bt))));
--- 854,11 ---
        break;
      }
      default: fatal("%s", type2name(elem_bt));
    }
  
!   Node* broadcast = VectorNode::scalar2vector(elem, num_elem, Type::get_const_basic_type(elem_bt), is_vector_mask(vbox_klass));
    broadcast = gvn().transform(broadcast);
  
    Node* box = box_vector(broadcast, vbox_type, elem_bt, num_elem);
    set_result(box);
    C->set_max_vector_size(MAX2(C->max_vector_size(), (uint)(num_elem * type2aelembytes(elem_bt))));

*** 745,23 ***
    } else {
      return false;
    }
  }
  
! //    <C, V extends Vector<?,?>>
! //    V load(Class<?> vectorClass, Class<?> elementType, int vlen,
! //           Object base, long offset,
! //           /* Vector.Mask<E,S> m*/
! //           Object container, int index,
! //           LoadOperation<C, VM> defaultImpl) {
  //
! //    <C, V extends Vector<?,?>>
! //    void store(Class<?> vectorClass, Class<?> elementType, int vlen,
! //               Object base, long offset,
! //               V v, /*Vector.Mask<E,S> m*/
! //               Object container, int index,
! //               StoreVectorOperation<C, V> defaultImpl) {
  
  bool LibraryCallKit::inline_vector_mem_operation(bool is_store) {
    const TypeInstPtr* vector_klass = gvn().type(argument(0))->isa_instptr();
    const TypeInstPtr* elem_klass   = gvn().type(argument(1))->isa_instptr();
    const TypeInt*     vlen         = gvn().type(argument(2))->isa_int();
--- 879,28 ---
    } else {
      return false;
    }
  }
  
! // public static
! // <C,
! //  VM,
! //  E,
! //  S extends VectorSpecies<E>>
! // VM load(Class<? extends VM> vmClass, Class<E> elementType, int length,
+ //         Object base, long offset,    // Unsafe addressing
+ //         C container, int index, S s,     // Arguments for default implementation
+ //         LoadOperation<C, VM, E, S> defaultImpl)
  //
! // public static
! // <C,
! //  V extends Vector<?>>
! // void store(Class<?> vectorClass, Class<?> elementType, int length,
! //            Object base, long offset,    // Unsafe addressing
! //            V v,
+ //            C container, int index,      // Arguments for default implementation
+ //            StoreVectorOperation<C, V> defaultImpl)
  
  bool LibraryCallKit::inline_vector_mem_operation(bool is_store) {
    const TypeInstPtr* vector_klass = gvn().type(argument(0))->isa_instptr();
    const TypeInstPtr* elem_klass   = gvn().type(argument(1))->isa_instptr();
    const TypeInt*     vlen         = gvn().type(argument(2))->isa_int();

*** 812,16 ***
    // Save state and restore on bailout
    uint old_sp = sp();
    SafePointNode* old_map = clone_map();
  
    Node* addr = make_unsafe_address(base, offset, (is_mask ? T_BOOLEAN : elem_bt), true);
!   // Can base be NULL? Otherwise, always on-heap access.
!   bool can_access_non_heap = TypePtr::NULL_PTR->higher_equal(gvn().type(base));
  
    const TypePtr *addr_type = gvn().type(addr)->isa_ptr();
    const TypeAryPtr* arr_type = addr_type->isa_aryptr();
  
    // Now handle special case where load/store happens from/to byte array but element type is not byte.
    bool using_byte_array = arr_type != NULL && arr_type->elem()->array_element_basic_type() == T_BYTE && elem_bt != T_BYTE;
    // Handle loading masks.
    // If there is no consistency between array and vector element types, it must be special byte array case or loading masks
    if (arr_type != NULL && !using_byte_array && !is_mask && !elem_consistent_with_arr(elem_bt, arr_type)) {
--- 951,27 ---
    // Save state and restore on bailout
    uint old_sp = sp();
    SafePointNode* old_map = clone_map();
  
    Node* addr = make_unsafe_address(base, offset, (is_mask ? T_BOOLEAN : elem_bt), true);
! 
!   // The memory barrier checks are based on ones for unsafe access.
+   // This is not 1-1 implementation.
+   const Type *const base_type = gvn().type(base);
  
    const TypePtr *addr_type = gvn().type(addr)->isa_ptr();
    const TypeAryPtr* arr_type = addr_type->isa_aryptr();
  
+   const bool in_native = TypePtr::NULL_PTR == base_type; // base always null
+   const bool in_heap   = !TypePtr::NULL_PTR->higher_equal(base_type); // base never null
+ 
+   const bool is_mixed_access = !in_heap && !in_native;
+ 
+   const bool is_mismatched_access = in_heap && (addr_type->isa_aryptr() == NULL);
+ 
+   const bool needs_cpu_membar = is_mixed_access || is_mismatched_access;
+ 
    // Now handle special case where load/store happens from/to byte array but element type is not byte.
    bool using_byte_array = arr_type != NULL && arr_type->elem()->array_element_basic_type() == T_BYTE && elem_bt != T_BYTE;
    // Handle loading masks.
    // If there is no consistency between array and vector element types, it must be special byte array case or loading masks
    if (arr_type != NULL && !using_byte_array && !is_mask && !elem_consistent_with_arr(elem_bt, arr_type)) {

*** 875,11 ***
      }
    }
  
    const TypeInstPtr* vbox_type = TypeInstPtr::make_exact(TypePtr::NotNull, vbox_klass);
  
!   if (can_access_non_heap) {
      insert_mem_bar(Op_MemBarCPUOrder);
    }
  
    if (is_store) {
      Node* val = unbox_vector(argument(6), vbox_type, elem_bt, num_elem);
--- 1025,11 ---
      }
    }
  
    const TypeInstPtr* vbox_type = TypeInstPtr::make_exact(TypePtr::NotNull, vbox_klass);
  
!   if (needs_cpu_membar) {
      insert_mem_bar(Op_MemBarCPUOrder);
    }
  
    if (is_store) {
      Node* val = unbox_vector(argument(6), vbox_type, elem_bt, num_elem);

*** 910,99 ***
        vload = gvn().transform(new VectorReinterpretNode(vload, vload->bottom_type()->is_vect(), to_vect_type));
      } else {
        // Special handle for masks
        if (is_mask) {
          vload = gvn().transform(LoadVectorNode::make(0, control(), memory(addr), addr, addr_type, num_elem, T_BOOLEAN));
!         const TypeVect* to_vect_type = TypeVect::make(elem_bt, num_elem);
-         vload = gvn().transform(new VectorLoadMaskNode(vload, to_vect_type));
        } else {
          vload = gvn().transform(LoadVectorNode::make(0, control(), memory(addr), addr, addr_type, num_elem, elem_bt));
        }
      }
      Node* box = box_vector(vload, vbox_type, elem_bt, num_elem);
      set_result(box);
    }
  
    old_map->destruct(&_gvn);
  
    if (can_access_non_heap) {
      insert_mem_bar(Op_MemBarCPUOrder);
    }
  
    C->set_max_vector_size(MAX2(C->max_vector_size(), (uint)(num_elem * type2aelembytes(elem_bt))));
    return true;
  }
  
! //   <C, V extends Vector<?>, W extends IntVector, E, S extends VectorSpecies<E>>
! //   void loadWithMap(Class<?> vectorClass, Class<E> E, int length, Class<?> vectorIndexClass,
! //                    Object base, long offset, // Unsafe addressing
! //                    W index_vector,
! //                    C container, int index, int[] indexMap, int indexM, S s, // Arguments for default implementation
! //                    LoadVectorOperationWithMap<C, V, E, S> defaultImpl)
  //
! //    <C, V extends Vector<?>, W extends IntVector>
! //    void storeWithMap(Class<?> vectorClass, Class<?> elementType, int length, Class<?> vectorIndexClass,
! //                      Object base, long offset,    // Unsafe addressing
! //                      W index_vector, V v,
! //                      C container, int index, int[] indexMap, int indexM, // Arguments for default implementation
! //                      StoreVectorOperationWithMap<C, V> defaultImpl) {
  //
  bool LibraryCallKit::inline_vector_gather_scatter(bool is_scatter) {
    const TypeInstPtr* vector_klass     = gvn().type(argument(0))->isa_instptr();
!   const TypeInstPtr* elem_klass       = gvn().type(argument(1))->isa_instptr();
!   const TypeInt*     vlen             = gvn().type(argument(2))->isa_int();
!   const TypeInstPtr* vector_idx_klass = gvn().type(argument(3))->isa_instptr();
  
    if (vector_klass == NULL || elem_klass == NULL || vector_idx_klass == NULL || vlen == NULL ||
        vector_klass->const_oop() == NULL || elem_klass->const_oop() == NULL || vector_idx_klass->const_oop() == NULL || !vlen->is_con()) {
      if (C->print_intrinsics()) {
        tty->print_cr("  ** missing constant: vclass=%s etype=%s vlen=%s viclass=%s",
                      NodeClassNames[argument(0)->Opcode()],
-                     NodeClassNames[argument(1)->Opcode()],
                      NodeClassNames[argument(2)->Opcode()],
!                     NodeClassNames[argument(3)->Opcode()]);
      }
      return false; // not enough info for intrinsification
    }
  
    if (!is_klass_initialized(vector_klass) || !is_klass_initialized(vector_idx_klass)) {
      if (C->print_intrinsics()) {
        tty->print_cr("  ** klass argument not initialized");
      }
      return false;
    }
    ciType* elem_type = elem_klass->const_oop()->as_instance()->java_mirror_type();
    if (!elem_type->is_primitive_type()) {
      if (C->print_intrinsics()) {
        tty->print_cr("  ** not a primitive bt=%d", elem_type->basic_type());
      }
      return false; // should be primitive type
    }
    BasicType elem_bt = elem_type->basic_type();
    int num_elem = vlen->get_con();
  
!   if (!arch_supports_vector(is_scatter ? Op_StoreVectorScatter : Op_LoadVectorGather, num_elem, elem_bt, VecMaskNotUsed)) {
!     if (C->print_intrinsics()) {
!       tty->print_cr("  ** not supported: arity=%d op=%s vlen=%d etype=%s ismask=no",
!                     is_scatter, is_scatter ? "scatter" : "gather",
!                     num_elem, type2name(elem_bt));
      }
-     return false; // not supported
    }
  
    // Check that the vector holding indices is supported by architecture
    if (!arch_supports_vector(Op_LoadVector, num_elem, T_INT, VecMaskNotUsed)) {
        if (C->print_intrinsics()) {
!         tty->print_cr("  ** not supported: arity=%d op=%s/loadindex vlen=%d etype=int ismask=no",
                        is_scatter, is_scatter ? "scatter" : "gather",
!                       num_elem);
        }
        return false; // not supported
!     }
  
!   Node* base = argument(4);
!   Node* offset = ConvL2X(argument(5));
  
    // Save state and restore on bailout
    uint old_sp = sp();
    SafePointNode* old_map = clone_map();
  
--- 1060,385 ---
        vload = gvn().transform(new VectorReinterpretNode(vload, vload->bottom_type()->is_vect(), to_vect_type));
      } else {
        // Special handle for masks
        if (is_mask) {
          vload = gvn().transform(LoadVectorNode::make(0, control(), memory(addr), addr, addr_type, num_elem, T_BOOLEAN));
!         vload = gvn().transform(new VectorLoadMaskNode(vload, TypeVect::makemask(elem_bt, num_elem)));
        } else {
          vload = gvn().transform(LoadVectorNode::make(0, control(), memory(addr), addr, addr_type, num_elem, elem_bt));
        }
      }
      Node* box = box_vector(vload, vbox_type, elem_bt, num_elem);
      set_result(box);
    }
  
    old_map->destruct(&_gvn);
  
+   if (needs_cpu_membar) {
+     insert_mem_bar(Op_MemBarCPUOrder);
+   }
+ 
+   C->set_max_vector_size(MAX2(C->max_vector_size(), (uint)(num_elem * type2aelembytes(elem_bt))));
+   return true;
+ }
+ 
+ // public static
+ // <C,
+ //  V extends Vector<?>,
+ //  E,
+ //  S extends VectorSpecies<E>,
+ //  M extends VectorMask<E>>
+ // V loadMasked(Class<? extends V> vectorClass, Class<M> maskClass, Class<E> elementType,
+ //              int length, Object base, long offset, M m,
+ //              C container, int index, S s,  // Arguments for default implementation
+ //              LoadVectorMaskedOperation<C, V, S, M> defaultImpl) {
+ //
+ // public static
+ // <C,
+ //  V extends Vector<E>,
+ //  M extends VectorMask<E>,
+ //  E>
+ // void storeMasked(Class<? extends V> vectorClass, Class<M> maskClass, Class<E> elementType,
+ //                  int length, Object base, long offset,
+ //                  V v, M m,
+ //                  C container, int index,  // Arguments for default implementation
+ //                  StoreVectorMaskedOperation<C, V, M, E> defaultImpl) {
+ //
+ bool LibraryCallKit::inline_vector_mem_masked_operation(bool is_store) {
+   const TypeInstPtr* vector_klass = gvn().type(argument(0))->isa_instptr();
+   const TypeInstPtr* mask_klass   = gvn().type(argument(1))->isa_instptr();
+   const TypeInstPtr* elem_klass   = gvn().type(argument(2))->isa_instptr();
+   const TypeInt*     vlen         = gvn().type(argument(3))->isa_int();
+ 
+   if (vector_klass == NULL || mask_klass == NULL || elem_klass == NULL || vlen == NULL ||
+       vector_klass->const_oop() == NULL || mask_klass->const_oop() == NULL ||
+       elem_klass->const_oop() == NULL || !vlen->is_con()) {
+     if (C->print_intrinsics()) {
+       tty->print_cr("  ** missing constant: vclass=%s mclass=%s etype=%s vlen=%s",
+                     NodeClassNames[argument(0)->Opcode()],
+                     NodeClassNames[argument(1)->Opcode()],
+                     NodeClassNames[argument(2)->Opcode()],
+                     NodeClassNames[argument(3)->Opcode()]);
+     }
+     return false; // not enough info for intrinsification
+   }
+   if (!is_klass_initialized(vector_klass)) {
+     if (C->print_intrinsics()) {
+       tty->print_cr("  ** klass argument not initialized");
+     }
+     return false;
+   }
+ 
+   if (!is_klass_initialized(mask_klass)) {
+     if (C->print_intrinsics()) {
+       tty->print_cr("  ** mask klass argument not initialized");
+     }
+     return false;
+   }
+ 
+   ciType* elem_type = elem_klass->const_oop()->as_instance()->java_mirror_type();
+   if (!elem_type->is_primitive_type()) {
+     if (C->print_intrinsics()) {
+       tty->print_cr("  ** not a primitive bt=%d", elem_type->basic_type());
+     }
+     return false; // should be primitive type
+   }
+ 
+   BasicType elem_bt = elem_type->basic_type();
+   int num_elem = vlen->get_con();
+ 
+   Node* base = argument(4);
+   Node* offset = ConvL2X(argument(5));
+ 
+   // Save state and restore on bailout
+   uint old_sp = sp();
+   SafePointNode* old_map = clone_map();
+ 
+   Node* addr = make_unsafe_address(base, offset, elem_bt, true);
+   const TypePtr *addr_type = gvn().type(addr)->isa_ptr();
+   const TypeAryPtr* arr_type = addr_type->isa_aryptr();
+ 
+   // Now handle special case where load/store happens from/to byte array but element type is not byte.
+   bool using_byte_array = arr_type != NULL && arr_type->elem()->array_element_basic_type() == T_BYTE && elem_bt != T_BYTE;
+   // If there is no consistency between array and vector element types, it must be special byte array case
+   if (arr_type != NULL && !using_byte_array && !elem_consistent_with_arr(elem_bt, arr_type)) {
+     if (C->print_intrinsics()) {
+       tty->print_cr("  ** not supported: arity=%d op=%s vlen=%d etype=%s atype=%s",
+                     is_store, is_store ? "storeMasked" : "loadMasked",
+                     num_elem, type2name(elem_bt), type2name(arr_type->elem()->array_element_basic_type()));
+     }
+     set_map(old_map);
+     set_sp(old_sp);
+     return false;
+   }
+ 
+   int mem_num_elem = using_byte_array ? num_elem * type2aelembytes(elem_bt) : num_elem;
+   BasicType mem_elem_bt = using_byte_array ? T_BYTE : elem_bt;
+   bool use_predicate = arch_supports_vector(is_store ? Op_StoreVectorMasked : Op_LoadVectorMasked,
+                                             mem_num_elem, mem_elem_bt,
+                                             (VectorMaskUseType) (VecMaskUseLoad | VecMaskUsePred));
+   // Masked vector store operation needs the architecture predicate feature. We need to check
+   // whether the predicated vector operation is supported by backend.
+   if (is_store && !use_predicate) {
+     if (C->print_intrinsics()) {
+       tty->print_cr("  ** not supported: op=storeMasked vlen=%d etype=%s using_byte_array=%d",
+                     num_elem, type2name(elem_bt), using_byte_array ? 1 : 0);
+     }
+     set_map(old_map);
+     set_sp(old_sp);
+     return false;
+   }
+ 
+   // This only happens for masked vector load. If predicate is not supported, then check whether
+   // the normal vector load and blend operations are supported by backend.
+   if (!use_predicate && (!arch_supports_vector(Op_LoadVector, mem_num_elem, mem_elem_bt, VecMaskNotUsed) ||
+       !arch_supports_vector(Op_VectorBlend, mem_num_elem, mem_elem_bt, VecMaskUseLoad))) {
+     if (C->print_intrinsics()) {
+       tty->print_cr("  ** not supported: op=loadMasked vlen=%d etype=%s using_byte_array=%d",
+                     num_elem, type2name(elem_bt), using_byte_array ? 1 : 0);
+     }
+     set_map(old_map);
+     set_sp(old_sp);
+     return false;
+   }
+ 
+   // Since we are using byte array, we need to double check that the vector reinterpret operation
+   // with byte type is supported by backend.
+   if (using_byte_array) {
+     if (!arch_supports_vector(Op_VectorReinterpret, mem_num_elem, T_BYTE, VecMaskNotUsed)) {
+       if (C->print_intrinsics()) {
+         tty->print_cr("  ** not supported: arity=%d op=%s vlen=%d etype=%s using_byte_array=1",
+                       is_store, is_store ? "storeMasked" : "loadMasked",
+                       num_elem, type2name(elem_bt));
+       }
+       set_map(old_map);
+       set_sp(old_sp);
+       return false;
+     }
+   }
+ 
+   // Since it needs to unbox the mask, we need to double check that the related load operations
+   // for mask are supported by backend.
+   if (!arch_supports_vector(Op_LoadVector, num_elem, elem_bt, VecMaskUseLoad)) {
+     if (C->print_intrinsics()) {
+       tty->print_cr("  ** not supported: arity=%d op=%s vlen=%d etype=%s",
+                       is_store, is_store ? "storeMasked" : "loadMasked",
+                       num_elem, type2name(elem_bt));
+     }
+     set_map(old_map);
+     set_sp(old_sp);
+     return false;
+   }
+ 
+   // Can base be NULL? Otherwise, always on-heap access.
+   bool can_access_non_heap = TypePtr::NULL_PTR->higher_equal(gvn().type(base));
+   if (can_access_non_heap) {
+     insert_mem_bar(Op_MemBarCPUOrder);
+   }
+ 
+   ciKlass* vbox_klass = vector_klass->const_oop()->as_instance()->java_lang_Class_klass();
+   ciKlass* mbox_klass = mask_klass->const_oop()->as_instance()->java_lang_Class_klass();
+   assert(!is_vector_mask(vbox_klass) && is_vector_mask(mbox_klass), "Invalid class type");
+   const TypeInstPtr* vbox_type = TypeInstPtr::make_exact(TypePtr::NotNull, vbox_klass);
+   const TypeInstPtr* mbox_type = TypeInstPtr::make_exact(TypePtr::NotNull, mbox_klass);
+ 
+   Node* mask = unbox_vector(is_store ? argument(8) : argument(7), mbox_type, elem_bt, num_elem);
+   if (mask == NULL) {
+     if (C->print_intrinsics()) {
+       tty->print_cr("  ** unbox failed mask=%s",
+                     is_store ? NodeClassNames[argument(8)->Opcode()]
+                              : NodeClassNames[argument(7)->Opcode()]);
+     }
+     set_map(old_map);
+     set_sp(old_sp);
+     return false;
+   }
+ 
+   if (is_store) {
+     Node* val = unbox_vector(argument(7), vbox_type, elem_bt, num_elem);
+     if (val == NULL) {
+       if (C->print_intrinsics()) {
+         tty->print_cr("  ** unbox failed vector=%s",
+                       NodeClassNames[argument(7)->Opcode()]);
+       }
+       set_map(old_map);
+       set_sp(old_sp);
+       return false; // operand unboxing failed
+     }
+     set_all_memory(reset_memory());
+ 
+     if (using_byte_array) {
+       // Reinterpret the incoming vector to byte vector.
+       const TypeVect* to_vect_type = TypeVect::make(mem_elem_bt, mem_num_elem);
+       val = gvn().transform(new VectorReinterpretNode(val, val->bottom_type()->is_vect(), to_vect_type));
+       // Reinterpret the vector mask to byte type.
+       const TypeVect* from_mask_type = TypeVect::makemask(elem_bt, num_elem);
+       const TypeVect* to_mask_type = TypeVect::makemask(mem_elem_bt, mem_num_elem);
+       mask = gvn().transform(new VectorReinterpretNode(mask, from_mask_type, to_mask_type));
+     }
+     Node* vstore = gvn().transform(new StoreVectorMaskedNode(control(), memory(addr), addr, val, addr_type, mask));
+     set_memory(vstore, addr_type);
+   } else {
+     Node* vload = NULL;
+ 
+     if (using_byte_array) {
+       // Reinterpret the vector mask to byte type.
+       const TypeVect* from_mask_type = TypeVect::makemask(elem_bt, num_elem);
+       const TypeVect* to_mask_type = TypeVect::makemask(mem_elem_bt, mem_num_elem);
+       mask = gvn().transform(new VectorReinterpretNode(mask, from_mask_type, to_mask_type));
+     }
+ 
+     if (use_predicate) {
+       // Generate masked load vector node if predicate feature is supported.
+       const TypeVect* vt = TypeVect::make(mem_elem_bt, mem_num_elem);
+       vload = gvn().transform(new LoadVectorMaskedNode(control(), memory(addr), addr, addr_type, vt, mask));
+     } else {
+       // Use the vector blend to implement the masked load vector. The biased elements are zeros.
+       Node* zero = gvn().transform(gvn().zerocon(mem_elem_bt));
+       zero = gvn().transform(VectorNode::scalar2vector(zero, mem_num_elem, Type::get_const_basic_type(mem_elem_bt)));
+       vload = gvn().transform(LoadVectorNode::make(0, control(), memory(addr), addr, addr_type, mem_num_elem, mem_elem_bt));
+       vload = gvn().transform(new VectorBlendNode(zero, vload, mask));
+     }
+ 
+     if (using_byte_array) {
+       const TypeVect* to_vect_type = TypeVect::make(elem_bt, num_elem);
+       vload = gvn().transform(new VectorReinterpretNode(vload, vload->bottom_type()->is_vect(), to_vect_type));
+     }
+ 
+     Node* box = box_vector(vload, vbox_type, elem_bt, num_elem);
+     set_result(box);
+   }
+ 
+   old_map->destruct(&_gvn);
+ 
    if (can_access_non_heap) {
      insert_mem_bar(Op_MemBarCPUOrder);
    }
  
    C->set_max_vector_size(MAX2(C->max_vector_size(), (uint)(num_elem * type2aelembytes(elem_bt))));
    return true;
  }
  
! // <C,
! //  V extends Vector<?>,
! //  W extends Vector<Integer>,
! //  S extends VectorSpecies<E>,
! //  M extends VectorMask<E>,
! //  E>
+ // V loadWithMap(Class<? extends V> vectorClass, Class<M> maskClass, Class<E> elementType, int length,
+ //               Class<? extends Vector<Integer>> vectorIndexClass,
+ //               Object base, long offset, // Unsafe addressing
+ //               W index_vector, M m,
+ //               C container, int index, int[] indexMap, int indexM, S s, // Arguments for default implementation
+ //               LoadVectorOperationWithMap<C, V, E, S, M> defaultImpl)
  //
! //  <C,
! //   V extends Vector<E>,
! //   W extends Vector<Integer>,
! //   M extends VectorMask<E>,
! //   E>
! //  void storeWithMap(Class<? extends V> vectorClass, Class<M> maskClass, Class<E> elementType,
+ //                    int length, Class<? extends Vector<Integer>> vectorIndexClass, Object base, long offset,    // Unsafe addressing
+ //                    W index_vector, V v, M m,
+ //                    C container, int index, int[] indexMap, int indexM, // Arguments for default implementation
+ //                    StoreVectorOperationWithMap<C, V, M, E> defaultImpl)
  //
  bool LibraryCallKit::inline_vector_gather_scatter(bool is_scatter) {
    const TypeInstPtr* vector_klass     = gvn().type(argument(0))->isa_instptr();
!   const TypeInstPtr* mask_klass       = gvn().type(argument(1))->isa_instptr();
!   const TypeInstPtr* elem_klass       = gvn().type(argument(2))->isa_instptr();
!   const TypeInt*     vlen             = gvn().type(argument(3))->isa_int();
+   const TypeInstPtr* vector_idx_klass = gvn().type(argument(4))->isa_instptr();
  
    if (vector_klass == NULL || elem_klass == NULL || vector_idx_klass == NULL || vlen == NULL ||
        vector_klass->const_oop() == NULL || elem_klass->const_oop() == NULL || vector_idx_klass->const_oop() == NULL || !vlen->is_con()) {
      if (C->print_intrinsics()) {
        tty->print_cr("  ** missing constant: vclass=%s etype=%s vlen=%s viclass=%s",
                      NodeClassNames[argument(0)->Opcode()],
                      NodeClassNames[argument(2)->Opcode()],
!                     NodeClassNames[argument(3)->Opcode()],
+                     NodeClassNames[argument(4)->Opcode()]);
      }
      return false; // not enough info for intrinsification
    }
  
    if (!is_klass_initialized(vector_klass) || !is_klass_initialized(vector_idx_klass)) {
      if (C->print_intrinsics()) {
        tty->print_cr("  ** klass argument not initialized");
      }
      return false;
    }
+ 
    ciType* elem_type = elem_klass->const_oop()->as_instance()->java_mirror_type();
    if (!elem_type->is_primitive_type()) {
      if (C->print_intrinsics()) {
        tty->print_cr("  ** not a primitive bt=%d", elem_type->basic_type());
      }
      return false; // should be primitive type
    }
+ 
    BasicType elem_bt = elem_type->basic_type();
    int num_elem = vlen->get_con();
  
!   const Type* vmask_type = gvn().type(is_scatter ? argument(10) : argument(9));
!   bool is_masked_op = vmask_type != TypePtr::NULL_PTR;
!   if (is_masked_op) {
!     if (mask_klass == NULL || mask_klass->const_oop() == NULL) {
!       if (C->print_intrinsics()) {
+         tty->print_cr("  ** missing constant: maskclass=%s", NodeClassNames[argument(1)->Opcode()]);
+       }
+       return false; // not enough info for intrinsification
+     }
+ 
+     if (!is_klass_initialized(mask_klass)) {
+       if (C->print_intrinsics()) {
+         tty->print_cr("  ** mask klass argument not initialized");
+       }
+       return false;
+     }
+ 
+     if (vmask_type->maybe_null()) {
+       if (C->print_intrinsics()) {
+         tty->print_cr("  ** null mask values are not allowed for masked op");
+       }
+       return false;
+     }
+ 
+     // Check whether the predicated gather/scatter node is supported by architecture.
+     if (!arch_supports_vector(is_scatter ? Op_StoreVectorScatterMasked : Op_LoadVectorGatherMasked, num_elem, elem_bt,
+                               (VectorMaskUseType) (VecMaskUseLoad | VecMaskUsePred))) {
+       if (C->print_intrinsics()) {
+         tty->print_cr("  ** not supported: arity=%d op=%s vlen=%d etype=%s is_masked_op=1",
+                       is_scatter, is_scatter ? "scatterMasked" : "gatherMasked",
+                       num_elem, type2name(elem_bt));
+       }
+       return false; // not supported
+     }
+   } else {
+     // Check whether the normal gather/scatter node is supported for non-masked operation.
+     if (!arch_supports_vector(is_scatter ? Op_StoreVectorScatter : Op_LoadVectorGather, num_elem, elem_bt, VecMaskNotUsed)) {
+       if (C->print_intrinsics()) {
+         tty->print_cr("  ** not supported: arity=%d op=%s vlen=%d etype=%s is_masked_op=0",
+                       is_scatter, is_scatter ? "scatter" : "gather",
+                       num_elem, type2name(elem_bt));
+       }
+       return false; // not supported
      }
    }
  
    // Check that the vector holding indices is supported by architecture
    if (!arch_supports_vector(Op_LoadVector, num_elem, T_INT, VecMaskNotUsed)) {
        if (C->print_intrinsics()) {
!         tty->print_cr("  ** not supported: arity=%d op=%s/loadindex vlen=%d etype=int is_masked_op=%d",
                        is_scatter, is_scatter ? "scatter" : "gather",
!                       num_elem, is_masked_op ? 1 : 0);
        }
        return false; // not supported
!   }
  
!   Node* base = argument(5);
!   Node* offset = ConvL2X(argument(6));
  
    // Save state and restore on bailout
    uint old_sp = sp();
    SafePointNode* old_map = clone_map();
  

*** 1020,73 ***
      }
      set_map(old_map);
      set_sp(old_sp);
      return false;
    }
    ciKlass* vbox_klass = vector_klass->const_oop()->as_instance()->java_lang_Class_klass();
    const TypeInstPtr* vbox_type = TypeInstPtr::make_exact(TypePtr::NotNull, vbox_klass);
- 
    ciKlass* vbox_idx_klass = vector_idx_klass->const_oop()->as_instance()->java_lang_Class_klass();
- 
    if (vbox_idx_klass == NULL) {
      set_map(old_map);
      set_sp(old_sp);
      return false;
    }
  
    const TypeInstPtr* vbox_idx_type = TypeInstPtr::make_exact(TypePtr::NotNull, vbox_idx_klass);
! 
-   Node* index_vect = unbox_vector(argument(7), vbox_idx_type, T_INT, num_elem);
    if (index_vect == NULL) {
      set_map(old_map);
      set_sp(old_sp);
      return false;
    }
    const TypeVect* vector_type = TypeVect::make(elem_bt, num_elem);
    if (is_scatter) {
!     Node* val = unbox_vector(argument(8), vbox_type, elem_bt, num_elem);
      if (val == NULL) {
        set_map(old_map);
        set_sp(old_sp);
        return false; // operand unboxing failed
      }
      set_all_memory(reset_memory());
  
!     Node* vstore = gvn().transform(new StoreVectorScatterNode(control(), memory(addr), addr, addr_type, val, index_vect));
      set_memory(vstore, addr_type);
    } else {
!     Node* vload = gvn().transform(new LoadVectorGatherNode(control(), memory(addr), addr, addr_type, vector_type, index_vect));
! 
      Node* box = box_vector(vload, vbox_type, elem_bt, num_elem);
      set_result(box);
    }
  
    old_map->destruct(&_gvn);
  
    C->set_max_vector_size(MAX2(C->max_vector_size(), (uint)(num_elem * type2aelembytes(elem_bt))));
    return true;
  }
  
! // <V extends Vector<?,?>>
! // long reductionCoerced(int oprId, Class<?> vectorClass, Class<?> elementType, int vlen,
! //                       V v,
! //                       Function<V,Long> defaultImpl)
! 
  bool LibraryCallKit::inline_vector_reduction() {
    const TypeInt*     opr          = gvn().type(argument(0))->isa_int();
    const TypeInstPtr* vector_klass = gvn().type(argument(1))->isa_instptr();
!   const TypeInstPtr* elem_klass   = gvn().type(argument(2))->isa_instptr();
!   const TypeInt*     vlen         = gvn().type(argument(3))->isa_int();
  
    if (opr == NULL || vector_klass == NULL || elem_klass == NULL || vlen == NULL ||
        !opr->is_con() || vector_klass->const_oop() == NULL || elem_klass->const_oop() == NULL || !vlen->is_con()) {
      if (C->print_intrinsics()) {
        tty->print_cr("  ** missing constant: opr=%s vclass=%s etype=%s vlen=%s",
                      NodeClassNames[argument(0)->Opcode()],
                      NodeClassNames[argument(1)->Opcode()],
!                     NodeClassNames[argument(2)->Opcode()],
!                     NodeClassNames[argument(3)->Opcode()]);
      }
      return false; // not enough info for intrinsification
    }
    if (!is_klass_initialized(vector_klass)) {
      if (C->print_intrinsics()) {
--- 1456,101 ---
      }
      set_map(old_map);
      set_sp(old_sp);
      return false;
    }
+ 
    ciKlass* vbox_klass = vector_klass->const_oop()->as_instance()->java_lang_Class_klass();
    const TypeInstPtr* vbox_type = TypeInstPtr::make_exact(TypePtr::NotNull, vbox_klass);
    ciKlass* vbox_idx_klass = vector_idx_klass->const_oop()->as_instance()->java_lang_Class_klass();
    if (vbox_idx_klass == NULL) {
      set_map(old_map);
      set_sp(old_sp);
      return false;
    }
  
    const TypeInstPtr* vbox_idx_type = TypeInstPtr::make_exact(TypePtr::NotNull, vbox_idx_klass);
!   Node* index_vect = unbox_vector(argument(8), vbox_idx_type, T_INT, num_elem);
    if (index_vect == NULL) {
      set_map(old_map);
      set_sp(old_sp);
      return false;
    }
+ 
+   Node* mask = NULL;
+   if (is_masked_op) {
+     ciKlass* mbox_klass = mask_klass->const_oop()->as_instance()->java_lang_Class_klass();
+     const TypeInstPtr* mbox_type = TypeInstPtr::make_exact(TypePtr::NotNull, mbox_klass);
+     mask = unbox_vector(is_scatter ? argument(10) : argument(9), mbox_type, elem_bt, num_elem);
+     if (mask == NULL) {
+       if (C->print_intrinsics()) {
+         tty->print_cr("  ** unbox failed mask=%s",
+                     is_scatter ? NodeClassNames[argument(10)->Opcode()]
+                                : NodeClassNames[argument(9)->Opcode()]);
+       }
+       set_map(old_map);
+       set_sp(old_sp);
+       return false;
+     }
+   }
+ 
    const TypeVect* vector_type = TypeVect::make(elem_bt, num_elem);
    if (is_scatter) {
!     Node* val = unbox_vector(argument(9), vbox_type, elem_bt, num_elem);
      if (val == NULL) {
        set_map(old_map);
        set_sp(old_sp);
        return false; // operand unboxing failed
      }
      set_all_memory(reset_memory());
  
!     Node* vstore = NULL;
+     if (mask != NULL) {
+       vstore = gvn().transform(new StoreVectorScatterMaskedNode(control(), memory(addr), addr, addr_type, val, index_vect, mask));
+     } else {
+       vstore = gvn().transform(new StoreVectorScatterNode(control(), memory(addr), addr, addr_type, val, index_vect));
+     }
      set_memory(vstore, addr_type);
    } else {
!     Node* vload = NULL;
!     if (mask != NULL) {
+       vload = gvn().transform(new LoadVectorGatherMaskedNode(control(), memory(addr), addr, addr_type, vector_type, index_vect, mask));
+     } else {
+       vload = gvn().transform(new LoadVectorGatherNode(control(), memory(addr), addr, addr_type, vector_type, index_vect));
+     }
      Node* box = box_vector(vload, vbox_type, elem_bt, num_elem);
      set_result(box);
    }
  
    old_map->destruct(&_gvn);
  
    C->set_max_vector_size(MAX2(C->max_vector_size(), (uint)(num_elem * type2aelembytes(elem_bt))));
    return true;
  }
  
! // public static
! // <V extends Vector<E>,
! //  M extends VectorMask<E>,
! //  E>
! // long reductionCoerced(int oprId, Class<? extends V> vectorClass, Class<? extends M> maskClass,
+ //                       Class<E> elementType, int length, V v, M m,
+ //                       ReductionOperation<V, M> defaultImpl)
  bool LibraryCallKit::inline_vector_reduction() {
    const TypeInt*     opr          = gvn().type(argument(0))->isa_int();
    const TypeInstPtr* vector_klass = gvn().type(argument(1))->isa_instptr();
!   const TypeInstPtr* mask_klass   = gvn().type(argument(2))->isa_instptr();
!   const TypeInstPtr* elem_klass   = gvn().type(argument(3))->isa_instptr();
+   const TypeInt*     vlen         = gvn().type(argument(4))->isa_int();
  
    if (opr == NULL || vector_klass == NULL || elem_klass == NULL || vlen == NULL ||
        !opr->is_con() || vector_klass->const_oop() == NULL || elem_klass->const_oop() == NULL || !vlen->is_con()) {
      if (C->print_intrinsics()) {
        tty->print_cr("  ** missing constant: opr=%s vclass=%s etype=%s vlen=%s",
                      NodeClassNames[argument(0)->Opcode()],
                      NodeClassNames[argument(1)->Opcode()],
!                     NodeClassNames[argument(3)->Opcode()],
!                     NodeClassNames[argument(4)->Opcode()]);
      }
      return false; // not enough info for intrinsification
    }
    if (!is_klass_initialized(vector_klass)) {
      if (C->print_intrinsics()) {

*** 1099,55 ***
      if (C->print_intrinsics()) {
        tty->print_cr("  ** not a primitive bt=%d", elem_type->basic_type());
      }
      return false; // should be primitive type
    }
    BasicType elem_bt = elem_type->basic_type();
    int num_elem = vlen->get_con();
- 
    int opc  = VectorSupport::vop2ideal(opr->get_con(), elem_bt);
    int sopc = ReductionNode::opcode(opc, elem_bt);
  
!   // TODO When mask usage is supported, VecMaskNotUsed needs to be VecMaskUseLoad.
!   if (!arch_supports_vector(sopc, num_elem, elem_bt, VecMaskNotUsed)) {
      if (C->print_intrinsics()) {
!       tty->print_cr("  ** not supported: arity=1 op=%d/reduce vlen=%d etype=%s ismask=no",
                      sopc, num_elem, type2name(elem_bt));
      }
      return false;
    }
  
    ciKlass* vbox_klass = vector_klass->const_oop()->as_instance()->java_lang_Class_klass();
    const TypeInstPtr* vbox_type = TypeInstPtr::make_exact(TypePtr::NotNull, vbox_klass);
  
!   Node* opd = unbox_vector(argument(4), vbox_type, elem_bt, num_elem);
    if (opd == NULL) {
      return false; // operand unboxing failed
    }
  
    Node* init = ReductionNode::make_reduction_input(gvn(), opc, elem_bt);
!   Node* rn = gvn().transform(ReductionNode::make(opc, NULL, init, opd, elem_bt));
  
    Node* bits = NULL;
    switch (elem_bt) {
      case T_BYTE:
      case T_SHORT:
      case T_INT: {
!       bits = gvn().transform(new ConvI2LNode(rn));
        break;
      }
      case T_FLOAT: {
!       rn   = gvn().transform(new MoveF2INode(rn));
!       bits = gvn().transform(new ConvI2LNode(rn));
        break;
      }
      case T_DOUBLE: {
!       bits = gvn().transform(new MoveD2LNode(rn));
        break;
      }
      case T_LONG: {
!       bits = rn; // no conversion needed
        break;
      }
      default: fatal("%s", type2name(elem_bt));
    }
    set_result(bits);
--- 1563,120 ---
      if (C->print_intrinsics()) {
        tty->print_cr("  ** not a primitive bt=%d", elem_type->basic_type());
      }
      return false; // should be primitive type
    }
+ 
+   const Type* vmask_type = gvn().type(argument(6));
+   bool is_masked_op = vmask_type != TypePtr::NULL_PTR;
+   if (is_masked_op) {
+     if (mask_klass == NULL || mask_klass->const_oop() == NULL) {
+       if (C->print_intrinsics()) {
+         tty->print_cr("  ** missing constant: maskclass=%s", NodeClassNames[argument(2)->Opcode()]);
+       }
+       return false; // not enough info for intrinsification
+     }
+ 
+     if (!is_klass_initialized(mask_klass)) {
+       if (C->print_intrinsics()) {
+         tty->print_cr("  ** mask klass argument not initialized");
+       }
+       return false;
+     }
+ 
+     if (vmask_type->maybe_null()) {
+       if (C->print_intrinsics()) {
+         tty->print_cr("  ** null mask values are not allowed for masked op");
+       }
+       return false;
+     }
+   }
+ 
    BasicType elem_bt = elem_type->basic_type();
    int num_elem = vlen->get_con();
    int opc  = VectorSupport::vop2ideal(opr->get_con(), elem_bt);
    int sopc = ReductionNode::opcode(opc, elem_bt);
  
!   // When using mask, mask use type needs to be VecMaskUseLoad.
!   if (!arch_supports_vector(sopc, num_elem, elem_bt, is_masked_op ? VecMaskUseLoad : VecMaskNotUsed)) {
+     if (C->print_intrinsics()) {
+       tty->print_cr("  ** not supported: arity=1 op=%d/reduce vlen=%d etype=%s is_masked_op=%d",
+                     sopc, num_elem, type2name(elem_bt), is_masked_op ? 1 : 0);
+     }
+     return false;
+   }
+ 
+   // Return true if current platform has implemented the masked operation with predicate feature.
+   bool use_predicate = is_masked_op && arch_supports_vector(sopc, num_elem, elem_bt, VecMaskUsePred);
+   if (is_masked_op && !use_predicate && !arch_supports_vector(Op_VectorBlend, num_elem, elem_bt, VecMaskUseLoad)) {
      if (C->print_intrinsics()) {
!       tty->print_cr("  ** not supported: arity=1 op=%d/reduce vlen=%d etype=%s is_masked_op=1",
                      sopc, num_elem, type2name(elem_bt));
      }
      return false;
    }
  
    ciKlass* vbox_klass = vector_klass->const_oop()->as_instance()->java_lang_Class_klass();
    const TypeInstPtr* vbox_type = TypeInstPtr::make_exact(TypePtr::NotNull, vbox_klass);
  
!   Node* opd = unbox_vector(argument(5), vbox_type, elem_bt, num_elem);
    if (opd == NULL) {
      return false; // operand unboxing failed
    }
  
+   Node* mask = NULL;
+   if (is_masked_op) {
+     ciKlass* mbox_klass = mask_klass->const_oop()->as_instance()->java_lang_Class_klass();
+     assert(is_vector_mask(mbox_klass), "argument(2) should be a mask class");
+     const TypeInstPtr* mbox_type = TypeInstPtr::make_exact(TypePtr::NotNull, mbox_klass);
+     mask = unbox_vector(argument(6), mbox_type, elem_bt, num_elem);
+     if (mask == NULL) {
+       if (C->print_intrinsics()) {
+         tty->print_cr("  ** unbox failed mask=%s",
+                       NodeClassNames[argument(6)->Opcode()]);
+       }
+       return false;
+     }
+   }
+ 
    Node* init = ReductionNode::make_reduction_input(gvn(), opc, elem_bt);
!   Node* value = NULL;
+   if (mask == NULL) {
+     assert(!is_masked_op, "Masked op needs the mask value never null");
+     value = ReductionNode::make(opc, NULL, init, opd, elem_bt);
+   } else {
+     if (use_predicate) {
+       value = ReductionNode::make(opc, NULL, init, opd, elem_bt);
+       value->add_req(mask);
+       value->add_flag(Node::Flag_is_predicated_vector);
+     } else {
+       Node* reduce_identity = gvn().transform(VectorNode::scalar2vector(init, num_elem, Type::get_const_basic_type(elem_bt)));
+       value = gvn().transform(new VectorBlendNode(reduce_identity, opd, mask));
+       value = ReductionNode::make(opc, NULL, init, value, elem_bt);
+     }
+   }
+   value = gvn().transform(value);
  
    Node* bits = NULL;
    switch (elem_bt) {
      case T_BYTE:
      case T_SHORT:
      case T_INT: {
!       bits = gvn().transform(new ConvI2LNode(value));
        break;
      }
      case T_FLOAT: {
!       value = gvn().transform(new MoveF2INode(value));
!       bits  = gvn().transform(new ConvI2LNode(value));
        break;
      }
      case T_DOUBLE: {
!       bits = gvn().transform(new MoveD2LNode(value));
        break;
      }
      case T_LONG: {
!       bits = value; // no conversion needed
        break;
      }
      default: fatal("%s", type2name(elem_bt));
    }
    set_result(bits);

*** 1155,11 ***
    return true;
  }
  
  // public static <V> boolean test(int cond, Class<?> vectorClass, Class<?> elementType, int vlen,
  //                                V v1, V v2,
! //                                BiFunction<V, V, Boolean> defaultImpl) {
  //
  bool LibraryCallKit::inline_vector_test() {
    const TypeInt*     cond         = gvn().type(argument(0))->isa_int();
    const TypeInstPtr* vector_klass = gvn().type(argument(1))->isa_instptr();
    const TypeInstPtr* elem_klass   = gvn().type(argument(2))->isa_instptr();
--- 1684,11 ---
    return true;
  }
  
  // public static <V> boolean test(int cond, Class<?> vectorClass, Class<?> elementType, int vlen,
  //                                V v1, V v2,
! //                                BiFunction<V, V, Boolean> defaultImpl)
  //
  bool LibraryCallKit::inline_vector_test() {
    const TypeInt*     cond         = gvn().type(argument(0))->isa_int();
    const TypeInstPtr* vector_klass = gvn().type(argument(1))->isa_instptr();
    const TypeInstPtr* elem_klass   = gvn().type(argument(2))->isa_instptr();

*** 1216,15 ***
    C->set_max_vector_size(MAX2(C->max_vector_size(), (uint)(num_elem * type2aelembytes(elem_bt))));
    return true;
  }
  
  // public static
! // <V extends Vector, M extends Mask>
! // V blend(Class<V> vectorClass, Class<M> maskClass, Class<?> elementType, int vlen,
  //         V v1, V v2, M m,
! //         VectorBlendOp<V,M> defaultImpl) { ...
- //
  bool LibraryCallKit::inline_vector_blend() {
    const TypeInstPtr* vector_klass = gvn().type(argument(0))->isa_instptr();
    const TypeInstPtr* mask_klass   = gvn().type(argument(1))->isa_instptr();
    const TypeInstPtr* elem_klass   = gvn().type(argument(2))->isa_instptr();
    const TypeInt*     vlen         = gvn().type(argument(3))->isa_int();
--- 1745,16 ---
    C->set_max_vector_size(MAX2(C->max_vector_size(), (uint)(num_elem * type2aelembytes(elem_bt))));
    return true;
  }
  
  // public static
! // <V extends Vector<E>,
! //  M extends VectorMask<E>,
+ //  E>
+ // V blend(Class<? extends V> vectorClass, Class<M> maskClass, Class<E> elementType, int vlen,
  //         V v1, V v2, M m,
! //         VectorBlendOp<V, M, E> defaultImpl)
  bool LibraryCallKit::inline_vector_blend() {
    const TypeInstPtr* vector_klass = gvn().type(argument(0))->isa_instptr();
    const TypeInstPtr* mask_klass   = gvn().type(argument(1))->isa_instptr();
    const TypeInstPtr* elem_klass   = gvn().type(argument(2))->isa_instptr();
    const TypeInt*     vlen         = gvn().type(argument(3))->isa_int();

*** 1287,17 ***
    set_result(box);
    C->set_max_vector_size(MAX2(C->max_vector_size(), (uint)(num_elem * type2aelembytes(elem_bt))));
    return true;
  }
  
! //  public static <V extends Vector<E,S>,
! //          M extends Vector.Mask<E,S>,
! //          S extends Vector.Shape, E>
! //  M compare(int cond, Class<V> vectorClass, Class<M> maskClass, Class<?> elementType, int vlen,
! //            V v1, V v2,
! //            VectorCompareOp<V,M> defaultImpl) { ...
! //
  bool LibraryCallKit::inline_vector_compare() {
    const TypeInt*     cond         = gvn().type(argument(0))->isa_int();
    const TypeInstPtr* vector_klass = gvn().type(argument(1))->isa_instptr();
    const TypeInstPtr* mask_klass   = gvn().type(argument(2))->isa_instptr();
    const TypeInstPtr* elem_klass   = gvn().type(argument(3))->isa_instptr();
--- 1817,17 ---
    set_result(box);
    C->set_max_vector_size(MAX2(C->max_vector_size(), (uint)(num_elem * type2aelembytes(elem_bt))));
    return true;
  }
  
! //  public static
! //  <V extends Vector<E>,
! //   M extends VectorMask<E>,
! //   E>
! //  M compare(int cond, Class<? extends V> vectorClass, Class<M> maskClass, Class<E> elementType, int vlen,
! //            V v1, V v2, M m,
! //            VectorCompareOp<V,M> defaultImpl)
  bool LibraryCallKit::inline_vector_compare() {
    const TypeInt*     cond         = gvn().type(argument(0))->isa_int();
    const TypeInstPtr* vector_klass = gvn().type(argument(1))->isa_instptr();
    const TypeInstPtr* mask_klass   = gvn().type(argument(2))->isa_instptr();
    const TypeInstPtr* elem_klass   = gvn().type(argument(3))->isa_instptr();

*** 1361,52 ***
    const TypeInstPtr* mbox_type = TypeInstPtr::make_exact(TypePtr::NotNull, mbox_klass);
  
    Node* v1 = unbox_vector(argument(5), vbox_type, elem_bt, num_elem);
    Node* v2 = unbox_vector(argument(6), vbox_type, elem_bt, num_elem);
  
    if (v1 == NULL || v2 == NULL) {
      return false; // operand unboxing failed
    }
    BoolTest::mask pred = (BoolTest::mask)cond->get_con();
    ConINode* pred_node = (ConINode*)gvn().makecon(cond);
  
!   const TypeVect* vt = TypeVect::make(mask_bt, num_elem);
!   Node* operation = gvn().transform(new VectorMaskCmpNode(pred, v1, v2, pred_node, vt));
  
    Node* box = box_vector(operation, mbox_type, mask_bt, num_elem);
    set_result(box);
    C->set_max_vector_size(MAX2(C->max_vector_size(), (uint)(num_elem * type2aelembytes(elem_bt))));
    return true;
  }
  
  // public static
! // <V extends Vector, Sh extends Shuffle>
! //  V rearrangeOp(Class<V> vectorClass, Class<Sh> shuffleClass, Class< ? > elementType, int vlen,
! //    V v1, Sh sh,
! //    VectorSwizzleOp<V, Sh, S, E> defaultImpl) { ...
! 
  bool LibraryCallKit::inline_vector_rearrange() {
    const TypeInstPtr* vector_klass  = gvn().type(argument(0))->isa_instptr();
    const TypeInstPtr* shuffle_klass = gvn().type(argument(1))->isa_instptr();
!   const TypeInstPtr* elem_klass    = gvn().type(argument(2))->isa_instptr();
!   const TypeInt*     vlen          = gvn().type(argument(3))->isa_int();
  
!   if (vector_klass == NULL || shuffle_klass == NULL || elem_klass == NULL || vlen == NULL) {
      return false; // dead code
    }
!   if (shuffle_klass->const_oop() == NULL || vector_klass->const_oop() == NULL ||
!     elem_klass->const_oop() == NULL || !vlen->is_con()) {
      if (C->print_intrinsics()) {
        tty->print_cr("  ** missing constant: vclass=%s sclass=%s etype=%s vlen=%s",
                      NodeClassNames[argument(0)->Opcode()],
                      NodeClassNames[argument(1)->Opcode()],
!                     NodeClassNames[argument(2)->Opcode()],
!                     NodeClassNames[argument(3)->Opcode()]);
      }
      return false; // not enough info for intrinsification
    }
!   if (!is_klass_initialized(vector_klass) || !is_klass_initialized(shuffle_klass)) {
      if (C->print_intrinsics()) {
        tty->print_cr("  ** klass argument not initialized");
      }
      return false;
    }
--- 1891,89 ---
    const TypeInstPtr* mbox_type = TypeInstPtr::make_exact(TypePtr::NotNull, mbox_klass);
  
    Node* v1 = unbox_vector(argument(5), vbox_type, elem_bt, num_elem);
    Node* v2 = unbox_vector(argument(6), vbox_type, elem_bt, num_elem);
  
+   bool is_masked_op = argument(7)->bottom_type() != TypePtr::NULL_PTR;
+   Node* mask = is_masked_op ? unbox_vector(argument(7), mbox_type, elem_bt, num_elem) : NULL;
+   if (is_masked_op && mask == NULL) {
+     if (C->print_intrinsics()) {
+       tty->print_cr("  ** not supported: mask = null arity=2 op=comp/%d vlen=%d etype=%s ismask=usestore is_masked_op=1",
+                     cond->get_con(), num_elem, type2name(elem_bt));
+     }
+     return false;
+   }
+ 
+   bool use_predicate = is_masked_op && arch_supports_vector(Op_VectorMaskCmp, num_elem, elem_bt, VecMaskUsePred);
+   if (is_masked_op && !use_predicate && !arch_supports_vector(Op_AndV, num_elem, elem_bt, VecMaskUseLoad)) {
+     if (C->print_intrinsics()) {
+       tty->print_cr("  ** not supported: arity=2 op=comp/%d vlen=%d etype=%s ismask=usestore is_masked_op=1",
+                     cond->get_con(), num_elem, type2name(elem_bt));
+     }
+     return false;
+   }
+ 
    if (v1 == NULL || v2 == NULL) {
      return false; // operand unboxing failed
    }
    BoolTest::mask pred = (BoolTest::mask)cond->get_con();
    ConINode* pred_node = (ConINode*)gvn().makecon(cond);
  
!   const TypeVect* vmask_type = TypeVect::makemask(mask_bt, num_elem);
!   Node* operation = new VectorMaskCmpNode(pred, v1, v2, pred_node, vmask_type);
+ 
+   if (is_masked_op) {
+     if (use_predicate) {
+       operation->add_req(mask);
+       operation->add_flag(Node::Flag_is_predicated_vector);
+     } else {
+       operation = gvn().transform(operation);
+       operation = VectorNode::make(Op_AndV, operation, mask, vmask_type);
+     }
+   }
+ 
+   operation = gvn().transform(operation);
  
    Node* box = box_vector(operation, mbox_type, mask_bt, num_elem);
    set_result(box);
    C->set_max_vector_size(MAX2(C->max_vector_size(), (uint)(num_elem * type2aelembytes(elem_bt))));
    return true;
  }
  
  // public static
! // <V extends Vector<E>,
! //  Sh extends VectorShuffle<E>,
! //  M extends VectorMask<E>,
! //  E>
! // V rearrangeOp(Class<? extends V> vectorClass, Class<Sh> shuffleClass, Class<M> maskClass, Class<E> elementType, int vlen,
+ //               V v1, Sh sh, M m,
+ //               VectorRearrangeOp<V, Sh, M, E> defaultImpl)
  bool LibraryCallKit::inline_vector_rearrange() {
    const TypeInstPtr* vector_klass  = gvn().type(argument(0))->isa_instptr();
    const TypeInstPtr* shuffle_klass = gvn().type(argument(1))->isa_instptr();
!   const TypeInstPtr* mask_klass    = gvn().type(argument(2))->isa_instptr();
!   const TypeInstPtr* elem_klass    = gvn().type(argument(3))->isa_instptr();
+   const TypeInt*     vlen          = gvn().type(argument(4))->isa_int();
  
!   if (vector_klass == NULL  || shuffle_klass == NULL ||  elem_klass == NULL || vlen == NULL) {
      return false; // dead code
    }
!   if (shuffle_klass->const_oop() == NULL ||
!       vector_klass->const_oop()  == NULL ||
+       elem_klass->const_oop()    == NULL ||
+       !vlen->is_con()) {
      if (C->print_intrinsics()) {
        tty->print_cr("  ** missing constant: vclass=%s sclass=%s etype=%s vlen=%s",
                      NodeClassNames[argument(0)->Opcode()],
                      NodeClassNames[argument(1)->Opcode()],
!                     NodeClassNames[argument(3)->Opcode()],
!                     NodeClassNames[argument(4)->Opcode()]);
      }
      return false; // not enough info for intrinsification
    }
!   if (!is_klass_initialized(vector_klass)  ||
+       !is_klass_initialized(shuffle_klass)) {
      if (C->print_intrinsics()) {
        tty->print_cr("  ** klass argument not initialized");
      }
      return false;
    }

*** 1426,31 ***
        tty->print_cr("  ** not supported: arity=0 op=load/shuffle vlen=%d etype=%s ismask=no",
                      num_elem, type2name(elem_bt));
      }
      return false; // not supported
    }
!   if (!arch_supports_vector(Op_VectorRearrange, num_elem, elem_bt, VecMaskNotUsed)) {
      if (C->print_intrinsics()) {
!       tty->print_cr("  ** not supported: arity=2 op=shuffle/rearrange vlen=%d etype=%s ismask=no",
!                     num_elem, type2name(elem_bt));
      }
-     return false; // not supported
    }
    ciKlass* vbox_klass = vector_klass->const_oop()->as_instance()->java_lang_Class_klass();
    const TypeInstPtr* vbox_type = TypeInstPtr::make_exact(TypePtr::NotNull, vbox_klass);
  
    ciKlass* shbox_klass = shuffle_klass->const_oop()->as_instance()->java_lang_Class_klass();
    const TypeInstPtr* shbox_type = TypeInstPtr::make_exact(TypePtr::NotNull, shbox_klass);
  
!   Node* v1 = unbox_vector(argument(4), vbox_type, elem_bt, num_elem);
!   Node* shuffle = unbox_vector(argument(5), shbox_type, shuffle_bt, num_elem);
  
    if (v1 == NULL || shuffle == NULL) {
      return false; // operand unboxing failed
    }
  
!   Node* rearrange = gvn().transform(new VectorRearrangeNode(v1, shuffle));
  
    Node* box = box_vector(rearrange, vbox_type, elem_bt, num_elem);
    set_result(box);
    C->set_max_vector_size(MAX2(C->max_vector_size(), (uint)(num_elem * type2aelembytes(elem_bt))));
    return true;
--- 1993,76 ---
        tty->print_cr("  ** not supported: arity=0 op=load/shuffle vlen=%d etype=%s ismask=no",
                      num_elem, type2name(elem_bt));
      }
      return false; // not supported
    }
! 
+   bool is_masked_op = argument(7)->bottom_type() != TypePtr::NULL_PTR;
+   bool use_predicate = is_masked_op;
+   if (is_masked_op &&
+       (mask_klass == NULL ||
+        mask_klass->const_oop() == NULL ||
+        !is_klass_initialized(mask_klass))) {
      if (C->print_intrinsics()) {
!       tty->print_cr("  ** mask_klass argument not initialized");
!     }
+   }
+   VectorMaskUseType checkFlags = (VectorMaskUseType)(is_masked_op ? (VecMaskUseLoad | VecMaskUsePred) : VecMaskNotUsed);
+   if (!arch_supports_vector(Op_VectorRearrange, num_elem, elem_bt, checkFlags)) {
+     use_predicate = false;
+     if(!is_masked_op ||
+        (!arch_supports_vector(Op_VectorRearrange, num_elem, elem_bt, VecMaskNotUsed) ||
+         !arch_supports_vector(Op_VectorBlend, num_elem, elem_bt, VecMaskUseLoad)     ||
+         !arch_supports_vector(VectorNode::replicate_opcode(elem_bt), num_elem, elem_bt, VecMaskNotUsed))) {
+       if (C->print_intrinsics()) {
+         tty->print_cr("  ** not supported: arity=2 op=shuffle/rearrange vlen=%d etype=%s ismask=no",
+                       num_elem, type2name(elem_bt));
+       }
+       return false; // not supported
      }
    }
    ciKlass* vbox_klass = vector_klass->const_oop()->as_instance()->java_lang_Class_klass();
    const TypeInstPtr* vbox_type = TypeInstPtr::make_exact(TypePtr::NotNull, vbox_klass);
  
    ciKlass* shbox_klass = shuffle_klass->const_oop()->as_instance()->java_lang_Class_klass();
    const TypeInstPtr* shbox_type = TypeInstPtr::make_exact(TypePtr::NotNull, shbox_klass);
  
!   Node* v1 = unbox_vector(argument(5), vbox_type, elem_bt, num_elem);
!   Node* shuffle = unbox_vector(argument(6), shbox_type, shuffle_bt, num_elem);
  
    if (v1 == NULL || shuffle == NULL) {
      return false; // operand unboxing failed
    }
  
!   Node* mask = NULL;
+   if (is_masked_op) {
+     ciKlass* mbox_klass = mask_klass->const_oop()->as_instance()->java_lang_Class_klass();
+     const TypeInstPtr* mbox_type = TypeInstPtr::make_exact(TypePtr::NotNull, mbox_klass);
+     mask = unbox_vector(argument(7), mbox_type, elem_bt, num_elem);
+     if (mask == NULL) {
+       if (C->print_intrinsics()) {
+         tty->print_cr("  ** not supported: arity=3 op=shuffle/rearrange vlen=%d etype=%s ismask=useload is_masked_op=1",
+                       num_elem, type2name(elem_bt));
+       }
+       return false;
+     }
+   }
+ 
+   Node* rearrange = new VectorRearrangeNode(v1, shuffle);
+   if (is_masked_op) {
+     if (use_predicate) {
+       rearrange->add_req(mask);
+       rearrange->add_flag(Node::Flag_is_predicated_vector);
+     } else {
+       const TypeVect* vt = v1->bottom_type()->is_vect();
+       rearrange = gvn().transform(rearrange);
+       Node* zero = gvn().makecon(Type::get_zero_type(elem_bt));
+       Node* zerovec = gvn().transform(VectorNode::scalar2vector(zero, num_elem, Type::get_const_basic_type(elem_bt)));
+       rearrange = new VectorBlendNode(zerovec, rearrange, mask);
+     }
+   }
+   rearrange = gvn().transform(rearrange);
  
    Node* box = box_vector(rearrange, vbox_type, elem_bt, num_elem);
    set_result(box);
    C->set_max_vector_size(MAX2(C->max_vector_size(), (uint)(num_elem * type2aelembytes(elem_bt))));
    return true;

*** 1512,82 ***
                                        opd2);
    return gvn().transform(new ProjNode(gvn().transform(operation), TypeFunc::Parms));
  }
  
  //  public static
! //  <V extends Vector<?,?>>
! //  V broadcastInt(int opr, Class<V> vectorClass, Class<?> elementType, int vlen,
! //                 V v, int i,
! //                 VectorBroadcastIntOp<V> defaultImpl) {
! //
  bool LibraryCallKit::inline_vector_broadcast_int() {
    const TypeInt*     opr          = gvn().type(argument(0))->isa_int();
    const TypeInstPtr* vector_klass = gvn().type(argument(1))->isa_instptr();
!   const TypeInstPtr* elem_klass   = gvn().type(argument(2))->isa_instptr();
!   const TypeInt*     vlen         = gvn().type(argument(3))->isa_int();
  
    if (opr == NULL || vector_klass == NULL || elem_klass == NULL || vlen == NULL) {
      return false; // dead code
    }
    if (!opr->is_con() || vector_klass->const_oop() == NULL || elem_klass->const_oop() == NULL || !vlen->is_con()) {
      if (C->print_intrinsics()) {
        tty->print_cr("  ** missing constant: opr=%s vclass=%s etype=%s vlen=%s",
                      NodeClassNames[argument(0)->Opcode()],
                      NodeClassNames[argument(1)->Opcode()],
!                     NodeClassNames[argument(2)->Opcode()],
!                     NodeClassNames[argument(3)->Opcode()]);
      }
      return false; // not enough info for intrinsification
    }
    if (!is_klass_initialized(vector_klass)) {
      if (C->print_intrinsics()) {
        tty->print_cr("  ** klass argument not initialized");
      }
      return false;
    }
    ciType* elem_type = elem_klass->const_oop()->as_instance()->java_mirror_type();
    if (!elem_type->is_primitive_type()) {
      if (C->print_intrinsics()) {
        tty->print_cr("  ** not a primitive bt=%d", elem_type->basic_type());
      }
      return false; // should be primitive type
    }
!   BasicType elem_bt = elem_type->basic_type();
    int num_elem = vlen->get_con();
    int opc = VectorSupport::vop2ideal(opr->get_con(), elem_bt);
    bool is_shift  = VectorNode::is_shift_opcode(opc);
    bool is_rotate = VectorNode::is_rotate_opcode(opc);
    if (opc == 0 || (!is_shift && !is_rotate)) {
      if (C->print_intrinsics()) {
        tty->print_cr("  ** operation not supported: op=%d bt=%s", opr->get_con(), type2name(elem_bt));
      }
      return false; // operation not supported
    }
    int sopc = VectorNode::opcode(opc, elem_bt);
    if (sopc == 0) {
      if (C->print_intrinsics()) {
        tty->print_cr("  ** operation not supported: opc=%s bt=%s", NodeClassNames[opc], type2name(elem_bt));
      }
      return false; // operation not supported
    }
!   Node* cnt  = argument(5);
    ciKlass* vbox_klass = vector_klass->const_oop()->as_instance()->java_lang_Class_klass();
    const TypeInstPtr* vbox_type = TypeInstPtr::make_exact(TypePtr::NotNull, vbox_klass);
    const TypeInt* cnt_type = cnt->bottom_type()->isa_int();
  
    // If CPU supports vector constant rotate instructions pass it directly
    bool is_const_rotate = is_rotate && cnt_type && cnt_type->is_con() &&
                           Matcher::supports_vector_constant_rotates(cnt_type->get_con());
    bool has_scalar_args = is_rotate ? !is_const_rotate : true;
!   if (!arch_supports_vector(sopc, num_elem, elem_bt, VecMaskNotUsed, has_scalar_args)) {
!     if (C->print_intrinsics()) {
!       tty->print_cr("  ** not supported: arity=0 op=int/%d vlen=%d etype=%s ismask=no",
!                     sopc, num_elem, type2name(elem_bt));
      }
-     return false; // not supported
    }
!   Node* opd1 = unbox_vector(argument(4), vbox_type, elem_bt, num_elem);
    Node* opd2 = NULL;
    if (is_shift) {
      opd2 = vector_shift_count(cnt, opc, elem_bt, num_elem);
    } else {
      assert(is_rotate, "unexpected operation");
--- 2124,127 ---
                                        opd2);
    return gvn().transform(new ProjNode(gvn().transform(operation), TypeFunc::Parms));
  }
  
  //  public static
! //  <V extends Vector<E>,
! //   M extends VectorMask<E>,
! //   E>
! //  V broadcastInt(int opr, Class<? extends V> vectorClass, Class<? extends M> maskClass,
! //                 Class<E> elementType, int length,
+ //                 V v, int n, M m,
+ //                 VectorBroadcastIntOp<V, M> defaultImpl)
  bool LibraryCallKit::inline_vector_broadcast_int() {
    const TypeInt*     opr          = gvn().type(argument(0))->isa_int();
    const TypeInstPtr* vector_klass = gvn().type(argument(1))->isa_instptr();
!   const TypeInstPtr* mask_klass   = gvn().type(argument(2))->isa_instptr();
!   const TypeInstPtr* elem_klass   = gvn().type(argument(3))->isa_instptr();
+   const TypeInt*     vlen         = gvn().type(argument(4))->isa_int();
  
    if (opr == NULL || vector_klass == NULL || elem_klass == NULL || vlen == NULL) {
      return false; // dead code
    }
    if (!opr->is_con() || vector_klass->const_oop() == NULL || elem_klass->const_oop() == NULL || !vlen->is_con()) {
      if (C->print_intrinsics()) {
        tty->print_cr("  ** missing constant: opr=%s vclass=%s etype=%s vlen=%s",
                      NodeClassNames[argument(0)->Opcode()],
                      NodeClassNames[argument(1)->Opcode()],
!                     NodeClassNames[argument(3)->Opcode()],
!                     NodeClassNames[argument(4)->Opcode()]);
      }
      return false; // not enough info for intrinsification
    }
    if (!is_klass_initialized(vector_klass)) {
      if (C->print_intrinsics()) {
        tty->print_cr("  ** klass argument not initialized");
      }
      return false;
    }
+ 
+   const Type* vmask_type = gvn().type(argument(7));
+   bool is_masked_op = vmask_type != TypePtr::NULL_PTR;
+   if (is_masked_op) {
+     if (mask_klass == NULL || mask_klass->const_oop() == NULL) {
+       if (C->print_intrinsics()) {
+         tty->print_cr("  ** missing constant: maskclass=%s", NodeClassNames[argument(2)->Opcode()]);
+       }
+       return false; // not enough info for intrinsification
+     }
+ 
+     if (!is_klass_initialized(mask_klass)) {
+       if (C->print_intrinsics()) {
+         tty->print_cr("  ** mask klass argument not initialized");
+       }
+       return false;
+     }
+ 
+     if (vmask_type->maybe_null()) {
+       if (C->print_intrinsics()) {
+         tty->print_cr("  ** null mask values are not allowed for masked op");
+       }
+       return false;
+     }
+   }
+ 
    ciType* elem_type = elem_klass->const_oop()->as_instance()->java_mirror_type();
    if (!elem_type->is_primitive_type()) {
      if (C->print_intrinsics()) {
        tty->print_cr("  ** not a primitive bt=%d", elem_type->basic_type());
      }
      return false; // should be primitive type
    }
! 
    int num_elem = vlen->get_con();
+   BasicType elem_bt = elem_type->basic_type();
    int opc = VectorSupport::vop2ideal(opr->get_con(), elem_bt);
+ 
    bool is_shift  = VectorNode::is_shift_opcode(opc);
    bool is_rotate = VectorNode::is_rotate_opcode(opc);
+ 
    if (opc == 0 || (!is_shift && !is_rotate)) {
      if (C->print_intrinsics()) {
        tty->print_cr("  ** operation not supported: op=%d bt=%s", opr->get_con(), type2name(elem_bt));
      }
      return false; // operation not supported
    }
+ 
    int sopc = VectorNode::opcode(opc, elem_bt);
    if (sopc == 0) {
      if (C->print_intrinsics()) {
        tty->print_cr("  ** operation not supported: opc=%s bt=%s", NodeClassNames[opc], type2name(elem_bt));
      }
      return false; // operation not supported
    }
! 
+   Node* cnt  = argument(6);
    ciKlass* vbox_klass = vector_klass->const_oop()->as_instance()->java_lang_Class_klass();
    const TypeInstPtr* vbox_type = TypeInstPtr::make_exact(TypePtr::NotNull, vbox_klass);
    const TypeInt* cnt_type = cnt->bottom_type()->isa_int();
  
    // If CPU supports vector constant rotate instructions pass it directly
    bool is_const_rotate = is_rotate && cnt_type && cnt_type->is_con() &&
                           Matcher::supports_vector_constant_rotates(cnt_type->get_con());
    bool has_scalar_args = is_rotate ? !is_const_rotate : true;
! 
!   VectorMaskUseType checkFlags = (VectorMaskUseType)(is_masked_op ? (VecMaskUseLoad | VecMaskUsePred) : VecMaskNotUsed);
!   bool use_predicate = is_masked_op;
! 
+   if (!arch_supports_vector(sopc, num_elem, elem_bt, checkFlags, has_scalar_args)) {
+     use_predicate = false;
+     if (!is_masked_op ||
+         (!arch_supports_vector(sopc, num_elem, elem_bt, VecMaskNotUsed, has_scalar_args) ||
+          !arch_supports_vector(Op_VectorBlend, num_elem, elem_bt, VecMaskUseLoad))) {
+ 
+       if (C->print_intrinsics()) {
+         tty->print_cr("  ** not supported: arity=0 op=int/%d vlen=%d etype=%s is_masked_op=%d",
+                       sopc, num_elem, type2name(elem_bt), is_masked_op ? 1 : 0);
+       }
+       return false; // not supported
      }
    }
! 
+   Node* opd1 = unbox_vector(argument(5), vbox_type, elem_bt, num_elem);
    Node* opd2 = NULL;
    if (is_shift) {
      opd2 = vector_shift_count(cnt, opc, elem_bt, num_elem);
    } else {
      assert(is_rotate, "unexpected operation");

*** 1598,15 ***
      } else {
        // Constant shift value.
        opd2 = cnt;
      }
    }
    if (opd1 == NULL || opd2 == NULL) {
      return false;
    }
-   Node* operation = gvn().transform(VectorNode::make(opc, opd1, opd2, num_elem, elem_bt));
  
    Node* vbox = box_vector(operation, vbox_type, elem_bt, num_elem);
    set_result(vbox);
    C->set_max_vector_size(MAX2(C->max_vector_size(), (uint)(num_elem * type2aelembytes(elem_bt))));
    return true;
  }
--- 2255,39 ---
      } else {
        // Constant shift value.
        opd2 = cnt;
      }
    }
+ 
    if (opd1 == NULL || opd2 == NULL) {
      return false;
    }
  
+   Node* mask = NULL;
+   if (is_masked_op) {
+     ciKlass* mbox_klass = mask_klass->const_oop()->as_instance()->java_lang_Class_klass();
+     const TypeInstPtr* mbox_type = TypeInstPtr::make_exact(TypePtr::NotNull, mbox_klass);
+     mask = unbox_vector(argument(7), mbox_type, elem_bt, num_elem);
+     if (mask == NULL) {
+       if (C->print_intrinsics()) {
+         tty->print_cr("  ** unbox failed mask=%s", NodeClassNames[argument(7)->Opcode()]);
+       }
+       return false;
+     }
+   }
+ 
+   Node* operation = VectorNode::make(opc, opd1, opd2, num_elem, elem_bt);
+   if (is_masked_op && mask != NULL) {
+     if (use_predicate) {
+       operation->add_req(mask);
+       operation->add_flag(Node::Flag_is_predicated_vector);
+     } else {
+       operation = gvn().transform(operation);
+       operation = new VectorBlendNode(opd1, operation, mask);
+     }
+   }
+   operation = gvn().transform(operation);
    Node* vbox = box_vector(operation, vbox_type, elem_bt, num_elem);
    set_result(vbox);
    C->set_max_vector_size(MAX2(C->max_vector_size(), (uint)(num_elem * type2aelembytes(elem_bt))));
    return true;
  }

*** 1616,11 ***
  //                   S extends VectorSpecies>
  // VOUT convert(int oprId,
  //           Class<?> fromVectorClass, Class<?> fromElementType, int fromVLen,
  //           Class<?>   toVectorClass, Class<?>   toElementType, int   toVLen,
  //           VIN v, S s,
! //           VectorConvertOp<VOUT, VIN, S> defaultImpl) {
  //
  bool LibraryCallKit::inline_vector_convert() {
    const TypeInt*     opr               = gvn().type(argument(0))->isa_int();
  
    const TypeInstPtr* vector_klass_from = gvn().type(argument(1))->isa_instptr();
--- 2297,11 ---
  //                   S extends VectorSpecies>
  // VOUT convert(int oprId,
  //           Class<?> fromVectorClass, Class<?> fromElementType, int fromVLen,
  //           Class<?>   toVectorClass, Class<?>   toElementType, int   toVLen,
  //           VIN v, S s,
! //           VectorConvertOp<VOUT, VIN, S> defaultImpl)
  //
  bool LibraryCallKit::inline_vector_convert() {
    const TypeInt*     opr               = gvn().type(argument(0))->isa_int();
  
    const TypeInstPtr* vector_klass_from = gvn().type(argument(1))->isa_instptr();

*** 1677,13 ***
    ciType* elem_type_to = elem_klass_to->const_oop()->as_instance()->java_mirror_type();
    if (!elem_type_to->is_primitive_type()) {
      return false; // should be primitive type
    }
    BasicType elem_bt_to = elem_type_to->basic_type();
-   if (is_mask && (type2aelembytes(elem_bt_from) != type2aelembytes(elem_bt_to))) {
-     return false; // elem size mismatch
-   }
  
    int num_elem_from = vlen_from->get_con();
    int num_elem_to = vlen_to->get_con();
  
    // Check whether we can unbox to appropriate size. Even with casting, checking for reinterpret is needed
--- 2358,10 ---

*** 1725,17 ***
    Node* opd1 = unbox_vector(argument(7), vbox_type_from, elem_bt_from, num_elem_from);
    if (opd1 == NULL) {
      return false;
    }
  
!   const TypeVect* src_type = TypeVect::make(elem_bt_from, num_elem_from);
!   const TypeVect* dst_type = TypeVect::make(elem_bt_to,   num_elem_to);
  
    Node* op = opd1;
    if (is_cast) {
!     assert(!is_mask, "masks cannot be casted");
!     int cast_vopc = VectorCastNode::opcode(elem_bt_from);
      // Make sure that cast is implemented to particular type/size combination.
      if (!arch_supports_vector(cast_vopc, num_elem_to, elem_bt_to, VecMaskNotUsed)) {
        if (C->print_intrinsics()) {
          tty->print_cr("  ** not supported: arity=1 op=cast#%d/3 vlen2=%d etype2=%s ismask=%d",
                        cast_vopc,
--- 2403,32 ---
    Node* opd1 = unbox_vector(argument(7), vbox_type_from, elem_bt_from, num_elem_from);
    if (opd1 == NULL) {
      return false;
    }
  
!   const TypeVect* src_type = TypeVect::make(elem_bt_from, num_elem_from, is_mask);
!   const TypeVect* dst_type = TypeVect::make(elem_bt_to, num_elem_to, is_mask);
+ 
+   // Safety check to prevent casting if source mask is of type vector
+   // and destination mask of type predicate vector and vice-versa.
+   // From X86 standpoint, this case will only arise over KNL target,
+   // where certain masks (depending on the species) are either propagated
+   // through a vector or predicate register.
+   if (is_mask &&
+       ((src_type->isa_vectmask() == NULL && dst_type->isa_vectmask()) ||
+        (dst_type->isa_vectmask() == NULL && src_type->isa_vectmask()))) {
+     return false;
+   }
  
    Node* op = opd1;
    if (is_cast) {
!     BasicType new_elem_bt_to = elem_bt_to;
!     BasicType new_elem_bt_from = elem_bt_from;
+     if (is_mask && is_floating_point_type(elem_bt_from)) {
+       new_elem_bt_from = elem_bt_from == T_FLOAT ? T_INT : T_LONG;
+     }
+     int cast_vopc = VectorCastNode::opcode(new_elem_bt_from);
      // Make sure that cast is implemented to particular type/size combination.
      if (!arch_supports_vector(cast_vopc, num_elem_to, elem_bt_to, VecMaskNotUsed)) {
        if (C->print_intrinsics()) {
          tty->print_cr("  ** not supported: arity=1 op=cast#%d/3 vlen2=%d etype2=%s ismask=%d",
                        cast_vopc,

*** 1785,13 ***
                                                       src_type,
                                                       TypeVect::make(elem_bt_from,
                                                                      num_elem_for_resize)));
        op = gvn().transform(VectorCastNode::make(cast_vopc, op, elem_bt_to, num_elem_to));
      } else {
!       // Since input and output number of elements match, and since we know this vector size is
!       // supported, simply do a cast with no resize needed.
!       op = gvn().transform(VectorCastNode::make(cast_vopc, op, elem_bt_to, num_elem_to));
      }
    } else if (Type::cmp(src_type, dst_type) != 0) {
      assert(!is_cast, "must be reinterpret");
      op = gvn().transform(new VectorReinterpretNode(op, src_type, dst_type));
    }
--- 2478,36 ---
                                                       src_type,
                                                       TypeVect::make(elem_bt_from,
                                                                      num_elem_for_resize)));
        op = gvn().transform(VectorCastNode::make(cast_vopc, op, elem_bt_to, num_elem_to));
      } else {
!       if (is_mask) {
!         if ((dst_type->isa_vectmask() && src_type->isa_vectmask()) ||
!             (type2aelembytes(elem_bt_from) == type2aelembytes(elem_bt_to))) {
+           op = gvn().transform(new VectorMaskCastNode(op, dst_type));
+         } else {
+           // Special handling for casting operation involving floating point types.
+           // Case A) F -> X :=  F -> VectorMaskCast (F->I/L [NOP]) -> VectorCast[I/L]2X
+           // Case B) X -> F :=  X -> VectorCastX2[I/L] -> VectorMaskCast ([I/L]->F [NOP])
+           // Case C) F -> F :=  VectorMaskCast (F->I/L [NOP]) -> VectorCast[I/L]2[L/I] -> VectotMaskCast (L/I->F [NOP])
+           if (is_floating_point_type(elem_bt_from)) {
+             const TypeVect* new_src_type = TypeVect::make(new_elem_bt_from, num_elem_to, is_mask);
+             op = gvn().transform(new VectorMaskCastNode(op, new_src_type));
+           }
+           if (is_floating_point_type(elem_bt_to)) {
+             new_elem_bt_to = elem_bt_to == T_FLOAT ? T_INT : T_LONG;
+           }
+           op = gvn().transform(VectorCastNode::make(cast_vopc, op, new_elem_bt_to, num_elem_to));
+           if (new_elem_bt_to != elem_bt_to) {
+             op = gvn().transform(new VectorMaskCastNode(op, dst_type));
+           }
+         }
+       } else {
+         // Since input and output number of elements match, and since we know this vector size is
+         // supported, simply do a cast with no resize needed.
+         op = gvn().transform(VectorCastNode::make(cast_vopc, op, elem_bt_to, num_elem_to));
+       }
      }
    } else if (Type::cmp(src_type, dst_type) != 0) {
      assert(!is_cast, "must be reinterpret");
      op = gvn().transform(new VectorReinterpretNode(op, src_type, dst_type));
    }

*** 1802,15 ***
    C->set_max_vector_size(MAX2(C->max_vector_size(), (uint)(num_elem_to * type2aelembytes(elem_bt_to))));
    return true;
  }
  
  //  public static
! //  <V extends Vector<?>>
! //  V insert(Class<? extends V> vectorClass, Class<?> elementType, int vlen,
  //           V vec, int ix, long val,
! //           VecInsertOp<V> defaultImpl) {
- //
  bool LibraryCallKit::inline_vector_insert() {
    const TypeInstPtr* vector_klass = gvn().type(argument(0))->isa_instptr();
    const TypeInstPtr* elem_klass   = gvn().type(argument(1))->isa_instptr();
    const TypeInt*     vlen         = gvn().type(argument(2))->isa_int();
    const TypeInt*     idx          = gvn().type(argument(4))->isa_int();
--- 2518,15 ---
    C->set_max_vector_size(MAX2(C->max_vector_size(), (uint)(num_elem_to * type2aelembytes(elem_bt_to))));
    return true;
  }
  
  //  public static
! //  <V extends Vector<E>,
! //   E>
+ //  V insert(Class<? extends V> vectorClass, Class<E> elementType, int vlen,
  //           V vec, int ix, long val,
! //           VecInsertOp<V> defaultImpl)
  bool LibraryCallKit::inline_vector_insert() {
    const TypeInstPtr* vector_klass = gvn().type(argument(0))->isa_instptr();
    const TypeInstPtr* elem_klass   = gvn().type(argument(1))->isa_instptr();
    const TypeInt*     vlen         = gvn().type(argument(2))->isa_int();
    const TypeInt*     idx          = gvn().type(argument(4))->isa_int();

*** 1895,15 ***
    C->set_max_vector_size(MAX2(C->max_vector_size(), (uint)(num_elem * type2aelembytes(elem_bt))));
    return true;
  }
  
  //  public static
! //  <V extends Vector<?>>
! //  long extract(Class<?> vectorClass, Class<?> elementType, int vlen,
  //               V vec, int ix,
! //               VecExtractOp<V> defaultImpl) {
- //
  bool LibraryCallKit::inline_vector_extract() {
    const TypeInstPtr* vector_klass = gvn().type(argument(0))->isa_instptr();
    const TypeInstPtr* elem_klass   = gvn().type(argument(1))->isa_instptr();
    const TypeInt*     vlen         = gvn().type(argument(2))->isa_int();
    const TypeInt*     idx          = gvn().type(argument(4))->isa_int();
--- 2611,15 ---
    C->set_max_vector_size(MAX2(C->max_vector_size(), (uint)(num_elem * type2aelembytes(elem_bt))));
    return true;
  }
  
  //  public static
! //  <V extends Vector<E>,
! //   E>
+ //  long extract(Class<? extends V> vectorClass, Class<E> elementType, int vlen,
  //               V vec, int ix,
! //               VecExtractOp<V> defaultImpl)
  bool LibraryCallKit::inline_vector_extract() {
    const TypeInstPtr* vector_klass = gvn().type(argument(0))->isa_instptr();
    const TypeInstPtr* elem_klass   = gvn().type(argument(1))->isa_instptr();
    const TypeInt*     vlen         = gvn().type(argument(2))->isa_int();
    const TypeInt*     idx          = gvn().type(argument(4))->isa_int();
< prev index next >